Implement more general pruning

2015-02-13 14:34:44 +01:00 · 2015-02-13 14:34:44 +01:00 · d620da365e
commit d620da365e
parent 0c3102721c
1 changed files with 142 additions and 72 deletions
--- a/routing_algorithms/map_matching.hpp
+++ b/routing_algorithms/map_matching.hpp
@ -26,6 +26,9 @@ or see http://www.gnu.org/licenses/agpl.txt.
 #include "../data_structures/coordinate_calculation.hpp"
 #include "../util/simple_logger.hpp"
 #include <variant/variant.hpp>
 #include <osrm/json_container.hpp>
 #include <algorithm>
 #include <iomanip>
 #include <numeric>
@ -87,24 +90,23 @@ template <class DataFacadeT> class MapMatching final
    constexpr static const double log_sigma_z = std::log(sigma_z);
    constexpr static const double log_2_pi = std::log(2 * M_PI);
-    // TODO: move to a probability util header and implement as normal distribution
+    constexpr static double emission_probability(const double distance)
    constexpr double emission_probability(const double distance) const
    {
        return (1. / (std::sqrt(2. * M_PI) * sigma_z)) *
               std::exp(-0.5 * std::pow((distance / sigma_z), 2.));
    }
-    constexpr double transition_probability(const float d_t, const float beta) const
+    constexpr static double transition_probability(const float d_t, const float beta)
    {
        return (1. / beta) * std::exp(-d_t / beta);
    }
-    inline double log_emission_probability(const double distance) const {
+    constexpr static double log_emission_probability(const double distance)
-        const double normed_distance = distance / sigma_z;
+    {
-        return -0.5 * (log_2_pi + normed_distance * normed_distance) - log_sigma_z;
+        return -0.5 * (log_2_pi + (distance / sigma_z) * (distance / sigma_z)) - log_sigma_z;
    }
-    inline double log_transition_probability(const float d_t, const float beta) const
+    constexpr static double log_transition_probability(const float d_t, const float beta)
    {
        return -std::log(beta) - d_t / beta;
    }
@ -228,70 +230,65 @@ template <class DataFacadeT> class MapMatching final
        return distance;
    }
-  public:
+    struct HiddenMarkovModel
    MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data)
        : super(facade), engine_working_data(engine_working_data)
    {
    }
    void operator()(const Matching::CandidateLists &timestamp_list,
                    const std::vector<FixedPointCoordinate> coordinate_list,
                    std::vector<PhantomNode>& matched_nodes,
                    float& matched_length,
                    JSON::Object& _debug_info) const
    {
        std::vector<bool> breakage(timestamp_list.size(), true);
        BOOST_ASSERT(timestamp_list.size() > 0);
        // TODO for the viterbi values we actually only need the current and last row
        std::vector<std::vector<double>> viterbi;
        std::vector<std::vector<std::size_t>> parents;
-        std::vector<std::vector<float>> segment_lengths;
+        std::vector<std::vector<float>> path_lengths;
        std::vector<std::vector<bool>> pruned;
        std::vector<bool> breakage;
        const Matching::CandidateLists& timestamp_list;
        constexpr static double IMPOSSIBLE_LOG_PROB = -std::numeric_limits<double>::infinity();
        constexpr static double MINIMAL_LOG_PROB = -std::numeric_limits<double>::max();
        HiddenMarkovModel(const Matching::CandidateLists& timestamp_list)
        : breakage(timestamp_list.size())
        , timestamp_list(timestamp_list)
        {
            for (const auto& l : timestamp_list)
            {
-            viterbi.emplace_back(l.size(), -std::numeric_limits<double>::infinity());
+                viterbi.emplace_back(l.size());
-            parents.emplace_back(l.size(), 0);
+                parents.emplace_back(l.size());
-            segment_lengths.emplace_back(l.size(), 0);
+                path_lengths.emplace_back(l.size());
-            pruned.emplace_back(l.size(), true);
+                pruned.emplace_back(l.size());
            }
-        JSON::Array _debug_states;
+            clear(0);
-        for (const auto& l : timestamp_list)
+        }
        void clear(unsigned initial_timestamp)
        {
-            JSON::Array _debug_timestamps;
+            BOOST_ASSERT(viterbi.size() == parents.size()
-            for (unsigned i = 0; i < l.size(); i++)
+                      && parents.size() == path_lengths.size()
                      && path_lengths.size() == pruned.size()
                      && pruned.size() == breakage.size());
            for (unsigned t = initial_timestamp; t < viterbi.size(); t++)
            {
-                JSON::Object _debug_state;
+                std::fill(viterbi[t].begin(), viterbi[t].end(), IMPOSSIBLE_LOG_PROB);
-                _debug_state.values["transitions"] = JSON::Array();
+                std::fill(parents[t].begin(), parents[t].end(), 0);
-                _debug_state.values["coordinate"] = makeJSONArray(l[i].first.location.lat / COORDINATE_PRECISION,
+                std::fill(path_lengths[t].begin(), path_lengths[t].end(), 0);
-                                                                  l[i].first.location.lon / COORDINATE_PRECISION);
+                std::fill(pruned[t].begin(), pruned[t].end(), true);
                _debug_timestamps.values.push_back(_debug_state);
            }
-            _debug_states.values.push_back(_debug_timestamps);
+            std::fill(breakage.begin()+initial_timestamp, breakage.end(), true);
        }
-        unsigned initial_timestamp = 0;
+        unsigned initialize(unsigned initial_timestamp)
        {
            BOOST_ASSERT(initial_timestamp < timestamp_list.size());
            do
            {
                for (auto s = 0u; s < viterbi[initial_timestamp].size(); ++s)
                {
                // this might need to be squared as pi_s is also defined as the emission
                // probability in the paper.
                    viterbi[initial_timestamp][s] = log_emission_probability(timestamp_list[initial_timestamp][s].second);
                    parents[initial_timestamp][s] = s;
-                pruned[initial_timestamp][s] = viterbi[initial_timestamp][s] < -std::numeric_limits<double>::max();
+                    pruned[initial_timestamp][s] = viterbi[initial_timestamp][s] < MINIMAL_LOG_PROB;
                    breakage[initial_timestamp] = breakage[initial_timestamp] && pruned[initial_timestamp][s];
                _debug_states.values[initial_timestamp]
                    .get<JSONVariantArray>().get().values[s]
                    .get<JSONVariantObject>().get().values["viterbi"] = makeJSONSafe(viterbi[initial_timestamp][s]);
                _debug_states.values[initial_timestamp]
                    .get<JSONVariantArray>().get().values[s]
                    .get<JSONVariantObject>().get().values["pruned"] = static_cast<unsigned>(pruned[initial_timestamp][s]);
                }
                ++initial_timestamp;
@ -302,18 +299,70 @@ template <class DataFacadeT> class MapMatching final
            BOOST_ASSERT(breakage[initial_timestamp] == false);
-        unsigned prev_unbroken_timestamp = initial_timestamp;
+            return initial_timestamp;
        }
    };
  public:
    MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data)
        : super(facade), engine_working_data(engine_working_data)
    {
    }
    void operator()(const Matching::CandidateLists &timestamp_list,
                    const std::vector<FixedPointCoordinate> coordinate_list,
                    std::vector<PhantomNode>& matched_nodes,
                    float& matched_length,
                    JSON::Object& _debug_info) const
    {
        BOOST_ASSERT(timestamp_list.size() > 0);
        HiddenMarkovModel model(timestamp_list);
        unsigned initial_timestamp = model.initialize(0);
        JSON::Array _debug_states;
        for (unsigned t = 0; t < timestamp_list.size(); t++)
        {
            JSON::Array _debug_timestamps;
            for (unsigned s = 0; s < timestamp_list[t].size(); s++)
            {
                JSON::Object _debug_state;
                _debug_state.values["transitions"] = JSON::Array();
                _debug_state.values["coordinate"] = makeJSONArray(timestamp_list[t][s].first.location.lat / COORDINATE_PRECISION,
                                                                  timestamp_list[t][s].first.location.lon / COORDINATE_PRECISION);
                if (t < initial_timestamp)
                {
                    _debug_state.values["viterbi"] = makeJSONSafe(HiddenMarkovModel::IMPOSSIBLE_LOG_PROB);
                    _debug_state.values["pruned"] = 0u;
                }
                else if (t == initial_timestamp)
                {
                    _debug_state.values["viterbi"] = makeJSONSafe(model.viterbi[t][s]);
                    _debug_state.values["pruned"] = static_cast<unsigned>(model.pruned[t][s]);
                }
                _debug_timestamps.values.push_back(_debug_state);
            }
            _debug_states.values.push_back(_debug_timestamps);
        }
        std::vector<unsigned> prev_unbroken_timestamps;
        prev_unbroken_timestamps.reserve(timestamp_list.size());
        prev_unbroken_timestamps.push_back(initial_timestamp);
        for (auto t = initial_timestamp + 1; t < timestamp_list.size(); ++t)
        {
-            const auto& prev_viterbi = viterbi[prev_unbroken_timestamp];
+            unsigned prev_unbroken_timestamp = prev_unbroken_timestamps.back();
-            const auto& prev_pruned = pruned[prev_unbroken_timestamp];
+            const auto& prev_viterbi = model.viterbi[prev_unbroken_timestamp];
            const auto& prev_pruned = model.pruned[prev_unbroken_timestamp];
            const auto& prev_unbroken_timestamps_list = timestamp_list[prev_unbroken_timestamp];
            const auto& prev_coordinate = coordinate_list[prev_unbroken_timestamp];
-            auto& current_viterbi = viterbi[t];
+            auto& current_viterbi = model.viterbi[t];
-            auto& current_pruned = pruned[t];
+            auto& current_pruned = model.pruned[t];
-            auto& current_parents = parents[t];
+            auto& current_parents = model.parents[t];
-            auto& current_lengths = segment_lengths[t];
+            auto& current_lengths = model.path_lengths[t];
            const auto& current_timestamps_list = timestamp_list[t];
            const auto& current_coordinate = coordinate_list[t];
@ -361,14 +410,13 @@ template <class DataFacadeT> class MapMatching final
                        .get<JSONVariantObject>().get().values["transitions"]
                        .get<JSONVariantArray>().get().values.push_back(_debug_transistion);
                    if (new_value > current_viterbi[s_prime])
                    {
                        current_viterbi[s_prime] = new_value;
                        current_parents[s_prime] = s;
                        current_lengths[s_prime] = network_distance;
                        current_pruned[s_prime] = false;
-                        breakage[t] = false;
+                        model.breakage[t] = false;
                    }
                }
            }
@ -383,23 +431,45 @@ template <class DataFacadeT> class MapMatching final
                    .get<JSONVariantObject>().get().values["pruned"]  = static_cast<unsigned>(current_pruned[s_prime]);
            }
-            if (!breakage[t])
+            if (model.breakage[t])
            {
-                prev_unbroken_timestamp = t;
+                if (prev_unbroken_timestamps.size() > 1)
                {
                    // remove both ends of the breakge
                    prev_unbroken_timestamps.pop_back();
                }
                // we reached the beginning of the trace, discard the whole beginning
                else
                {
                    model.clear(t);
                    model.initialize(t);
                }
            }
            else
            {
                prev_unbroken_timestamps.push_back(t);
            }
        }
        if (prev_unbroken_timestamps.size() < 1)
        {
            return;
        }
        unsigned last_unbroken_timestamp = prev_unbroken_timestamps.back();
        // loop through the columns, and only compare the last entry
-        auto max_element_iter = std::max_element(viterbi[prev_unbroken_timestamp].begin(), viterbi[prev_unbroken_timestamp].end());
+        auto max_element_iter = std::max_element(model.viterbi[last_unbroken_timestamp].begin(),
-        auto parent_index = std::distance(viterbi[prev_unbroken_timestamp].begin(), max_element_iter);
+                                                 model.viterbi[last_unbroken_timestamp].end());
        auto parent_index = std::distance(model.viterbi[last_unbroken_timestamp].begin(), max_element_iter);
        std::deque<std::pair<std::size_t, std::size_t>> reconstructed_indices;
-        for (auto i = prev_unbroken_timestamp; i > initial_timestamp; --i)
+        for (auto i = last_unbroken_timestamp; i > initial_timestamp; --i)
        {
-            if (breakage[i])
+            if (model.breakage[i])
                continue;
            reconstructed_indices.emplace_front(i, parent_index);
-            parent_index = parents[i][parent_index];
+            parent_index = model.parents[i][parent_index];
        }
        reconstructed_indices.emplace_front(initial_timestamp, parent_index);
@ -411,7 +481,7 @@ template <class DataFacadeT> class MapMatching final
            auto location_index = reconstructed_indices[i].second;
            matched_nodes[i] = timestamp_list[timestamp_index][location_index].first;
-            matched_length += segment_lengths[timestamp_index][location_index];
+            matched_length += model.path_lengths[timestamp_index][location_index];
            _debug_states.values[timestamp_index]
                .get<JSONVariantArray>().get().values[location_index]
@ -419,7 +489,7 @@ template <class DataFacadeT> class MapMatching final
        }
        JSON::Array _debug_breakage;
-        for (auto b : breakage) {
+        for (auto b : model.breakage) {
            _debug_breakage.values.push_back(static_cast<unsigned>(b));
        }