osrm-backend/routing_algorithms/map_matching.hpp

/*
    open source routing machine
    Copyright (C) Dennis Luxen, others 2010

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU AFFERO General Public License as published by
the Free Software Foundation; either version 3 of the License, or
any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
or see http://www.gnu.org/licenses/agpl.txt.
 */

#ifndef MAP_MATCHING_H
#define MAP_MATCHING_H

#include "routing_base.hpp"

#include "../data_structures/coordinate_calculation.hpp"
#include "../util/simple_logger.hpp"

#include <variant/variant.hpp>
#include <osrm/json_container.hpp>

#include <algorithm>
#include <iomanip>
#include <numeric>

#include <fstream>

using JSONVariantArray = mapbox::util::recursive_wrapper<JSON::Array>;
using JSONVariantObject = mapbox::util::recursive_wrapper<JSON::Object>;

template<typename T>
T makeJSONSafe(T d)
{
    if (std::isnan(d) || std::numeric_limits<T>::infinity() == d) {
        return std::numeric_limits<T>::max();
    }
    if (-std::numeric_limits<T>::infinity() == d) {
        return -std::numeric_limits<T>::max();
    }

    return d;
}

void appendToJSONArray(JSON::Array& a) { }

template<typename T, typename... Args>
void appendToJSONArray(JSON::Array& a, T value, Args... args)
{
    a.values.emplace_back(value);
    appendToJSONArray(a, args...);
}

template<typename... Args>
JSON::Array makeJSONArray(Args... args)
{
    JSON::Array a;
    appendToJSONArray(a, args...);
    return a;
}

namespace Matching
{

struct SubMatching
{
    std::vector<PhantomNode> nodes;
    unsigned begin_idx;
    unsigned end_idx;
    double length;
    double confidence;
};

using CandidateList =  std::vector<std::pair<PhantomNode, double>>;
using CandidateLists = std::vector<CandidateList>;
using SubMatchingList = std::vector<SubMatching>;
constexpr static const unsigned max_number_of_candidates = 20;
}

// implements a hidden markov model map matching algorithm
template <class DataFacadeT> class MapMatching final
    : public BasicRoutingInterface<DataFacadeT, MapMatching<DataFacadeT>>
{
    using super = BasicRoutingInterface<DataFacadeT, MapMatching<DataFacadeT>>;
    using QueryHeap = SearchEngineData::QueryHeap;
    SearchEngineData &engine_working_data;

    // FIXME this value should be a table based on samples/meter (or samples/min)
    constexpr static const double beta = 10.0;
    constexpr static const double sigma_z = 4.07;
    constexpr static const double log_sigma_z = std::log(sigma_z);
    constexpr static const double log_2_pi = std::log(2 * M_PI);

    constexpr static double emission_probability(const double distance)
    {
        return (1. / (std::sqrt(2. * M_PI) * sigma_z)) *
               std::exp(-0.5 * std::pow((distance / sigma_z), 2.));
    }

    constexpr static double transition_probability(const float d_t, const float beta)
    {
        return (1. / beta) * std::exp(-d_t / beta);
    }

    constexpr static double log_emission_probability(const double distance)
    {
        return -0.5 * (log_2_pi + (distance / sigma_z) * (distance / sigma_z)) - log_sigma_z;
    }

    constexpr static double log_transition_probability(const float d_t, const float beta)
    {
        return -std::log(beta) - d_t / beta;
    }

    // TODO: needs to be estimated from the input locations
    // FIXME These values seem wrong. Higher beta for more samples/minute? Should be inverse proportional.
    //constexpr static const double beta = 1.;
    // samples/min and beta
    // 1 0.49037673
    // 2 0.82918373
    // 3 1.24364564
    // 4 1.67079581
    // 5 2.00719298
    // 6 2.42513007
    // 7 2.81248831
    // 8 3.15745473
    // 9 3.52645392
    // 10 4.09511775
    // 11 4.67319795
    // 21 12.55107715
    // 12 5.41088180
    // 13 6.47666590
    // 14 6.29010734
    // 15 7.80752112
    // 16 8.09074504
    // 17 8.08550528
    // 18 9.09405065
    // 19 11.09090603
    // 20 11.87752824
    // 21 12.55107715
    // 22 15.82820829
    // 23 17.69496773
    // 24 18.07655652
    // 25 19.63438911
    // 26 25.40832185
    // 27 23.76001877
    // 28 28.43289797
    // 29 32.21683062
    // 30 34.56991141

    double get_network_distance(const PhantomNode &source_phantom,
                                const PhantomNode &target_phantom) const
    {
        EdgeWeight upper_bound = INVALID_EDGE_WEIGHT;
        NodeID middle_node = SPECIAL_NODEID;
        EdgeWeight edge_offset = std::min(0, -source_phantom.GetForwardWeightPlusOffset());
        edge_offset = std::min(edge_offset, -source_phantom.GetReverseWeightPlusOffset());

        engine_working_data.InitializeOrClearFirstThreadLocalStorage(
            super::facade->GetNumberOfNodes());
        engine_working_data.InitializeOrClearSecondThreadLocalStorage(
            super::facade->GetNumberOfNodes());

        QueryHeap &forward_heap = *(engine_working_data.forward_heap_1);
        QueryHeap &reverse_heap = *(engine_working_data.reverse_heap_1);

        if (source_phantom.forward_node_id != SPECIAL_NODEID)
        {
            forward_heap.Insert(source_phantom.forward_node_id,
                                -source_phantom.GetForwardWeightPlusOffset(),
                                source_phantom.forward_node_id);
        }
        if (source_phantom.reverse_node_id != SPECIAL_NODEID)
        {
            forward_heap.Insert(source_phantom.reverse_node_id,
                                -source_phantom.GetReverseWeightPlusOffset(),
                                source_phantom.reverse_node_id);
        }

        if (target_phantom.forward_node_id != SPECIAL_NODEID)
        {
            reverse_heap.Insert(target_phantom.forward_node_id,
                                target_phantom.GetForwardWeightPlusOffset(),
                                target_phantom.forward_node_id);
        }
        if (target_phantom.reverse_node_id != SPECIAL_NODEID)
        {
            reverse_heap.Insert(target_phantom.reverse_node_id,
                                target_phantom.GetReverseWeightPlusOffset(),
                                target_phantom.reverse_node_id);
        }

        // search from s and t till new_min/(1+epsilon) > length_of_shortest_path
        while (0 < (forward_heap.Size() + reverse_heap.Size()))
        {
            if (0 < forward_heap.Size())
            {
                super::RoutingStep(
                    forward_heap, reverse_heap, &middle_node, &upper_bound, edge_offset, true);
            }
            if (0 < reverse_heap.Size())
            {
                super::RoutingStep(
                    reverse_heap, forward_heap, &middle_node, &upper_bound, edge_offset, false);
            }
        }

        double distance = std::numeric_limits<double>::max();
        if (upper_bound != INVALID_EDGE_WEIGHT)
        {
            std::vector<NodeID> packed_leg;
            super::RetrievePackedPathFromHeap(forward_heap, reverse_heap, middle_node, packed_leg);
            std::vector<PathData> unpacked_path;
            PhantomNodes nodes;
            nodes.source_phantom = source_phantom;
            nodes.target_phantom = target_phantom;
            super::UnpackPath(packed_leg, nodes, unpacked_path);

            FixedPointCoordinate previous_coordinate = source_phantom.location;
            FixedPointCoordinate current_coordinate;
            distance = 0;
            for (const auto& p : unpacked_path)
            {
                current_coordinate = super::facade->GetCoordinateOfNode(p.node);
                distance += coordinate_calculation::great_circle_distance(previous_coordinate, current_coordinate);
                previous_coordinate = current_coordinate;
            }
            distance += coordinate_calculation::great_circle_distance(previous_coordinate, target_phantom.location);
        }

        return distance;
    }

    struct HiddenMarkovModel
    {
        std::vector<std::vector<double>> viterbi;
        std::vector<std::vector<std::size_t>> parents;
        std::vector<std::vector<float>> path_lengths;
        std::vector<std::vector<bool>> pruned;
        std::vector<bool> breakage;

        const Matching::CandidateLists& timestamp_list;

        constexpr static double IMPOSSIBLE_LOG_PROB = -std::numeric_limits<double>::infinity();
        constexpr static double MINIMAL_LOG_PROB = -std::numeric_limits<double>::max();

        HiddenMarkovModel(const Matching::CandidateLists& timestamp_list)
        : breakage(timestamp_list.size())
        , timestamp_list(timestamp_list)
        {
            for (const auto& l : timestamp_list)
            {
                viterbi.emplace_back(l.size());
                parents.emplace_back(l.size());
                path_lengths.emplace_back(l.size());
                pruned.emplace_back(l.size());
            }

            clear(0);
        }

        void clear(unsigned initial_timestamp)
        {
            BOOST_ASSERT(viterbi.size() == parents.size()
                      && parents.size() == path_lengths.size()
                      && path_lengths.size() == pruned.size()
                      && pruned.size() == breakage.size());

            for (unsigned t = initial_timestamp; t < viterbi.size(); t++)
            {
                std::fill(viterbi[t].begin(), viterbi[t].end(), IMPOSSIBLE_LOG_PROB);
                std::fill(parents[t].begin(), parents[t].end(), 0);
                std::fill(path_lengths[t].begin(), path_lengths[t].end(), 0);
                std::fill(pruned[t].begin(), pruned[t].end(), true);
            }
            std::fill(breakage.begin()+initial_timestamp, breakage.end(), true);
        }

        unsigned initialize(unsigned initial_timestamp)
        {
            BOOST_ASSERT(initial_timestamp < timestamp_list.size());

            do
            {
                for (auto s = 0u; s < viterbi[initial_timestamp].size(); ++s)
                {
                    viterbi[initial_timestamp][s] = log_emission_probability(timestamp_list[initial_timestamp][s].second);
                    parents[initial_timestamp][s] = s;
                    pruned[initial_timestamp][s] = viterbi[initial_timestamp][s] < MINIMAL_LOG_PROB;

                    breakage[initial_timestamp] = breakage[initial_timestamp] && pruned[initial_timestamp][s];

                }

                ++initial_timestamp;
            } while (breakage[initial_timestamp - 1]);

            BOOST_ASSERT(initial_timestamp > 0 && initial_timestamp < viterbi.size());
            --initial_timestamp;

            BOOST_ASSERT(breakage[initial_timestamp] == false);

            return initial_timestamp;
        }

    };

  public:
    MapMatching(DataFacadeT *facade, SearchEngineData &engine_working_data)
        : super(facade), engine_working_data(engine_working_data)
    {
    }


    void operator()(const Matching::CandidateLists &timestamp_list,
                    const std::vector<FixedPointCoordinate> coordinate_list,
                    Matching::SubMatchingList& sub_matchings,
                    JSON::Object& _debug_info) const
    {
        BOOST_ASSERT(timestamp_list.size() > 0);

        HiddenMarkovModel model(timestamp_list);

        unsigned initial_timestamp = model.initialize(0);

        JSON::Array _debug_states;
        for (unsigned t = 0; t < timestamp_list.size(); t++)
        {
            JSON::Array _debug_timestamps;
            for (unsigned s = 0; s < timestamp_list[t].size(); s++)
            {
                JSON::Object _debug_state;
                _debug_state.values["transitions"] = JSON::Array();
                _debug_state.values["coordinate"] = makeJSONArray(timestamp_list[t][s].first.location.lat / COORDINATE_PRECISION,
                                                                  timestamp_list[t][s].first.location.lon / COORDINATE_PRECISION);
                if (t < initial_timestamp)
                {
                    _debug_state.values["viterbi"] = makeJSONSafe(HiddenMarkovModel::IMPOSSIBLE_LOG_PROB);
                    _debug_state.values["pruned"] = 0u;
                }
                else if (t == initial_timestamp)
                {
                    _debug_state.values["viterbi"] = makeJSONSafe(model.viterbi[t][s]);
                    _debug_state.values["pruned"] = static_cast<unsigned>(model.pruned[t][s]);
                }
                _debug_timestamps.values.push_back(_debug_state);
            }
            _debug_states.values.push_back(_debug_timestamps);
        }

        std::vector<unsigned> split_points;
        std::vector<unsigned> prev_unbroken_timestamps;
        prev_unbroken_timestamps.reserve(timestamp_list.size());
        prev_unbroken_timestamps.push_back(initial_timestamp);
        for (auto t = initial_timestamp + 1; t < timestamp_list.size(); ++t)
        {
            unsigned prev_unbroken_timestamp = prev_unbroken_timestamps.back();
            const auto& prev_viterbi = model.viterbi[prev_unbroken_timestamp];
            const auto& prev_pruned = model.pruned[prev_unbroken_timestamp];
            const auto& prev_unbroken_timestamps_list = timestamp_list[prev_unbroken_timestamp];
            const auto& prev_coordinate = coordinate_list[prev_unbroken_timestamp];

            auto& current_viterbi = model.viterbi[t];
            auto& current_pruned = model.pruned[t];
            auto& current_parents = model.parents[t];
            auto& current_lengths = model.path_lengths[t];
            const auto& current_timestamps_list = timestamp_list[t];
            const auto& current_coordinate = coordinate_list[t];

            std::cout << " # " << prev_unbroken_timestamp << " -> " << t << std::endl;

            // compute d_t for this timestamp and the next one
            for (auto s = 0u; s < prev_viterbi.size(); ++s)
            {
                if (prev_pruned[s])
                    continue;

                for (auto s_prime = 0u; s_prime < current_viterbi.size(); ++s_prime)
                {
                    // how likely is candidate s_prime at time t to be emitted?
                    const double emission_pr = log_emission_probability(timestamp_list[t][s_prime].second);
                    double new_value = prev_viterbi[s] + emission_pr;
                    if (current_viterbi[s_prime] > new_value)
                        continue;

                    // get distance diff between loc1/2 and locs/s_prime
                    const auto network_distance = get_network_distance(prev_unbroken_timestamps_list[s].first,
                                                                       current_timestamps_list[s_prime].first);
                    const auto great_circle_distance =
                        coordinate_calculation::great_circle_distance(prev_coordinate,
                                                                      current_coordinate);

                    const auto d_t = std::abs(network_distance - great_circle_distance);

                    // very low probability transition -> prune
                    if (d_t > 500)
                        continue;

                    const double transition_pr = log_transition_probability(d_t, beta);
                    new_value += transition_pr;

                    JSON::Object _debug_transistion;
                    _debug_transistion.values["to"] = makeJSONArray(t, s_prime);
                    _debug_transistion.values["properties"] = makeJSONArray(
                        makeJSONSafe(prev_viterbi[s]),
                        makeJSONSafe(emission_pr),
                        makeJSONSafe(transition_pr),
                        network_distance,
                        great_circle_distance
                    );
                    _debug_states.values[prev_unbroken_timestamp]
                        .get<JSONVariantArray>().get().values[s]
                        .get<JSONVariantObject>().get().values["transitions"]
                        .get<JSONVariantArray>().get().values.push_back(_debug_transistion);

                    if (new_value > current_viterbi[s_prime])
                    {
                        current_viterbi[s_prime] = new_value;
                        current_parents[s_prime] = s;
                        current_lengths[s_prime] = network_distance;
                        current_pruned[s_prime] = false;
                        model.breakage[t] = false;
                    }
                }
            }

            for (auto s_prime = 0u; s_prime < current_viterbi.size(); ++s_prime)
            {
                _debug_states.values[t]
                    .get<JSONVariantArray>().get().values[s_prime]
                    .get<JSONVariantObject>().get().values["viterbi"] = makeJSONSafe(current_viterbi[s_prime]);
                _debug_states.values[t]
                    .get<JSONVariantArray>().get().values[s_prime]
                    .get<JSONVariantObject>().get().values["pruned"]  = static_cast<unsigned>(current_pruned[s_prime]);
            }

            if (model.breakage[t])
            {
                std::cout << "Broken!" << std::endl;
                // TODO we actually don't need to go to the beginning.
                // with temporal information we can split after _n_
                // skipped states
                if (prev_unbroken_timestamps.size() > 1)
                {
                    // remove both ends of the breakge
                    prev_unbroken_timestamps.pop_back();
                }
                // we reached the beginning of the trace and it is still broken
                // -> split the trace here
                else
                {
                    split_points.push_back(t);
                    // note this preserves everything before t
                    model.clear(t);
                    model.initialize(t);
                    prev_unbroken_timestamps.push_back(t);
                }
            }
            else
            {
                prev_unbroken_timestamps.push_back(t);
            }
        }

        if (prev_unbroken_timestamps.size() > 1)
        {
            split_points.push_back(prev_unbroken_timestamps.back()+1);
        }

        unsigned sub_matching_begin = initial_timestamp;
        for (const unsigned sub_matching_end : split_points)
        {
            Matching::SubMatching matching;

            // matchings that only consist of one candidate are invalid
            if (sub_matching_end - sub_matching_begin < 2)
            {
                sub_matching_begin = sub_matching_end;
                continue;
            }

            std::cout << sub_matching_begin << " -> " << sub_matching_end << std::endl;

            matching.begin_idx = sub_matching_begin;
            matching.end_idx = sub_matching_end;

            // loop through the columns, and only compare the last entry
            auto max_element_iter = std::max_element(model.viterbi[sub_matching_end-1].begin(),
                                                     model.viterbi[sub_matching_end-1].end());

            auto parent_index = std::distance(model.viterbi[sub_matching_end-1].begin(), max_element_iter);
            std::deque<std::pair<std::size_t, std::size_t>> reconstructed_indices;
            for (auto i = sub_matching_end-1; i > sub_matching_begin; --i)
            {
                if (model.breakage[i])
                    continue;
                reconstructed_indices.emplace_front(i, parent_index);
                parent_index = model.parents[i][parent_index];
            }
            reconstructed_indices.emplace_front(initial_timestamp, parent_index);

            matching.length = 0.0f;
            matching.nodes.resize(reconstructed_indices.size());
            for (auto i = 0u; i < reconstructed_indices.size(); ++i)
            {
                auto timestamp_index = reconstructed_indices[i].first;
                auto location_index = reconstructed_indices[i].second;

                matching.nodes[i] = timestamp_list[timestamp_index][location_index].first;
                matching.length += model.path_lengths[timestamp_index][location_index];

                _debug_states.values[timestamp_index]
                    .get<JSONVariantArray>().get().values[location_index]
                    .get<JSONVariantObject>().get().values["chosen"] = true;
            }

            sub_matchings.push_back(matching);

            sub_matching_begin = sub_matching_end;
        }

        JSON::Array _debug_breakage;
        for (auto b : model.breakage) {
            _debug_breakage.values.push_back(static_cast<unsigned>(b));
        }

        _debug_info.values["breakage"] = _debug_breakage;
        _debug_info.values["states"] = _debug_states;
    }
};

//[1] "Hidden Markov Map Matching Through Noise and Sparseness"; P. Newson and J. Krumm; 2009; ACM GIS

#endif /* MAP_MATCHING_H */