diff --git a/.github/workflows/osrm-backend.yml b/.github/workflows/osrm-backend.yml index b743ee942..10df684c4 100644 --- a/.github/workflows/osrm-backend.yml +++ b/.github/workflows/osrm-backend.yml @@ -723,14 +723,34 @@ jobs: make -j$(nproc) benchmarks cd .. make -C test/data + # we run benchmarks in tmpfs to avoid impact of disk IO + - name: Create folder for tmpfs + run: mkdir -p /opt/benchmarks - name: Run PR Benchmarks run: | - ./pr/scripts/ci/run_benchmarks.sh -f $(pwd)/pr -r $(pwd)/pr_results -s $(pwd)/pr -b $(pwd)/pr/build -o ~/data.osm.pbf -g ~/gps_traces.csv + sudo mount -t tmpfs -o size=4g none /opt/benchmarks + cp -rf pr/build /opt/benchmarks/build + mkdir -p /opt/benchmarks/test + cp -rf pr/test/data /opt/benchmarks/test/data + cp -rf pr/profiles /opt/benchmarks/profiles + + ./pr/scripts/ci/run_benchmarks.sh -f /opt/benchmarks -r $(pwd)/pr_results -s $(pwd)/pr -b /opt/benchmarks/build -o ~/data.osm.pbf -g ~/gps_traces.csv + sudo umount /opt/benchmarks - name: Run Base Benchmarks run: | - # we intentionally use scripts from PR branch to be able to update them and see results in the same PR - ./pr/scripts/ci/run_benchmarks.sh -f $(pwd)/base -r $(pwd)/base_results -s $(pwd)/pr -b $(pwd)/base/build -o ~/data.osm.pbf -g ~/gps_traces.csv + sudo mount -t tmpfs -o size=4g none /opt/benchmarks + cp -rf base/build /opt/benchmarks/build + mkdir -p /opt/benchmarks/test + cp -rf base/test/data /opt/benchmarks/test/data + cp -rf base/profiles /opt/benchmarks/profiles + # TODO: remove it when base branch will have this file at needed location + if [ ! -f /opt/benchmarks/test/data/portugal_to_korea.json ]; then + cp base/src/benchmarks/portugal_to_korea.json /opt/benchmarks/test/data/portugal_to_korea.json + fi + # we intentionally use scripts from PR branch to be able to update them and see results in the same PR + ./pr/scripts/ci/run_benchmarks.sh -f /opt/benchmarks -r $(pwd)/base_results -s $(pwd)/pr -b /opt/benchmarks/build -o ~/data.osm.pbf -g ~/gps_traces.csv + sudo umount /opt/benchmarks - name: Post Benchmark Results run: | python3 pr/scripts/ci/post_benchmark_results.py base_results pr_results diff --git a/CHANGELOG.md b/CHANGELOG.md index 073f4cd49..1495a57d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,8 @@ - CHANGED: Use node-api instead of NAN. [#6452](https://github.com/Project-OSRM/osrm-backend/pull/6452) - Misc: - CHANGED: Re-use priority queue in StaticRTree. [#6952](https://github.com/Project-OSRM/osrm-backend/pull/6952) + - CHANGED: Optimise encodePolyline function. [#6940](https://github.com/Project-OSRM/osrm-backend/pull/6940) + - CHANGED: Avoid reallocations in base64 encoding. [#6951](https://github.com/Project-OSRM/osrm-backend/pull/6951) - CHANGED: Get rid of unused Boost dependencies. [#6960](https://github.com/Project-OSRM/osrm-backend/pull/6960) - CHANGED: Apply micro-optimisation for Table & Trip APIs. [#6949](https://github.com/Project-OSRM/osrm-backend/pull/6949) - CHANGED: Apply micro-optimisation for Route API. [#6948](https://github.com/Project-OSRM/osrm-backend/pull/6948) diff --git a/include/engine/base64.hpp b/include/engine/base64.hpp index d1fa8753c..042aa6d59 100644 --- a/include/engine/base64.hpp +++ b/include/engine/base64.hpp @@ -47,24 +47,29 @@ namespace engine // Encodes a chunk of memory to Base64. inline std::string encodeBase64(const unsigned char *first, std::size_t size) { - std::vector bytes{first, first + size}; - BOOST_ASSERT(!bytes.empty()); + BOOST_ASSERT(size > 0); - std::size_t bytes_to_pad{0}; + std::string encoded; + encoded.reserve(((size + 2) / 3) * 4); - while (bytes.size() % 3 != 0) + auto padding = (3 - size % 3) % 3; + + BOOST_ASSERT(padding == 0 || padding == 1 || padding == 2); + + for (auto itr = detail::Base64FromBinary(first); itr != detail::Base64FromBinary(first + size); + ++itr) { - bytes_to_pad += 1; - bytes.push_back(0); + encoded.push_back(*itr); } - BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2); - BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3"); + for (size_t index = 0; index < padding; ++index) + { + encoded.push_back('='); + } - std::string encoded{detail::Base64FromBinary{bytes.data()}, - detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}}; + BOOST_ASSERT(encoded.size() == (size + 2) / 3 * 4); - return encoded.append(bytes_to_pad, '='); + return encoded; } // C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types diff --git a/include/engine/polyline_compressor.hpp b/include/engine/polyline_compressor.hpp index b2a505764..af58b0422 100644 --- a/include/engine/polyline_compressor.hpp +++ b/include/engine/polyline_compressor.hpp @@ -12,7 +12,7 @@ namespace osrm::engine { namespace detail { -std::string encode(std::vector &numbers); +void encode(int number_to_encode, std::string &output); std::int32_t decode_polyline_integer(std::string::const_iterator &first, std::string::const_iterator last); } // namespace detail @@ -30,27 +30,24 @@ std::string encodePolyline(CoordVectorForwardIter begin, CoordVectorForwardIter return {}; } - std::vector delta_numbers; - BOOST_ASSERT(size > 0); - delta_numbers.reserve((size - 1) * 2); + std::string output; + // just a guess that we will need ~4 bytes per coordinate to avoid reallocations + output.reserve(size * 4); + int current_lat = 0; int current_lon = 0; - std::for_each( - begin, - end, - [&delta_numbers, ¤t_lat, ¤t_lon, coordinate_to_polyline]( - const util::Coordinate loc) - { - const int lat_diff = - std::round(static_cast(loc.lat) * coordinate_to_polyline) - current_lat; - const int lon_diff = - std::round(static_cast(loc.lon) * coordinate_to_polyline) - current_lon; - delta_numbers.emplace_back(lat_diff); - delta_numbers.emplace_back(lon_diff); - current_lat += lat_diff; - current_lon += lon_diff; - }); - return detail::encode(delta_numbers); + for (auto it = begin; it != end; ++it) + { + const int lat_diff = + std::round(static_cast(it->lat) * coordinate_to_polyline) - current_lat; + const int lon_diff = + std::round(static_cast(it->lon) * coordinate_to_polyline) - current_lon; + detail::encode(lat_diff, output); + detail::encode(lon_diff, output); + current_lat += lat_diff; + current_lon += lon_diff; + } + return output; } // Decodes geometry from polyline format diff --git a/scripts/ci/run_benchmarks.sh b/scripts/ci/run_benchmarks.sh index a0f32ece5..0d0324d13 100755 --- a/scripts/ci/run_benchmarks.sh +++ b/scripts/ci/run_benchmarks.sh @@ -64,7 +64,7 @@ function run_benchmarks_for_folder { echo "Running alias" $BENCHMARKS_FOLDER/alias-bench > "$RESULTS_FOLDER/alias.bench" echo "Running json-render-bench" - $BENCHMARKS_FOLDER/json-render-bench "$FOLDER/src/benchmarks/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench" + $BENCHMARKS_FOLDER/json-render-bench "$FOLDER/test/data/portugal_to_korea.json" > "$RESULTS_FOLDER/json-render.bench" echo "Running packedvector-bench" $BENCHMARKS_FOLDER/packedvector-bench > "$RESULTS_FOLDER/packedvector.bench" echo "Running rtree-bench" diff --git a/src/engine/polyline_compressor.cpp b/src/engine/polyline_compressor.cpp index 3d4500643..7435fed3f 100644 --- a/src/engine/polyline_compressor.cpp +++ b/src/engine/polyline_compressor.cpp @@ -10,9 +10,19 @@ namespace osrm::engine::detail // anonymous to keep TU local { -std::string encode(int number_to_encode) +void encode(int number_to_encode, std::string &output) { - std::string output; + if (number_to_encode < 0) + { + const unsigned binary = std::llabs(number_to_encode); + const unsigned twos = (~binary) + 1u; + const unsigned shl = twos << 1u; + number_to_encode = static_cast(~shl); + } + else + { + number_to_encode <<= 1u; + } while (number_to_encode >= 0x20) { const int next_value = (0x20 | (number_to_encode & 0x1f)) + 63; @@ -22,31 +32,6 @@ std::string encode(int number_to_encode) number_to_encode += 63; output += static_cast(number_to_encode); - return output; -} - -std::string encode(std::vector &numbers) -{ - std::string output; - for (auto &number : numbers) - { - if (number < 0) - { - const unsigned binary = std::llabs(number); - const unsigned twos = (~binary) + 1u; - const unsigned shl = twos << 1u; - number = static_cast(~shl); - } - else - { - number <<= 1u; - } - } - for (const int number : numbers) - { - output += encode(number); - } - return output; } // https://developers.google.com/maps/documentation/utilities/polylinealgorithm diff --git a/src/benchmarks/portugal_to_korea.json b/test/data/portugal_to_korea.json similarity index 100% rename from src/benchmarks/portugal_to_korea.json rename to test/data/portugal_to_korea.json diff --git a/unit_tests/engine/base64.cpp b/unit_tests/engine/base64.cpp index 3adb7db3e..60e5d73ca 100644 --- a/unit_tests/engine/base64.cpp +++ b/unit_tests/engine/base64.cpp @@ -74,4 +74,98 @@ BOOST_AUTO_TEST_CASE(hint_encoding_decoding_roundtrip_bytewise) reinterpret_cast(&decoded))); } +BOOST_AUTO_TEST_CASE(long_string_encoding) +{ + using namespace osrm::engine; + std::string long_string(1000, 'A'); // String of 1000 'A's + std::string encoded = encodeBase64(long_string); + BOOST_CHECK_EQUAL(decodeBase64(encoded), long_string); +} + +BOOST_AUTO_TEST_CASE(invalid_base64_decoding) +{ + using namespace osrm::engine; + BOOST_CHECK_THROW(decodeBase64("Invalid!"), std::exception); +} + +BOOST_AUTO_TEST_CASE(hint_serialization_size) +{ + using namespace osrm::engine; + using namespace osrm::util; + + const Coordinate coordinate; + const PhantomNode phantom; + const osrm::test::MockDataFacade facade{}; + + const SegmentHint hint{phantom, facade.GetCheckSum()}; + const auto base64 = hint.ToBase64(); + + BOOST_CHECK_EQUAL(base64.size(), 112); +} + +BOOST_AUTO_TEST_CASE(extended_roundtrip_tests) +{ + using namespace osrm::engine; + + std::vector test_strings = { + "Hello, World!", // Simple ASCII string + "1234567890", // Numeric string + "!@#$%^&*()_+", // Special characters + std::string(1000, 'A'), // Long repeating string + "¡Hola, mundo!", // Non-ASCII characters + "こんにちは、世界!", // Unicode characters + std::string("\x00\x01\x02\x03", 4), // Binary data + "a", // Single character + "ab", // Two characters + "abc", // Three characters (no padding in Base64) + std::string(190, 'x') // String that doesn't align with Base64 padding + }; + + for (const auto &test_str : test_strings) + { + std::string encoded = encodeBase64(test_str); + std::string decoded = decodeBase64(encoded); + BOOST_CHECK_EQUAL(decoded, test_str); + + // Additional checks + BOOST_CHECK(encoded.find_first_not_of( + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") == + std::string::npos); + if (test_str.length() % 3 != 0) + { + BOOST_CHECK(encoded.back() == '='); + } + } +} + +BOOST_AUTO_TEST_CASE(roundtrip_with_url_safe_chars) +{ + using namespace osrm::engine; + + std::string original = "Hello+World/Nothing?Is:Impossible"; + std::string encoded = encodeBase64(original); + + // Replace '+' with '-' and '/' with '_' + std::replace(encoded.begin(), encoded.end(), '+', '-'); + std::replace(encoded.begin(), encoded.end(), '/', '_'); + + std::string decoded = decodeBase64(encoded); + BOOST_CHECK_EQUAL(decoded, original); +} + +BOOST_AUTO_TEST_CASE(roundtrip_stress_test) +{ + using namespace osrm::engine; + + std::string test_str; + for (int i = 0; i < 1000; ++i) + { + test_str += static_cast(i % 256); + } + + std::string encoded = encodeBase64(test_str); + std::string decoded = decodeBase64(encoded); + BOOST_CHECK_EQUAL(decoded, test_str); +} + BOOST_AUTO_TEST_SUITE_END() diff --git a/unit_tests/engine/polyline_compressor.cpp b/unit_tests/engine/polyline_compressor.cpp index fa05512a9..e2a9a5fd7 100644 --- a/unit_tests/engine/polyline_compressor.cpp +++ b/unit_tests/engine/polyline_compressor.cpp @@ -45,4 +45,96 @@ BOOST_AUTO_TEST_CASE(polyline6_test_case) decodePolyline<1000000>(encodePolyline<1000000>(coords.begin(), coords.end())).begin())); } +BOOST_AUTO_TEST_CASE(empty_polyline_test) +{ + using namespace osrm::engine; + using namespace osrm::util; + + std::vector empty_coords; + BOOST_CHECK_EQUAL(encodePolyline(empty_coords.begin(), empty_coords.end()), ""); + BOOST_CHECK(decodePolyline("").empty()); +} +BOOST_AUTO_TEST_CASE(polyline_single_point_test) +{ + using namespace osrm::engine; + using namespace osrm::util; + + const std::vector coords({{FixedLongitude{-122414000}, FixedLatitude{37776000}}}); + + const std::string encoded = encodePolyline(coords.begin(), coords.end()); + BOOST_CHECK_EQUAL(encoded, "_cqeFn~cjV"); + + const auto decoded = decodePolyline(encoded); + BOOST_CHECK_EQUAL(decoded.size(), 1); + BOOST_CHECK_EQUAL(decoded[0].lon, FixedLongitude{-122414000}); + BOOST_CHECK_EQUAL(decoded[0].lat, FixedLatitude{37776000}); +} + +BOOST_AUTO_TEST_CASE(polyline_multiple_points_test) +{ + using namespace osrm::engine; + using namespace osrm::util; + + const std::vector coords({{FixedLongitude{-122414000}, FixedLatitude{37776000}}, + {FixedLongitude{-122420000}, FixedLatitude{37779000}}, + {FixedLongitude{-122421000}, FixedLatitude{37780000}}}); + + const std::string encoded = encodePolyline(coords.begin(), coords.end()); + BOOST_CHECK_EQUAL(encoded, "_cqeFn~cjVwQnd@gEfE"); + + const auto decoded = decodePolyline(encoded); + BOOST_CHECK_EQUAL(decoded.size(), 3); + for (size_t i = 0; i < coords.size(); ++i) + { + BOOST_CHECK_EQUAL(decoded[i].lon, coords[i].lon); + BOOST_CHECK_EQUAL(decoded[i].lat, coords[i].lat); + } +} + +BOOST_AUTO_TEST_CASE(polyline_large_coordinate_difference_test) +{ + using namespace osrm::engine; + using namespace osrm::util; + + const std::vector coords({{FixedLongitude{-179000000}, FixedLatitude{-89000000}}, + {FixedLongitude{179000000}, FixedLatitude{89000000}}}); + + const std::string encoded = encodePolyline(coords.begin(), coords.end()); + BOOST_CHECK_EQUAL(encoded, "~xe~O~|oca@_sl}`@_{`hcA"); + + const auto decoded = decodePolyline(encoded); + BOOST_CHECK_EQUAL(decoded.size(), 2); + for (size_t i = 0; i < coords.size(); ++i) + { + BOOST_CHECK_EQUAL(decoded[i].lon, coords[i].lon); + BOOST_CHECK_EQUAL(decoded[i].lat, coords[i].lat); + } +} + +BOOST_AUTO_TEST_CASE(roundtrip) +{ + using namespace osrm::engine; + using namespace osrm::util; + + { + const auto encoded = "_chxEn`zvN\\\\]]"; + const auto decoded = decodePolyline(encoded); + const auto reencoded = encodePolyline(decoded.begin(), decoded.end()); + BOOST_CHECK_EQUAL(encoded, reencoded); + } + { + const auto encoded = + "gcneIpgxzRcDnBoBlEHzKjBbHlG`@`IkDxIiKhKoMaLwTwHeIqHuAyGXeB~Ew@fFjAtIzExF"; + const auto decoded = decodePolyline(encoded); + const auto reencoded = encodePolyline(decoded.begin(), decoded.end()); + BOOST_CHECK_EQUAL(encoded, reencoded); + } + { + const auto encoded = "_p~iF~ps|U_ulLnnqC_mqNvxq`@"; + const auto decoded = decodePolyline(encoded); + const auto reencoded = encodePolyline(decoded.begin(), decoded.end()); + BOOST_CHECK_EQUAL(encoded, reencoded); + } +} + BOOST_AUTO_TEST_SUITE_END()