Avoid reallocations in base64 encoding (#6951)

This commit is contained in:
Siarhei Fedartsou 2024-06-22 08:50:18 +02:00 committed by GitHub
parent e8da3d9231
commit 3d01d96036
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 111 additions and 11 deletions

View File

@ -24,6 +24,7 @@
- NodeJS:
- CHANGED: Use node-api instead of NAN. [#6452](https://github.com/Project-OSRM/osrm-backend/pull/6452)
- Misc:
- CHANGED: Avoid reallocations in base64 encoding. [#6951](https://github.com/Project-OSRM/osrm-backend/pull/6951)
- CHANGED: Get rid of unused Boost dependencies. [#6960](https://github.com/Project-OSRM/osrm-backend/pull/6960)
- CHANGED: Apply micro-optimisation for Table & Trip APIs. [#6949](https://github.com/Project-OSRM/osrm-backend/pull/6949)
- CHANGED: Apply micro-optimisation for Route API. [#6948](https://github.com/Project-OSRM/osrm-backend/pull/6948)

View File

@ -47,24 +47,29 @@ namespace engine
// Encodes a chunk of memory to Base64.
inline std::string encodeBase64(const unsigned char *first, std::size_t size)
{
std::vector<unsigned char> bytes{first, first + size};
BOOST_ASSERT(!bytes.empty());
BOOST_ASSERT(size > 0);
std::size_t bytes_to_pad{0};
std::string encoded;
encoded.reserve(((size + 2) / 3) * 4);
while (bytes.size() % 3 != 0)
auto padding = (3 - size % 3) % 3;
BOOST_ASSERT(padding == 0 || padding == 1 || padding == 2);
for (auto itr = detail::Base64FromBinary(first); itr != detail::Base64FromBinary(first + size);
++itr)
{
bytes_to_pad += 1;
bytes.push_back(0);
encoded.push_back(*itr);
}
BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2);
BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3");
for (size_t index = 0; index < padding; ++index)
{
encoded.push_back('=');
}
std::string encoded{detail::Base64FromBinary{bytes.data()},
detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}};
BOOST_ASSERT(encoded.size() == (size + 2) / 3 * 4);
return encoded.append(bytes_to_pad, '=');
return encoded;
}
// C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types

View File

@ -74,4 +74,98 @@ BOOST_AUTO_TEST_CASE(hint_encoding_decoding_roundtrip_bytewise)
reinterpret_cast<const unsigned char *>(&decoded)));
}
BOOST_AUTO_TEST_CASE(long_string_encoding)
{
using namespace osrm::engine;
std::string long_string(1000, 'A'); // String of 1000 'A's
std::string encoded = encodeBase64(long_string);
BOOST_CHECK_EQUAL(decodeBase64(encoded), long_string);
}
BOOST_AUTO_TEST_CASE(invalid_base64_decoding)
{
using namespace osrm::engine;
BOOST_CHECK_THROW(decodeBase64("Invalid!"), std::exception);
}
BOOST_AUTO_TEST_CASE(hint_serialization_size)
{
using namespace osrm::engine;
using namespace osrm::util;
const Coordinate coordinate;
const PhantomNode phantom;
const osrm::test::MockDataFacade<osrm::engine::routing_algorithms::ch::Algorithm> facade{};
const SegmentHint hint{phantom, facade.GetCheckSum()};
const auto base64 = hint.ToBase64();
BOOST_CHECK_EQUAL(base64.size(), 112);
}
BOOST_AUTO_TEST_CASE(extended_roundtrip_tests)
{
using namespace osrm::engine;
std::vector<std::string> test_strings = {
"Hello, World!", // Simple ASCII string
"1234567890", // Numeric string
"!@#$%^&*()_+", // Special characters
std::string(1000, 'A'), // Long repeating string
"¡Hola, mundo!", // Non-ASCII characters
"こんにちは、世界!", // Unicode characters
std::string("\x00\x01\x02\x03", 4), // Binary data
"a", // Single character
"ab", // Two characters
"abc", // Three characters (no padding in Base64)
std::string(190, 'x') // String that doesn't align with Base64 padding
};
for (const auto &test_str : test_strings)
{
std::string encoded = encodeBase64(test_str);
std::string decoded = decodeBase64(encoded);
BOOST_CHECK_EQUAL(decoded, test_str);
// Additional checks
BOOST_CHECK(encoded.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") ==
std::string::npos);
if (test_str.length() % 3 != 0)
{
BOOST_CHECK(encoded.back() == '=');
}
}
}
BOOST_AUTO_TEST_CASE(roundtrip_with_url_safe_chars)
{
using namespace osrm::engine;
std::string original = "Hello+World/Nothing?Is:Impossible";
std::string encoded = encodeBase64(original);
// Replace '+' with '-' and '/' with '_'
std::replace(encoded.begin(), encoded.end(), '+', '-');
std::replace(encoded.begin(), encoded.end(), '/', '_');
std::string decoded = decodeBase64(encoded);
BOOST_CHECK_EQUAL(decoded, original);
}
BOOST_AUTO_TEST_CASE(roundtrip_stress_test)
{
using namespace osrm::engine;
std::string test_str;
for (int i = 0; i < 1000; ++i)
{
test_str += static_cast<char>(i % 256);
}
std::string encoded = encodeBase64(test_str);
std::string decoded = decodeBase64(encoded);
BOOST_CHECK_EQUAL(decoded, test_str);
}
BOOST_AUTO_TEST_SUITE_END()