From 0f0db4c823184eb9d05811210333ff6e17f4a17e Mon Sep 17 00:00:00 2001 From: "Daniel J. Hofmann" Date: Thu, 24 Mar 2016 12:31:05 +0100 Subject: [PATCH] Provide correct Base64 implementation. Phew, this was painful. Turns out most hints out there on how to use the Boost serialization iterators are wrong. Here's why: transform_width<6, 8> needs an input stream of length: common multiple of 6 and 8. That is, the padding needs to happen _before_ using the provided iterators, otherwise the behavior is undefined! See: http://www.boost.org/doc/libs/1_60_0/boost/archive/iterators/transform_width.hpp Thanks @mokob for pointing that out to me! We also need to manually add as many padding chars "=" to the encoded result as many bytes we had to append to the input to conform to the rule above. Decoding then knows the number of padding chars by counting for "=" and then using it in order to split off the last bytes from the decoded result. --- include/engine/base64.hpp | 57 ++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/include/engine/base64.hpp b/include/engine/base64.hpp index 1fc63b48d..648296183 100644 --- a/include/engine/base64.hpp +++ b/include/engine/base64.hpp @@ -2,6 +2,7 @@ #define OSRM_BASE64_HPP #include +#include #include #include @@ -16,12 +17,26 @@ // RFC 4648 "The Base16, Base32, and Base64 Data Encodings" // See: https://tools.ietf.org/html/rfc4648 -// Implementation adapted from: http://stackoverflow.com/a/28471421 +namespace detail +{ // The C++ standard guarantees none of this by default, but we need it in the following. static_assert(CHAR_BIT == 8u, "we assume a byte holds 8 bits"); static_assert(sizeof(char) == 1u, "we assume a char is one byte large"); +using Base64FromBinary = boost::archive::iterators::base64_from_binary< + boost::archive::iterators::transform_width>; + +using BinaryFromBase64 = boost::archive::iterators::transform_width< + boost::archive::iterators::binary_from_base64, + 8, // get a view of 8 bit + 6 // from a sequence of 6 bit + >; +} // ns detail + namespace osrm { namespace engine @@ -32,18 +47,24 @@ namespace engine // Encodes a chunk of memory to Base64. inline std::string encodeBase64(const unsigned char *first, std::size_t size) { - using namespace boost::archive::iterators; + std::vector bytes{first, first + size}; + BOOST_ASSERT(!bytes.empty()); - const std::string bytes{first, first + size}; + std::size_t bytes_to_pad{0}; - using Iter = base64_from_binary>; + while (bytes.size() % 3 != 0) + { + bytes_to_pad += 1; + bytes.push_back(0); + } - Iter view_first{begin(bytes)}; - Iter view_last{end(bytes)}; + BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2); + BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3"); - std::string encoded{view_first, view_last}; + std::string encoded{detail::Base64FromBinary{bytes.data()}, + detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}}; - return encoded.append((3 - size % 3) % 3, '='); + return encoded.append(bytes_to_pad, '='); } // C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types @@ -78,22 +99,16 @@ template std::string encodeBase64Bytewise(const T &x) // Decodes into a chunk of memory that is at least as large as the input. template void decodeBase64(const std::string &encoded, OutputIter out) { - using namespace boost::archive::iterators; - using namespace boost::algorithm; + auto unpadded = encoded; - using Iter = transform_width, 8, 6>; + const auto num_padded = std::count(begin(encoded), end(encoded), '='); + std::replace(begin(unpadded), end(unpadded), '=', 'A'); // A_64 == \0 - Iter view_first{begin(encoded)}; - Iter view_last{end(encoded)}; + std::string decoded{detail::BinaryFromBase64{begin(unpadded)}, + detail::BinaryFromBase64{begin(unpadded) + unpadded.length()}}; - const auto null = [](const unsigned char c) - { - return c == '\0'; - }; - - const auto bytes = trim_right_copy_if(std::string{view_first, view_last}, null); - - boost::copy(bytes, out); + decoded.erase(end(decoded) - num_padded, end(decoded)); + std::copy(begin(decoded), end(decoded), out); } // Convenience specialization, filling string instead of byte-dumping into it.