Provide correct Base64 implementation.

Phew, this was painful. Turns out most hints out there on how to use the
Boost serialization iterators are wrong. Here's why:

    transform_width<6, 8>

needs an input stream of length: common multiple of 6 and 8.

That is, the padding needs to happen _before_ using the provided
iterators, otherwise the behavior is undefined!

See: http://www.boost.org/doc/libs/1_60_0/boost/archive/iterators/transform_width.hpp

Thanks @mokob for pointing that out to me!

We also need to manually add as many padding chars "=" to the encoded
result as many bytes we had to append to the input to conform to the
rule above.

Decoding then knows the number of padding chars by counting for "=" and
then using it in order to split off the last bytes from the decoded
result.
This commit is contained in:
Daniel J. Hofmann 2016-03-24 12:31:05 +01:00 committed by Patrick Niklaus
parent c3e7e96dce
commit 0f0db4c823

View File

@ -2,6 +2,7 @@
#define OSRM_BASE64_HPP
#include <string>
#include <vector>
#include <iterator>
#include <type_traits>
@ -16,12 +17,26 @@
// RFC 4648 "The Base16, Base32, and Base64 Data Encodings"
// See: https://tools.ietf.org/html/rfc4648
// Implementation adapted from: http://stackoverflow.com/a/28471421
namespace detail
{
// The C++ standard guarantees none of this by default, but we need it in the following.
static_assert(CHAR_BIT == 8u, "we assume a byte holds 8 bits");
static_assert(sizeof(char) == 1u, "we assume a char is one byte large");
using Base64FromBinary = boost::archive::iterators::base64_from_binary<
boost::archive::iterators::transform_width<const char *, // sequence of chars
6, // get view of 6 bit
8 // from sequence of 8 bit
>>;
using BinaryFromBase64 = boost::archive::iterators::transform_width<
boost::archive::iterators::binary_from_base64<std::string::const_iterator>,
8, // get a view of 8 bit
6 // from a sequence of 6 bit
>;
} // ns detail
namespace osrm
{
namespace engine
@ -32,18 +47,24 @@ namespace engine
// Encodes a chunk of memory to Base64.
inline std::string encodeBase64(const unsigned char *first, std::size_t size)
{
using namespace boost::archive::iterators;
std::vector<unsigned char> bytes{first, first + size};
BOOST_ASSERT(!bytes.empty());
const std::string bytes{first, first + size};
std::size_t bytes_to_pad{0};
using Iter = base64_from_binary<transform_width<std::string::const_iterator, 6, 8>>;
while (bytes.size() % 3 != 0)
{
bytes_to_pad += 1;
bytes.push_back(0);
}
Iter view_first{begin(bytes)};
Iter view_last{end(bytes)};
BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2);
BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3");
std::string encoded{view_first, view_last};
std::string encoded{detail::Base64FromBinary{bytes.data()},
detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}};
return encoded.append((3 - size % 3) % 3, '=');
return encoded.append(bytes_to_pad, '=');
}
// C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types
@ -78,22 +99,16 @@ template <typename T> std::string encodeBase64Bytewise(const T &x)
// Decodes into a chunk of memory that is at least as large as the input.
template <typename OutputIter> void decodeBase64(const std::string &encoded, OutputIter out)
{
using namespace boost::archive::iterators;
using namespace boost::algorithm;
auto unpadded = encoded;
using Iter = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
const auto num_padded = std::count(begin(encoded), end(encoded), '=');
std::replace(begin(unpadded), end(unpadded), '=', 'A'); // A_64 == \0
Iter view_first{begin(encoded)};
Iter view_last{end(encoded)};
std::string decoded{detail::BinaryFromBase64{begin(unpadded)},
detail::BinaryFromBase64{begin(unpadded) + unpadded.length()}};
const auto null = [](const unsigned char c)
{
return c == '\0';
};
const auto bytes = trim_right_copy_if(std::string{view_first, view_last}, null);
boost::copy(bytes, out);
decoded.erase(end(decoded) - num_padded, end(decoded));
std::copy(begin(decoded), end(decoded), out);
}
// Convenience specialization, filling string instead of byte-dumping into it.