Provide correct Base64 implementation.

Phew, this was painful. Turns out most hints out there on how to use the Boost serialization iterators are wrong. Here's why: transform_width<6, 8> needs an input stream of length: common multiple of 6 and 8. That is, the padding needs to happen _before_ using the provided iterators, otherwise the behavior is undefined! See: http://www.boost.org/doc/libs/1_60_0/boost/archive/iterators/transform_width.hpp Thanks @mokob for pointing that out to me! We also need to manually add as many padding chars "=" to the encoded result as many bytes we had to append to the input to conform to the rule above. Decoding then knows the number of padding chars by counting for "=" and then using it in order to split off the last bytes from the decoded result.
2016-03-24 12:31:05 +01:00 · 2016-03-24 12:31:05 +01:00 · 0f0db4c823
commit 0f0db4c823
parent c3e7e96dce
1 changed files with 36 additions and 21 deletions
--- a/include/engine/base64.hpp
+++ b/include/engine/base64.hpp
@ -2,6 +2,7 @@
 #define OSRM_BASE64_HPP

 #include <string>
+#include <vector>
 #include <iterator>
 #include <type_traits>

@ -16,12 +17,26 @@

 // RFC 4648 "The Base16, Base32, and Base64 Data Encodings"
 // See: https://tools.ietf.org/html/rfc4648
-// Implementation adapted from: http://stackoverflow.com/a/28471421

+namespace detail
+{
 // The C++ standard guarantees none of this by default, but we need it in the following.
 static_assert(CHAR_BIT == 8u, "we assume a byte holds 8 bits");
 static_assert(sizeof(char) == 1u, "we assume a char is one byte large");

+using Base64FromBinary = boost::archive::iterators::base64_from_binary<
+    boost::archive::iterators::transform_width<const char *, // sequence of chars
+                                               6,            // get view of 6 bit
+                                               8             // from sequence of 8 bit
+                                               >>;
+
+using BinaryFromBase64 = boost::archive::iterators::transform_width<
+    boost::archive::iterators::binary_from_base64<std::string::const_iterator>,
+    8, // get a view of 8 bit
+    6  // from a sequence of 6 bit
+    >;
+} // ns detail
+
 namespace osrm
 {
 namespace engine
@ -32,18 +47,24 @@ namespace engine
 // Encodes a chunk of memory to Base64.
 inline std::string encodeBase64(const unsigned char *first, std::size_t size)
 {
-    using namespace boost::archive::iterators;
+    std::vector<unsigned char> bytes{first, first + size};
+    BOOST_ASSERT(!bytes.empty());

-    const std::string bytes{first, first + size};
+    std::size_t bytes_to_pad{0};

-    using Iter = base64_from_binary<transform_width<std::string::const_iterator, 6, 8>>;
+    while (bytes.size() % 3 != 0)
+    {
+        bytes_to_pad += 1;
+        bytes.push_back(0);
+    }

-    Iter view_first{begin(bytes)};
-    Iter view_last{end(bytes)};
+    BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2);
+    BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3");

-    std::string encoded{view_first, view_last};
+    std::string encoded{detail::Base64FromBinary{bytes.data()},
+                        detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}};

-    return encoded.append((3 - size % 3) % 3, '=');
+    return encoded.append(bytes_to_pad, '=');
 }

 // C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types
@ -78,22 +99,16 @@ template <typename T> std::string encodeBase64Bytewise(const T &x)
 // Decodes into a chunk of memory that is at least as large as the input.
 template <typename OutputIter> void decodeBase64(const std::string &encoded, OutputIter out)
 {
-    using namespace boost::archive::iterators;
-    using namespace boost::algorithm;
+    auto unpadded = encoded;

-    using Iter = transform_width<binary_from_base64<std::string::const_iterator>, 8, 6>;
+    const auto num_padded = std::count(begin(encoded), end(encoded), '=');
+    std::replace(begin(unpadded), end(unpadded), '=', 'A'); // A_64 == \0

-    Iter view_first{begin(encoded)};
-    Iter view_last{end(encoded)};
+    std::string decoded{detail::BinaryFromBase64{begin(unpadded)},
+                        detail::BinaryFromBase64{begin(unpadded) + unpadded.length()}};

-    const auto null = [](const unsigned char c)
-    {
-        return c == '\0';
-    };
-
-    const auto bytes = trim_right_copy_if(std::string{view_first, view_last}, null);
-
-    boost::copy(bytes, out);
+    decoded.erase(end(decoded) - num_padded, end(decoded));
+    std::copy(begin(decoded), end(decoded), out);
 }

 // Convenience specialization, filling string instead of byte-dumping into it.