Copy upstream code as of Chromium 39d53

For libchrome r1075024 uprev, where base/base64.cc will use the new
function modp_b64_encode_data, change made in crrev.com/1074948.

BUG=None
TEST=emerge modp_b64

Change-Id: I4fefddd140d431ee6fca4d27ffa8199372e4bebb
Reviewed-on: https://chromium-review.googlesource.com/c/aosp/platform/external/modp_b64/+/4060326
Tested-by: Grace Cham <hscham@chromium.org>
Commit-Queue: Grace Cham <hscham@chromium.org>
Reviewed-by: Manoj Gupta <manojgupta@chromium.org>
diff --git a/modp_b64.cc b/modp_b64.cc
index fdb8a40..2901a8f 100644
--- a/modp_b64.cc
+++ b/modp_b64.cc
@@ -45,39 +45,11 @@
 /* public header */
 #include "modp_b64.h"
 
-/*
- * If you are ripping this out of the library, comment out the next
- * line and uncomment the next lines as approrpiate
- */
-//#include "config.h"
-
-/* if on motoral, sun, ibm; uncomment this */
-/* #define WORDS_BIGENDIAN 1 */
-/* else for Intel, Amd; uncomment this */
-/* #undef WORDS_BIGENDIAN */
-
 #include "modp_b64_data.h"
 
 #define BADCHAR 0x01FFFFFF
 
-/**
- * you can control if we use padding by commenting out this
- * next line.  However, I highly recommend you use padding and not
- * using it should only be for compatability with a 3rd party.
- * Also, 'no padding' is not tested!
- */
-#define DOPAD 1
-
-/*
- * if we aren't doing padding
- * set the pad character to NULL
- */
-#ifndef DOPAD
-#undef CHARPAD
-#define CHARPAD '\0'
-#endif
-
-size_t modp_b64_encode(char* dest, const char* str, size_t len)
+size_t modp_b64_encode_data(char* dest, const char* str, size_t len)
 {
     size_t i = 0;
     uint8_t* p = (uint8_t*) dest;
@@ -113,97 +85,42 @@
         *p++ = CHARPAD;
     }
 
-    *p = '\0';
     return p - (uint8_t*)dest;
 }
 
-#ifdef WORDS_BIGENDIAN   /* BIG ENDIAN -- SUN / IBM / MOTOROLA */
-int modp_b64_decode(char* dest, const char* src, int len)
-{
-    if (len == 0) return 0;
-
-#ifdef DOPAD
-    /* if padding is used, then the message must be at least
-       4 chars and be a multiple of 4.
-       there can be at most 2 pad chars at the end */
-    if (len < 4 || (len % 4 != 0)) return MODP_B64_ERROR;
-    if (src[len-1] == CHARPAD) {
-        len--;
-        if (src[len -1] == CHARPAD) {
-            len--;
-        }
-    }
-#endif  /* DOPAD */
-
-    size_t i;
-    int leftover = len % 4;
-    size_t chunks = (leftover == 0) ? len / 4 - 1 : len /4;
-
-    uint8_t* p = (uint8_t*) dest;
-    uint32_t x = 0;
-    uint32_t* destInt = (uint32_t*) p;
-    uint32_t* srcInt = (uint32_t*) src;
-    uint32_t y = *srcInt++;
-    for (i = 0; i < chunks; ++i) {
-        x = d0[y >> 24 & 0xff] | d1[y >> 16 & 0xff] |
-            d2[y >> 8 & 0xff] | d3[y & 0xff];
-
-        if (x >= BADCHAR)  return MODP_B64_ERROR;
-        *destInt = x << 8;
-        p += 3;
-        destInt = (uint32_t*)p;
-        y = *srcInt++;
-    }
-
-    switch (leftover) {
-    case 0:
-        x = d0[y >> 24 & 0xff] | d1[y >> 16 & 0xff] |
-            d2[y >>  8 & 0xff] | d3[y & 0xff];
-        if (x >= BADCHAR)  return MODP_B64_ERROR;
-        *p++ = ((uint8_t*)&x)[1];
-        *p++ = ((uint8_t*)&x)[2];
-        *p = ((uint8_t*)&x)[3];
-        return (chunks+1)*3;
-    case 1:
-        x = d3[y >> 24];
-        *p =  (uint8_t)x;
-        break;
-    case 2:
-        x = d3[y >> 24] *64 + d3[(y >> 16) & 0xff];
-        *p =  (uint8_t)(x >> 4);
-        break;
-    default:  /* case 3 */
-        x = (d3[y >> 24] *64 + d3[(y >> 16) & 0xff])*64 +
-            d3[(y >> 8) & 0xff];
-        *p++ = (uint8_t) (x >> 10);
-        *p = (uint8_t) (x >> 2);
-        break;
-    }
-
-    if (x >= BADCHAR) return MODP_B64_ERROR;
-    return 3*chunks + (6*leftover)/8;
+size_t modp_b64_encode(char* dest, const char* str, size_t len) {
+  size_t output_size = modp_b64_encode_data(dest, str, len);
+  dest[output_size] = '\0';
+  return output_size;
 }
 
-#else /* LITTLE  ENDIAN -- INTEL AND FRIENDS */
-
-size_t modp_b64_decode(char* dest, const char* src, size_t len)
-{
-    if (len == 0) return 0;
-
-#ifdef DOPAD
-    /*
-     * if padding is used, then the message must be at least
-     * 4 chars and be a multiple of 4
-     */
-    if (len < 4 || (len % 4 != 0)) return MODP_B64_ERROR; /* error */
-    /* there can be at most 2 pad chars at the end */
-    if (src[len-1] == CHARPAD) {
+size_t do_decode_padding(const char* src, size_t len, ModpDecodePolicy policy) {
+  if (policy == ModpDecodePolicy::kNoPaddingValidation) {
+    while (len > 0 && src[len - 1] == CHARPAD) {
         len--;
-        if (src[len -1] == CHARPAD) {
-            len--;
-        }
     }
-#endif
+  } else {
+    const size_t remainder = len % 4;
+    if (policy == ModpDecodePolicy::kStrict && (remainder != 0 || len < 4))
+      return MODP_B64_ERROR;
+    if (remainder == 0) {
+      if (src[len - 1] == CHARPAD) {
+        len--;
+        if (src[len - 1] == CHARPAD) {
+          len--;
+        }
+      }
+    }
+  }
+  return len % 4 == 1 ? MODP_B64_ERROR : len;
+}
+
+size_t modp_b64_decode(char* dest, const char* src, size_t len, ModpDecodePolicy policy)
+{
+    if (len == 0 ||
+        (len = do_decode_padding(src, len, policy)) == MODP_B64_ERROR) {
+      return len;
+    }
 
     size_t i;
     int leftover = len % 4;
@@ -229,7 +146,6 @@
         *p++ =  ((uint8_t*)(&x))[1];
         *p =    ((uint8_t*)(&x))[2];
         return (chunks+1)*3;
-        break;
     case 1:  /* with padding this is an impossible case */
         x = d0[y[0]];
         *p = *((uint8_t*)(&x)); // i.e. first char/byte in int
@@ -249,5 +165,3 @@
 
     return 3*chunks + (6*leftover)/8;
 }
-
-#endif  /* if bigendian / else / endif */
diff --git a/modp_b64/modp_b64.h b/modp_b64/modp_b64.h
index 0a2669d..9d11f81 100644
--- a/modp_b64/modp_b64.h
+++ b/modp_b64/modp_b64.h
@@ -37,17 +37,16 @@
  * len contains the number of bytes in the src
  * dest should be allocated by the caller to contain
  *   at least modp_b64_encode_len(len) bytes (see below)
- *   This will contain the null-terminated b64 encoded result
- * returns length of the destination string plus the ending null byte
- *    i.e.  the result will be equal to strlen(dest) + 1
+ *   This will contain the (non-null terminated) b64 bytes.
+ * returns length of the destination string.
  *
  * Example
- * 
+ *
  * \code
  * char* src = ...;
  * int srclen = ...; //the length of number of bytes in src
  * char* dest = (char*) malloc(modp_b64_encode_len);
- * int len = modp_b64_encode(dest, src, sourcelen);
+ * int len = modp_b64_encode_data(dest, src, sourcelen);
  * if (len == -1) {
  *   printf("Error\n");
  * } else {
@@ -56,6 +55,17 @@
  * \endcode
  *
  */
+size_t modp_b64_encode_data(char* dest, const char* str, size_t len);
+
+/**
+ * Same as modp_b64_encode_data, but additionally sets a null terminator at the
+ * end of `dest` (i.e. at dest[output_size]).
+ * Like modp_b64_encode_data, returns the length of the destination string (i.e.
+ * not counting the null terminator).
+ *
+ * TODO(csharrison): Consider removing this once all callers migrate to
+ * modp_b64_encode_data.
+ */
 size_t modp_b64_encode(char* dest, const char* str, size_t len);
 
 /**
@@ -79,11 +89,27 @@
  * if (len == -1) { error }
  * \endcode
  */
-size_t modp_b64_decode(char* dest, const char* src, size_t len);
+enum class ModpDecodePolicy {
+  // src length must be divisible by 4, with a max of 2 pad chars.
+  kStrict,
+
+  // Matches the infra spec: https://infra.spec.whatwg.org/#forgiving-base64
+  // _except_ for ignoring whitespace (Step 1).
+  kForgiving,
+
+  // src length % 4 must not equal 1, after stripping all pad chars.
+  // Accepts any number of pad chars.
+  kNoPaddingValidation,
+};
+size_t modp_b64_decode(
+    char* dest,
+    const char* src,
+    size_t len,
+    ModpDecodePolicy policy = ModpDecodePolicy::kStrict);
 
 /**
- * The maximum input that can be passed into modp_b64_encode. Lengths beyond
- * this will overflow modp_b64_encode_len.
+ * The maximum input that can be passed into modp_b64_encode{_data}.
+ * Lengths beyond this will overflow modp_b64_encode_len.
  *
  * This works because modp_b64_encode_len(A) computes:
  *     ceiling[max_len / 3] * 4 + 1
@@ -91,23 +117,27 @@
  *   = floor[(SIZE_MAX-1)/4] * 4 + 1
  *  <= SIZE_MAX-1 + 1
  *   = SIZE_MAX
+ *
+ * Note: technically modp_b64_encode_data can take one extra byte, but for
+ * simplicity the bound is shared between the two functions.
  */
 #define MODP_B64_MAX_INPUT_LEN ((SIZE_MAX - 1) / 4 * 3)
 
 /**
  * Given a source string of length len, this returns the amount of
- * memory the destination string should have.
+ * memory the destination string should have, for modp_b64_encode_data and
+ * modp_b64_encode, respectively.
  *
  * remember, this is integer math
  * 3 bytes turn into 4 chars
- * ceiling[len / 3] * 4 + 1
+ * ceiling[len / 3] * 4
  *
- * +1 is for any extra null.
  *
- * WARNING: This expression will overflow if the A is above
+ * WARNING: These expressions will overflow if the A is above
  * MODP_B64_MAX_INPUT_LEN. The caller must check this bound first.
  */
-#define modp_b64_encode_len(A) ((A+2)/3 * 4 + 1)
+#define modp_b64_encode_data_len(A) ((A + 2) / 3 * 4)
+#define modp_b64_encode_len(A) (modp_b64_encode_data_len(A) + 1)
 
 /**
  * Given a base64 string of length len,
@@ -120,69 +150,10 @@
  */
 #define modp_b64_decode_len(A) (A / 4 * 3 + 2)
 
-/**
- * Will return the strlen of the output from encoding.
- * This may be less than the required number of bytes allocated.
- *
- * This allows you to 'deserialized' a struct
- * \code
- * char* b64encoded = "...";
- * int len = strlen(b64encoded);
- *
- * struct datastuff foo;
- * if (modp_b64_encode_strlen(sizeof(struct datastuff)) != len) {
- *    // wrong size
- *    return false;
- * } else {
- *    // safe to do;
- *    if (modp_b64_decode((char*) &foo, b64encoded, len) == -1) {
- *      // bad characters
- *      return false;
- *    }
- * }
- * // foo is filled out now
- * \endcode
- */
-#define modp_b64_encode_strlen(A) ((A + 2)/ 3 * 4)
-
 #define MODP_B64_ERROR ((size_t)-1)
 
 #ifdef __cplusplus
 }
-
-#include <string>
-
-inline std::string& modp_b64_encode(std::string& s)
-{
-    std::string x(modp_b64_encode_len(s.size()), '\0');
-    size_t d = modp_b64_encode(const_cast<char*>(x.data()), s.data(), (int)s.size());
-    x.erase(d, std::string::npos);
-    s.swap(x);
-    return s;
-}
-
-/**
- * base 64 decode a string (self-modifing)
- * On failure, the string is empty.
- *
- * This function is for C++ only (duh)
- *
- * \param[in,out] s the string to be decoded
- * \return a reference to the input string
- */
-inline std::string& modp_b64_decode(std::string& s)
-{
-    std::string x(modp_b64_decode_len(s.size()), '\0');
-    size_t d = modp_b64_decode(const_cast<char*>(x.data()), s.data(), (int)s.size());
-    if (d == MODP_B64_ERROR) {
-        x.clear();
-    } else {
-        x.erase(d, std::string::npos);
-    }
-    s.swap(x);
-    return s;
-}
-
 #endif /* __cplusplus */
 
 #endif /* MODP_B64 */