Source/base64.c

   1 /* base64.c -- Encode binary data using printable characters.
   2    Copyright (C) 1999, 2000, 2001, 2004, 2005, 2006 Free Software
   3    Foundation, Inc.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software Foundation,
  17    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
  18
  19 /* Written by Simon Josefsson.  Partially adapted from GNU MailUtils
  20  * (mailbox/filter_trans.c, as of 2004-11-28).  Improved by review
  21  * from Paul Eggert, Bruno Haible, and Stepan Kasal.
  22  *
  23  * See also RFC 3548 <http://www.ietf.org/rfc/rfc3548.txt>.
  24  *
  25  * Be careful with error checking.  Here is how you would typically
  26  * use these functions:
  27  *
  28  * bool ok = base64_decode_alloc (in, inlen, &out, &outlen);
  29  * if (!ok)
  30  *   FAIL: input was not valid base64
  31  * if (out == NULL)
  32  *   FAIL: memory allocation error
  33  * OK: data in OUT/OUTLEN
  34  *
  35  * size_t outlen = base64_encode_alloc (in, inlen, &out);
  36  * if (out == NULL && outlen == 0 && inlen != 0)
  37  *   FAIL: input too long
  38  * if (out == NULL)
  39  *   FAIL: memory allocation error
  40  * OK: data in OUT/OUTLEN.
  41  *
  42  */
  43
  44 /* Get prototype. */
  45 #include "base64.h"
  46
  47 /* Get malloc. */
  48 #include <stdlib.h>
  49
  50 /* Get UCHAR_MAX. */
  51 #include <limits.h>
  52
  53 /* C89 compliant way to cast 'char' to 'unsigned char'. */
  54 static inline unsigned char
  55 to_uchar (char ch)
  56 {
  57   return ch;
  58 }
  59
  60 /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN.
  61    If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as
  62    possible.  If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero
  63    terminate the output buffer. */
  64 void
  65 base64_encode (const char *restrict in, size_t inlen,
  66                char *restrict out, size_t outlen)
  67 {
  68   static const char b64str[64] =
  69     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  70
  71   while (inlen && outlen)
  72     {
  73       *out++ = b64str[(to_uchar (in[0]) >> 2) & 0x3f];
  74       if (!--outlen)
  75         break;
  76       *out++ = b64str[((to_uchar (in[0]) << 4)
  77                        + (--inlen ? to_uchar (in[1]) >> 4 : 0))
  78                       & 0x3f];
  79       if (!--outlen)
  80         break;
  81       *out++ =
  82         (inlen
  83          ? b64str[((to_uchar (in[1]) << 2)
  84                    + (--inlen ? to_uchar (in[2]) >> 6 : 0))
  85                   & 0x3f]
  86          : '=');
  87       if (!--outlen)
  88         break;
  89       *out++ = inlen ? b64str[to_uchar (in[2]) & 0x3f] : '=';
  90       if (!--outlen)
  91         break;
  92       if (inlen)
  93         inlen--;
  94       if (inlen)
  95         in += 3;
  96     }
  97
  98   if (outlen)
  99     *out = '\0';
 100 }
 101
 102 /* Allocate a buffer and store zero terminated base64 encoded data
 103    from array IN of size INLEN, returning BASE64_LENGTH(INLEN), i.e.,
 104    the length of the encoded data, excluding the terminating zero.  On
 105    return, the OUT variable will hold a pointer to newly allocated
 106    memory that must be deallocated by the caller.  If output string
 107    length would overflow, 0 is returned and OUT is set to NULL.  If
 108    memory allocation failed, OUT is set to NULL, and the return value
 109    indicates length of the requested memory block, i.e.,
 110    BASE64_LENGTH(inlen) + 1. */
 111 size_t
 112 base64_encode_alloc (const char *in, size_t inlen, char **out)
 113 {
 114   size_t outlen = 1 + BASE64_LENGTH (inlen);
 115
 116   /* Check for overflow in outlen computation.
 117    *
 118    * If there is no overflow, outlen >= inlen.
 119    *
 120    * If the operation (inlen + 2) overflows then it yields at most +1, so
 121    * outlen is 0.
 122    *
 123    * If the multiplication overflows, we lose at least half of the
 124    * correct value, so the result is < ((inlen + 2) / 3) * 2, which is
 125    * less than (inlen + 2) * 0.66667, which is less than inlen as soon as
 126    * (inlen > 4).
 127    */
 128   if (inlen > outlen)
 129     {
 130       *out = NULL;
 131       return 0;
 132     }
 133
 134   *out = malloc (outlen);
 135   if (!*out)
 136     return outlen;
 137
 138   base64_encode (in, inlen, *out, outlen);
 139
 140   return outlen - 1;
 141 }
 142
 143 /* With this approach this file works independent of the charset used
 144    (think EBCDIC).  However, it does assume that the characters in the
 145    Base64 alphabet (A-Za-z0-9+/) are encoded in 0..255.  POSIX
 146    1003.1-2001 require that char and unsigned char are 8-bit
 147    quantities, though, taking care of that problem.  But this may be a
 148    potential problem on non-POSIX C99 platforms.
 149
 150    IBM C V6 for AIX mishandles "#define B64(x) ...'x'...", so use "_"
 151    as the formal parameter rather than "x".  */
 152 #define B64(_)                                  \
 153   ((_) == 'A' ? 0                               \
 154    : (_) == 'B' ? 1                             \
 155    : (_) == 'C' ? 2                             \
 156    : (_) == 'D' ? 3                             \
 157    : (_) == 'E' ? 4                             \
 158    : (_) == 'F' ? 5                             \
 159    : (_) == 'G' ? 6                             \
 160    : (_) == 'H' ? 7                             \
 161    : (_) == 'I' ? 8                             \
 162    : (_) == 'J' ? 9                             \
 163    : (_) == 'K' ? 10                            \
 164    : (_) == 'L' ? 11                            \
 165    : (_) == 'M' ? 12                            \
 166    : (_) == 'N' ? 13                            \
 167    : (_) == 'O' ? 14                            \
 168    : (_) == 'P' ? 15                            \
 169    : (_) == 'Q' ? 16                            \
 170    : (_) == 'R' ? 17                            \
 171    : (_) == 'S' ? 18                            \
 172    : (_) == 'T' ? 19                            \
 173    : (_) == 'U' ? 20                            \
 174    : (_) == 'V' ? 21                            \
 175    : (_) == 'W' ? 22                            \
 176    : (_) == 'X' ? 23                            \
 177    : (_) == 'Y' ? 24                            \
 178    : (_) == 'Z' ? 25                            \
 179    : (_) == 'a' ? 26                            \
 180    : (_) == 'b' ? 27                            \
 181    : (_) == 'c' ? 28                            \
 182    : (_) == 'd' ? 29                            \
 183    : (_) == 'e' ? 30                            \
 184    : (_) == 'f' ? 31                            \
 185    : (_) == 'g' ? 32                            \
 186    : (_) == 'h' ? 33                            \
 187    : (_) == 'i' ? 34                            \
 188    : (_) == 'j' ? 35                            \
 189    : (_) == 'k' ? 36                            \
 190    : (_) == 'l' ? 37                            \
 191    : (_) == 'm' ? 38                            \
 192    : (_) == 'n' ? 39                            \
 193    : (_) == 'o' ? 40                            \
 194    : (_) == 'p' ? 41                            \
 195    : (_) == 'q' ? 42                            \
 196    : (_) == 'r' ? 43                            \
 197    : (_) == 's' ? 44                            \
 198    : (_) == 't' ? 45                            \
 199    : (_) == 'u' ? 46                            \
 200    : (_) == 'v' ? 47                            \
 201    : (_) == 'w' ? 48                            \
 202    : (_) == 'x' ? 49                            \
 203    : (_) == 'y' ? 50                            \
 204    : (_) == 'z' ? 51                            \
 205    : (_) == '0' ? 52                            \
 206    : (_) == '1' ? 53                            \
 207    : (_) == '2' ? 54                            \
 208    : (_) == '3' ? 55                            \
 209    : (_) == '4' ? 56                            \
 210    : (_) == '5' ? 57                            \
 211    : (_) == '6' ? 58                            \
 212    : (_) == '7' ? 59                            \
 213    : (_) == '8' ? 60                            \
 214    : (_) == '9' ? 61                            \
 215    : (_) == '+' ? 62                            \
 216    : (_) == '/' ? 63                            \
 217    : -1)
 218
 219 static const signed char b64[0x100] = {
 220   B64 (0), B64 (1), B64 (2), B64 (3),
 221   B64 (4), B64 (5), B64 (6), B64 (7),
 222   B64 (8), B64 (9), B64 (10), B64 (11),
 223   B64 (12), B64 (13), B64 (14), B64 (15),
 224   B64 (16), B64 (17), B64 (18), B64 (19),
 225   B64 (20), B64 (21), B64 (22), B64 (23),
 226   B64 (24), B64 (25), B64 (26), B64 (27),
 227   B64 (28), B64 (29), B64 (30), B64 (31),
 228   B64 (32), B64 (33), B64 (34), B64 (35),
 229   B64 (36), B64 (37), B64 (38), B64 (39),
 230   B64 (40), B64 (41), B64 (42), B64 (43),
 231   B64 (44), B64 (45), B64 (46), B64 (47),
 232   B64 (48), B64 (49), B64 (50), B64 (51),
 233   B64 (52), B64 (53), B64 (54), B64 (55),
 234   B64 (56), B64 (57), B64 (58), B64 (59),
 235   B64 (60), B64 (61), B64 (62), B64 (63),
 236   B64 (64), B64 (65), B64 (66), B64 (67),
 237   B64 (68), B64 (69), B64 (70), B64 (71),
 238   B64 (72), B64 (73), B64 (74), B64 (75),
 239   B64 (76), B64 (77), B64 (78), B64 (79),
 240   B64 (80), B64 (81), B64 (82), B64 (83),
 241   B64 (84), B64 (85), B64 (86), B64 (87),
 242   B64 (88), B64 (89), B64 (90), B64 (91),
 243   B64 (92), B64 (93), B64 (94), B64 (95),
 244   B64 (96), B64 (97), B64 (98), B64 (99),
 245   B64 (100), B64 (101), B64 (102), B64 (103),
 246   B64 (104), B64 (105), B64 (106), B64 (107),
 247   B64 (108), B64 (109), B64 (110), B64 (111),
 248   B64 (112), B64 (113), B64 (114), B64 (115),
 249   B64 (116), B64 (117), B64 (118), B64 (119),
 250   B64 (120), B64 (121), B64 (122), B64 (123),
 251   B64 (124), B64 (125), B64 (126), B64 (127),
 252   B64 (128), B64 (129), B64 (130), B64 (131),
 253   B64 (132), B64 (133), B64 (134), B64 (135),
 254   B64 (136), B64 (137), B64 (138), B64 (139),
 255   B64 (140), B64 (141), B64 (142), B64 (143),
 256   B64 (144), B64 (145), B64 (146), B64 (147),
 257   B64 (148), B64 (149), B64 (150), B64 (151),
 258   B64 (152), B64 (153), B64 (154), B64 (155),
 259   B64 (156), B64 (157), B64 (158), B64 (159),
 260   B64 (160), B64 (161), B64 (162), B64 (163),
 261   B64 (164), B64 (165), B64 (166), B64 (167),
 262   B64 (168), B64 (169), B64 (170), B64 (171),
 263   B64 (172), B64 (173), B64 (174), B64 (175),
 264   B64 (176), B64 (177), B64 (178), B64 (179),
 265   B64 (180), B64 (181), B64 (182), B64 (183),
 266   B64 (184), B64 (185), B64 (186), B64 (187),
 267   B64 (188), B64 (189), B64 (190), B64 (191),
 268   B64 (192), B64 (193), B64 (194), B64 (195),
 269   B64 (196), B64 (197), B64 (198), B64 (199),
 270   B64 (200), B64 (201), B64 (202), B64 (203),
 271   B64 (204), B64 (205), B64 (206), B64 (207),
 272   B64 (208), B64 (209), B64 (210), B64 (211),
 273   B64 (212), B64 (213), B64 (214), B64 (215),
 274   B64 (216), B64 (217), B64 (218), B64 (219),
 275   B64 (220), B64 (221), B64 (222), B64 (223),
 276   B64 (224), B64 (225), B64 (226), B64 (227),
 277   B64 (228), B64 (229), B64 (230), B64 (231),
 278   B64 (232), B64 (233), B64 (234), B64 (235),
 279   B64 (236), B64 (237), B64 (238), B64 (239),
 280   B64 (240), B64 (241), B64 (242), B64 (243),
 281   B64 (244), B64 (245), B64 (246), B64 (247),
 282   B64 (248), B64 (249), B64 (250), B64 (251),
 283   B64 (252), B64 (253), B64 (254), B64 (255)
 284 };
 285
 286 #if UCHAR_MAX == 255
 287 # define uchar_in_range(c) true
 288 #else
 289 # define uchar_in_range(c) ((c) <= 255)
 290 #endif
 291
 292 /* Return true if CH is a character from the Base64 alphabet, and
 293    false otherwise.  Note that '=' is padding and not considered to be
 294    part of the alphabet.  */
 295 bool
 296 isbase64 (char ch)
 297 {
 298   return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)];
 299 }
 300
 301 /* Decode base64 encoded input array IN of length INLEN to output
 302    array OUT that can hold *OUTLEN bytes.  Return true if decoding was
 303    successful, i.e. if the input was valid base64 data, false
 304    otherwise.  If *OUTLEN is too small, as many bytes as possible will
 305    be written to OUT.  On return, *OUTLEN holds the length of decoded
 306    bytes in OUT.  Note that as soon as any non-alphabet characters are
 307    encountered, decoding is stopped and false is returned.  This means
 308    that, when applicable, you must remove any line terminators that is
 309    part of the data stream before calling this function.  */
 310 bool
 311 base64_decode (const char *restrict in, size_t inlen,
 312                char *restrict out, size_t *outlen)
 313 {
 314   size_t outleft = *outlen;
 315
 316   while (inlen >= 2)
 317     {
 318       if (!isbase64 (in[0]) || !isbase64 (in[1]))
 319         break;
 320
 321       if (outleft)
 322         {
 323           *out++ = ((b64[to_uchar (in[0])] << 2)
 324                     | (b64[to_uchar (in[1])] >> 4));
 325           outleft--;
 326         }
 327
 328       if (inlen == 2)
 329         break;
 330
 331       if (in[2] == '=')
 332         {
 333           if (inlen != 4)
 334             break;
 335
 336           if (in[3] != '=')
 337             break;
 338
 339         }
 340       else
 341         {
 342           if (!isbase64 (in[2]))
 343             break;
 344
 345           if (outleft)
 346             {
 347               *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0)
 348                         | (b64[to_uchar (in[2])] >> 2));
 349               outleft--;
 350             }
 351
 352           if (inlen == 3)
 353             break;
 354
 355           if (in[3] == '=')
 356             {
 357               if (inlen != 4)
 358                 break;
 359             }
 360           else
 361             {
 362               if (!isbase64 (in[3]))
 363                 break;
 364
 365               if (outleft)
 366                 {
 367                   *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0)
 368                             | b64[to_uchar (in[3])]);
 369                   outleft--;
 370                 }
 371             }
 372         }
 373
 374       in += 4;
 375       inlen -= 4;
 376     }
 377
 378   *outlen -= outleft;
 379
 380   if (inlen != 0)
 381     return false;
 382
 383   return true;
 384 }
 385
 386 /* Allocate an output buffer in *OUT, and decode the base64 encoded
 387    data stored in IN of size INLEN to the *OUT buffer.  On return, the
 388    size of the decoded data is stored in *OUTLEN.  OUTLEN may be NULL,
 389    if the caller is not interested in the decoded length.  *OUT may be
 390    NULL to indicate an out of memory error, in which case *OUTLEN
 391    contains the size of the memory block needed.  The function returns
 392    true on successful decoding and memory allocation errors.  (Use the
 393    *OUT and *OUTLEN parameters to differentiate between successful
 394    decoding and memory error.)  The function returns false if the
 395    input was invalid, in which case *OUT is NULL and *OUTLEN is
 396    undefined. */
 397 bool
 398 base64_decode_alloc (const char *in, size_t inlen, char **out,
 399                      size_t *outlen)
 400 {
 401   /* This may allocate a few bytes too much, depending on input,
 402      but it's not worth the extra CPU time to compute the exact amount.
 403      The exact amount is 3 * inlen / 4, minus 1 if the input ends
 404      with "=" and minus another 1 if the input ends with "==".
 405      Dividing before multiplying avoids the possibility of overflow.  */
 406   size_t needlen = 3 * (inlen / 4) + 2;
 407
 408   *out = malloc (needlen);
 409   if (!*out)
 410     return true;
 411
 412   if (!base64_decode (in, inlen, *out, &needlen))
 413     {
 414       free (*out);
 415       *out = NULL;
 416       return false;
 417     }
 418
 419   if (outlen)
 420     *outlen = needlen;
 421
 422   return true;
 423 }