11 but WITHOUT ANY WARRANTY; without even the implied warranty of |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 GNU General Public License for more details. |
12 GNU General Public License for more details. |
14 |
13 |
15 You should have received a copy of the GNU General Public License |
14 You should have received a copy of the GNU General Public License |
16 along with this program; if not, write to the Free Software Foundation, |
15 along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
17 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ |
|
18 |
16 |
19 /* Written by Simon Josefsson. Partially adapted from GNU MailUtils |
17 /* Written by Simon Josefsson. Partially adapted from GNU MailUtils |
20 * (mailbox/filter_trans.c, as of 2004-11-28). Improved by review |
18 * (mailbox/filter_trans.c, as of 2004-11-28). Improved by review |
21 * from Paul Eggert, Bruno Haible, and Stepan Kasal. |
19 * from Paul Eggert, Bruno Haible, and Stepan Kasal. |
22 * |
20 * |
23 * See also RFC 3548 <http://www.ietf.org/rfc/rfc3548.txt>. |
21 * See also RFC 4648 <http://www.ietf.org/rfc/rfc4648.txt>. |
24 * |
22 * |
25 * Be careful with error checking. Here is how you would typically |
23 * Be careful with error checking. Here is how you would typically |
26 * use these functions: |
24 * use these functions: |
27 * |
25 * |
28 * bool ok = base64_decode_alloc (in, inlen, &out, &outlen); |
26 * bool ok = base64_decode_alloc (in, inlen, &out, &outlen); |
61 If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as |
61 If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as |
62 possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero |
62 possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero |
63 terminate the output buffer. */ |
63 terminate the output buffer. */ |
64 void |
64 void |
65 base64_encode (const char *restrict in, size_t inlen, |
65 base64_encode (const char *restrict in, size_t inlen, |
66 char *restrict out, size_t outlen) |
66 char *restrict out, size_t outlen) |
67 { |
67 { |
68 static const char b64str[64] = |
68 static const char b64str[64] = |
69 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
69 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
70 |
70 |
71 while (inlen && outlen) |
71 while (inlen && outlen) |
72 { |
72 { |
73 *out++ = b64str[(to_uchar (in[0]) >> 2) & 0x3f]; |
73 *out++ = b64str[(to_uchar (in[0]) >> 2) & 0x3f]; |
74 if (!--outlen) |
74 if (!--outlen) |
75 break; |
75 break; |
76 *out++ = b64str[((to_uchar (in[0]) << 4) |
76 *out++ = b64str[((to_uchar (in[0]) << 4) |
77 + (--inlen ? to_uchar (in[1]) >> 4 : 0)) |
77 + (--inlen ? to_uchar (in[1]) >> 4 : 0)) |
78 & 0x3f]; |
78 & 0x3f]; |
79 if (!--outlen) |
79 if (!--outlen) |
80 break; |
80 break; |
81 *out++ = |
81 *out++ = |
82 (inlen |
82 (inlen |
83 ? b64str[((to_uchar (in[1]) << 2) |
83 ? b64str[((to_uchar (in[1]) << 2) |
84 + (--inlen ? to_uchar (in[2]) >> 6 : 0)) |
84 + (--inlen ? to_uchar (in[2]) >> 6 : 0)) |
85 & 0x3f] |
85 & 0x3f] |
86 : '='); |
86 : '='); |
87 if (!--outlen) |
87 if (!--outlen) |
88 break; |
88 break; |
89 *out++ = inlen ? b64str[to_uchar (in[2]) & 0x3f] : '='; |
89 *out++ = inlen ? b64str[to_uchar (in[2]) & 0x3f] : '='; |
90 if (!--outlen) |
90 if (!--outlen) |
91 break; |
91 break; |
92 if (inlen) |
92 if (inlen) |
93 inlen--; |
93 inlen--; |
94 if (inlen) |
94 if (inlen) |
95 in += 3; |
95 in += 3; |
96 } |
96 } |
97 |
97 |
98 if (outlen) |
98 if (outlen) |
99 *out = '\0'; |
99 *out = '\0'; |
100 } |
100 } |
296 isbase64 (char ch) |
296 isbase64 (char ch) |
297 { |
297 { |
298 return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)]; |
298 return uchar_in_range (to_uchar (ch)) && 0 <= b64[to_uchar (ch)]; |
299 } |
299 } |
300 |
300 |
301 /* Decode base64 encoded input array IN of length INLEN to output |
301 /* Initialize decode-context buffer, CTX. */ |
302 array OUT that can hold *OUTLEN bytes. Return true if decoding was |
302 void |
303 successful, i.e. if the input was valid base64 data, false |
303 base64_decode_ctx_init (struct base64_decode_context *ctx) |
304 otherwise. If *OUTLEN is too small, as many bytes as possible will |
304 { |
305 be written to OUT. On return, *OUTLEN holds the length of decoded |
305 ctx->i = 0; |
306 bytes in OUT. Note that as soon as any non-alphabet characters are |
306 } |
307 encountered, decoding is stopped and false is returned. This means |
307 |
308 that, when applicable, you must remove any line terminators that is |
308 /* If CTX->i is 0 or 4, there are four or more bytes in [*IN..IN_END), and |
309 part of the data stream before calling this function. */ |
309 none of those four is a newline, then return *IN. Otherwise, copy up to |
|
310 4 - CTX->i non-newline bytes from that range into CTX->buf, starting at |
|
311 index CTX->i and setting CTX->i to reflect the number of bytes copied, |
|
312 and return CTX->buf. In either case, advance *IN to point to the byte |
|
313 after the last one processed, and set *N_NON_NEWLINE to the number of |
|
314 verified non-newline bytes accessible through the returned pointer. */ |
|
315 static inline char * |
|
316 get_4 (struct base64_decode_context *ctx, |
|
317 char const *restrict *in, char const *restrict in_end, |
|
318 size_t *n_non_newline) |
|
319 { |
|
320 if (ctx->i == 4) |
|
321 ctx->i = 0; |
|
322 |
|
323 if (ctx->i == 0) |
|
324 { |
|
325 char const *t = *in; |
|
326 if (4 <= in_end - *in && memchr (t, '\n', 4) == NULL) |
|
327 { |
|
328 /* This is the common case: no newline. */ |
|
329 *in += 4; |
|
330 *n_non_newline = 4; |
|
331 return (char *) t; |
|
332 } |
|
333 } |
|
334 |
|
335 { |
|
336 /* Copy non-newline bytes into BUF. */ |
|
337 char const *p = *in; |
|
338 while (p < in_end) |
|
339 { |
|
340 char c = *p++; |
|
341 if (c != '\n') |
|
342 { |
|
343 ctx->buf[ctx->i++] = c; |
|
344 if (ctx->i == 4) |
|
345 break; |
|
346 } |
|
347 } |
|
348 |
|
349 *in = p; |
|
350 *n_non_newline = ctx->i; |
|
351 return ctx->buf; |
|
352 } |
|
353 } |
|
354 |
|
355 #define return_false \ |
|
356 do \ |
|
357 { \ |
|
358 *outp = out; \ |
|
359 return false; \ |
|
360 } \ |
|
361 while (false) |
|
362 |
|
363 /* Decode up to four bytes of base64-encoded data, IN, of length INLEN |
|
364 into the output buffer, *OUT, of size *OUTLEN bytes. Return true if |
|
365 decoding is successful, false otherwise. If *OUTLEN is too small, |
|
366 as many bytes as possible are written to *OUT. On return, advance |
|
367 *OUT to point to the byte after the last one written, and decrement |
|
368 *OUTLEN to reflect the number of bytes remaining in *OUT. */ |
|
369 static inline bool |
|
370 decode_4 (char const *restrict in, size_t inlen, |
|
371 char *restrict *outp, size_t *outleft) |
|
372 { |
|
373 char *out = *outp; |
|
374 if (inlen < 2) |
|
375 return false; |
|
376 |
|
377 if (!isbase64 (in[0]) || !isbase64 (in[1])) |
|
378 return false; |
|
379 |
|
380 if (*outleft) |
|
381 { |
|
382 *out++ = ((b64[to_uchar (in[0])] << 2) |
|
383 | (b64[to_uchar (in[1])] >> 4)); |
|
384 --*outleft; |
|
385 } |
|
386 |
|
387 if (inlen == 2) |
|
388 return_false; |
|
389 |
|
390 if (in[2] == '=') |
|
391 { |
|
392 if (inlen != 4) |
|
393 return_false; |
|
394 |
|
395 if (in[3] != '=') |
|
396 return_false; |
|
397 } |
|
398 else |
|
399 { |
|
400 if (!isbase64 (in[2])) |
|
401 return_false; |
|
402 |
|
403 if (*outleft) |
|
404 { |
|
405 *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0) |
|
406 | (b64[to_uchar (in[2])] >> 2)); |
|
407 --*outleft; |
|
408 } |
|
409 |
|
410 if (inlen == 3) |
|
411 return_false; |
|
412 |
|
413 if (in[3] == '=') |
|
414 { |
|
415 if (inlen != 4) |
|
416 return_false; |
|
417 } |
|
418 else |
|
419 { |
|
420 if (!isbase64 (in[3])) |
|
421 return_false; |
|
422 |
|
423 if (*outleft) |
|
424 { |
|
425 *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0) |
|
426 | b64[to_uchar (in[3])]); |
|
427 --*outleft; |
|
428 } |
|
429 } |
|
430 } |
|
431 |
|
432 *outp = out; |
|
433 return true; |
|
434 } |
|
435 |
|
436 /* Decode base64-encoded input array IN of length INLEN to output array |
|
437 OUT that can hold *OUTLEN bytes. The input data may be interspersed |
|
438 with newlines. Return true if decoding was successful, i.e. if the |
|
439 input was valid base64 data, false otherwise. If *OUTLEN is too |
|
440 small, as many bytes as possible will be written to OUT. On return, |
|
441 *OUTLEN holds the length of decoded bytes in OUT. Note that as soon |
|
442 as any non-alphabet, non-newline character is encountered, decoding |
|
443 is stopped and false is returned. If INLEN is zero, then process |
|
444 only whatever data is stored in CTX. |
|
445 |
|
446 Initially, CTX must have been initialized via base64_decode_ctx_init. |
|
447 Subsequent calls to this function must reuse whatever state is recorded |
|
448 in that buffer. It is necessary for when a quadruple of base64 input |
|
449 bytes spans two input buffers. |
|
450 |
|
451 If CTX is NULL then newlines are treated as garbage and the input |
|
452 buffer is processed as a unit. */ |
|
453 |
310 bool |
454 bool |
311 base64_decode (const char *restrict in, size_t inlen, |
455 base64_decode_ctx (struct base64_decode_context *ctx, |
312 char *restrict out, size_t *outlen) |
456 const char *restrict in, size_t inlen, |
|
457 char *restrict out, size_t *outlen) |
313 { |
458 { |
314 size_t outleft = *outlen; |
459 size_t outleft = *outlen; |
315 |
460 bool ignore_newlines = ctx != NULL; |
316 while (inlen >= 2) |
461 bool flush_ctx = false; |
317 { |
462 unsigned int ctx_i = 0; |
318 if (!isbase64 (in[0]) || !isbase64 (in[1])) |
463 |
319 break; |
464 if (ignore_newlines) |
320 |
465 { |
321 if (outleft) |
466 ctx_i = ctx->i; |
322 { |
467 flush_ctx = inlen == 0; |
323 *out++ = ((b64[to_uchar (in[0])] << 2) |
468 } |
324 | (b64[to_uchar (in[1])] >> 4)); |
469 |
325 outleft--; |
470 |
326 } |
471 while (true) |
327 |
472 { |
328 if (inlen == 2) |
473 size_t outleft_save = outleft; |
329 break; |
474 if (ctx_i == 0 && !flush_ctx) |
330 |
475 { |
331 if (in[2] == '=') |
476 while (true) |
332 { |
477 { |
333 if (inlen != 4) |
478 /* Save a copy of outleft, in case we need to re-parse this |
334 break; |
479 block of four bytes. */ |
335 |
480 outleft_save = outleft; |
336 if (in[3] != '=') |
481 if (!decode_4 (in, inlen, &out, &outleft)) |
337 break; |
482 break; |
338 |
483 |
339 } |
484 in += 4; |
340 else |
485 inlen -= 4; |
341 { |
486 } |
342 if (!isbase64 (in[2])) |
487 } |
343 break; |
488 |
344 |
489 if (inlen == 0 && !flush_ctx) |
345 if (outleft) |
490 break; |
346 { |
491 |
347 *out++ = (((b64[to_uchar (in[1])] << 4) & 0xf0) |
492 /* Handle the common case of 72-byte wrapped lines. |
348 | (b64[to_uchar (in[2])] >> 2)); |
493 This also handles any other multiple-of-4-byte wrapping. */ |
349 outleft--; |
494 if (inlen && *in == '\n' && ignore_newlines) |
350 } |
495 { |
351 |
496 ++in; |
352 if (inlen == 3) |
497 --inlen; |
353 break; |
498 continue; |
354 |
499 } |
355 if (in[3] == '=') |
500 |
356 { |
501 /* Restore OUT and OUTLEFT. */ |
357 if (inlen != 4) |
502 out -= outleft_save - outleft; |
358 break; |
503 outleft = outleft_save; |
359 } |
504 |
360 else |
505 { |
361 { |
506 char const *in_end = in + inlen; |
362 if (!isbase64 (in[3])) |
507 char const *non_nl; |
363 break; |
508 |
364 |
509 if (ignore_newlines) |
365 if (outleft) |
510 non_nl = get_4 (ctx, &in, in_end, &inlen); |
366 { |
511 else |
367 *out++ = (((b64[to_uchar (in[2])] << 6) & 0xc0) |
512 non_nl = in; /* Might have nl in this case. */ |
368 | b64[to_uchar (in[3])]); |
513 |
369 outleft--; |
514 /* If the input is empty or consists solely of newlines (0 non-newlines), |
370 } |
515 then we're done. Likewise if there are fewer than 4 bytes when not |
371 } |
516 flushing context and not treating newlines as garbage. */ |
372 } |
517 if (inlen == 0 || (inlen < 4 && !flush_ctx && ignore_newlines)) |
373 |
518 { |
374 in += 4; |
519 inlen = 0; |
375 inlen -= 4; |
520 break; |
|
521 } |
|
522 if (!decode_4 (non_nl, inlen, &out, &outleft)) |
|
523 break; |
|
524 |
|
525 inlen = in_end - in; |
|
526 } |
376 } |
527 } |
377 |
528 |
378 *outlen -= outleft; |
529 *outlen -= outleft; |
379 |
530 |
380 if (inlen != 0) |
531 return inlen == 0; |
381 return false; |
|
382 |
|
383 return true; |
|
384 } |
532 } |
385 |
533 |
386 /* Allocate an output buffer in *OUT, and decode the base64 encoded |
534 /* Allocate an output buffer in *OUT, and decode the base64 encoded |
387 data stored in IN of size INLEN to the *OUT buffer. On return, the |
535 data stored in IN of size INLEN to the *OUT buffer. On return, the |
388 size of the decoded data is stored in *OUTLEN. OUTLEN may be NULL, |
536 size of the decoded data is stored in *OUTLEN. OUTLEN may be NULL, |
393 *OUT and *OUTLEN parameters to differentiate between successful |
541 *OUT and *OUTLEN parameters to differentiate between successful |
394 decoding and memory error.) The function returns false if the |
542 decoding and memory error.) The function returns false if the |
395 input was invalid, in which case *OUT is NULL and *OUTLEN is |
543 input was invalid, in which case *OUT is NULL and *OUTLEN is |
396 undefined. */ |
544 undefined. */ |
397 bool |
545 bool |
398 base64_decode_alloc (const char *in, size_t inlen, char **out, |
546 base64_decode_alloc_ctx (struct base64_decode_context *ctx, |
399 size_t *outlen) |
547 const char *in, size_t inlen, char **out, |
400 { |
548 size_t *outlen) |
401 /* This may allocate a few bytes too much, depending on input, |
549 { |
402 but it's not worth the extra CPU time to compute the exact amount. |
550 /* This may allocate a few bytes too many, depending on input, |
403 The exact amount is 3 * inlen / 4, minus 1 if the input ends |
551 but it's not worth the extra CPU time to compute the exact size. |
404 with "=" and minus another 1 if the input ends with "==". |
552 The exact size is 3 * (inlen + (ctx ? ctx->i : 0)) / 4, minus 1 if the |
|
553 input ends with "=" and minus another 1 if the input ends with "==". |
405 Dividing before multiplying avoids the possibility of overflow. */ |
554 Dividing before multiplying avoids the possibility of overflow. */ |
406 size_t needlen = 3 * (inlen / 4) + 2; |
555 size_t needlen = 3 * (inlen / 4) + 3; |
407 |
556 |
408 *out = malloc (needlen); |
557 *out = malloc (needlen); |
409 if (!*out) |
558 if (!*out) |
410 return true; |
559 return true; |
411 |
560 |
412 if (!base64_decode (in, inlen, *out, &needlen)) |
561 if (!base64_decode_ctx (ctx, in, inlen, *out, &needlen)) |
413 { |
562 { |
414 free (*out); |
563 free (*out); |
415 *out = NULL; |
564 *out = NULL; |
416 return false; |
565 return false; |
417 } |
566 } |