]> git.saurik.com Git - apple/libdispatch.git/blob - src/transform.c
libdispatch-339.92.1.tar.gz
[apple/libdispatch.git] / src / transform.c
1 /*
2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 #include "internal.h"
22
23 #include <libkern/OSByteOrder.h>
24
25 #if defined(__LITTLE_ENDIAN__)
26 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16LE
27 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16BE
28 #elif defined(__BIG_ENDIAN__)
29 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16BE
30 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16LE
31 #endif
32
33 enum {
34 _DISPATCH_DATA_FORMAT_NONE = 0x1,
35 _DISPATCH_DATA_FORMAT_UTF8 = 0x2,
36 _DISPATCH_DATA_FORMAT_UTF16LE = 0x4,
37 _DISPATCH_DATA_FORMAT_UTF16BE = 0x8,
38 _DISPATCH_DATA_FORMAT_UTF_ANY = 0x10,
39 _DISPATCH_DATA_FORMAT_BASE32 = 0x20,
40 _DISPATCH_DATA_FORMAT_BASE32HEX = 0x40,
41 _DISPATCH_DATA_FORMAT_BASE64 = 0x80,
42 };
43
44 #pragma mark -
45 #pragma mark baseXX tables
46
47 static const unsigned char base32_encode_table[] =
48 "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
49
50 static const char base32_decode_table[] = {
51 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
52 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
53 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26,
54 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, 1, 2,
55 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
56 20, 21, 22, 23, 24, 25
57 };
58 static const ssize_t base32_decode_table_size = sizeof(base32_decode_table)
59 / sizeof(*base32_decode_table);
60
61 static const unsigned char base32hex_encode_table[] =
62 "0123456789ABCDEFGHIJKLMNOPQRSTUV";
63
64 static const char base32hex_decode_table[] = {
65 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
66 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
67 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
68 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -2, -1, -1, -1, 10, 11, 12,
69 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
70 30, 31
71 };
72 static const ssize_t base32hex_decode_table_size =
73 sizeof(base32hex_encode_table) / sizeof(*base32hex_encode_table);
74
75 static const unsigned char base64_encode_table[] =
76 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
77
78 static const char base64_decode_table[] = {
79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
80 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
81 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
82 -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59,
83 60, 61, -1, -1, -1, -2, -1, -1, -1, 0, 1, 2, 3, 4,
84 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
85 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26,
86 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
87 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
88 };
89
90 static const ssize_t base64_decode_table_size = sizeof(base64_decode_table)
91 / sizeof(*base64_decode_table);
92
93 #pragma mark -
94 #pragma mark dispatch_transform_buffer
95
96 typedef struct dispatch_transform_buffer_s {
97 dispatch_data_t data;
98 uint8_t *start;
99 union {
100 uint8_t *u8;
101 uint16_t *u16;
102 } ptr;
103 size_t size;
104 } dispatch_transform_buffer_s;
105
106 static size_t
107 _dispatch_transform_sizet_mul(size_t a, size_t b)
108 {
109 size_t rv = SIZE_MAX;
110 if (a == 0 || rv/a >= b) {
111 rv = a * b;
112 }
113 return rv;
114 }
115
116 #define BUFFER_MALLOC_MAX (100*1024*1024)
117
118 static bool
119 _dispatch_transform_buffer_new(dispatch_transform_buffer_s *buffer,
120 size_t required, size_t size)
121 {
122 size_t remaining = buffer->size - (size_t)(buffer->ptr.u8 - buffer->start);
123 if (required == 0 || remaining < required) {
124 if (buffer->start) {
125 if (buffer->ptr.u8 > buffer->start) {
126 dispatch_data_t _new = dispatch_data_create(buffer->start,
127 (size_t)(buffer->ptr.u8 - buffer->start), NULL,
128 DISPATCH_DATA_DESTRUCTOR_FREE);
129 dispatch_data_t _concat = dispatch_data_create_concat(
130 buffer->data, _new);
131 dispatch_release(_new);
132 dispatch_release(buffer->data);
133 buffer->data = _concat;
134 } else {
135 free(buffer->start);
136 }
137 }
138 buffer->size = required + size;
139 buffer->start = NULL;
140 if (buffer->size > 0) {
141 if (buffer->size > BUFFER_MALLOC_MAX) {
142 return false;
143 }
144 buffer->start = (uint8_t*)malloc(buffer->size);
145 if (buffer->start == NULL) {
146 return false;
147 }
148 }
149 buffer->ptr.u8 = buffer->start;
150 }
151 return true;
152 }
153
154 #pragma mark -
155 #pragma mark dispatch_transform_helpers
156
157 static dispatch_data_t
158 _dispatch_data_subrange_map(dispatch_data_t data, const void **ptr,
159 size_t offset, size_t size)
160 {
161 dispatch_data_t subrange, map = NULL;
162
163 subrange = dispatch_data_create_subrange(data, offset, size);
164 if (dispatch_data_get_size(subrange) == size) {
165 map = dispatch_data_create_map(subrange, ptr, NULL);
166 }
167 dispatch_release(subrange);
168 return map;
169 }
170
171 static dispatch_data_format_type_t
172 _dispatch_transform_detect_utf(dispatch_data_t data)
173 {
174 const void *p;
175 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 0, 2);
176
177 if (subrange == NULL) {
178 return NULL;
179 }
180
181 const uint16_t ch = *(const uint16_t *)p;
182 dispatch_data_format_type_t type = DISPATCH_DATA_FORMAT_TYPE_UTF8;
183
184 if (ch == 0xfeff) {
185 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST;
186 } else if (ch == 0xfffe) {
187 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_REV;
188 }
189
190 dispatch_release(subrange);
191
192 return type;
193 }
194
195 static uint16_t
196 _dispatch_transform_swap_to_host(uint16_t x, int32_t byteOrder)
197 {
198 if (byteOrder == OSLittleEndian) {
199 return OSSwapLittleToHostInt16(x);
200 }
201 return OSSwapBigToHostInt16(x);
202 }
203
204 static uint16_t
205 _dispatch_transform_swap_from_host(uint16_t x, int32_t byteOrder)
206 {
207 if (byteOrder == OSLittleEndian) {
208 return OSSwapHostToLittleInt16(x);
209 }
210 return OSSwapHostToBigInt16(x);
211 }
212
213 #pragma mark -
214 #pragma mark UTF-8
215
216 static uint8_t
217 _dispatch_transform_utf8_length(uint8_t byte)
218 {
219 if ((byte & 0x80) == 0) {
220 return 1;
221 } else if ((byte & 0xe0) == 0xc0) {
222 return 2;
223 } else if ((byte & 0xf0) == 0xe0) {
224 return 3;
225 } else if ((byte & 0xf8) == 0xf0) {
226 return 4;
227 }
228 return 0;
229 }
230
231 static uint32_t
232 _dispatch_transform_read_utf8_sequence(const uint8_t *bytes)
233 {
234 uint32_t wch = 0;
235 uint8_t seq_length = _dispatch_transform_utf8_length(*bytes);
236
237 switch (seq_length) {
238 case 4:
239 wch |= (*bytes & 0x7);
240 wch <<= 6;
241 break;
242 case 3:
243 wch |= (*bytes & 0xf);
244 wch <<= 6;
245 break;
246 case 2:
247 wch |= (*bytes & 0x1f);
248 wch <<= 6;
249 break;
250 case 1:
251 wch = (*bytes & 0x7f);
252 break;
253 default:
254 // Not a utf-8 sequence
255 break;
256 }
257
258 bytes++;
259 seq_length--;
260
261 while (seq_length > 0) {
262 wch |= (*bytes & 0x3f);
263 bytes++;
264 seq_length--;
265
266 if (seq_length > 0) {
267 wch <<= 6;
268 }
269 }
270 return wch;
271 }
272
273 #pragma mark -
274 #pragma mark UTF-16
275
276 static dispatch_data_t
277 _dispatch_transform_to_utf16(dispatch_data_t data, int32_t byteOrder)
278 {
279 __block size_t skip = 0;
280
281 __block dispatch_transform_buffer_s buffer = {
282 .data = dispatch_data_empty,
283 };
284
285 bool success = dispatch_data_apply(data, ^(
286 DISPATCH_UNUSED dispatch_data_t region,
287 size_t offset, const void *_buffer, size_t size) {
288 const uint8_t *src = _buffer;
289 size_t i;
290
291 if (offset == 0) {
292 size_t dest_size = 2 + _dispatch_transform_sizet_mul(size,
293 sizeof(uint16_t));
294 if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
295 return (bool)false;
296 }
297 // Insert BOM
298 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(0xfeff,
299 byteOrder);
300 }
301
302 // Skip is incremented if the previous block read-ahead into our block
303 if (skip >= size) {
304 skip -= size;
305 return (bool)true;
306 } else if (skip > 0) {
307 src += skip;
308 size -= skip;
309 skip = 0;
310 }
311
312 for (i = 0; i < size;) {
313 uint32_t wch = 0;
314 uint8_t byte_size = _dispatch_transform_utf8_length(*src);
315
316 if (byte_size == 0) {
317 return (bool)false;
318 } else if (byte_size + i > size) {
319 // UTF-8 byte sequence spans over into the next block(s)
320 const void *p;
321 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p,
322 offset + i, byte_size);
323 if (subrange == NULL) {
324 return (bool)false;
325 }
326
327 wch = _dispatch_transform_read_utf8_sequence(p);
328 skip += byte_size - (size - i);
329 src += byte_size;
330 i = size;
331
332 dispatch_release(subrange);
333 } else {
334 wch = _dispatch_transform_read_utf8_sequence(src);
335 src += byte_size;
336 i += byte_size;
337 }
338
339 size_t next = _dispatch_transform_sizet_mul(size - i, sizeof(uint16_t));
340 if (wch >= 0xd800 && wch < 0xdfff) {
341 // Illegal range (surrogate pair)
342 return (bool)false;
343 } else if (wch >= 0x10000) {
344 // Surrogate pair
345 if (!_dispatch_transform_buffer_new(&buffer, 2 *
346 sizeof(uint16_t), next)) {
347 return (bool)false;
348 }
349 wch -= 0x10000;
350 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
351 ((wch >> 10) & 0x3ff) + 0xd800, byteOrder);
352 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
353 (wch & 0x3ff) + 0xdc00, byteOrder);
354 } else {
355 if (!_dispatch_transform_buffer_new(&buffer, 1 *
356 sizeof(uint16_t), next)) {
357 return (bool)false;
358 }
359 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
360 (wch & 0xffff), byteOrder);
361 }
362 }
363
364 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
365
366 return (bool)true;
367 });
368
369 if (!success) {
370 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
371 dispatch_release(buffer.data);
372 return NULL;
373 }
374
375 return buffer.data;
376 }
377
378 static dispatch_data_t
379 _dispatch_transform_from_utf16(dispatch_data_t data, int32_t byteOrder)
380 {
381 __block size_t skip = 0;
382
383 __block dispatch_transform_buffer_s buffer = {
384 .data = dispatch_data_empty,
385 };
386
387 bool success = dispatch_data_apply(data, ^(
388 DISPATCH_UNUSED dispatch_data_t region, size_t offset,
389 const void *_buffer, size_t size) {
390 const uint16_t *src = _buffer;
391
392 if (offset == 0) {
393 // Assume first buffer will be mostly single-byte UTF-8 sequences
394 size_t dest_size = _dispatch_transform_sizet_mul(size, 2) / 3;
395 if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
396 return (bool)false;
397 }
398 }
399
400 size_t i = 0, max = size / 2;
401
402 // Skip is incremented if the previous block read-ahead into our block
403 if (skip >= size) {
404 skip -= size;
405 return (bool)true;
406 } else if (skip > 0) {
407 src = (uint16_t *)(((uint8_t *)src) + skip);
408 size -= skip;
409 max = (size / 2);
410 skip = 0;
411 }
412
413 // If the buffer is an odd size, allow read ahead into the next region
414 if ((size % 2) != 0) {
415 max += 1;
416 }
417
418 for (i = 0; i < max; i++) {
419 uint32_t wch = 0;
420 uint16_t ch;
421
422 if ((i == (max - 1)) && (max > (size / 2))) {
423 // Last byte of an odd sized range
424 const void *p;
425 dispatch_data_t range = _dispatch_data_subrange_map(data, &p,
426 offset + (i * 2), 2);
427 if (range == NULL) {
428 return (bool)false;
429 }
430 ch = _dispatch_transform_swap_to_host((uint16_t)*(uint64_t*)p,
431 byteOrder);
432 dispatch_release(range);
433 skip += 1;
434 } else {
435 ch = _dispatch_transform_swap_to_host(src[i], byteOrder);
436 }
437
438 if (ch == 0xfffe && offset == 0 && i == 0) {
439 // Wrong-endian BOM at beginning of data
440 return (bool)false;
441 } else if (ch == 0xfeff && offset == 0 && i == 0) {
442 // Correct-endian BOM, skip it
443 continue;
444 }
445
446 if ((ch >= 0xd800) && (ch <= 0xdbff)) {
447 // Surrogate pair
448 wch = ((ch - 0xd800u) << 10);
449 if (++i >= max) {
450 // Surrogate byte isn't in this block
451 const void *p;
452 dispatch_data_t range = _dispatch_data_subrange_map(data,
453 &p, offset + (i * 2), 2);
454 if (range == NULL) {
455 return (bool)false;
456 }
457 ch = _dispatch_transform_swap_to_host(*(uint16_t *)p,
458 byteOrder);
459 dispatch_release(range);
460 skip += 2;
461 } else {
462 ch = _dispatch_transform_swap_to_host(src[i], byteOrder);
463 }
464 if (!((ch >= 0xdc00) && (ch <= 0xdfff))) {
465 return (bool)false;
466 }
467 wch = (wch | (ch & 0x3ff));
468 wch += 0x10000;
469 } else if ((ch >= 0xdc00) && (ch <= 0xdfff)) {
470 return (bool)false;
471 } else {
472 wch = ch;
473 }
474
475 size_t next = _dispatch_transform_sizet_mul(max - i, 2);
476 if (wch < 0x80) {
477 if (!_dispatch_transform_buffer_new(&buffer, 1, next)) {
478 return (bool)false;
479 }
480 *(buffer.ptr.u8)++ = (uint8_t)(wch & 0xff);
481 } else if (wch < 0x800) {
482 if (!_dispatch_transform_buffer_new(&buffer, 2, next)) {
483 return (bool)false;
484 }
485 *(buffer.ptr.u8)++ = (uint8_t)(0xc0 | (wch >> 6));
486 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
487 } else if (wch < 0x10000) {
488 if (!_dispatch_transform_buffer_new(&buffer, 3, next)) {
489 return (bool)false;
490 }
491 *(buffer.ptr.u8)++ = (uint8_t)(0xe0 | (wch >> 12));
492 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
493 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
494 } else if (wch < 0x200000) {
495 if (!_dispatch_transform_buffer_new(&buffer, 4, next)) {
496 return (bool)false;
497 }
498 *(buffer.ptr.u8)++ = (uint8_t)(0xf0 | (wch >> 18));
499 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 12) & 0x3f));
500 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
501 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
502 }
503 }
504
505 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
506
507 return (bool)true;
508 });
509
510 if (!success) {
511 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
512 dispatch_release(buffer.data);
513 return NULL;
514 }
515
516 return buffer.data;
517 }
518
519 static dispatch_data_t
520 _dispatch_transform_from_utf16le(dispatch_data_t data)
521 {
522 return _dispatch_transform_from_utf16(data, OSLittleEndian);
523 }
524
525 static dispatch_data_t
526 _dispatch_transform_from_utf16be(dispatch_data_t data)
527 {
528 return _dispatch_transform_from_utf16(data, OSBigEndian);
529 }
530
531 static dispatch_data_t
532 _dispatch_transform_to_utf16le(dispatch_data_t data)
533 {
534 return _dispatch_transform_to_utf16(data, OSLittleEndian);
535 }
536
537 static dispatch_data_t
538 _dispatch_transform_to_utf16be(dispatch_data_t data)
539 {
540 return _dispatch_transform_to_utf16(data, OSBigEndian);
541 }
542
543 #pragma mark -
544 #pragma mark base32
545
546 static dispatch_data_t
547 _dispatch_transform_from_base32_with_table(dispatch_data_t data,
548 const char* table, ssize_t table_size)
549 {
550 __block uint64_t x = 0, count = 0, pad = 0;
551
552 __block dispatch_data_t rv = dispatch_data_empty;
553
554 bool success = dispatch_data_apply(data, ^(
555 DISPATCH_UNUSED dispatch_data_t region,
556 DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
557 size_t i, dest_size = (size * 5) / 8;
558
559 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
560 uint8_t *ptr = dest;
561 if (dest == NULL) {
562 return (bool)false;
563 }
564
565 const uint8_t *bytes = buffer;
566
567 for (i = 0; i < size; i++) {
568 if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
569 continue;
570 }
571
572 ssize_t index = bytes[i];
573 if (index >= table_size || table[index] == -1) {
574 free(dest);
575 return (bool)false;
576 }
577 count++;
578
579 char value = table[index];
580 if (value == -2) {
581 value = 0;
582 pad++;
583 }
584
585 x <<= 5;
586 x += (uint64_t)value;
587
588 if ((count & 0x7) == 0) {
589 *ptr++ = (x >> 32) & 0xff;
590 *ptr++ = (x >> 24) & 0xff;
591 *ptr++ = (x >> 16) & 0xff;
592 *ptr++ = (x >> 8) & 0xff;
593 *ptr++ = x & 0xff;
594 }
595 }
596
597 size_t final = (size_t)(ptr - dest);
598 switch (pad) {
599 case 1:
600 final -= 1;
601 break;
602 case 3:
603 final -= 2;
604 break;
605 case 4:
606 final -= 3;
607 break;
608 case 6:
609 final -= 4;
610 break;
611 }
612
613 dispatch_data_t val = dispatch_data_create(dest, final, NULL,
614 DISPATCH_DATA_DESTRUCTOR_FREE);
615 dispatch_data_t concat = dispatch_data_create_concat(rv, val);
616
617 dispatch_release(val);
618 dispatch_release(rv);
619 rv = concat;
620
621 return (bool)true;
622 });
623
624 if (!success) {
625 dispatch_release(rv);
626 return NULL;
627 }
628
629 return rv;
630 }
631
632 static dispatch_data_t
633 _dispatch_transform_to_base32_with_table(dispatch_data_t data, const unsigned char* table)
634 {
635 size_t total = dispatch_data_get_size(data);
636 __block size_t count = 0;
637
638 if (total > SIZE_T_MAX-4 || ((total+4)/5 > SIZE_T_MAX/8)) {
639 /* We can't hold larger than size_t in a dispatch_data_t
640 * and we want to avoid an integer overflow in the next
641 * calculation.
642 */
643 return NULL;
644 }
645
646 size_t dest_size = (total + 4) / 5 * 8;
647 uint8_t *dest = (uint8_t*)malloc(dest_size);
648 if (dest == NULL) {
649 return NULL;
650 }
651
652 __block uint8_t *ptr = dest;
653
654 /*
655 0 1 2 3 4
656 8-bit bytes: xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy
657 5-bit chunks: aaaaabbb bbcccccd ddddeeee efffffgg ggghhhhh
658 */
659
660 bool success = dispatch_data_apply(data, ^(
661 DISPATCH_UNUSED dispatch_data_t region,
662 size_t offset, const void *buffer, size_t size) {
663 const uint8_t *bytes = buffer;
664 size_t i;
665
666 for (i = 0; i < size; i++, count++) {
667 uint8_t curr = bytes[i], last = 0;
668
669 if ((count % 5) != 0) {
670 if (i == 0) {
671 const void *p;
672 dispatch_data_t subrange = _dispatch_data_subrange_map(data,
673 &p, offset - 1, 1);
674 if (subrange == NULL) {
675 return (bool)false;
676 }
677 last = *(uint8_t*)p;
678 dispatch_release(subrange);
679 } else {
680 last = bytes[i - 1];
681 }
682 }
683
684 switch (count % 5) {
685 case 0:
686 // a
687 *ptr++ = table[(curr >> 3) & 0x1fu];
688 break;
689 case 1:
690 // b + c
691 *ptr++ = table[((last << 2)|(curr >> 6)) & 0x1f];
692 *ptr++ = table[(curr >> 1) & 0x1f];
693 break;
694 case 2:
695 // d
696 *ptr++ = table[((last << 4)|(curr >> 4)) & 0x1f];
697 break;
698 case 3:
699 // e + f
700 *ptr++ = table[((last << 1)|(curr >> 7)) & 0x1f];
701 *ptr++ = table[(curr >> 2) & 0x1f];
702 break;
703 case 4:
704 // g + h
705 *ptr++ = table[((last << 3)|(curr >> 5)) & 0x1f];
706 *ptr++ = table[curr & 0x1f];
707 break;
708 }
709 }
710
711 // Last region, insert padding bytes, if needed
712 if (offset + size == total) {
713 switch (count % 5) {
714 case 0:
715 break;
716 case 1:
717 // b[4:2]
718 *ptr++ = table[(bytes[size-1] << 2) & 0x1c];
719 break;
720 case 2:
721 // d[4]
722 *ptr++ = table[(bytes[size-1] << 4) & 0x10];
723 break;
724 case 3:
725 // e[4:1]
726 *ptr++ = table[(bytes[size-1] << 1) & 0x1e];
727 break;
728 case 4:
729 // g[2:3]
730 *ptr++ = table[(bytes[size-1] << 3) & 0x18];
731 break;
732 }
733 switch (count % 5) {
734 case 0:
735 break;
736 case 1:
737 *ptr++ = '='; // c
738 *ptr++ = '='; // d
739 case 2:
740 *ptr++ = '='; // e
741 case 3:
742 *ptr++ = '='; // f
743 *ptr++ = '='; // g
744 case 4:
745 *ptr++ = '='; // h
746 break;
747 }
748 }
749
750 return (bool)true;
751 });
752
753 if (!success) {
754 free(dest);
755 return NULL;
756 }
757 return dispatch_data_create(dest, dest_size, NULL,
758 DISPATCH_DATA_DESTRUCTOR_FREE);
759 }
760
761 static dispatch_data_t
762 _dispatch_transform_from_base32(dispatch_data_t data)
763 {
764 return _dispatch_transform_from_base32_with_table(data, base32_decode_table,
765 base32_decode_table_size);
766 }
767
768 static dispatch_data_t
769 _dispatch_transform_to_base32(dispatch_data_t data)
770 {
771 return _dispatch_transform_to_base32_with_table(data, base32_encode_table);
772 }
773
774 static dispatch_data_t
775 _dispatch_transform_from_base32hex(dispatch_data_t data)
776 {
777 return _dispatch_transform_from_base32_with_table(data,
778 base32hex_decode_table, base32hex_decode_table_size);
779 }
780
781 static dispatch_data_t
782 _dispatch_transform_to_base32hex(dispatch_data_t data)
783 {
784 return _dispatch_transform_to_base32_with_table(data,
785 base32hex_encode_table);
786 }
787
788 #pragma mark -
789 #pragma mark base64
790
791 static dispatch_data_t
792 _dispatch_transform_from_base64(dispatch_data_t data)
793 {
794 __block uint64_t x = 0, count = 0;
795 __block size_t pad = 0;
796
797 __block dispatch_data_t rv = dispatch_data_empty;
798
799 bool success = dispatch_data_apply(data, ^(
800 DISPATCH_UNUSED dispatch_data_t region,
801 DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
802 size_t i, dest_size = (size * 3) / 4;
803
804 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
805 uint8_t *ptr = dest;
806 if (dest == NULL) {
807 return (bool)false;
808 }
809
810 const uint8_t *bytes = buffer;
811
812 for (i = 0; i < size; i++) {
813 if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
814 continue;
815 }
816
817 ssize_t index = bytes[i];
818 if (index >= base64_decode_table_size ||
819 base64_decode_table[index] == -1) {
820 free(dest);
821 return (bool)false;
822 }
823 count++;
824
825 char value = base64_decode_table[index];
826 if (value == -2) {
827 value = 0;
828 pad++;
829 }
830
831 x <<= 6;
832 x += (uint64_t)value;
833
834 if ((count & 0x3) == 0) {
835 *ptr++ = (x >> 16) & 0xff;
836 *ptr++ = (x >> 8) & 0xff;
837 *ptr++ = x & 0xff;
838 }
839 }
840
841 size_t final = (size_t)(ptr - dest);
842 if (pad > 0) {
843 // 2 bytes of pad means only had one char in final group
844 final -= pad;
845 }
846
847 dispatch_data_t val = dispatch_data_create(dest, final, NULL,
848 DISPATCH_DATA_DESTRUCTOR_FREE);
849 dispatch_data_t concat = dispatch_data_create_concat(rv, val);
850
851 dispatch_release(val);
852 dispatch_release(rv);
853 rv = concat;
854
855 return (bool)true;
856 });
857
858 if (!success) {
859 dispatch_release(rv);
860 return NULL;
861 }
862
863 return rv;
864 }
865
866 static dispatch_data_t
867 _dispatch_transform_to_base64(dispatch_data_t data)
868 {
869 // RFC 4648 states that we should not linebreak
870 // http://tools.ietf.org/html/rfc4648
871 size_t total = dispatch_data_get_size(data);
872 __block size_t count = 0;
873
874 if (total > SIZE_T_MAX-2 || ((total+2)/3> SIZE_T_MAX/4)) {
875 /* We can't hold larger than size_t in a dispatch_data_t
876 * and we want to avoid an integer overflow in the next
877 * calculation.
878 */
879 return NULL;
880 }
881
882 size_t dest_size = (total + 2) / 3 * 4;
883 uint8_t *dest = (uint8_t*)malloc(dest_size);
884 if (dest == NULL) {
885 return NULL;
886 }
887
888 __block uint8_t *ptr = dest;
889
890 /*
891 * 3 8-bit bytes: xxxxxxxx yyyyyyyy zzzzzzzz
892 * 4 6-bit chunks: aaaaaabb bbbbcccc ccdddddd
893 */
894
895 bool success = dispatch_data_apply(data, ^(
896 DISPATCH_UNUSED dispatch_data_t region,
897 size_t offset, const void *buffer, size_t size) {
898 const uint8_t *bytes = buffer;
899 size_t i;
900
901 for (i = 0; i < size; i++, count++) {
902 uint8_t curr = bytes[i], last = 0;
903
904 if ((count % 3) != 0) {
905 if (i == 0) {
906 const void *p;
907 dispatch_data_t subrange = _dispatch_data_subrange_map(data,
908 &p, offset - 1, 1);
909 if (subrange == NULL) {
910 return (bool)false;
911 }
912 last = *(uint8_t*)p;
913 dispatch_release(subrange);
914 } else {
915 last = bytes[i - 1];
916 }
917 }
918
919 switch (count % 3) {
920 case 0:
921 *ptr++ = base64_encode_table[(curr >> 2) & 0x3f];
922 break;
923 case 1:
924 *ptr++ = base64_encode_table[((last << 4)|(curr >> 4)) & 0x3f];
925 break;
926 case 2:
927 *ptr++ = base64_encode_table[((last << 2)|(curr >> 6)) & 0x3f];
928 *ptr++ = base64_encode_table[(curr & 0x3f)];
929 break;
930 }
931 }
932
933 // Last region, insert padding bytes, if needed
934 if (offset + size == total) {
935 switch (count % 3) {
936 case 0:
937 break;
938 case 1:
939 *ptr++ = base64_encode_table[(bytes[size-1] << 4) & 0x30];
940 *ptr++ = '=';
941 *ptr++ = '=';
942 break;
943 case 2:
944 *ptr++ = base64_encode_table[(bytes[size-1] << 2) & 0x3c];
945 *ptr++ = '=';
946 break;
947 }
948 }
949
950 return (bool)true;
951 });
952
953 if (!success) {
954 free(dest);
955 return NULL;
956 }
957 return dispatch_data_create(dest, dest_size, NULL,
958 DISPATCH_DATA_DESTRUCTOR_FREE);
959 }
960
961 #pragma mark -
962 #pragma mark dispatch_data_transform
963
964 dispatch_data_t
965 dispatch_data_create_with_transform(dispatch_data_t data,
966 dispatch_data_format_type_t input, dispatch_data_format_type_t output)
967 {
968 if (input->type == _DISPATCH_DATA_FORMAT_UTF_ANY) {
969 input = _dispatch_transform_detect_utf(data);
970 if (input == NULL) {
971 return NULL;
972 }
973 }
974
975 if ((input->type & ~output->input_mask) != 0) {
976 return NULL;
977 }
978
979 if ((output->type & ~input->output_mask) != 0) {
980 return NULL;
981 }
982
983 if (dispatch_data_get_size(data) == 0) {
984 return data;
985 }
986
987 dispatch_data_t temp1;
988 if (input->decode) {
989 temp1 = input->decode(data);
990 } else {
991 dispatch_retain(data);
992 temp1 = data;
993 }
994
995 if (!temp1) {
996 return NULL;
997 }
998
999 dispatch_data_t temp2;
1000 if (output->encode) {
1001 temp2 = output->encode(temp1);
1002 } else {
1003 dispatch_retain(temp1);
1004 temp2 = temp1;
1005 }
1006
1007 dispatch_release(temp1);
1008 return temp2;
1009 }
1010
1011 const struct dispatch_data_format_type_s _dispatch_data_format_type_none = {
1012 .type = _DISPATCH_DATA_FORMAT_NONE,
1013 .input_mask = ~0u,
1014 .output_mask = ~0u,
1015 .decode = NULL,
1016 .encode = NULL,
1017 };
1018
1019 const struct dispatch_data_format_type_s _dispatch_data_format_type_base32 = {
1020 .type = _DISPATCH_DATA_FORMAT_BASE32,
1021 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1022 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1023 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1024 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1025 .decode = _dispatch_transform_from_base32,
1026 .encode = _dispatch_transform_to_base32,
1027 };
1028
1029 const struct dispatch_data_format_type_s _dispatch_data_format_type_base32hex =
1030 {
1031 .type = _DISPATCH_DATA_FORMAT_BASE32HEX,
1032 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1033 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1034 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1035 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1036 .decode = _dispatch_transform_from_base32hex,
1037 .encode = _dispatch_transform_to_base32hex,
1038 };
1039
1040 const struct dispatch_data_format_type_s _dispatch_data_format_type_base64 = {
1041 .type = _DISPATCH_DATA_FORMAT_BASE64,
1042 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1043 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1044 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1045 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1046 .decode = _dispatch_transform_from_base64,
1047 .encode = _dispatch_transform_to_base64,
1048 };
1049
1050 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16le = {
1051 .type = _DISPATCH_DATA_FORMAT_UTF16LE,
1052 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1053 _DISPATCH_DATA_FORMAT_UTF16LE),
1054 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1055 _DISPATCH_DATA_FORMAT_UTF16LE),
1056 .decode = _dispatch_transform_from_utf16le,
1057 .encode = _dispatch_transform_to_utf16le,
1058 };
1059
1060 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16be = {
1061 .type = _DISPATCH_DATA_FORMAT_UTF16BE,
1062 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1063 _DISPATCH_DATA_FORMAT_UTF16LE),
1064 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1065 _DISPATCH_DATA_FORMAT_UTF16LE),
1066 .decode = _dispatch_transform_from_utf16be,
1067 .encode = _dispatch_transform_to_utf16be,
1068 };
1069
1070 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf8 = {
1071 .type = _DISPATCH_DATA_FORMAT_UTF8,
1072 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1073 _DISPATCH_DATA_FORMAT_UTF16LE),
1074 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1075 _DISPATCH_DATA_FORMAT_UTF16LE),
1076 .decode = NULL,
1077 .encode = NULL,
1078 };
1079
1080 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf_any = {
1081 .type = _DISPATCH_DATA_FORMAT_UTF_ANY,
1082 .input_mask = 0,
1083 .output_mask = 0,
1084 .decode = NULL,
1085 .encode = NULL,
1086 };