]> git.saurik.com Git - apple/libdispatch.git/blob - src/transform.c
libdispatch-703.1.4.tar.gz
[apple/libdispatch.git] / src / transform.c
1 /*
2 * Copyright (c) 2011-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * @APPLE_APACHE_LICENSE_HEADER_END@
19 */
20
21 #include "internal.h"
22
23 #ifdef __APPLE__
24 #include <libkern/OSByteOrder.h>
25 #elif __linux__
26 #include <endian.h>
27 #define OSLittleEndian __LITTLE_ENDIAN
28 #define OSBigEndian __BIG_ENDIAN
29 #define OSSwapLittleToHostInt16 le16toh
30 #define OSSwapBigToHostInt16 be16toh
31 #define OSSwapHostToLittleInt16 htole16
32 #define OSSwapHostToBigInt16 htobe16
33 #endif
34
35 #if defined(__LITTLE_ENDIAN__)
36 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16LE
37 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16BE
38 #elif defined(__BIG_ENDIAN__)
39 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST DISPATCH_DATA_FORMAT_TYPE_UTF16BE
40 #define DISPATCH_DATA_FORMAT_TYPE_UTF16_REV DISPATCH_DATA_FORMAT_TYPE_UTF16LE
41 #else
42 #error Unsupported Endianness
43 #endif
44
45 enum {
46 _DISPATCH_DATA_FORMAT_NONE = 0x1,
47 _DISPATCH_DATA_FORMAT_UTF8 = 0x2,
48 _DISPATCH_DATA_FORMAT_UTF16LE = 0x4,
49 _DISPATCH_DATA_FORMAT_UTF16BE = 0x8,
50 _DISPATCH_DATA_FORMAT_UTF_ANY = 0x10,
51 _DISPATCH_DATA_FORMAT_BASE32 = 0x20,
52 _DISPATCH_DATA_FORMAT_BASE32HEX = 0x40,
53 _DISPATCH_DATA_FORMAT_BASE64 = 0x80,
54 };
55
56 #pragma mark -
57 #pragma mark baseXX tables
58
59 static const unsigned char base32_encode_table[] =
60 "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
61
62 static const char base32_decode_table[] = {
63 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
64 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
65 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26,
66 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, 1, 2,
67 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
68 20, 21, 22, 23, 24, 25
69 };
70 static const ssize_t base32_decode_table_size = sizeof(base32_decode_table)
71 / sizeof(*base32_decode_table);
72
73 static const unsigned char base32hex_encode_table[] =
74 "0123456789ABCDEFGHIJKLMNOPQRSTUV";
75
76 static const char base32hex_decode_table[] = {
77 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
78 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
79 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2,
80 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -2, -1, -1, -1, 10, 11, 12,
81 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
82 30, 31
83 };
84 static const ssize_t base32hex_decode_table_size =
85 sizeof(base32hex_encode_table) / sizeof(*base32hex_encode_table);
86
87 static const unsigned char base64_encode_table[] =
88 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
89
90 static const char base64_decode_table[] = {
91 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
92 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
93 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
94 -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59,
95 60, 61, -1, -1, -1, -2, -1, -1, -1, 0, 1, 2, 3, 4,
96 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
97 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26,
98 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
99 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
100 };
101
102 static const ssize_t base64_decode_table_size = sizeof(base64_decode_table)
103 / sizeof(*base64_decode_table);
104
105 #pragma mark -
106 #pragma mark dispatch_transform_buffer
107
108 typedef struct dispatch_transform_buffer_s {
109 dispatch_data_t data;
110 uint8_t *start;
111 union {
112 uint8_t *u8;
113 uint16_t *u16;
114 } ptr;
115 size_t size;
116 } dispatch_transform_buffer_s;
117
118 #define BUFFER_MALLOC_MAX (100*1024*1024)
119
120 static bool
121 _dispatch_transform_buffer_new(dispatch_transform_buffer_s *buffer,
122 size_t required, size_t size)
123 {
124 size_t remaining = buffer->size - (size_t)(buffer->ptr.u8 - buffer->start);
125 if (required == 0 || remaining < required) {
126 if (buffer->start) {
127 if (buffer->ptr.u8 > buffer->start) {
128 dispatch_data_t _new = dispatch_data_create(buffer->start,
129 (size_t)(buffer->ptr.u8 - buffer->start), NULL,
130 DISPATCH_DATA_DESTRUCTOR_FREE);
131 dispatch_data_t _concat = dispatch_data_create_concat(
132 buffer->data, _new);
133 dispatch_release(_new);
134 dispatch_release(buffer->data);
135 buffer->data = _concat;
136 } else {
137 free(buffer->start);
138 }
139 }
140 buffer->size = required + size;
141 buffer->start = NULL;
142 if (buffer->size > 0) {
143 if (buffer->size > BUFFER_MALLOC_MAX) {
144 return false;
145 }
146 buffer->start = (uint8_t*)malloc(buffer->size);
147 if (buffer->start == NULL) {
148 return false;
149 }
150 }
151 buffer->ptr.u8 = buffer->start;
152 }
153 return true;
154 }
155
156 #pragma mark -
157 #pragma mark dispatch_transform_helpers
158
159 static dispatch_data_t
160 _dispatch_data_subrange_map(dispatch_data_t data, const void **ptr,
161 size_t offset, size_t size)
162 {
163 dispatch_data_t subrange, map = NULL;
164
165 subrange = dispatch_data_create_subrange(data, offset, size);
166 if (dispatch_data_get_size(subrange) == size) {
167 map = dispatch_data_create_map(subrange, ptr, NULL);
168 }
169 dispatch_release(subrange);
170 return map;
171 }
172
173 static dispatch_data_format_type_t
174 _dispatch_transform_detect_utf(dispatch_data_t data)
175 {
176 const void *p;
177 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p, 0, 2);
178
179 if (subrange == NULL) {
180 return NULL;
181 }
182
183 const uint16_t ch = *(const uint16_t *)p;
184 dispatch_data_format_type_t type = DISPATCH_DATA_FORMAT_TYPE_UTF8;
185
186 if (ch == 0xfeff) {
187 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_HOST;
188 } else if (ch == 0xfffe) {
189 type = DISPATCH_DATA_FORMAT_TYPE_UTF16_REV;
190 }
191
192 dispatch_release(subrange);
193
194 return type;
195 }
196
197 static uint16_t
198 _dispatch_transform_swap_to_host(uint16_t x, int32_t byteOrder)
199 {
200 if (byteOrder == OSLittleEndian) {
201 return OSSwapLittleToHostInt16(x);
202 }
203 return OSSwapBigToHostInt16(x);
204 }
205
206 static uint16_t
207 _dispatch_transform_swap_from_host(uint16_t x, int32_t byteOrder)
208 {
209 if (byteOrder == OSLittleEndian) {
210 return OSSwapHostToLittleInt16(x);
211 }
212 return OSSwapHostToBigInt16(x);
213 }
214
215 #pragma mark -
216 #pragma mark UTF-8
217
218 static uint8_t
219 _dispatch_transform_utf8_length(uint8_t byte)
220 {
221 if ((byte & 0x80) == 0) {
222 return 1;
223 } else if ((byte & 0xe0) == 0xc0) {
224 return 2;
225 } else if ((byte & 0xf0) == 0xe0) {
226 return 3;
227 } else if ((byte & 0xf8) == 0xf0) {
228 return 4;
229 }
230 return 0;
231 }
232
233 static uint32_t
234 _dispatch_transform_read_utf8_sequence(const uint8_t *bytes)
235 {
236 uint32_t wch = 0;
237 uint8_t seq_length = _dispatch_transform_utf8_length(*bytes);
238
239 switch (seq_length) {
240 case 4:
241 wch |= (*bytes & 0x7);
242 wch <<= 6;
243 break;
244 case 3:
245 wch |= (*bytes & 0xf);
246 wch <<= 6;
247 break;
248 case 2:
249 wch |= (*bytes & 0x1f);
250 wch <<= 6;
251 break;
252 case 1:
253 wch = (*bytes & 0x7f);
254 break;
255 default:
256 // Not a utf-8 sequence
257 break;
258 }
259
260 bytes++;
261 seq_length--;
262
263 while (seq_length > 0) {
264 wch |= (*bytes & 0x3f);
265 bytes++;
266 seq_length--;
267
268 if (seq_length > 0) {
269 wch <<= 6;
270 }
271 }
272 return wch;
273 }
274
275 #pragma mark -
276 #pragma mark UTF-16
277
278 static dispatch_data_t
279 _dispatch_transform_to_utf16(dispatch_data_t data, int32_t byteOrder)
280 {
281 __block size_t skip = 0;
282
283 __block dispatch_transform_buffer_s buffer = {
284 .data = dispatch_data_empty,
285 };
286
287 bool success = dispatch_data_apply(data, ^(
288 DISPATCH_UNUSED dispatch_data_t region,
289 size_t offset, const void *_buffer, size_t size) {
290 const uint8_t *src = _buffer;
291 size_t i, dest_size;
292
293 if (offset == 0) {
294 if (os_mul_and_add_overflow(size, sizeof(uint16_t),
295 sizeof(uint16_t), &dest_size)) {
296 return (bool)false;
297 }
298 if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
299 return (bool)false;
300 }
301 // Insert BOM
302 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(0xfeff,
303 byteOrder);
304 }
305
306 // Skip is incremented if the previous block read-ahead into our block
307 if (skip >= size) {
308 skip -= size;
309 return (bool)true;
310 } else if (skip > 0) {
311 src += skip;
312 size -= skip;
313 skip = 0;
314 }
315
316 for (i = 0; i < size;) {
317 uint32_t wch = 0;
318 uint8_t byte_size = _dispatch_transform_utf8_length(*src);
319 size_t next;
320
321 if (byte_size == 0) {
322 return (bool)false;
323 } else if (byte_size + i > size) {
324 // UTF-8 byte sequence spans over into the next block(s)
325 const void *p;
326 dispatch_data_t subrange = _dispatch_data_subrange_map(data, &p,
327 offset + i, byte_size);
328 if (subrange == NULL) {
329 return (bool)false;
330 }
331
332 wch = _dispatch_transform_read_utf8_sequence(p);
333 skip += byte_size - (size - i);
334 src += byte_size;
335 i = size;
336
337 dispatch_release(subrange);
338 } else {
339 wch = _dispatch_transform_read_utf8_sequence(src);
340 src += byte_size;
341 i += byte_size;
342 }
343
344 if (os_mul_overflow(size - i, sizeof(uint16_t), &next)) {
345 return (bool)false;
346 }
347 if (wch >= 0xd800 && wch < 0xdfff) {
348 // Illegal range (surrogate pair)
349 return (bool)false;
350 } else if (wch >= 0x10000) {
351 // Surrogate pair
352 if (!_dispatch_transform_buffer_new(&buffer, 2 *
353 sizeof(uint16_t), next)) {
354 return (bool)false;
355 }
356 wch -= 0x10000;
357 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
358 ((wch >> 10) & 0x3ff) + 0xd800, byteOrder);
359 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
360 (wch & 0x3ff) + 0xdc00, byteOrder);
361 } else {
362 if (!_dispatch_transform_buffer_new(&buffer, 1 *
363 sizeof(uint16_t), next)) {
364 return (bool)false;
365 }
366 *(buffer.ptr.u16)++ = _dispatch_transform_swap_from_host(
367 (wch & 0xffff), byteOrder);
368 }
369 }
370
371 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
372
373 return (bool)true;
374 });
375
376 if (!success) {
377 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
378 dispatch_release(buffer.data);
379 return NULL;
380 }
381
382 return buffer.data;
383 }
384
385 static dispatch_data_t
386 _dispatch_transform_from_utf16(dispatch_data_t data, int32_t byteOrder)
387 {
388 __block size_t skip = 0;
389
390 __block dispatch_transform_buffer_s buffer = {
391 .data = dispatch_data_empty,
392 };
393
394 bool success = dispatch_data_apply(data, ^(
395 DISPATCH_UNUSED dispatch_data_t region, size_t offset,
396 const void *_buffer, size_t size) {
397 const uint16_t *src = _buffer;
398
399 if (offset == 0) {
400 size_t dest_size = howmany(size, 3) * 2;
401 // Assume first buffer will be mostly single-byte UTF-8 sequences
402 if (!_dispatch_transform_buffer_new(&buffer, dest_size, 0)) {
403 return (bool)false;
404 }
405 }
406
407 size_t i = 0, max = size / 2;
408
409 // Skip is incremented if the previous block read-ahead into our block
410 if (skip >= size) {
411 skip -= size;
412 return (bool)true;
413 } else if (skip > 0) {
414 src = (uint16_t *)(((uint8_t *)src) + skip);
415 size -= skip;
416 max = (size / 2);
417 skip = 0;
418 }
419
420 // If the buffer is an odd size, allow read ahead into the next region
421 if ((size % 2) != 0) {
422 max += 1;
423 }
424
425 for (i = 0; i < max; i++) {
426 uint32_t wch = 0;
427 uint16_t ch;
428 size_t next;
429
430 if ((i == (max - 1)) && (max > (size / 2))) {
431 // Last byte of an odd sized range
432 const void *p;
433 dispatch_data_t range = _dispatch_data_subrange_map(data, &p,
434 offset + (i * 2), 2);
435 if (range == NULL) {
436 return (bool)false;
437 }
438 ch = _dispatch_transform_swap_to_host((uint16_t)*(uint64_t*)p,
439 byteOrder);
440 dispatch_release(range);
441 skip += 1;
442 } else {
443 ch = _dispatch_transform_swap_to_host(src[i], byteOrder);
444 }
445
446 if (ch == 0xfffe && offset == 0 && i == 0) {
447 // Wrong-endian BOM at beginning of data
448 return (bool)false;
449 } else if (ch == 0xfeff && offset == 0 && i == 0) {
450 // Correct-endian BOM, skip it
451 continue;
452 }
453
454 if ((ch >= 0xd800) && (ch <= 0xdbff)) {
455 // Surrogate pair
456 wch = ((ch - 0xd800u) << 10);
457 if (++i >= max) {
458 // Surrogate byte isn't in this block
459 const void *p;
460 dispatch_data_t range = _dispatch_data_subrange_map(data,
461 &p, offset + (i * 2), 2);
462 if (range == NULL) {
463 return (bool)false;
464 }
465 ch = _dispatch_transform_swap_to_host(*(uint16_t *)p,
466 byteOrder);
467 dispatch_release(range);
468 skip += 2;
469 } else {
470 ch = _dispatch_transform_swap_to_host(src[i], byteOrder);
471 }
472 if (!((ch >= 0xdc00) && (ch <= 0xdfff))) {
473 return (bool)false;
474 }
475 wch = (wch | (ch & 0x3ff));
476 wch += 0x10000;
477 } else if ((ch >= 0xdc00) && (ch <= 0xdfff)) {
478 return (bool)false;
479 } else {
480 wch = ch;
481 }
482
483 if (os_mul_overflow(max - i, 2, &next)) {
484 return (bool)false;
485 }
486 if (wch < 0x80) {
487 if (!_dispatch_transform_buffer_new(&buffer, 1, next)) {
488 return (bool)false;
489 }
490 *(buffer.ptr.u8)++ = (uint8_t)(wch & 0xff);
491 } else if (wch < 0x800) {
492 if (!_dispatch_transform_buffer_new(&buffer, 2, next)) {
493 return (bool)false;
494 }
495 *(buffer.ptr.u8)++ = (uint8_t)(0xc0 | (wch >> 6));
496 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
497 } else if (wch < 0x10000) {
498 if (!_dispatch_transform_buffer_new(&buffer, 3, next)) {
499 return (bool)false;
500 }
501 *(buffer.ptr.u8)++ = (uint8_t)(0xe0 | (wch >> 12));
502 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
503 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
504 } else if (wch < 0x200000) {
505 if (!_dispatch_transform_buffer_new(&buffer, 4, next)) {
506 return (bool)false;
507 }
508 *(buffer.ptr.u8)++ = (uint8_t)(0xf0 | (wch >> 18));
509 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 12) & 0x3f));
510 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | ((wch >> 6) & 0x3f));
511 *(buffer.ptr.u8)++ = (uint8_t)(0x80 | (wch & 0x3f));
512 }
513 }
514
515 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
516
517 return (bool)true;
518 });
519
520 if (!success) {
521 (void)_dispatch_transform_buffer_new(&buffer, 0, 0);
522 dispatch_release(buffer.data);
523 return NULL;
524 }
525
526 return buffer.data;
527 }
528
529 static dispatch_data_t
530 _dispatch_transform_from_utf16le(dispatch_data_t data)
531 {
532 return _dispatch_transform_from_utf16(data, OSLittleEndian);
533 }
534
535 static dispatch_data_t
536 _dispatch_transform_from_utf16be(dispatch_data_t data)
537 {
538 return _dispatch_transform_from_utf16(data, OSBigEndian);
539 }
540
541 static dispatch_data_t
542 _dispatch_transform_to_utf16le(dispatch_data_t data)
543 {
544 return _dispatch_transform_to_utf16(data, OSLittleEndian);
545 }
546
547 static dispatch_data_t
548 _dispatch_transform_to_utf16be(dispatch_data_t data)
549 {
550 return _dispatch_transform_to_utf16(data, OSBigEndian);
551 }
552
553 #pragma mark -
554 #pragma mark base32
555
556 static dispatch_data_t
557 _dispatch_transform_from_base32_with_table(dispatch_data_t data,
558 const char* table, ssize_t table_size)
559 {
560 __block uint64_t x = 0, count = 0, pad = 0;
561
562 __block dispatch_data_t rv = dispatch_data_empty;
563
564 bool success = dispatch_data_apply(data, ^(
565 DISPATCH_UNUSED dispatch_data_t region,
566 DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
567 size_t i, dest_size = howmany(size, 8) * 5;
568 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
569 uint8_t *ptr = dest;
570 if (dest == NULL) {
571 return (bool)false;
572 }
573
574 const uint8_t *bytes = buffer;
575
576 for (i = 0; i < size; i++) {
577 if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
578 continue;
579 }
580
581 ssize_t index = bytes[i];
582 if (index >= table_size || table[index] == -1) {
583 free(dest);
584 return (bool)false;
585 }
586 count++;
587
588 char value = table[index];
589 if (value == -2) {
590 value = 0;
591 pad++;
592 }
593
594 x <<= 5;
595 x += (uint64_t)value;
596
597 if ((count & 0x7) == 0) {
598 *ptr++ = (x >> 32) & 0xff;
599 *ptr++ = (x >> 24) & 0xff;
600 *ptr++ = (x >> 16) & 0xff;
601 *ptr++ = (x >> 8) & 0xff;
602 *ptr++ = x & 0xff;
603 }
604 }
605
606 size_t final = (size_t)(ptr - dest);
607 switch (pad) {
608 case 1:
609 final -= 1;
610 break;
611 case 3:
612 final -= 2;
613 break;
614 case 4:
615 final -= 3;
616 break;
617 case 6:
618 final -= 4;
619 break;
620 }
621
622 dispatch_data_t val = dispatch_data_create(dest, final, NULL,
623 DISPATCH_DATA_DESTRUCTOR_FREE);
624 dispatch_data_t concat = dispatch_data_create_concat(rv, val);
625
626 dispatch_release(val);
627 dispatch_release(rv);
628 rv = concat;
629
630 return (bool)true;
631 });
632
633 if (!success) {
634 dispatch_release(rv);
635 return NULL;
636 }
637
638 return rv;
639 }
640
641 static dispatch_data_t
642 _dispatch_transform_to_base32_with_table(dispatch_data_t data, const unsigned char* table)
643 {
644 size_t total = dispatch_data_get_size(data), dest_size;
645 __block size_t count = 0;
646
647 dest_size = howmany(total, 5);
648 // <rdar://problem/25676583>
649 // os_mul_overflow(dest_size, 8, &dest_size)
650 if (dest_size > SIZE_T_MAX / 8) {
651 return NULL;
652 }
653 dest_size *= 8;
654
655 uint8_t *dest = (uint8_t*)malloc(dest_size);
656 if (dest == NULL) {
657 return NULL;
658 }
659
660 __block uint8_t *ptr = dest;
661
662 /*
663 0 1 2 3 4
664 8-bit bytes: xxxxxxxx yyyyyyyy zzzzzzzz xxxxxxxx yyyyyyyy
665 5-bit chunks: aaaaabbb bbcccccd ddddeeee efffffgg ggghhhhh
666 */
667
668 bool success = dispatch_data_apply(data, ^(
669 DISPATCH_UNUSED dispatch_data_t region,
670 size_t offset, const void *buffer, size_t size) {
671 const uint8_t *bytes = buffer;
672 size_t i;
673
674 for (i = 0; i < size; i++, count++) {
675 uint8_t curr = bytes[i], last = 0;
676
677 if ((count % 5) != 0) {
678 if (i == 0) {
679 const void *p;
680 dispatch_data_t subrange = _dispatch_data_subrange_map(data,
681 &p, offset - 1, 1);
682 if (subrange == NULL) {
683 return (bool)false;
684 }
685 last = *(uint8_t*)p;
686 dispatch_release(subrange);
687 } else {
688 last = bytes[i - 1];
689 }
690 }
691
692 switch (count % 5) {
693 case 0:
694 // a
695 *ptr++ = table[(curr >> 3) & 0x1fu];
696 break;
697 case 1:
698 // b + c
699 *ptr++ = table[((last << 2)|(curr >> 6)) & 0x1f];
700 *ptr++ = table[(curr >> 1) & 0x1f];
701 break;
702 case 2:
703 // d
704 *ptr++ = table[((last << 4)|(curr >> 4)) & 0x1f];
705 break;
706 case 3:
707 // e + f
708 *ptr++ = table[((last << 1)|(curr >> 7)) & 0x1f];
709 *ptr++ = table[(curr >> 2) & 0x1f];
710 break;
711 case 4:
712 // g + h
713 *ptr++ = table[((last << 3)|(curr >> 5)) & 0x1f];
714 *ptr++ = table[curr & 0x1f];
715 break;
716 }
717 }
718
719 // Last region, insert padding bytes, if needed
720 if (offset + size == total) {
721 switch (count % 5) {
722 case 0:
723 break;
724 case 1:
725 // b[4:2]
726 *ptr++ = table[(bytes[size-1] << 2) & 0x1c];
727 break;
728 case 2:
729 // d[4]
730 *ptr++ = table[(bytes[size-1] << 4) & 0x10];
731 break;
732 case 3:
733 // e[4:1]
734 *ptr++ = table[(bytes[size-1] << 1) & 0x1e];
735 break;
736 case 4:
737 // g[2:3]
738 *ptr++ = table[(bytes[size-1] << 3) & 0x18];
739 break;
740 }
741 switch (count % 5) {
742 case 0:
743 break;
744 case 1:
745 *ptr++ = '='; // c
746 *ptr++ = '='; // d
747 case 2:
748 *ptr++ = '='; // e
749 case 3:
750 *ptr++ = '='; // f
751 *ptr++ = '='; // g
752 case 4:
753 *ptr++ = '='; // h
754 break;
755 }
756 }
757
758 return (bool)true;
759 });
760
761 if (!success) {
762 free(dest);
763 return NULL;
764 }
765 return dispatch_data_create(dest, dest_size, NULL,
766 DISPATCH_DATA_DESTRUCTOR_FREE);
767 }
768
769 static dispatch_data_t
770 _dispatch_transform_from_base32(dispatch_data_t data)
771 {
772 return _dispatch_transform_from_base32_with_table(data, base32_decode_table,
773 base32_decode_table_size);
774 }
775
776 static dispatch_data_t
777 _dispatch_transform_to_base32(dispatch_data_t data)
778 {
779 return _dispatch_transform_to_base32_with_table(data, base32_encode_table);
780 }
781
782 static dispatch_data_t
783 _dispatch_transform_from_base32hex(dispatch_data_t data)
784 {
785 return _dispatch_transform_from_base32_with_table(data,
786 base32hex_decode_table, base32hex_decode_table_size);
787 }
788
789 static dispatch_data_t
790 _dispatch_transform_to_base32hex(dispatch_data_t data)
791 {
792 return _dispatch_transform_to_base32_with_table(data,
793 base32hex_encode_table);
794 }
795
796 #pragma mark -
797 #pragma mark base64
798
799 static dispatch_data_t
800 _dispatch_transform_from_base64(dispatch_data_t data)
801 {
802 __block uint64_t x = 0, count = 0;
803 __block size_t pad = 0;
804
805 __block dispatch_data_t rv = dispatch_data_empty;
806
807 bool success = dispatch_data_apply(data, ^(
808 DISPATCH_UNUSED dispatch_data_t region,
809 DISPATCH_UNUSED size_t offset, const void *buffer, size_t size) {
810 size_t i, dest_size = howmany(size, 4) * 3;
811
812 uint8_t *dest = (uint8_t*)malloc(dest_size * sizeof(uint8_t));
813 uint8_t *ptr = dest;
814 if (dest == NULL) {
815 return (bool)false;
816 }
817
818 const uint8_t *bytes = buffer;
819
820 for (i = 0; i < size; i++) {
821 if (bytes[i] == '\n' || bytes[i] == '\t' || bytes[i] == ' ') {
822 continue;
823 }
824
825 ssize_t index = bytes[i];
826 if (index >= base64_decode_table_size ||
827 base64_decode_table[index] == -1) {
828 free(dest);
829 return (bool)false;
830 }
831 count++;
832
833 char value = base64_decode_table[index];
834 if (value == -2) {
835 value = 0;
836 pad++;
837 }
838
839 x <<= 6;
840 x += (uint64_t)value;
841
842 if ((count & 0x3) == 0) {
843 *ptr++ = (x >> 16) & 0xff;
844 *ptr++ = (x >> 8) & 0xff;
845 *ptr++ = x & 0xff;
846 }
847 }
848
849 size_t final = (size_t)(ptr - dest);
850 if (pad > 0) {
851 // 2 bytes of pad means only had one char in final group
852 final -= pad;
853 }
854
855 dispatch_data_t val = dispatch_data_create(dest, final, NULL,
856 DISPATCH_DATA_DESTRUCTOR_FREE);
857 dispatch_data_t concat = dispatch_data_create_concat(rv, val);
858
859 dispatch_release(val);
860 dispatch_release(rv);
861 rv = concat;
862
863 return (bool)true;
864 });
865
866 if (!success) {
867 dispatch_release(rv);
868 return NULL;
869 }
870
871 return rv;
872 }
873
874 static dispatch_data_t
875 _dispatch_transform_to_base64(dispatch_data_t data)
876 {
877 // RFC 4648 states that we should not linebreak
878 // http://tools.ietf.org/html/rfc4648
879 size_t total = dispatch_data_get_size(data), dest_size;
880 __block size_t count = 0;
881
882 dest_size = howmany(total, 3);
883 // <rdar://problem/25676583>
884 // os_mul_overflow(dest_size, 4, &dest_size)
885 if (dest_size > SIZE_T_MAX / 4) {
886 return NULL;
887 }
888 dest_size *= 4;
889
890 uint8_t *dest = (uint8_t*)malloc(dest_size);
891 if (dest == NULL) {
892 return NULL;
893 }
894
895 __block uint8_t *ptr = dest;
896
897 /*
898 * 3 8-bit bytes: xxxxxxxx yyyyyyyy zzzzzzzz
899 * 4 6-bit chunks: aaaaaabb bbbbcccc ccdddddd
900 */
901
902 bool success = dispatch_data_apply(data, ^(
903 DISPATCH_UNUSED dispatch_data_t region,
904 size_t offset, const void *buffer, size_t size) {
905 const uint8_t *bytes = buffer;
906 size_t i;
907
908 for (i = 0; i < size; i++, count++) {
909 uint8_t curr = bytes[i], last = 0;
910
911 if ((count % 3) != 0) {
912 if (i == 0) {
913 const void *p;
914 dispatch_data_t subrange = _dispatch_data_subrange_map(data,
915 &p, offset - 1, 1);
916 if (subrange == NULL) {
917 return (bool)false;
918 }
919 last = *(uint8_t*)p;
920 dispatch_release(subrange);
921 } else {
922 last = bytes[i - 1];
923 }
924 }
925
926 switch (count % 3) {
927 case 0:
928 *ptr++ = base64_encode_table[(curr >> 2) & 0x3f];
929 break;
930 case 1:
931 *ptr++ = base64_encode_table[((last << 4)|(curr >> 4)) & 0x3f];
932 break;
933 case 2:
934 *ptr++ = base64_encode_table[((last << 2)|(curr >> 6)) & 0x3f];
935 *ptr++ = base64_encode_table[(curr & 0x3f)];
936 break;
937 }
938 }
939
940 // Last region, insert padding bytes, if needed
941 if (offset + size == total) {
942 switch (count % 3) {
943 case 0:
944 break;
945 case 1:
946 *ptr++ = base64_encode_table[(bytes[size-1] << 4) & 0x30];
947 *ptr++ = '=';
948 *ptr++ = '=';
949 break;
950 case 2:
951 *ptr++ = base64_encode_table[(bytes[size-1] << 2) & 0x3c];
952 *ptr++ = '=';
953 break;
954 }
955 }
956
957 return (bool)true;
958 });
959
960 if (!success) {
961 free(dest);
962 return NULL;
963 }
964 return dispatch_data_create(dest, dest_size, NULL,
965 DISPATCH_DATA_DESTRUCTOR_FREE);
966 }
967
968 #pragma mark -
969 #pragma mark dispatch_data_transform
970
971 dispatch_data_t
972 dispatch_data_create_with_transform(dispatch_data_t data,
973 dispatch_data_format_type_t input, dispatch_data_format_type_t output)
974 {
975 if (input->type == _DISPATCH_DATA_FORMAT_UTF_ANY) {
976 input = _dispatch_transform_detect_utf(data);
977 if (input == NULL) {
978 return DISPATCH_BAD_INPUT;
979 }
980 }
981
982 if ((input->type & ~output->input_mask) != 0) {
983 return DISPATCH_BAD_INPUT;
984 }
985
986 if ((output->type & ~input->output_mask) != 0) {
987 return DISPATCH_BAD_INPUT;
988 }
989
990 if (dispatch_data_get_size(data) == 0) {
991 return data;
992 }
993
994 dispatch_data_t temp1;
995 if (input->decode) {
996 temp1 = input->decode(data);
997 } else {
998 dispatch_retain(data);
999 temp1 = data;
1000 }
1001
1002 if (!temp1) {
1003 return DISPATCH_BAD_INPUT;
1004 }
1005
1006 dispatch_data_t temp2;
1007 if (output->encode) {
1008 temp2 = output->encode(temp1);
1009 } else {
1010 dispatch_retain(temp1);
1011 temp2 = temp1;
1012 }
1013
1014 dispatch_release(temp1);
1015 return temp2;
1016 }
1017
1018 const struct dispatch_data_format_type_s _dispatch_data_format_type_none = {
1019 .type = _DISPATCH_DATA_FORMAT_NONE,
1020 .input_mask = ~0u,
1021 .output_mask = ~0u,
1022 .decode = NULL,
1023 .encode = NULL,
1024 };
1025
1026 const struct dispatch_data_format_type_s _dispatch_data_format_type_base32 = {
1027 .type = _DISPATCH_DATA_FORMAT_BASE32,
1028 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1029 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1030 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1031 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1032 .decode = _dispatch_transform_from_base32,
1033 .encode = _dispatch_transform_to_base32,
1034 };
1035
1036 const struct dispatch_data_format_type_s _dispatch_data_format_type_base32hex =
1037 {
1038 .type = _DISPATCH_DATA_FORMAT_BASE32HEX,
1039 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1040 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1041 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1042 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1043 .decode = _dispatch_transform_from_base32hex,
1044 .encode = _dispatch_transform_to_base32hex,
1045 };
1046
1047 const struct dispatch_data_format_type_s _dispatch_data_format_type_base64 = {
1048 .type = _DISPATCH_DATA_FORMAT_BASE64,
1049 .input_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1050 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1051 .output_mask = (_DISPATCH_DATA_FORMAT_NONE | _DISPATCH_DATA_FORMAT_BASE32 |
1052 _DISPATCH_DATA_FORMAT_BASE32HEX | _DISPATCH_DATA_FORMAT_BASE64),
1053 .decode = _dispatch_transform_from_base64,
1054 .encode = _dispatch_transform_to_base64,
1055 };
1056
1057 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16le = {
1058 .type = _DISPATCH_DATA_FORMAT_UTF16LE,
1059 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1060 _DISPATCH_DATA_FORMAT_UTF16LE),
1061 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1062 _DISPATCH_DATA_FORMAT_UTF16LE),
1063 .decode = _dispatch_transform_from_utf16le,
1064 .encode = _dispatch_transform_to_utf16le,
1065 };
1066
1067 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf16be = {
1068 .type = _DISPATCH_DATA_FORMAT_UTF16BE,
1069 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1070 _DISPATCH_DATA_FORMAT_UTF16LE),
1071 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1072 _DISPATCH_DATA_FORMAT_UTF16LE),
1073 .decode = _dispatch_transform_from_utf16be,
1074 .encode = _dispatch_transform_to_utf16be,
1075 };
1076
1077 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf8 = {
1078 .type = _DISPATCH_DATA_FORMAT_UTF8,
1079 .input_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1080 _DISPATCH_DATA_FORMAT_UTF16LE),
1081 .output_mask = (_DISPATCH_DATA_FORMAT_UTF8 | _DISPATCH_DATA_FORMAT_UTF16BE |
1082 _DISPATCH_DATA_FORMAT_UTF16LE),
1083 .decode = NULL,
1084 .encode = NULL,
1085 };
1086
1087 const struct dispatch_data_format_type_s _dispatch_data_format_type_utf_any = {
1088 .type = _DISPATCH_DATA_FORMAT_UTF_ANY,
1089 .input_mask = 0,
1090 .output_mask = 0,
1091 .decode = NULL,
1092 .encode = NULL,
1093 };