]> git.saurik.com Git - apple/libc.git/blob - locale/FreeBSD/utf2.c.patch
Libc-763.12.tar.gz
[apple/libc.git] / locale / FreeBSD / utf2.c.patch
1 --- utf2.c.bsdnew 2009-11-09 17:38:09.000000000 -0800
2 +++ utf2.c 2009-11-09 17:41:17.000000000 -0800
3 @@ -27,6 +27,8 @@
4 #include <sys/param.h>
5 __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.16 2007/10/15 09:51:30 ache Exp $");
6
7 +#include "xlocale_private.h"
8 +
9 #include <errno.h>
10 #include <limits.h>
11 #include <runetype.h>
12 @@ -35,62 +37,61 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/
13 #include <wchar.h>
14 #include "mblocal.h"
15
16 -extern int __mb_sb_limit;
17 +#define UTF2_MB_CUR_MAX 3
18
19 -static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
20 - size_t, mbstate_t * __restrict);
21 -static int _UTF8_mbsinit(const mbstate_t *);
22 -static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict,
23 +static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict,
24 + size_t, mbstate_t * __restrict, locale_t);
25 +static int _UTF2_mbsinit(const mbstate_t *, locale_t);
26 +static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict,
27 const char ** __restrict, size_t, size_t,
28 - mbstate_t * __restrict);
29 -static size_t _UTF8_wcrtomb(char * __restrict, wchar_t,
30 - mbstate_t * __restrict);
31 -static size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
32 - size_t, size_t, mbstate_t * __restrict);
33 + mbstate_t * __restrict, locale_t);
34 +static size_t _UTF2_wcrtomb(char * __restrict, wchar_t,
35 + mbstate_t * __restrict, locale_t);
36 +static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
37 + size_t, size_t, mbstate_t * __restrict, locale_t);
38
39 typedef struct {
40 wchar_t ch;
41 int want;
42 wchar_t lbound;
43 -} _UTF8State;
44 +} _UTF2State;
45
46 -int
47 -_UTF8_init(_RuneLocale *rl)
48 +__private_extern__ int
49 +_UTF2_init(struct __xlocale_st_runelocale *xrl)
50 {
51
52 - __mbrtowc = _UTF8_mbrtowc;
53 - __wcrtomb = _UTF8_wcrtomb;
54 - __mbsinit = _UTF8_mbsinit;
55 - __mbsnrtowcs = _UTF8_mbsnrtowcs;
56 - __wcsnrtombs = _UTF8_wcsnrtombs;
57 - _CurrentRuneLocale = rl;
58 - __mb_cur_max = 6;
59 + xrl->__mbrtowc = _UTF2_mbrtowc;
60 + xrl->__wcrtomb = _UTF2_wcrtomb;
61 + xrl->__mbsinit = _UTF2_mbsinit;
62 + xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs;
63 + xrl->__wcsnrtombs = _UTF2_wcsnrtombs;
64 + xrl->__mb_cur_max = UTF2_MB_CUR_MAX;
65 /*
66 * UCS-4 encoding used as the internal representation, so
67 * slots 0x0080-0x00FF are occuped and must be excluded
68 * from the single byte ctype by setting the limit.
69 */
70 - __mb_sb_limit = 128;
71 + xrl->__mb_sb_limit = 128;
72
73 return (0);
74 }
75
76 static int
77 -_UTF8_mbsinit(const mbstate_t *ps)
78 +_UTF2_mbsinit(const mbstate_t *ps, locale_t loc)
79 {
80
81 - return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
82 + return (ps == NULL || ((const _UTF2State *)ps)->want == 0);
83 }
84
85 static size_t
86 -_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
87 - mbstate_t * __restrict ps)
88 +_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
89 + mbstate_t * __restrict ps, locale_t loc)
90 {
91 - _UTF8State *us;
92 + _UTF2State *us;
93 int ch, i, mask, want;
94 wchar_t lbound, wch;
95
96 - us = (_UTF8State *)ps;
97 + us = (_UTF2State *)ps;
98
99 if (us->want < 0 || us->want > 6) {
100 errno = EINVAL;
101 @@ -140,21 +141,9 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc,
102 mask = 0x0f;
103 want = 3;
104 lbound = 0x800;
105 - } else if ((ch & 0xf8) == 0xf0) {
106 - mask = 0x07;
107 - want = 4;
108 - lbound = 0x10000;
109 - } else if ((ch & 0xfc) == 0xf8) {
110 - mask = 0x03;
111 - want = 5;
112 - lbound = 0x200000;
113 - } else if ((ch & 0xfe) == 0xfc) {
114 - mask = 0x01;
115 - want = 6;
116 - lbound = 0x4000000;
117 } else {
118 /*
119 - * Malformed input; input is not UTF-8.
120 + * Malformed input; input is not UTF2.
121 */
122 errno = EILSEQ;
123 return ((size_t)-1);
124 @@ -205,16 +194,16 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc,
125 }
126
127 static size_t
128 -_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
129 - size_t nms, size_t len, mbstate_t * __restrict ps)
130 +_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
131 + size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc)
132 {
133 - _UTF8State *us;
134 + _UTF2State *us;
135 const char *s;
136 size_t nchr;
137 wchar_t wc;
138 size_t nb;
139
140 - us = (_UTF8State *)ps;
141 + us = (_UTF2State *)ps;
142
143 s = *src;
144 nchr = 0;
145 @@ -236,7 +225,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
146 * excluding NUL.
147 */
148 nb = 1;
149 - else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
150 + else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) ==
151 (size_t)-1)
152 /* Invalid sequence - mbrtowc() sets errno. */
153 return ((size_t)-1);
154 @@ -266,7 +255,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
155 */
156 *dst = (wchar_t)*s;
157 nb = 1;
158 - } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
159 + } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) ==
160 (size_t)-1) {
161 *src = s;
162 return ((size_t)-1);
163 @@ -287,13 +276,13 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
164 }
165
166 static size_t
167 -_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
168 +_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc)
169 {
170 - _UTF8State *us;
171 + _UTF2State *us;
172 unsigned char lead;
173 int i, len;
174
175 - us = (_UTF8State *)ps;
176 + us = (_UTF2State *)ps;
177
178 if (us->want != 0) {
179 errno = EINVAL;
180 @@ -325,15 +314,6 @@ _UTF8_wcrtomb(char * __restrict s, wchar
181 } else if ((wc & ~0xffff) == 0) {
182 lead = 0xe0;
183 len = 3;
184 - } else if ((wc & ~0x1fffff) == 0) {
185 - lead = 0xf0;
186 - len = 4;
187 - } else if ((wc & ~0x3ffffff) == 0) {
188 - lead = 0xf8;
189 - len = 5;
190 - } else if ((wc & ~0x7fffffff) == 0) {
191 - lead = 0xfc;
192 - len = 6;
193 } else {
194 errno = EILSEQ;
195 return ((size_t)-1);
196 @@ -355,16 +335,16 @@ _UTF8_wcrtomb(char * __restrict s, wchar
197 }
198
199 static size_t
200 -_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
201 - size_t nwc, size_t len, mbstate_t * __restrict ps)
202 +_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
203 + size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc)
204 {
205 - _UTF8State *us;
206 + _UTF2State *us;
207 char buf[MB_LEN_MAX];
208 const wchar_t *s;
209 size_t nbytes;
210 size_t nb;
211
212 - us = (_UTF8State *)ps;
213 + us = (_UTF2State *)ps;
214
215 if (us->want != 0) {
216 errno = EINVAL;
217 @@ -379,7 +359,7 @@ _UTF8_wcsnrtombs(char * __restrict dst,
218 if (0 <= *s && *s < 0x80)
219 /* Fast path for plain ASCII characters. */
220 nb = 1;
221 - else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
222 + else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) ==
223 (size_t)-1)
224 /* Invalid character - wcrtomb() sets errno. */
225 return ((size_t)-1);
226 @@ -396,9 +376,9 @@ _UTF8_wcsnrtombs(char * __restrict dst,
227 /* Fast path for plain ASCII characters. */
228 nb = 1;
229 *dst = *s;
230 - } else if (len > (size_t)MB_CUR_MAX) {
231 + } else if (len > (size_t)UTF2_MB_CUR_MAX) {
232 /* Enough space to translate in-place. */
233 - if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
234 + if ((nb = _UTF2_wcrtomb(dst, *s, ps, loc)) == (size_t)-1) {
235 *src = s;
236 return ((size_t)-1);
237 }
238 @@ -406,7 +386,7 @@ _UTF8_wcsnrtombs(char * __restrict dst,
239 /*
240 * May not be enough space; use temp. buffer.
241 */
242 - if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
243 + if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == (size_t)-1) {
244 *src = s;
245 return ((size_t)-1);
246 }