]> git.saurik.com Git - apple/libc.git/blame - locale/FreeBSD/utf2.c.patch
Libc-763.13.tar.gz
[apple/libc.git] / locale / FreeBSD / utf2.c.patch
CommitLineData
1f2f436a
A
1--- utf2.c.bsdnew 2009-11-09 17:38:09.000000000 -0800
2+++ utf2.c 2009-11-09 17:41:17.000000000 -0800
3@@ -27,6 +27,8 @@
3d9156a7 4 #include <sys/param.h>
1f2f436a 5 __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.16 2007/10/15 09:51:30 ache Exp $");
3d9156a7
A
6
7+#include "xlocale_private.h"
8+
9 #include <errno.h>
10 #include <limits.h>
11 #include <runetype.h>
1f2f436a 12@@ -35,62 +37,61 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/
3d9156a7
A
13 #include <wchar.h>
14 #include "mblocal.h"
15
1f2f436a 16-extern int __mb_sb_limit;
3d9156a7 17+#define UTF2_MB_CUR_MAX 3
1f2f436a
A
18
19-static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict,
20- size_t, mbstate_t * __restrict);
21-static int _UTF8_mbsinit(const mbstate_t *);
22-static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict,
23+static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict,
24+ size_t, mbstate_t * __restrict, locale_t);
3d9156a7 25+static int _UTF2_mbsinit(const mbstate_t *, locale_t);
1f2f436a
A
26+static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict,
27 const char ** __restrict, size_t, size_t,
28- mbstate_t * __restrict);
29-static size_t _UTF8_wcrtomb(char * __restrict, wchar_t,
30- mbstate_t * __restrict);
31-static size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
32- size_t, size_t, mbstate_t * __restrict);
33+ mbstate_t * __restrict, locale_t);
34+static size_t _UTF2_wcrtomb(char * __restrict, wchar_t,
35+ mbstate_t * __restrict, locale_t);
3d9156a7 36+static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
1f2f436a 37+ size_t, size_t, mbstate_t * __restrict, locale_t);
3d9156a7
A
38
39 typedef struct {
40 wchar_t ch;
41 int want;
42 wchar_t lbound;
43-} _UTF8State;
44+} _UTF2State;
45
46-int
47-_UTF8_init(_RuneLocale *rl)
48+__private_extern__ int
49+_UTF2_init(struct __xlocale_st_runelocale *xrl)
50 {
51
52- __mbrtowc = _UTF8_mbrtowc;
53- __wcrtomb = _UTF8_wcrtomb;
54- __mbsinit = _UTF8_mbsinit;
55- __mbsnrtowcs = _UTF8_mbsnrtowcs;
56- __wcsnrtombs = _UTF8_wcsnrtombs;
57- _CurrentRuneLocale = rl;
58- __mb_cur_max = 6;
59+ xrl->__mbrtowc = _UTF2_mbrtowc;
60+ xrl->__wcrtomb = _UTF2_wcrtomb;
61+ xrl->__mbsinit = _UTF2_mbsinit;
62+ xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs;
63+ xrl->__wcsnrtombs = _UTF2_wcsnrtombs;
64+ xrl->__mb_cur_max = UTF2_MB_CUR_MAX;
1f2f436a
A
65 /*
66 * UCS-4 encoding used as the internal representation, so
67 * slots 0x0080-0x00FF are occuped and must be excluded
68 * from the single byte ctype by setting the limit.
69 */
70- __mb_sb_limit = 128;
71+ xrl->__mb_sb_limit = 128;
3d9156a7
A
72
73 return (0);
74 }
75
1f2f436a 76 static int
3d9156a7 77-_UTF8_mbsinit(const mbstate_t *ps)
3d9156a7
A
78+_UTF2_mbsinit(const mbstate_t *ps, locale_t loc)
79 {
80
81- return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
82+ return (ps == NULL || ((const _UTF2State *)ps)->want == 0);
83 }
84
1f2f436a 85 static size_t
3d9156a7
A
86-_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
87- mbstate_t * __restrict ps)
3d9156a7
A
88+_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
89+ mbstate_t * __restrict ps, locale_t loc)
90 {
91- _UTF8State *us;
92+ _UTF2State *us;
93 int ch, i, mask, want;
94 wchar_t lbound, wch;
95
96- us = (_UTF8State *)ps;
97+ us = (_UTF2State *)ps;
98
1f2f436a 99 if (us->want < 0 || us->want > 6) {
3d9156a7 100 errno = EINVAL;
1f2f436a 101@@ -140,21 +141,9 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc,
3d9156a7
A
102 mask = 0x0f;
103 want = 3;
104 lbound = 0x800;
105- } else if ((ch & 0xf8) == 0xf0) {
106- mask = 0x07;
107- want = 4;
108- lbound = 0x10000;
109- } else if ((ch & 0xfc) == 0xf8) {
110- mask = 0x03;
111- want = 5;
112- lbound = 0x200000;
1f2f436a 113- } else if ((ch & 0xfe) == 0xfc) {
3d9156a7
A
114- mask = 0x01;
115- want = 6;
116- lbound = 0x4000000;
117 } else {
118 /*
119- * Malformed input; input is not UTF-8.
120+ * Malformed input; input is not UTF2.
121 */
122 errno = EILSEQ;
123 return ((size_t)-1);
1f2f436a 124@@ -205,16 +194,16 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc,
3d9156a7
A
125 }
126
1f2f436a 127 static size_t
3d9156a7
A
128-_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
129- size_t nms, size_t len, mbstate_t * __restrict ps)
3d9156a7
A
130+_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
131+ size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc)
132 {
133- _UTF8State *us;
134+ _UTF2State *us;
135 const char *s;
136 size_t nchr;
137 wchar_t wc;
138 size_t nb;
139
140- us = (_UTF8State *)ps;
141+ us = (_UTF2State *)ps;
142
143 s = *src;
144 nchr = 0;
1f2f436a 145@@ -236,7 +225,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
3d9156a7
A
146 * excluding NUL.
147 */
148 nb = 1;
149- else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
150+ else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) ==
151 (size_t)-1)
152 /* Invalid sequence - mbrtowc() sets errno. */
153 return ((size_t)-1);
1f2f436a 154@@ -266,7 +255,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
3d9156a7
A
155 */
156 *dst = (wchar_t)*s;
157 nb = 1;
158- } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
159+ } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) ==
160 (size_t)-1) {
161 *src = s;
162 return ((size_t)-1);
1f2f436a 163@@ -287,13 +276,13 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds
3d9156a7
A
164 }
165
1f2f436a 166 static size_t
3d9156a7 167-_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
3d9156a7
A
168+_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc)
169 {
170- _UTF8State *us;
171+ _UTF2State *us;
172 unsigned char lead;
173 int i, len;
174
175- us = (_UTF8State *)ps;
176+ us = (_UTF2State *)ps;
177
178 if (us->want != 0) {
179 errno = EINVAL;
1f2f436a 180@@ -325,15 +314,6 @@ _UTF8_wcrtomb(char * __restrict s, wchar
3d9156a7
A
181 } else if ((wc & ~0xffff) == 0) {
182 lead = 0xe0;
183 len = 3;
184- } else if ((wc & ~0x1fffff) == 0) {
185- lead = 0xf0;
186- len = 4;
187- } else if ((wc & ~0x3ffffff) == 0) {
188- lead = 0xf8;
189- len = 5;
190- } else if ((wc & ~0x7fffffff) == 0) {
191- lead = 0xfc;
192- len = 6;
193 } else {
194 errno = EILSEQ;
195 return ((size_t)-1);
1f2f436a 196@@ -355,16 +335,16 @@ _UTF8_wcrtomb(char * __restrict s, wchar
3d9156a7
A
197 }
198
1f2f436a 199 static size_t
3d9156a7
A
200-_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
201- size_t nwc, size_t len, mbstate_t * __restrict ps)
3d9156a7
A
202+_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
203+ size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc)
204 {
205- _UTF8State *us;
206+ _UTF2State *us;
207 char buf[MB_LEN_MAX];
208 const wchar_t *s;
209 size_t nbytes;
210 size_t nb;
211
212- us = (_UTF8State *)ps;
213+ us = (_UTF2State *)ps;
214
215 if (us->want != 0) {
216 errno = EINVAL;
1f2f436a 217@@ -379,7 +359,7 @@ _UTF8_wcsnrtombs(char * __restrict dst,
3d9156a7
A
218 if (0 <= *s && *s < 0x80)
219 /* Fast path for plain ASCII characters. */
220 nb = 1;
221- else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
222+ else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) ==
223 (size_t)-1)
224 /* Invalid character - wcrtomb() sets errno. */
225 return ((size_t)-1);
1f2f436a 226@@ -396,9 +376,9 @@ _UTF8_wcsnrtombs(char * __restrict dst,
3d9156a7
A
227 /* Fast path for plain ASCII characters. */
228 nb = 1;
229 *dst = *s;
230- } else if (len > (size_t)MB_CUR_MAX) {
231+ } else if (len > (size_t)UTF2_MB_CUR_MAX) {
232 /* Enough space to translate in-place. */
1f2f436a
A
233- if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) {
234+ if ((nb = _UTF2_wcrtomb(dst, *s, ps, loc)) == (size_t)-1) {
3d9156a7
A
235 *src = s;
236 return ((size_t)-1);
237 }
1f2f436a 238@@ -406,7 +386,7 @@ _UTF8_wcsnrtombs(char * __restrict dst,
3d9156a7
A
239 /*
240 * May not be enough space; use temp. buffer.
241 */
1f2f436a
A
242- if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) {
243+ if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == (size_t)-1) {
3d9156a7
A
244 *src = s;
245 return ((size_t)-1);
246 }