]> git.saurik.com Git - apple/libc.git/blob - locale/FreeBSD/utf2.c.patch
Libc-391.2.3.tar.gz
[apple/libc.git] / locale / FreeBSD / utf2.c.patch
1 --- utf2.c.orig Fri Feb 18 15:49:55 2005
2 +++ utf2.c Fri Feb 18 15:52:07 2005
3 @@ -25,8 +25,11 @@
4 */
5
6 #include <sys/param.h>
7 +/* dumb down UTF-8 to do UTF2 */
8 __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.11 2004/07/27 06:29:48 tjr Exp $");
9
10 +#include "xlocale_private.h"
11 +
12 #include <errno.h>
13 #include <limits.h>
14 #include <runetype.h>
15 @@ -35,54 +38,55 @@
16 #include <wchar.h>
17 #include "mblocal.h"
18
19 -size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
20 - mbstate_t * __restrict);
21 -int _UTF8_mbsinit(const mbstate_t *);
22 -size_t _UTF8_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict,
23 - size_t, size_t, mbstate_t * __restrict);
24 -size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
25 -size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
26 - size_t, size_t, mbstate_t * __restrict);
27 +#define UTF2_MB_CUR_MAX 3
28 +
29 +static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
30 + mbstate_t * __restrict, locale_t);
31 +static int _UTF2_mbsinit(const mbstate_t *, locale_t);
32 +static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict, const char ** __restrict,
33 + size_t, size_t, mbstate_t * __restrict, locale_t);
34 +static size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict, locale_t);
35 +static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
36 + size_t, size_t, mbstate_t * __restrict, locale_t);
37
38 typedef struct {
39 wchar_t ch;
40 int want;
41 wchar_t lbound;
42 -} _UTF8State;
43 +} _UTF2State;
44
45 -int
46 -_UTF8_init(_RuneLocale *rl)
47 +__private_extern__ int
48 +_UTF2_init(struct __xlocale_st_runelocale *xrl)
49 {
50
51 - __mbrtowc = _UTF8_mbrtowc;
52 - __wcrtomb = _UTF8_wcrtomb;
53 - __mbsinit = _UTF8_mbsinit;
54 - __mbsnrtowcs = _UTF8_mbsnrtowcs;
55 - __wcsnrtombs = _UTF8_wcsnrtombs;
56 - _CurrentRuneLocale = rl;
57 - __mb_cur_max = 6;
58 + xrl->__mbrtowc = _UTF2_mbrtowc;
59 + xrl->__wcrtomb = _UTF2_wcrtomb;
60 + xrl->__mbsinit = _UTF2_mbsinit;
61 + xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs;
62 + xrl->__wcsnrtombs = _UTF2_wcsnrtombs;
63 + xrl->__mb_cur_max = UTF2_MB_CUR_MAX;
64
65 return (0);
66 }
67
68 -int
69 -_UTF8_mbsinit(const mbstate_t *ps)
70 +static int
71 +_UTF2_mbsinit(const mbstate_t *ps, locale_t loc)
72 {
73
74 - return (ps == NULL || ((const _UTF8State *)ps)->want == 0);
75 + return (ps == NULL || ((const _UTF2State *)ps)->want == 0);
76 }
77
78 -size_t
79 -_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
80 - mbstate_t * __restrict ps)
81 +static size_t
82 +_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
83 + mbstate_t * __restrict ps, locale_t loc)
84 {
85 - _UTF8State *us;
86 + _UTF2State *us;
87 int ch, i, mask, want;
88 wchar_t lbound, wch;
89
90 - us = (_UTF8State *)ps;
91 + us = (_UTF2State *)ps;
92
93 - if (us->want < 0 || us->want > 6) {
94 + if (us->want < 0 || us->want > 3) {
95 errno = EINVAL;
96 return ((size_t)-1);
97 }
98 @@ -130,21 +134,9 @@
99 mask = 0x0f;
100 want = 3;
101 lbound = 0x800;
102 - } else if ((ch & 0xf8) == 0xf0) {
103 - mask = 0x07;
104 - want = 4;
105 - lbound = 0x10000;
106 - } else if ((ch & 0xfc) == 0xf8) {
107 - mask = 0x03;
108 - want = 5;
109 - lbound = 0x200000;
110 - } else if ((ch & 0xfc) == 0xfc) {
111 - mask = 0x01;
112 - want = 6;
113 - lbound = 0x4000000;
114 } else {
115 /*
116 - * Malformed input; input is not UTF-8.
117 + * Malformed input; input is not UTF2.
118 */
119 errno = EILSEQ;
120 return ((size_t)-1);
121 @@ -194,17 +186,17 @@
122 return (wch == L'\0' ? 0 : want);
123 }
124
125 -size_t
126 -_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
127 - size_t nms, size_t len, mbstate_t * __restrict ps)
128 +static size_t
129 +_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
130 + size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc)
131 {
132 - _UTF8State *us;
133 + _UTF2State *us;
134 const char *s;
135 size_t nchr;
136 wchar_t wc;
137 size_t nb;
138
139 - us = (_UTF8State *)ps;
140 + us = (_UTF2State *)ps;
141
142 s = *src;
143 nchr = 0;
144 @@ -226,7 +218,7 @@
145 * excluding NUL.
146 */
147 nb = 1;
148 - else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) ==
149 + else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) ==
150 (size_t)-1)
151 /* Invalid sequence - mbrtowc() sets errno. */
152 return ((size_t)-1);
153 @@ -256,7 +248,7 @@
154 */
155 *dst = (wchar_t)*s;
156 nb = 1;
157 - } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) ==
158 + } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) ==
159 (size_t)-1) {
160 *src = s;
161 return ((size_t)-1);
162 @@ -276,14 +268,14 @@
163 return (nchr);
164 }
165
166 -size_t
167 -_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
168 +static size_t
169 +_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc)
170 {
171 - _UTF8State *us;
172 + _UTF2State *us;
173 unsigned char lead;
174 int i, len;
175
176 - us = (_UTF8State *)ps;
177 + us = (_UTF2State *)ps;
178
179 if (us->want != 0) {
180 errno = EINVAL;
181 @@ -315,15 +307,6 @@
182 } else if ((wc & ~0xffff) == 0) {
183 lead = 0xe0;
184 len = 3;
185 - } else if ((wc & ~0x1fffff) == 0) {
186 - lead = 0xf0;
187 - len = 4;
188 - } else if ((wc & ~0x3ffffff) == 0) {
189 - lead = 0xf8;
190 - len = 5;
191 - } else if ((wc & ~0x7fffffff) == 0) {
192 - lead = 0xfc;
193 - len = 6;
194 } else {
195 errno = EILSEQ;
196 return ((size_t)-1);
197 @@ -344,17 +327,17 @@
198 return (len);
199 }
200
201 -size_t
202 -_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
203 - size_t nwc, size_t len, mbstate_t * __restrict ps)
204 +static size_t
205 +_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
206 + size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc)
207 {
208 - _UTF8State *us;
209 + _UTF2State *us;
210 char buf[MB_LEN_MAX];
211 const wchar_t *s;
212 size_t nbytes;
213 size_t nb;
214
215 - us = (_UTF8State *)ps;
216 + us = (_UTF2State *)ps;
217
218 if (us->want != 0) {
219 errno = EINVAL;
220 @@ -369,7 +352,7 @@
221 if (0 <= *s && *s < 0x80)
222 /* Fast path for plain ASCII characters. */
223 nb = 1;
224 - else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) ==
225 + else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) ==
226 (size_t)-1)
227 /* Invalid character - wcrtomb() sets errno. */
228 return ((size_t)-1);
229 @@ -386,9 +369,9 @@
230 /* Fast path for plain ASCII characters. */
231 nb = 1;
232 *dst = *s;
233 - } else if (len > (size_t)MB_CUR_MAX) {
234 + } else if (len > (size_t)UTF2_MB_CUR_MAX) {
235 /* Enough space to translate in-place. */
236 - if ((nb = (int)_UTF8_wcrtomb(dst, *s, ps)) < 0) {
237 + if ((nb = (int)_UTF2_wcrtomb(dst, *s, ps, loc)) < 0) {
238 *src = s;
239 return ((size_t)-1);
240 }
241 @@ -396,7 +379,7 @@
242 /*
243 * May not be enough space; use temp. buffer.
244 */
245 - if ((nb = (int)_UTF8_wcrtomb(buf, *s, ps)) < 0) {
246 + if ((nb = (int)_UTF2_wcrtomb(buf, *s, ps, loc)) < 0) {
247 *src = s;
248 return ((size_t)-1);
249 }