]>
Commit | Line | Data |
---|---|---|
1f2f436a A |
1 | --- utf2.c.bsdnew 2009-11-09 17:38:09.000000000 -0800 |
2 | +++ utf2.c 2009-11-09 17:41:17.000000000 -0800 | |
3 | @@ -27,6 +27,8 @@ | |
3d9156a7 | 4 | #include <sys/param.h> |
1f2f436a | 5 | __FBSDID("$FreeBSD: src/lib/libc/locale/utf8.c,v 1.16 2007/10/15 09:51:30 ache Exp $"); |
3d9156a7 A |
6 | |
7 | +#include "xlocale_private.h" | |
8 | + | |
9 | #include <errno.h> | |
10 | #include <limits.h> | |
11 | #include <runetype.h> | |
1f2f436a | 12 | @@ -35,62 +37,61 @@ __FBSDID("$FreeBSD: src/lib/libc/locale/ |
3d9156a7 A |
13 | #include <wchar.h> |
14 | #include "mblocal.h" | |
15 | ||
1f2f436a | 16 | -extern int __mb_sb_limit; |
3d9156a7 | 17 | +#define UTF2_MB_CUR_MAX 3 |
1f2f436a A |
18 | |
19 | -static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, | |
20 | - size_t, mbstate_t * __restrict); | |
21 | -static int _UTF8_mbsinit(const mbstate_t *); | |
22 | -static size_t _UTF8_mbsnrtowcs(wchar_t * __restrict, | |
23 | +static size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, | |
24 | + size_t, mbstate_t * __restrict, locale_t); | |
3d9156a7 | 25 | +static int _UTF2_mbsinit(const mbstate_t *, locale_t); |
1f2f436a A |
26 | +static size_t _UTF2_mbsnrtowcs(wchar_t * __restrict, |
27 | const char ** __restrict, size_t, size_t, | |
28 | - mbstate_t * __restrict); | |
29 | -static size_t _UTF8_wcrtomb(char * __restrict, wchar_t, | |
30 | - mbstate_t * __restrict); | |
31 | -static size_t _UTF8_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, | |
32 | - size_t, size_t, mbstate_t * __restrict); | |
33 | + mbstate_t * __restrict, locale_t); | |
34 | +static size_t _UTF2_wcrtomb(char * __restrict, wchar_t, | |
35 | + mbstate_t * __restrict, locale_t); | |
3d9156a7 | 36 | +static size_t _UTF2_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, |
1f2f436a | 37 | + size_t, size_t, mbstate_t * __restrict, locale_t); |
3d9156a7 A |
38 | |
39 | typedef struct { | |
40 | wchar_t ch; | |
41 | int want; | |
42 | wchar_t lbound; | |
43 | -} _UTF8State; | |
44 | +} _UTF2State; | |
45 | ||
46 | -int | |
47 | -_UTF8_init(_RuneLocale *rl) | |
48 | +__private_extern__ int | |
49 | +_UTF2_init(struct __xlocale_st_runelocale *xrl) | |
50 | { | |
51 | ||
52 | - __mbrtowc = _UTF8_mbrtowc; | |
53 | - __wcrtomb = _UTF8_wcrtomb; | |
54 | - __mbsinit = _UTF8_mbsinit; | |
55 | - __mbsnrtowcs = _UTF8_mbsnrtowcs; | |
56 | - __wcsnrtombs = _UTF8_wcsnrtombs; | |
57 | - _CurrentRuneLocale = rl; | |
58 | - __mb_cur_max = 6; | |
59 | + xrl->__mbrtowc = _UTF2_mbrtowc; | |
60 | + xrl->__wcrtomb = _UTF2_wcrtomb; | |
61 | + xrl->__mbsinit = _UTF2_mbsinit; | |
62 | + xrl->__mbsnrtowcs = _UTF2_mbsnrtowcs; | |
63 | + xrl->__wcsnrtombs = _UTF2_wcsnrtombs; | |
64 | + xrl->__mb_cur_max = UTF2_MB_CUR_MAX; | |
1f2f436a A |
65 | /* |
66 | * UCS-4 encoding used as the internal representation, so | |
67 | * slots 0x0080-0x00FF are occuped and must be excluded | |
68 | * from the single byte ctype by setting the limit. | |
69 | */ | |
70 | - __mb_sb_limit = 128; | |
71 | + xrl->__mb_sb_limit = 128; | |
3d9156a7 A |
72 | |
73 | return (0); | |
74 | } | |
75 | ||
1f2f436a | 76 | static int |
3d9156a7 | 77 | -_UTF8_mbsinit(const mbstate_t *ps) |
3d9156a7 A |
78 | +_UTF2_mbsinit(const mbstate_t *ps, locale_t loc) |
79 | { | |
80 | ||
81 | - return (ps == NULL || ((const _UTF8State *)ps)->want == 0); | |
82 | + return (ps == NULL || ((const _UTF2State *)ps)->want == 0); | |
83 | } | |
84 | ||
1f2f436a | 85 | static size_t |
3d9156a7 A |
86 | -_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, |
87 | - mbstate_t * __restrict ps) | |
3d9156a7 A |
88 | +_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, |
89 | + mbstate_t * __restrict ps, locale_t loc) | |
90 | { | |
91 | - _UTF8State *us; | |
92 | + _UTF2State *us; | |
93 | int ch, i, mask, want; | |
94 | wchar_t lbound, wch; | |
95 | ||
96 | - us = (_UTF8State *)ps; | |
97 | + us = (_UTF2State *)ps; | |
98 | ||
1f2f436a | 99 | if (us->want < 0 || us->want > 6) { |
3d9156a7 | 100 | errno = EINVAL; |
1f2f436a | 101 | @@ -140,21 +141,9 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, |
3d9156a7 A |
102 | mask = 0x0f; |
103 | want = 3; | |
104 | lbound = 0x800; | |
105 | - } else if ((ch & 0xf8) == 0xf0) { | |
106 | - mask = 0x07; | |
107 | - want = 4; | |
108 | - lbound = 0x10000; | |
109 | - } else if ((ch & 0xfc) == 0xf8) { | |
110 | - mask = 0x03; | |
111 | - want = 5; | |
112 | - lbound = 0x200000; | |
1f2f436a | 113 | - } else if ((ch & 0xfe) == 0xfc) { |
3d9156a7 A |
114 | - mask = 0x01; |
115 | - want = 6; | |
116 | - lbound = 0x4000000; | |
117 | } else { | |
118 | /* | |
119 | - * Malformed input; input is not UTF-8. | |
120 | + * Malformed input; input is not UTF2. | |
121 | */ | |
122 | errno = EILSEQ; | |
123 | return ((size_t)-1); | |
1f2f436a | 124 | @@ -205,16 +194,16 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, |
3d9156a7 A |
125 | } |
126 | ||
1f2f436a | 127 | static size_t |
3d9156a7 A |
128 | -_UTF8_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, |
129 | - size_t nms, size_t len, mbstate_t * __restrict ps) | |
3d9156a7 A |
130 | +_UTF2_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src, |
131 | + size_t nms, size_t len, mbstate_t * __restrict ps, locale_t loc) | |
132 | { | |
133 | - _UTF8State *us; | |
134 | + _UTF2State *us; | |
135 | const char *s; | |
136 | size_t nchr; | |
137 | wchar_t wc; | |
138 | size_t nb; | |
139 | ||
140 | - us = (_UTF8State *)ps; | |
141 | + us = (_UTF2State *)ps; | |
142 | ||
143 | s = *src; | |
144 | nchr = 0; | |
1f2f436a | 145 | @@ -236,7 +225,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds |
3d9156a7 A |
146 | * excluding NUL. |
147 | */ | |
148 | nb = 1; | |
149 | - else if ((nb = _UTF8_mbrtowc(&wc, s, nms, ps)) == | |
150 | + else if ((nb = _UTF2_mbrtowc(&wc, s, nms, ps, loc)) == | |
151 | (size_t)-1) | |
152 | /* Invalid sequence - mbrtowc() sets errno. */ | |
153 | return ((size_t)-1); | |
1f2f436a | 154 | @@ -266,7 +255,7 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds |
3d9156a7 A |
155 | */ |
156 | *dst = (wchar_t)*s; | |
157 | nb = 1; | |
158 | - } else if ((nb = _UTF8_mbrtowc(dst, s, nms, ps)) == | |
159 | + } else if ((nb = _UTF2_mbrtowc(dst, s, nms, ps, loc)) == | |
160 | (size_t)-1) { | |
161 | *src = s; | |
162 | return ((size_t)-1); | |
1f2f436a | 163 | @@ -287,13 +276,13 @@ _UTF8_mbsnrtowcs(wchar_t * __restrict ds |
3d9156a7 A |
164 | } |
165 | ||
1f2f436a | 166 | static size_t |
3d9156a7 | 167 | -_UTF8_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) |
3d9156a7 A |
168 | +_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps, locale_t loc) |
169 | { | |
170 | - _UTF8State *us; | |
171 | + _UTF2State *us; | |
172 | unsigned char lead; | |
173 | int i, len; | |
174 | ||
175 | - us = (_UTF8State *)ps; | |
176 | + us = (_UTF2State *)ps; | |
177 | ||
178 | if (us->want != 0) { | |
179 | errno = EINVAL; | |
1f2f436a | 180 | @@ -325,15 +314,6 @@ _UTF8_wcrtomb(char * __restrict s, wchar |
3d9156a7 A |
181 | } else if ((wc & ~0xffff) == 0) { |
182 | lead = 0xe0; | |
183 | len = 3; | |
184 | - } else if ((wc & ~0x1fffff) == 0) { | |
185 | - lead = 0xf0; | |
186 | - len = 4; | |
187 | - } else if ((wc & ~0x3ffffff) == 0) { | |
188 | - lead = 0xf8; | |
189 | - len = 5; | |
190 | - } else if ((wc & ~0x7fffffff) == 0) { | |
191 | - lead = 0xfc; | |
192 | - len = 6; | |
193 | } else { | |
194 | errno = EILSEQ; | |
195 | return ((size_t)-1); | |
1f2f436a | 196 | @@ -355,16 +335,16 @@ _UTF8_wcrtomb(char * __restrict s, wchar |
3d9156a7 A |
197 | } |
198 | ||
1f2f436a | 199 | static size_t |
3d9156a7 A |
200 | -_UTF8_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, |
201 | - size_t nwc, size_t len, mbstate_t * __restrict ps) | |
3d9156a7 A |
202 | +_UTF2_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, |
203 | + size_t nwc, size_t len, mbstate_t * __restrict ps, locale_t loc) | |
204 | { | |
205 | - _UTF8State *us; | |
206 | + _UTF2State *us; | |
207 | char buf[MB_LEN_MAX]; | |
208 | const wchar_t *s; | |
209 | size_t nbytes; | |
210 | size_t nb; | |
211 | ||
212 | - us = (_UTF8State *)ps; | |
213 | + us = (_UTF2State *)ps; | |
214 | ||
215 | if (us->want != 0) { | |
216 | errno = EINVAL; | |
1f2f436a | 217 | @@ -379,7 +359,7 @@ _UTF8_wcsnrtombs(char * __restrict dst, |
3d9156a7 A |
218 | if (0 <= *s && *s < 0x80) |
219 | /* Fast path for plain ASCII characters. */ | |
220 | nb = 1; | |
221 | - else if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == | |
222 | + else if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == | |
223 | (size_t)-1) | |
224 | /* Invalid character - wcrtomb() sets errno. */ | |
225 | return ((size_t)-1); | |
1f2f436a | 226 | @@ -396,9 +376,9 @@ _UTF8_wcsnrtombs(char * __restrict dst, |
3d9156a7 A |
227 | /* Fast path for plain ASCII characters. */ |
228 | nb = 1; | |
229 | *dst = *s; | |
230 | - } else if (len > (size_t)MB_CUR_MAX) { | |
231 | + } else if (len > (size_t)UTF2_MB_CUR_MAX) { | |
232 | /* Enough space to translate in-place. */ | |
1f2f436a A |
233 | - if ((nb = _UTF8_wcrtomb(dst, *s, ps)) == (size_t)-1) { |
234 | + if ((nb = _UTF2_wcrtomb(dst, *s, ps, loc)) == (size_t)-1) { | |
3d9156a7 A |
235 | *src = s; |
236 | return ((size_t)-1); | |
237 | } | |
1f2f436a | 238 | @@ -406,7 +386,7 @@ _UTF8_wcsnrtombs(char * __restrict dst, |
3d9156a7 A |
239 | /* |
240 | * May not be enough space; use temp. buffer. | |
241 | */ | |
1f2f436a A |
242 | - if ((nb = _UTF8_wcrtomb(buf, *s, ps)) == (size_t)-1) { |
243 | + if ((nb = _UTF2_wcrtomb(buf, *s, ps, loc)) == (size_t)-1) { | |
3d9156a7 A |
244 | *src = s; |
245 | return ((size_t)-1); | |
246 | } |