]>
Commit | Line | Data |
---|---|---|
5b2abdfb A |
1 | .\" Copyright (c) 1993 |
2 | .\" The Regents of the University of California. All rights reserved. | |
3 | .\" | |
4 | .\" This code is derived from software contributed to Berkeley by | |
5 | .\" Paul Borman at Krystal Technologies. | |
6 | .\" | |
7 | .\" Redistribution and use in source and binary forms, with or without | |
8 | .\" modification, are permitted provided that the following conditions | |
9 | .\" are met: | |
10 | .\" 1. Redistributions of source code must retain the above copyright | |
11 | .\" notice, this list of conditions and the following disclaimer. | |
12 | .\" 2. Redistributions in binary form must reproduce the above copyright | |
13 | .\" notice, this list of conditions and the following disclaimer in the | |
14 | .\" documentation and/or other materials provided with the distribution. | |
15 | .\" 3. All advertising materials mentioning features or use of this software | |
16 | .\" must display the following acknowledgement: | |
17 | .\" This product includes software developed by the University of | |
18 | .\" California, Berkeley and its contributors. | |
19 | .\" 4. Neither the name of the University nor the names of its contributors | |
20 | .\" may be used to endorse or promote products derived from this software | |
21 | .\" without specific prior written permission. | |
22 | .\" | |
23 | .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
24 | .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
25 | .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
26 | .\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
27 | .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
28 | .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
29 | .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
30 | .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
31 | .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
32 | .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
33 | .\" SUCH DAMAGE. | |
34 | .\" | |
35 | .\" @(#)euc.4 8.1 (Berkeley) 6/4/93 | |
9385eb3d | 36 | .\" $FreeBSD: src/lib/libc/locale/euc.4,v 1.9 2001/07/15 07:53:05 dd Exp $ |
5b2abdfb A |
37 | .\" |
38 | .Dd June 4, 1993 | |
39 | .Dt EUC 4 | |
40 | .Os | |
41 | .Sh NAME | |
42 | .Nm euc | |
43 | .Nd EUC encoding of runes | |
44 | .Sh SYNOPSIS | |
45 | .Nm ENCODING | |
46 | .Qq EUC | |
47 | .Pp | |
48 | .Nm VARIABLE | |
49 | .Ar len1 | |
50 | .Ar mask1 | |
51 | .Ar len2 | |
52 | .Ar mask2 | |
53 | .Ar len3 | |
54 | .Ar mask3 | |
55 | .Ar len4 | |
56 | .Ar mask4 | |
57 | .Ar mask | |
58 | .Sh DESCRIPTION | |
59 | The | |
60 | .Nm EUC | |
61 | encoding is provided for compatibility with | |
62 | .Ux | |
63 | based systems. | |
64 | See | |
65 | .Xr mklocale 1 | |
66 | for a complete description of the | |
67 | .Ev LC_CTYPE | |
68 | source file format. | |
69 | .Pp | |
70 | .Nm EUC | |
71 | implements a system of 4 multibyte codesets. | |
72 | A multibyte character in the first codeset consists of | |
73 | .Ar len1 | |
74 | bytes starting with a byte in the range of 0x00 to 0x7f. | |
75 | To allow use of ASCII, | |
76 | .Ar len1 | |
77 | is always 1. | |
78 | A multibyte character in the second codeset consists of | |
79 | .Ar len2 | |
80 | bytes starting with a byte in the range of 0x80-0xff excluding 0x8e and 0x8f. | |
81 | A multibyte character in the third codeset consists of | |
82 | .Ar len3 | |
83 | bytes starting with the byte 0x8e. | |
84 | A multibyte character in the fourth codeset consists of | |
85 | .Ar len4 | |
86 | bytes starting with the byte 0x8f. | |
87 | .Pp | |
88 | The | |
89 | .Ev rune_t | |
90 | encoding of | |
91 | .Nm EUC | |
92 | multibyte characters is dependent on the | |
93 | .Ar len | |
94 | and | |
95 | .Ar mask | |
96 | arguments. | |
97 | First, the bytes are moved into a | |
98 | .Ev rune_t | |
99 | as follows: | |
100 | .Bd -literal | |
101 | byte0 << ((\fIlen\fPN-1) * 8) | byte1 << ((\fIlen\fPN-2) * 8) | ... | byte\fIlen\fPN-1 | |
102 | .Ed | |
103 | .Pp | |
104 | The result is then ANDed with | |
105 | .Ar ~mask | |
106 | and ORed with | |
107 | .Ar maskN . | |
108 | Codesets 2 and 3 are special in that the leading byte (0x8e or 0x8f) is | |
109 | first removed and the | |
110 | .Ar lenN | |
111 | argument is reduced by 1. | |
112 | .Pp | |
113 | For example, the Japanese locale has the following | |
114 | .Ev VARIABLE | |
115 | line: | |
116 | .Bd -literal | |
117 | VARIABLE 1 0x0000 2 0x8080 2 0x0080 3 0x8000 0x8080 | |
118 | .Ed | |
119 | .Pp | |
120 | Codeset 1 consists of the values 0x0000 - 0x007f. | |
121 | .Pp | |
122 | Codeset 2 consists of the values who have the bits 0x8080 set. | |
123 | .Pp | |
124 | Codeset 3 consists of the values 0x0080 - 0x00ff. | |
125 | .Pp | |
126 | Codeset 4 consists of the values 0x8000 - 0xff7f excluding the values | |
127 | which have the 0x0080 bit set. | |
128 | .Pp | |
129 | Notice that the global | |
130 | .Ar mask | |
131 | is set to 0x8080, this implies that from those 2 bits the codeset can | |
132 | be determined. | |
133 | .Sh "EXAMPLE - Japanese Locale" | |
134 | This is a complete example of an | |
135 | .Ev LC_CTYPE | |
136 | source file for the Japanese locale | |
137 | .Bd -literal | |
138 | /* | |
139 | * Japanese LOCALE_CTYPE definitions using EUC of JIS character sets | |
140 | */ | |
141 | ||
142 | ENCODING "EUC" | |
143 | ||
144 | /* JIS JIS JIS */ | |
145 | /* X201 X208 X201 */ | |
146 | /* 00-7f 84-fe */ | |
147 | ||
148 | VARIABLE 1 0x0000 2 0x8080 2 0x0080 3 0x8000 0x8080 | |
149 | ||
150 | /* | |
151 | * Code Set 1 | |
152 | */ | |
153 | ALPHA 'A' - 'Z' 'a' - 'z' | |
154 | CONTROL 0x00 - 0x1f 0x7f | |
155 | DIGIT '0' - '9' | |
156 | GRAPH 0x21 - 0x7e | |
157 | LOWER 'a' - 'z' | |
158 | PUNCT 0x21 - 0x2f 0x3a - 0x40 0x5b - 0x60 0x7b - 0x7e | |
159 | SPACE 0x09 - 0x0d 0x20 | |
160 | UPPER 'A' - 'Z' | |
161 | XDIGIT 'a' - 'f' 'A' - 'F' | |
162 | BLANK ' ' '\t' | |
163 | PRINT 0x20 - 0x7e | |
164 | ||
165 | MAPLOWER < 'A' - 'Z' : 'a' > < 'a' - 'z' : 'a' > | |
166 | MAPUPPER < 'A' - 'Z' : 'A' > < 'a' - 'z' : 'A' > | |
167 | TODIGIT < '0' - '9' : 0 > | |
168 | TODIGIT < 'A' - 'F' : 10 > < 'a' - 'f' : 10 > | |
169 | ||
170 | /* | |
171 | * Code Set 2 | |
172 | */ | |
173 | ||
174 | SPACE 0xa1a1 | |
175 | PHONOGRAM 0xa1bc | |
176 | SPECIAL 0xa1a2 - 0xa1fe | |
177 | PUNCT 0xa1a2 - 0xa1f8 /* A few too many in here... */ | |
178 | ||
179 | SPECIAL 0xa2a1 - 0xa2ae 0xa2ba - 0xa2c1 0xa2ca - 0xa2d0 0xa2dc - 0xa2ea | |
180 | SPECIAL 0xa2f2 - 0xa2f9 0xa2fe | |
181 | ||
182 | DIGIT 0xa3b0 - 0xa3b9 | |
183 | UPPER 0xa3c1 - 0xa3da /* Romaji */ | |
184 | LOWER 0xa3e1 - 0xa3fa /* Romaji */ | |
185 | MAPLOWER < 0xa3c1 - 0xa3da : 0xa3e1 > /* English */ | |
186 | MAPLOWER < 0xa3e1 - 0xa3fa : 0xa3e1 > /* English */ | |
187 | MAPUPPER < 0xa3c1 - 0xa3da : 0xa3c1 > | |
188 | MAPUPPER < 0xa3e1 - 0xa3fa : 0xa3c1 > | |
189 | ||
190 | XDIGIT 0xa3c1 - 0xa3c6 0xa3e1 - 0xa3e6 | |
191 | ||
192 | TODIGIT < 0xa3b0 - 0xa3b9 : 0 > | |
193 | TODIGIT < 0xa3c1 - 0xa3c6 : 10 > < 0xa3e1 - 0xa3e6 : 10 > | |
194 | ||
195 | PHONOGRAM 0xa4a1 - 0xa4f3 | |
196 | PHONOGRAM 0xa5a1 - 0xa5f6 | |
197 | ||
198 | UPPER 0xa6a1 - 0xa6b8 /* Greek */ | |
199 | LOWER 0xa6c1 - 0xa6d8 /* Greek */ | |
200 | MAPLOWER < 0xa6a1 - 0xa6b8 : 0xa6c1 > < 0xa6c1 - 0xa6d8 : 0xa6c1 > | |
201 | MAPUPPER < 0xa6a1 - 0xa6b8 : 0xa6a1 > < 0xa6c1 - 0xa6d8 : 0xa6a1 > | |
202 | ||
203 | UPPER 0xa7a1 - 0xa7c1 /* Cyrillic */ | |
204 | LOWER 0xa7d1 - 0xa7f1 /* Cyrillic */ | |
205 | MAPLOWER < 0xa7a1 - 0xa7c1 : 0xa7d1 > < 0xa7d1 - 0xa7f1 : 0xa7d1 > | |
206 | MAPUPPER < 0xa7a1 - 0xa7c1 : 0xa7a1 > < 0xa7d1 - 0xa7f1 : 0xa7a1 > | |
207 | ||
208 | SPECIAL 0xa8a1 - 0xa8c0 | |
209 | ||
210 | IDEOGRAM 0xb0a1 - 0xb0fe 0xb1a1 - 0xb1fe 0xb2a1 - 0xb2fe | |
211 | IDEOGRAM 0xb3a1 - 0xb3fe 0xb4a1 - 0xb4fe 0xb5a1 - 0xb5fe | |
212 | IDEOGRAM 0xb6a1 - 0xb6fe 0xb7a1 - 0xb7fe 0xb8a1 - 0xb8fe | |
213 | IDEOGRAM 0xb9a1 - 0xb9fe 0xbaa1 - 0xbafe 0xbba1 - 0xbbfe | |
214 | IDEOGRAM 0xbca1 - 0xbcfe 0xbda1 - 0xbdfe 0xbea1 - 0xbefe | |
215 | IDEOGRAM 0xbfa1 - 0xbffe 0xc0a1 - 0xc0fe 0xc1a1 - 0xc1fe | |
216 | IDEOGRAM 0xc2a1 - 0xc2fe 0xc3a1 - 0xc3fe 0xc4a1 - 0xc4fe | |
217 | IDEOGRAM 0xc5a1 - 0xc5fe 0xc6a1 - 0xc6fe 0xc7a1 - 0xc7fe | |
218 | IDEOGRAM 0xc8a1 - 0xc8fe 0xc9a1 - 0xc9fe 0xcaa1 - 0xcafe | |
219 | IDEOGRAM 0xcba1 - 0xcbfe 0xcca1 - 0xccfe 0xcda1 - 0xcdfe | |
220 | IDEOGRAM 0xcea1 - 0xcefe 0xcfa1 - 0xcfd3 0xd0a1 - 0xd0fe | |
221 | IDEOGRAM 0xd1a1 - 0xd1fe 0xd2a1 - 0xd2fe 0xd3a1 - 0xd3fe | |
222 | IDEOGRAM 0xd4a1 - 0xd4fe 0xd5a1 - 0xd5fe 0xd6a1 - 0xd6fe | |
223 | IDEOGRAM 0xd7a1 - 0xd7fe 0xd8a1 - 0xd8fe 0xd9a1 - 0xd9fe | |
224 | IDEOGRAM 0xdaa1 - 0xdafe 0xdba1 - 0xdbfe 0xdca1 - 0xdcfe | |
225 | IDEOGRAM 0xdda1 - 0xddfe 0xdea1 - 0xdefe 0xdfa1 - 0xdffe | |
226 | IDEOGRAM 0xe0a1 - 0xe0fe 0xe1a1 - 0xe1fe 0xe2a1 - 0xe2fe | |
227 | IDEOGRAM 0xe3a1 - 0xe3fe 0xe4a1 - 0xe4fe 0xe5a1 - 0xe5fe | |
228 | IDEOGRAM 0xe6a1 - 0xe6fe 0xe7a1 - 0xe7fe 0xe8a1 - 0xe8fe | |
229 | IDEOGRAM 0xe9a1 - 0xe9fe 0xeaa1 - 0xeafe 0xeba1 - 0xebfe | |
230 | IDEOGRAM 0xeca1 - 0xecfe 0xeda1 - 0xedfe 0xeea1 - 0xeefe | |
231 | IDEOGRAM 0xefa1 - 0xeffe 0xf0a1 - 0xf0fe 0xf1a1 - 0xf1fe | |
232 | IDEOGRAM 0xf2a1 - 0xf2fe 0xf3a1 - 0xf3fe 0xf4a1 - 0xf4a4 | |
233 | /* | |
234 | * This is for Code Set 3, half-width kana | |
235 | */ | |
236 | SPECIAL 0xa1 - 0xdf | |
237 | PHONOGRAM 0xa1 - 0xdf | |
238 | CONTROL 0x84 - 0x97 0x9b - 0x9f 0xe0 - 0xfe | |
239 | .Ed | |
240 | .Sh "SEE ALSO" | |
241 | .Xr mklocale 1 , | |
242 | .Xr setlocale 3 |