]> git.saurik.com Git - hfs.git/blob - fsck_hfs/dfalib/DecompMakeData.c
hfs-226.1.1.tar.gz
[hfs.git] / fsck_hfs / dfalib / DecompMakeData.c
1 /*
2 File: DecompMakeData.c
3
4 Contains: Tool to generate tables for use by FixDecomps (CatalogCheck.c). It takes raw data on combining classes and decomposition changes, massages it into the trie form needed by
5 the function, and emits it on stdout (which should be directed to a file DecompData.h).
6
7 Copyright: © 2002 by Apple Computer, Inc., all rights reserved.
8
9 CVS change log:
10
11 $Log: DecompMakeData.c,v $
12 Revision 1.2 2002/12/20 01:20:36 lindak
13 Merged PR-2937515-2 into ZZ100
14 Old HFS+ decompositions need to be repaired
15
16 Revision 1.1.4.1 2002/12/16 18:55:22 jcotting
17 integrated code from text group (Peter Edberg) that will correct some
18 illegal names created with obsolete Unicode 2.1.2 decomposition rules
19 Bug #: 2937515
20 Submitted by: jerry cottingham
21 Reviewed by: don brady
22
23 Revision 1.1.2.1 2002/10/25 17:15:22 jcotting
24 added code from Peter Edberg that will detect and offer replacement
25 names for file system object names with pre-Jaguar decomp errors
26 Bug #: 2937515
27 Submitted by: jerry cottingham
28 Reviewed by: don brady
29
30 Revision 1.1 2002/10/16 06:33:26 pedberg
31 Initial working version of function and related tools and tables
32
33
34 Notes:
35
36 1. To build:
37 cc DecompMakeData.c -o DecompMakeData -g
38
39 2. To use:
40 ./DecompMakeData > DecompData.h
41
42 */
43
44 #include <stddef.h>
45 #include <stdio.h>
46
47 // Internal includes
48 #include "DecompDataEnums.h" // enums for data tables
49
50 struct UniCharClassAndRepl {
51 u_int16_t uChar;
52 u_int16_t combClass;
53 u_int16_t action;
54 u_int16_t matchAndReplacement[3];
55 };
56 typedef struct UniCharClassAndRepl UniCharClassAndRepl;
57
58 // The following is the raw data on
59 // 1. Current combining classes, derived from the Unicode 3.2.0 data file
60 // 2. Changes in decomposition sequences, derived by comparing the canonical decompositions derived from
61 // the Unicode 2.1.2 data file with the decompositions derived from the Unicode 3.2.0 data file (in both
62 // cases excluding decompositions in the ranges 2000-2FFF, F900-FAFF, etc.).
63 // These are folded into a single table so we can do one lookup of the high-order 12 bits of the shifted
64 // UniChar to determine if there is anything of interest.
65 //
66 // Note that these ignore non-BMP characters; the new decompositions and combining classes for those are
67 // not really relevant for the purpose of fixing the HFS+ filenames.
68
69 static const UniCharClassAndRepl uCharClassAndRepl[] = {
70 // cur char comb replacement next chars that replacement string
71 // to match class action must also match for cur or all
72 // -------- ----- ---------------------------- --------------- ---------------------
73 { 0x00A8, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x00A8, 0x0301 } },
74 { 0x01F8, 0, kReplaceCurWithTwo, { 0x004E, 0x0300 } },
75 { 0x01F9, 0, kReplaceCurWithTwo, { 0x006E, 0x0300 } },
76 { 0x0218, 0, kReplaceCurWithTwo, { 0x0053, 0x0326 } },
77 { 0x0219, 0, kReplaceCurWithTwo, { 0x0073, 0x0326 } },
78 { 0x021A, 0, kReplaceCurWithTwo, { 0x0054, 0x0326 } },
79 { 0x021B, 0, kReplaceCurWithTwo, { 0x0074, 0x0326 } },
80 { 0x021E, 0, kReplaceCurWithTwo, { 0x0048, 0x030C } },
81 { 0x021F, 0, kReplaceCurWithTwo, { 0x0068, 0x030C } },
82 { 0x0226, 0, kReplaceCurWithTwo, { 0x0041, 0x0307 } },
83 { 0x0227, 0, kReplaceCurWithTwo, { 0x0061, 0x0307 } },
84 { 0x0228, 0, kReplaceCurWithTwo, { 0x0045, 0x0327 } },
85 { 0x0229, 0, kReplaceCurWithTwo, { 0x0065, 0x0327 } },
86 { 0x022A, 0, kReplaceCurWithThree, { 0x004F, 0x0308, 0x0304 } },
87 { 0x022B, 0, kReplaceCurWithThree, { 0x006F, 0x0308, 0x0304 } },
88 { 0x022C, 0, kReplaceCurWithThree, { 0x004F, 0x0303, 0x0304 } },
89 { 0x022D, 0, kReplaceCurWithThree, { 0x006F, 0x0303, 0x0304 } },
90 { 0x022E, 0, kReplaceCurWithTwo, { 0x004F, 0x0307 } },
91 { 0x022F, 0, kReplaceCurWithTwo, { 0x006F, 0x0307 } },
92 { 0x0230, 0, kReplaceCurWithThree, { 0x004F, 0x0307, 0x0304 } },
93 { 0x0231, 0, kReplaceCurWithThree, { 0x006F, 0x0307, 0x0304 } },
94 { 0x0232, 0, kReplaceCurWithTwo, { 0x0059, 0x0304 } },
95 { 0x0233, 0, kReplaceCurWithTwo, { 0x0079, 0x0304 } },
96 { 0x0300, 230, 0, { 0 } },
97 { 0x0301, 230, 0, { 0 } },
98 { 0x0302, 230, 0, { 0 } },
99 { 0x0303, 230, 0, { 0 } },
100 { 0x0304, 230, 0, { 0 } },
101 { 0x0305, 230, 0, { 0 } },
102 { 0x0306, 230, kIfNextOneMatchesReplaceAllWithOne, { 0x0307, 0x0310 } },
103 { 0x0307, 230, 0, { 0 } },
104 { 0x0308, 230, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0308, 0x0301 } },
105 { 0x0309, 230, 0, { 0 } },
106 { 0x030A, 230, 0, { 0 } },
107 { 0x030B, 230, 0, { 0 } },
108 { 0x030C, 230, 0, { 0 } },
109 { 0x030D, 230, 0, { 0 } },
110 { 0x030E, 230, 0, { 0 } },
111 { 0x030F, 230, 0, { 0 } },
112 { 0x0310, 230, 0, { 0 } },
113 { 0x0311, 230, 0, { 0 } },
114 { 0x0312, 230, 0, { 0 } },
115 { 0x0313, 230, 0, { 0 } },
116 { 0x0314, 230, 0, { 0 } },
117 { 0x0315, 232, 0, { 0 } },
118 { 0x0316, 220, 0, { 0 } },
119 { 0x0317, 220, 0, { 0 } },
120 { 0x0318, 220, 0, { 0 } },
121 { 0x0319, 220, 0, { 0 } },
122 { 0x031A, 232, 0, { 0 } },
123 { 0x031B, 216, 0, { 0 } },
124 { 0x031C, 220, 0, { 0 } },
125 { 0x031D, 220, 0, { 0 } },
126 { 0x031E, 220, 0, { 0 } },
127 { 0x031F, 220, 0, { 0 } },
128 { 0x0320, 220, 0, { 0 } },
129 { 0x0321, 202, 0, { 0 } },
130 { 0x0322, 202, 0, { 0 } },
131 { 0x0323, 220, 0, { 0 } },
132 { 0x0324, 220, 0, { 0 } },
133 { 0x0325, 220, 0, { 0 } },
134 { 0x0326, 220, 0, { 0 } },
135 { 0x0327, 202, 0, { 0 } },
136 { 0x0328, 202, 0, { 0 } },
137 { 0x0329, 220, 0, { 0 } },
138 { 0x032A, 220, 0, { 0 } },
139 { 0x032B, 220, 0, { 0 } },
140 { 0x032C, 220, 0, { 0 } },
141 { 0x032D, 220, 0, { 0 } },
142 { 0x032E, 220, 0, { 0 } },
143 { 0x032F, 220, 0, { 0 } },
144 { 0x0330, 220, 0, { 0 } },
145 { 0x0331, 220, 0, { 0 } },
146 { 0x0332, 220, 0, { 0 } },
147 { 0x0333, 220, 0, { 0 } },
148 { 0x0334, 1, 0, { 0 } },
149 { 0x0335, 1, 0, { 0 } },
150 { 0x0336, 1, 0, { 0 } },
151 { 0x0337, 1, 0, { 0 } },
152 { 0x0338, 1, 0, { 0 } },
153 { 0x0339, 220, 0, { 0 } },
154 { 0x033A, 220, 0, { 0 } },
155 { 0x033B, 220, 0, { 0 } },
156 { 0x033C, 220, 0, { 0 } },
157 { 0x033D, 230, 0, { 0 } },
158 { 0x033E, 230, 0, { 0 } },
159 { 0x033F, 230, 0, { 0 } },
160 { 0x0340, 230, 0, { 0 } },
161 { 0x0341, 230, 0, { 0 } },
162 { 0x0342, 230, 0, { 0 } },
163 { 0x0343, 230, 0, { 0 } },
164 { 0x0344, 230, 0, { 0 } },
165 { 0x0345, 240, 0, { 0 } },
166 { 0x0346, 230, 0, { 0 } },
167 { 0x0347, 220, 0, { 0 } },
168 { 0x0348, 220, 0, { 0 } },
169 { 0x0349, 220, 0, { 0 } },
170 { 0x034A, 230, 0, { 0 } },
171 { 0x034B, 230, 0, { 0 } },
172 { 0x034C, 230, 0, { 0 } },
173 { 0x034D, 220, 0, { 0 } },
174 { 0x034E, 220, 0, { 0 } },
175 { 0x0360, 234, 0, { 0 } },
176 { 0x0361, 234, 0, { 0 } },
177 { 0x0362, 233, 0, { 0 } },
178 { 0x0363, 230, 0, { 0 } }, // new char in Uncode 3.2
179 { 0x0364, 230, 0, { 0 } }, // new char in Uncode 3.2
180 { 0x0365, 230, 0, { 0 } }, // new char in Uncode 3.2
181 { 0x0366, 230, 0, { 0 } }, // new char in Uncode 3.2
182 { 0x0367, 230, 0, { 0 } }, // new char in Uncode 3.2
183 { 0x0368, 230, 0, { 0 } }, // new char in Uncode 3.2
184 { 0x0369, 230, 0, { 0 } }, // new char in Uncode 3.2
185 { 0x036A, 230, 0, { 0 } }, // new char in Uncode 3.2
186 { 0x036B, 230, 0, { 0 } }, // new char in Uncode 3.2
187 { 0x036C, 230, 0, { 0 } }, // new char in Uncode 3.2
188 { 0x036D, 230, 0, { 0 } }, // new char in Uncode 3.2
189 { 0x036E, 230, 0, { 0 } }, // new char in Uncode 3.2
190 { 0x036F, 230, 0, { 0 } }, // new char in Uncode 3.2
191 { 0x0391, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0391, 0x0301 } },
192 { 0x0395, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0395, 0x0301 } },
193 { 0x0397, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0397, 0x0301 } },
194 { 0x0399, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0399, 0x0301 } },
195 { 0x039F, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x039F, 0x0301 } },
196 { 0x03A5, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03A5, 0x0301 } },
197 { 0x03A9, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03A9, 0x0301 } },
198 { 0x03B1, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B1, 0x0301 } },
199 { 0x03B5, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B5, 0x0301 } },
200 { 0x03B7, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B7, 0x0301 } },
201 { 0x03B9, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B9, 0x0301 } },
202 { 0x03BF, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03BF, 0x0301 } },
203 { 0x03C5, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03C5, 0x0301 } },
204 { 0x03C9, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03C9, 0x0301 } },
205 { 0x03D2, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03D2, 0x0301 } },
206 { 0x0400, 0, kReplaceCurWithTwo, { 0x0415, 0x0300 } },
207 { 0x040D, 0, kReplaceCurWithTwo, { 0x0418, 0x0300 } },
208 { 0x0450, 0, kReplaceCurWithTwo, { 0x0435, 0x0300 } },
209 { 0x045D, 0, kReplaceCurWithTwo, { 0x0438, 0x0300 } },
210 { 0x0483, 230, 0, { 0 } },
211 { 0x0484, 230, 0, { 0 } },
212 { 0x0485, 230, 0, { 0 } },
213 { 0x0486, 230, 0, { 0 } },
214 { 0x04EC, 0, kReplaceCurWithTwo, { 0x042D, 0x0308 } },
215 { 0x04ED, 0, kReplaceCurWithTwo, { 0x044D, 0x0308 } },
216 { 0x0591, 220, 0, { 0 } },
217 { 0x0592, 230, 0, { 0 } },
218 { 0x0593, 230, 0, { 0 } },
219 { 0x0594, 230, 0, { 0 } },
220 { 0x0595, 230, 0, { 0 } },
221 { 0x0596, 220, 0, { 0 } },
222 { 0x0597, 230, 0, { 0 } },
223 { 0x0598, 230, 0, { 0 } },
224 { 0x0599, 230, 0, { 0 } },
225 { 0x059A, 222, 0, { 0 } },
226 { 0x059B, 220, 0, { 0 } },
227 { 0x059C, 230, 0, { 0 } },
228 { 0x059D, 230, 0, { 0 } },
229 { 0x059E, 230, 0, { 0 } },
230 { 0x059F, 230, 0, { 0 } },
231 { 0x05A0, 230, 0, { 0 } },
232 { 0x05A1, 230, 0, { 0 } },
233 { 0x05A3, 220, 0, { 0 } },
234 { 0x05A4, 220, 0, { 0 } },
235 { 0x05A5, 220, 0, { 0 } },
236 { 0x05A6, 220, 0, { 0 } },
237 { 0x05A7, 220, 0, { 0 } },
238 { 0x05A8, 230, 0, { 0 } },
239 { 0x05A9, 230, 0, { 0 } },
240 { 0x05AA, 220, 0, { 0 } },
241 { 0x05AB, 230, 0, { 0 } },
242 { 0x05AC, 230, 0, { 0 } },
243 { 0x05AD, 222, 0, { 0 } },
244 { 0x05AE, 228, 0, { 0 } },
245 { 0x05AF, 230, 0, { 0 } },
246 { 0x05B0, 10, 0, { 0 } },
247 { 0x05B1, 11, 0, { 0 } },
248 { 0x05B2, 12, 0, { 0 } },
249 { 0x05B3, 13, 0, { 0 } },
250 { 0x05B4, 14, 0, { 0 } },
251 { 0x05B5, 15, 0, { 0 } },
252 { 0x05B6, 16, 0, { 0 } },
253 { 0x05B7, 17, 0, { 0 } },
254 { 0x05B8, 18, 0, { 0 } },
255 { 0x05B9, 19, 0, { 0 } },
256 { 0x05BB, 20, 0, { 0 } },
257 { 0x05BC, 21, 0, { 0 } },
258 { 0x05BD, 22, 0, { 0 } },
259 { 0x05BF, 23, 0, { 0 } },
260 { 0x05C1, 24, 0, { 0 } },
261 { 0x05C2, 25, 0, { 0 } },
262 { 0x05C4, 230, 0, { 0 } },
263 { 0x0622, 0, kReplaceCurWithTwo, { 0x0627, 0x0653 } },
264 { 0x0623, 0, kReplaceCurWithTwo, { 0x0627, 0x0654 } },
265 { 0x0624, 0, kReplaceCurWithTwo, { 0x0648, 0x0654 } },
266 { 0x0625, 0, kReplaceCurWithTwo, { 0x0627, 0x0655 } },
267 { 0x0626, 0, kReplaceCurWithTwo, { 0x064A, 0x0654 } },
268 { 0x064B, 27, 0, { 0 } },
269 { 0x064C, 28, 0, { 0 } },
270 { 0x064D, 29, 0, { 0 } },
271 { 0x064E, 30, 0, { 0 } },
272 { 0x064F, 31, 0, { 0 } },
273 { 0x0650, 32, 0, { 0 } },
274 { 0x0651, 33, 0, { 0 } },
275 { 0x0652, 34, 0, { 0 } },
276 { 0x0653, 230, 0, { 0 } },
277 { 0x0654, 230, 0, { 0 } },
278 { 0x0655, 220, 0, { 0 } },
279 { 0x0670, 35, 0, { 0 } },
280 { 0x06C0, 0, kReplaceCurWithTwo, { 0x06D5, 0x0654 } },
281 { 0x06C2, 0, kReplaceCurWithTwo, { 0x06C1, 0x0654 } },
282 { 0x06D3, 0, kReplaceCurWithTwo, { 0x06D2, 0x0654 } },
283 { 0x06D6, 230, 0, { 0 } },
284 { 0x06D7, 230, 0, { 0 } },
285 { 0x06D8, 230, 0, { 0 } },
286 { 0x06D9, 230, 0, { 0 } },
287 { 0x06DA, 230, 0, { 0 } },
288 { 0x06DB, 230, 0, { 0 } },
289 { 0x06DC, 230, 0, { 0 } },
290 { 0x06DF, 230, 0, { 0 } },
291 { 0x06E0, 230, 0, { 0 } },
292 { 0x06E1, 230, 0, { 0 } },
293 { 0x06E2, 230, 0, { 0 } },
294 { 0x06E3, 220, 0, { 0 } },
295 { 0x06E4, 230, 0, { 0 } },
296 { 0x06E7, 230, 0, { 0 } },
297 { 0x06E8, 230, 0, { 0 } },
298 { 0x06EA, 220, 0, { 0 } },
299 { 0x06EB, 230, 0, { 0 } },
300 { 0x06EC, 230, 0, { 0 } },
301 { 0x06ED, 220, 0, { 0 } },
302 { 0x0711, 36, 0, { 0 } },
303 { 0x0730, 230, 0, { 0 } },
304 { 0x0731, 220, 0, { 0 } },
305 { 0x0732, 230, 0, { 0 } },
306 { 0x0733, 230, 0, { 0 } },
307 { 0x0734, 220, 0, { 0 } },
308 { 0x0735, 230, 0, { 0 } },
309 { 0x0736, 230, 0, { 0 } },
310 { 0x0737, 220, 0, { 0 } },
311 { 0x0738, 220, 0, { 0 } },
312 { 0x0739, 220, 0, { 0 } },
313 { 0x073A, 230, 0, { 0 } },
314 { 0x073B, 220, 0, { 0 } },
315 { 0x073C, 220, 0, { 0 } },
316 { 0x073D, 230, 0, { 0 } },
317 { 0x073E, 220, 0, { 0 } },
318 { 0x073F, 230, 0, { 0 } },
319 { 0x0740, 230, 0, { 0 } },
320 { 0x0741, 230, 0, { 0 } },
321 { 0x0742, 220, 0, { 0 } },
322 { 0x0743, 230, 0, { 0 } },
323 { 0x0744, 220, 0, { 0 } },
324 { 0x0745, 230, 0, { 0 } },
325 { 0x0746, 220, 0, { 0 } },
326 { 0x0747, 230, 0, { 0 } },
327 { 0x0748, 220, 0, { 0 } },
328 { 0x0749, 230, 0, { 0 } },
329 { 0x074A, 230, 0, { 0 } },
330 { 0x093C, 7, 0, { 0 } },
331 { 0x094D, 9, 0, { 0 } },
332 { 0x0951, 230, 0, { 0 } },
333 { 0x0952, 220, 0, { 0 } },
334 { 0x0953, 230, 0, { 0 } },
335 { 0x0954, 230, 0, { 0 } },
336 { 0x09AC, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x09BC, 0x09B0 } },
337 { 0x09BC, 7, 0, { 0 } },
338 { 0x09CD, 9, 0, { 0 } },
339 { 0x0A21, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0A3C, 0x0A5C } },
340 { 0x0A33, 0, kReplaceCurWithTwo, { 0x0A32, 0x0A3C } },
341 { 0x0A36, 0, kReplaceCurWithTwo, { 0x0A38, 0x0A3C } },
342 { 0x0A3C, 7, 0, { 0 } },
343 { 0x0A4D, 9, 0, { 0 } },
344 { 0x0ABC, 7, 0, { 0 } },
345 { 0x0ACD, 9, 0, { 0 } },
346 { 0x0B2F, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0B3C, 0x0B5F } },
347 { 0x0B3C, 7, 0, { 0 } },
348 { 0x0B4D, 9, 0, { 0 } },
349 { 0x0BCD, 9, 0, { 0 } },
350 { 0x0C4D, 9, 0, { 0 } },
351 { 0x0C55, 84, 0, { 0 } },
352 { 0x0C56, 91, 0, { 0 } },
353 { 0x0CCD, 9, 0, { 0 } },
354 { 0x0D4D, 9, 0, { 0 } },
355 { 0x0DCA, 9, 0, { 0 } },
356 { 0x0DDA, 0, kReplaceCurWithTwo, { 0x0DD9, 0x0DCA } },
357 { 0x0DDC, 0, kReplaceCurWithTwo, { 0x0DD9, 0x0DCF } },
358 { 0x0DDD, 0, kReplaceCurWithThree, { 0x0DD9, 0x0DCF, 0x0DCA } },
359 { 0x0DDE, 0, kReplaceCurWithTwo, { 0x0DD9, 0x0DDF } },
360 { 0x0E38, 103, 0, { 0 } },
361 { 0x0E39, 103, 0, { 0 } },
362 { 0x0E3A, 9, 0, { 0 } },
363 { 0x0E48, 107, 0, { 0 } },
364 { 0x0E49, 107, 0, { 0 } },
365 { 0x0E4A, 107, 0, { 0 } },
366 { 0x0E4B, 107, 0, { 0 } },
367 { 0x0E4D, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0E32, 0x0E33 } },
368 { 0x0EB8, 118, 0, { 0 } },
369 { 0x0EB9, 118, 0, { 0 } },
370 { 0x0EC8, 122, 0, { 0 } },
371 { 0x0EC9, 122, 0, { 0 } },
372 { 0x0ECA, 122, 0, { 0 } },
373 { 0x0ECB, 122, 0, { 0 } },
374 { 0x0ECD, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0EB2, 0x0EB3 } },
375 { 0x0F18, 220, 0, { 0 } },
376 { 0x0F19, 220, 0, { 0 } },
377 { 0x0F35, 220, 0, { 0 } },
378 { 0x0F37, 220, 0, { 0 } },
379 { 0x0F39, 216, 0, { 0 } },
380 { 0x0F71, 129, 0, { 0 } },
381 { 0x0F72, 130, 0, { 0 } },
382 { 0x0F74, 132, 0, { 0 } },
383 { 0x0F7A, 130, 0, { 0 } },
384 { 0x0F7B, 130, 0, { 0 } },
385 { 0x0F7C, 130, 0, { 0 } },
386 { 0x0F7D, 130, 0, { 0 } },
387 { 0x0F80, 130, 0, { 0 } },
388 { 0x0F82, 230, 0, { 0 } },
389 { 0x0F83, 230, 0, { 0 } },
390 { 0x0F84, 9, 0, { 0 } },
391 { 0x0F86, 230, 0, { 0 } },
392 { 0x0F87, 230, 0, { 0 } },
393 { 0x0FB2, 0, kIfNextTwoMatchReplaceAllWithOne, { 0x0F80, 0x0F71, 0x0F77 } },
394 { 0x0FB3, 0, kIfNextTwoMatchReplaceAllWithOne, { 0x0F80, 0x0F71, 0x0F79 } },
395 { 0x0FC6, 220, 0, { 0 } },
396 { 0x1026, 0, kReplaceCurWithTwo, { 0x1025, 0x102E } },
397 { 0x1037, 7, 0, { 0 } },
398 { 0x1039, 9, 0, { 0 } },
399 { 0x1714, 9, 0, { 0 } }, // new char in Uncode 3.2
400 { 0x1734, 9, 0, { 0 } }, // new char in Uncode 3.2
401 { 0x17D2, 9, 0, { 0 } },
402 { 0x18A9, 228, 0, { 0 } },
403 { 0x20D0, 230, 0, { 0 } },
404 { 0x20D1, 230, 0, { 0 } },
405 { 0x20D2, 1, 0, { 0 } },
406 { 0x20D3, 1, 0, { 0 } },
407 { 0x20D4, 230, 0, { 0 } },
408 { 0x20D5, 230, 0, { 0 } },
409 { 0x20D6, 230, 0, { 0 } },
410 { 0x20D7, 230, 0, { 0 } },
411 { 0x20D8, 1, 0, { 0 } },
412 { 0x20D9, 1, 0, { 0 } },
413 { 0x20DA, 1, 0, { 0 } },
414 { 0x20DB, 230, 0, { 0 } },
415 { 0x20DC, 230, 0, { 0 } },
416 { 0x20E1, 230, 0, { 0 } },
417 { 0x20E5, 1, 0, { 0 } }, // new char in Uncode 3.2
418 { 0x20E6, 1, 0, { 0 } }, // new char in Uncode 3.2
419 { 0x20E7, 230, 0, { 0 } }, // new char in Uncode 3.2
420 { 0x20E8, 220, 0, { 0 } }, // new char in Uncode 3.2
421 { 0x20E9, 230, 0, { 0 } }, // new char in Uncode 3.2
422 { 0x20EA, 1, 0, { 0 } }, // new char in Uncode 3.2
423 { 0x302A, 218, 0, { 0 } },
424 { 0x302B, 228, 0, { 0 } },
425 { 0x302C, 232, 0, { 0 } },
426 { 0x302D, 222, 0, { 0 } },
427 { 0x302E, 224, 0, { 0 } },
428 { 0x302F, 224, 0, { 0 } },
429 { 0x3099, 8, 0, { 0 } },
430 { 0x309A, 8, 0, { 0 } },
431 { 0xFB1D, 0, kReplaceCurWithTwo, { 0x05D9, 0x05B4 } },
432 { 0xFB1E, 26, 0, { 0 } },
433 { 0xFE20, 230, 0, { 0 } },
434 { 0xFE21, 230, 0, { 0 } },
435 { 0xFE22, 230, 0, { 0 } },
436 { 0xFE23, 230, 0, { 0 } },
437 { 0, 0, 0, { 0 } }
438 };
439
440 enum {
441 kMaxRangeCount = 108,
442 kMaxReplaceDataCount = 256,
443 kIndexValuesPerLine = 16,
444 kReplDataValuesPerLine = 8
445 };
446
447 static int8_t rangesIndex[kHiFieldEntryCount]; // if >= 0, then index into xxxRanges[]
448 static u_int8_t classRanges[kMaxRangeCount][kLoFieldEntryCount];
449 static u_int8_t replRanges[kMaxRangeCount][kLoFieldEntryCount];
450 static u_int16_t rangesKey[kMaxRangeCount]; // remembers starting Unicode for range
451 static u_int16_t replacementData[kMaxReplaceDataCount];
452
453 int main(int argc, char *argv[]) {
454 u_int32_t entryIndex, rangeIndex;
455 const UniCharClassAndRepl * classAndReplPtr;
456 int32_t rangeCount;
457 u_int32_t replDataCount;
458
459 // print header stuff
460 plog("/*\n");
461 plog("\tFile:\t\tDecompData.h\n");
462 plog("\tContains:\tData tables for use in FixDecomps (CatalogCheck.c)\n");
463 plog("\tNote:\t\tThis file is generated automatically by running DecompMakeData\n");
464 plog("*/\n");
465 plog("#include \"DecompDataEnums.h\"\n\n");
466
467 // initialize arrays
468 for (entryIndex = 0; entryIndex < kHiFieldEntryCount; entryIndex++) {
469 rangesIndex[entryIndex] = -1;
470 }
471 for (rangeIndex = 0; rangeIndex < kMaxRangeCount; rangeIndex++) {
472 for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) {
473 classRanges[rangeIndex][entryIndex] = 0;
474 replRanges[rangeIndex][entryIndex] = 0;
475 }
476 }
477 rangeCount = 0;
478 replDataCount = 0;
479 replacementData[replDataCount++] = 0; // need to start real data at index 1
480
481 // process data
482 for (classAndReplPtr = uCharClassAndRepl; classAndReplPtr->uChar != 0; classAndReplPtr++) {
483 u_int32_t matchAndReplacementCount, matchAndReplacementIndex;
484 u_int16_t shiftUChar = classAndReplPtr->uChar + kShiftUniCharOffset;
485 if (shiftUChar >= kShiftUniCharLimit) {
486 plog("Exceeded uChar range for 0x%04X\n", classAndReplPtr->uChar);
487 return 1;
488 }
489 entryIndex = shiftUChar >> kLoFieldBitSize;
490 if (rangesIndex[entryIndex] == -1) {
491 if (rangeCount >= kMaxRangeCount) {
492 plog("Exceeded max range count with 0x%04X\n", classAndReplPtr->uChar);
493 return 1;
494 }
495 rangesKey[rangeCount] = classAndReplPtr->uChar & ~kLoFieldMask;
496 rangesIndex[entryIndex] = rangeCount++;
497 }
498 entryIndex = shiftUChar & kLoFieldMask;
499
500 if (classAndReplPtr->combClass != 0)
501 classRanges[rangeCount - 1][entryIndex] = classAndReplPtr->combClass;
502
503 if (classAndReplPtr->action != 0) {
504 switch (classAndReplPtr->action) {
505 case kReplaceCurWithTwo:
506 case kIfNextOneMatchesReplaceAllWithOne:
507 matchAndReplacementCount = 2;
508 break;
509 case kReplaceCurWithThree:
510 case kIfNextOneMatchesReplaceAllWithTwo:
511 case kIfNextTwoMatchReplaceAllWithOne:
512 matchAndReplacementCount = 3;
513 break;
514 default:
515 matchAndReplacementCount = 0;
516 break;
517 }
518 if (replDataCount + matchAndReplacementCount >= kMaxReplaceDataCount) {
519 plog("Exceeded max replacement data count with 0x%04X\n", classAndReplPtr->uChar);
520 return 1;
521 }
522 replRanges[rangeCount - 1][entryIndex] = replDataCount;
523 replacementData[replDataCount++] = classAndReplPtr->action;
524 for (matchAndReplacementIndex = 0; matchAndReplacementIndex < matchAndReplacementCount; matchAndReplacementIndex++) {
525 replacementData[replDataCount++] = classAndReplPtr->matchAndReplacement[matchAndReplacementIndex];
526 }
527 }
528 }
529
530 // print filled-in index
531 plog("static const int8_t classAndReplIndex[kHiFieldEntryCount] = {\n");
532 for (entryIndex = 0; entryIndex < kHiFieldEntryCount; entryIndex++) {
533 char * formatPtr = (entryIndex + 1 < kHiFieldEntryCount)? "%2d,\t": "%2d\t";
534 if (entryIndex % kIndexValuesPerLine == 0) // beginning of line,
535 plog("\t"); // print tab
536 plog(formatPtr, rangesIndex[entryIndex]); // print values
537 if ((entryIndex + 1) % kIndexValuesPerLine == 0) // end of line, print starting UniChar value
538 plog("// uChar 0x%04X-\n", (u_int16_t)(((entryIndex + 1 - kIndexValuesPerLine) << kLoFieldBitSize) - kShiftUniCharOffset) );
539 }
540 plog("};\n\n");
541
542 // print filled in class ranges
543 plog("static const u_int8_t combClassRanges[][kLoFieldEntryCount] = {\n", kLoFieldEntryCount);
544 for (rangeIndex = 0; rangeIndex < rangeCount; rangeIndex++) {
545 plog("\t{\t");
546 for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) {
547 char * formatPtr = (entryIndex + 1 < kLoFieldEntryCount)? "%3d,": "%3d";
548 plog(formatPtr, classRanges[rangeIndex][entryIndex]); // print values
549 }
550 plog("\t},\t// uChar 0x%04X-\n", rangesKey[rangeIndex]);
551 }
552 plog("};\n\n");
553
554 // print filled in repl ranges
555 plog("static const u_int8_t replaceRanges[][kLoFieldEntryCount] = {\n", kLoFieldEntryCount);
556 for (rangeIndex = 0; rangeIndex < rangeCount; rangeIndex++) {
557 plog("\t{\t");
558 for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) {
559 char * formatPtr = (entryIndex + 1 < kLoFieldEntryCount)? "%3d,": "%3d";
560 plog(formatPtr, replRanges[rangeIndex][entryIndex]); // print values
561 }
562 plog("\t},\t// uChar 0x%04X-\n", rangesKey[rangeIndex]);
563 }
564 plog("};\n\n");
565
566 // print filled in replacement data
567 plog("static const u_int16_t replaceData[] = {\n");
568 for (entryIndex = 0; entryIndex < replDataCount; entryIndex++) {
569 char * formatPtr = (entryIndex + 1 < replDataCount)? "0x%04X,\t": "0x%04X\t";
570 if (entryIndex % kReplDataValuesPerLine == 0) // beginning of line,
571 plog("\t"); // print tab
572 plog(formatPtr, replacementData[entryIndex]); // print values
573 if ((entryIndex + 1) % kReplDataValuesPerLine == 0 || entryIndex + 1 == replDataCount) // end of line,
574 plog("// index %d-\n", entryIndex & ~(kReplDataValuesPerLine-1) ); // print starting index value
575 }
576 plog("};\n\n");
577
578 // print summary info
579 plog("// combClassData:\n");
580 plog("// trimmed index: kHiFieldEntryCount(= %d) bytes\n", kHiFieldEntryCount);
581 plog("// ranges: 2 * %d ranges * kLoFieldEntryCount(= %d) bytes = %d\n", rangeCount, kLoFieldEntryCount, 2*rangeCount*kLoFieldEntryCount);
582 plog("// replData: %d entries * 2 = %d\n", replDataCount, 2*replDataCount);
583 plog("// total: %d\n\n", kHiFieldEntryCount + 2*rangeCount*kLoFieldEntryCount + 2*replDataCount);
584
585 return 0;
586 }