]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | # Copyright (c) 2002, International Business Machines Corporation and |
2 | # others. All Rights Reserved. | |
3 | # | |
4 | # file: line.txt | |
5 | # | |
6 | # Line Breaking Rules for ICU rules based break iteration. | |
7 | # Implement default line breaking as defined by Unicode TR 14. | |
8 | # | |
9 | ||
10 | ||
11 | # | |
12 | # Character Classes defined by Unicode TR 14. | |
13 | # These are generated by a script from the Unicode LineBreak derived | |
14 | # properties file. | |
15 | # | |
16 | ||
17 | ############ Start of Script-Generated Definitions ####################### | |
18 | ||
19 | $LF = [ \u000A]; | |
20 | ||
21 | $IN = [ \u2024-\u2026]; | |
22 | ||
23 | $SY = [ \u002F]; | |
24 | ||
25 | $EX = [ \u0021 \u003F \u2762-\u2763 \uFE56-\uFE57 \uFF01 \uFF1F]; | |
26 | ||
27 | $BA = [ \u0009 \u007C \u00AD \u058A \u0F0B \u1361 \u1680 \u17D5 \u2000-\u2006 | |
28 | \u2008-\u200A \u2010 \u2012-\u2013 \u2027 \u205F]; | |
29 | ||
30 | $IS = [ \u002C \u002E \u003A-\u003B \u0589]; | |
31 | ||
32 | $BB = [ \u00B4 \u02C8 \u02CC \u1806]; | |
33 | ||
34 | $SA = [ \u0E01-\u0E30 \u0E32-\u0E33 \u0E40-\u0E46 \u0E81-\u0E82 \u0E84 \u0E87-\u0E88 | |
35 | \u0E8A \u0E8D \u0E94-\u0E97 \u0E99-\u0E9F \u0EA1-\u0EA3 \u0EA5 | |
36 | \u0EA7 \u0EAA-\u0EAB \u0EAD-\u0EB0 \u0EB2-\u0EB3 \u0EBD \u0EC0-\u0EC4 | |
37 | \u0EC6 \u0EDC-\u0EDD \u1000-\u1021 \u1023-\u1027 \u1029-\u102A | |
38 | \u1050-\u1055 \u1780-\u17B3]; | |
39 | ||
40 | $CB = [ \uFFFC]; | |
41 | ||
42 | $XX = [ \uE000-\uF8FF \U000F0000-\U000FFFFD \U00100000-\U0010FFFD]; | |
43 | ||
44 | $HY = [ \u002D]; | |
45 | ||
46 | $AI = [ \u00A1 \u00A7-\u00A8 \u00AA \u00B2-\u00B3 \u00B6-\u00BA \u00BC-\u00BF | |
47 | \u00C6 \u00D0 \u00D7-\u00D8 \u00DE-\u00E1 \u00E6 \u00E8-\u00EA | |
48 | \u00EC-\u00ED \u00F0 \u00F2-\u00F3 \u00F7-\u00FA \u00FC \u00FE | |
49 | \u0101 \u0111 \u0113 \u011B \u0126-\u0127 \u012B \u0131-\u0133 | |
50 | \u0138 \u013F-\u0142 \u0144 \u0148-\u014A \u014D \u0152-\u0153 | |
51 | \u0166-\u0167 \u016B \u01CE \u01D0 \u01D2 \u01D4 \u01D6 \u01D8 | |
52 | \u01DA \u01DC \u0251 \u0261 \u02C7 \u02C9-\u02CB \u02CD \u02D0 | |
53 | \u02D8-\u02DB \u02DD \u0391-\u03A1 \u03A3-\u03A9 \u03B1-\u03C1 | |
54 | \u03C3-\u03C9 \u0401 \u0410-\u044F \u0451 \u2015-\u2016 \u2020-\u2021 | |
55 | \u203B \u2074 \u207F \u2081-\u2084 \u2105 \u2113 \u2121-\u2122 | |
56 | \u212B \u2140 \u2154-\u2155 \u215B \u215E \u2160-\u216B \u2170-\u2179 | |
57 | \u2190-\u2199 \u21D2 \u21D4 \u2200 \u2202-\u2203 \u2207-\u2208 | |
58 | \u220B \u220F \u2211 \u2215 \u221A \u221D-\u2220 \u2223 \u2225 | |
59 | \u2227-\u222C \u222E \u2234-\u2237 \u223C-\u223D \u2248 \u224C | |
60 | \u2252 \u2260-\u2261 \u2264-\u2267 \u226A-\u226B \u226E-\u226F | |
61 | \u2282-\u2283 \u2286-\u2287 \u2295 \u2299 \u22A5 \u22BF \u2312 | |
62 | \u2460-\u24BF \u24D0-\u24E9 \u24EB-\u24FE \u2500-\u254B \u2550-\u2574 | |
63 | \u2580-\u258F \u2592-\u2595 \u25A0-\u25A1 \u25A3-\u25A9 \u25B2-\u25B3 | |
64 | \u25B6-\u25B7 \u25BC-\u25BD \u25C0-\u25C1 \u25C6-\u25C8 \u25CB | |
65 | \u25CE-\u25D1 \u25E2-\u25E5 \u25EF \u2605-\u2606 \u2609 \u260E-\u260F | |
66 | \u2616-\u2617 \u261C \u261E \u2640 \u2642 \u2660-\u2661 \u2663-\u2665 | |
67 | \u2667-\u266A \u266C-\u266D \u266F \uFFFD]; | |
68 | ||
69 | $ZW = [ \u200B]; | |
70 | ||
71 | $SG = [ \uD800-\uDFFF]; | |
72 | ||
73 | $AL = [ \u0023 \u0026 \u002A \u003C-\u003E \u0040-\u005A \u005E-\u007A \u007E | |
74 | \u00A6 \u00A9 \u00AC \u00AE-\u00AF \u00B5 \u00C0-\u00C5 \u00C7-\u00CF | |
75 | \u00D1-\u00D6 \u00D9-\u00DD \u00E2-\u00E5 \u00E7 \u00EB \u00EE-\u00EF | |
76 | \u00F1 \u00F4-\u00F6 \u00FB \u00FD \u00FF-\u0100 \u0102-\u0110 | |
77 | \u0112 \u0114-\u011A \u011C-\u0125 \u0128-\u012A \u012C-\u0130 | |
78 | \u0134-\u0137 \u0139-\u013E \u0143 \u0145-\u0147 \u014B-\u014C | |
79 | \u014E-\u0151 \u0154-\u0165 \u0168-\u016A \u016C-\u01CD \u01CF | |
80 | \u01D1 \u01D3 \u01D5 \u01D7 \u01D9 \u01DB \u01DD-\u0220 \u0222-\u0233 | |
81 | \u0250 \u0252-\u0260 \u0262-\u02AD \u02B0-\u02C6 \u02CE-\u02CF | |
82 | \u02D1-\u02D7 \u02DC \u02DE-\u02EE \u0374-\u0375 \u037A \u037E | |
83 | \u0384-\u038A \u038C \u038E-\u0390 \u03AA-\u03B0 \u03C2 \u03CA-\u03CE | |
84 | \u03D0-\u03F6 \u0400 \u0402-\u040F \u0450 \u0452-\u0482 \u048A-\u04CE | |
85 | \u04D0-\u04F5 \u04F8-\u04F9 \u0500-\u050F \u0531-\u0556 \u0559-\u055F | |
86 | \u0561-\u0587 \u05BE \u05C0 \u05C3 \u05D0-\u05EA \u05F0-\u05F4 | |
87 | \u060C \u061B \u061F \u0621-\u063A \u0640-\u064A \u066A-\u066F | |
88 | \u0671-\u06D5 \u06E5-\u06E6 \u06E9 \u06FA-\u06FE \u0700-\u070D | |
89 | \u0710 \u0712-\u072C \u0780-\u07A5 \u07B1 \u0905-\u0939 \u093D | |
90 | \u0950 \u0958-\u0961 \u0964-\u0965 \u0970 \u0985-\u098C \u098F-\u0990 | |
91 | \u0993-\u09A8 \u09AA-\u09B0 \u09B2 \u09B6-\u09B9 \u09DC-\u09DD | |
92 | \u09DF-\u09E1 \u09F0-\u09F1 \u09F4-\u09FA \u0A05-\u0A0A \u0A0F-\u0A10 | |
93 | \u0A13-\u0A28 \u0A2A-\u0A30 \u0A32-\u0A33 \u0A35-\u0A36 \u0A38-\u0A39 | |
94 | \u0A59-\u0A5C \u0A5E \u0A72-\u0A74 \u0A85-\u0A8B \u0A8D \u0A8F-\u0A91 | |
95 | \u0A93-\u0AA8 \u0AAA-\u0AB0 \u0AB2-\u0AB3 \u0AB5-\u0AB9 \u0ABD | |
96 | \u0AD0 \u0AE0 \u0B05-\u0B0C \u0B0F-\u0B10 \u0B13-\u0B28 \u0B2A-\u0B30 | |
97 | \u0B32-\u0B33 \u0B36-\u0B39 \u0B3D \u0B5C-\u0B5D \u0B5F-\u0B61 | |
98 | \u0B70 \u0B83 \u0B85-\u0B8A \u0B8E-\u0B90 \u0B92-\u0B95 \u0B99-\u0B9A | |
99 | \u0B9C \u0B9E-\u0B9F \u0BA3-\u0BA4 \u0BA8-\u0BAA \u0BAE-\u0BB5 | |
100 | \u0BB7-\u0BB9 \u0BF0-\u0BF2 \u0C05-\u0C0C \u0C0E-\u0C10 \u0C12-\u0C28 | |
101 | \u0C2A-\u0C33 \u0C35-\u0C39 \u0C60-\u0C61 \u0C85-\u0C8C \u0C8E-\u0C90 | |
102 | \u0C92-\u0CA8 \u0CAA-\u0CB3 \u0CB5-\u0CB9 \u0CDE \u0CE0-\u0CE1 | |
103 | \u0D05-\u0D0C \u0D0E-\u0D10 \u0D12-\u0D28 \u0D2A-\u0D39 \u0D60-\u0D61 | |
104 | \u0D85-\u0D96 \u0D9A-\u0DB1 \u0DB3-\u0DBB \u0DBD \u0DC0-\u0DC6 | |
105 | \u0DF4 \u0E4F \u0F00-\u0F0A \u0F0D-\u0F17 \u0F1A-\u0F1F \u0F2A-\u0F34 | |
106 | \u0F36 \u0F38 \u0F40-\u0F47 \u0F49-\u0F6A \u0F85 \u0F88-\u0F8B | |
107 | \u0FBE-\u0FC5 \u0FC7-\u0FCC \u0FCF \u104A-\u104F \u10A0-\u10C5 | |
108 | \u10D0-\u10F8 \u10FB \u1200-\u1206 \u1208-\u1246 \u1248 \u124A-\u124D | |
109 | \u1250-\u1256 \u1258 \u125A-\u125D \u1260-\u1286 \u1288 \u128A-\u128D | |
110 | \u1290-\u12AE \u12B0 \u12B2-\u12B5 \u12B8-\u12BE \u12C0 \u12C2-\u12C5 | |
111 | \u12C8-\u12CE \u12D0-\u12D6 \u12D8-\u12EE \u12F0-\u130E \u1310 | |
112 | \u1312-\u1315 \u1318-\u131E \u1320-\u1346 \u1348-\u135A \u1362-\u1368 | |
113 | \u1372-\u137C \u13A0-\u13F4 \u1401-\u1676 \u1681-\u169A \u16A0-\u16F0 | |
114 | \u1700-\u170C \u170E-\u1711 \u1720-\u1731 \u1735-\u1736 \u1740-\u1751 | |
115 | \u1760-\u176C \u176E-\u1770 \u17DC \u1800-\u1805 \u1807-\u180A | |
116 | \u1820-\u1877 \u1880-\u18A8 \u1E00-\u1E9B \u1EA0-\u1EF9 \u1F00-\u1F15 | |
117 | \u1F18-\u1F1D \u1F20-\u1F45 \u1F48-\u1F4D \u1F50-\u1F57 \u1F59 | |
118 | \u1F5B \u1F5D \u1F5F-\u1F7D \u1F80-\u1FB4 \u1FB6-\u1FC4 \u1FC6-\u1FD3 | |
119 | \u1FD6-\u1FDB \u1FDD-\u1FEF \u1FF2-\u1FF4 \u1FF6-\u1FFE \u2017 | |
120 | \u2022-\u2023 \u2038 \u203D-\u2043 \u2047-\u2052 \u2057 \u2061-\u2063 | |
121 | \u2070-\u2071 \u2075-\u207C \u2080 \u2085-\u208C \u2100-\u2102 | |
122 | \u2104 \u2106-\u2108 \u210A-\u2112 \u2114-\u2115 \u2117-\u2120 | |
123 | \u2123-\u2125 \u2127-\u212A \u212C-\u213A \u213D-\u213F \u2141-\u214B | |
124 | \u2153 \u2156-\u215A \u215C-\u215D \u215F \u216C-\u216F \u217A-\u2183 | |
125 | \u219A-\u21D1 \u21D3 \u21D5-\u21FF \u2201 \u2204-\u2206 \u2209-\u220A | |
126 | \u220C-\u220E \u2210 \u2214 \u2216-\u2219 \u221B-\u221C \u2221-\u2222 | |
127 | \u2224 \u2226 \u222D \u222F-\u2233 \u2238-\u223B \u223E-\u2247 | |
128 | \u2249-\u224B \u224D-\u2251 \u2253-\u225F \u2262-\u2263 \u2268-\u2269 | |
129 | \u226C-\u226D \u2270-\u2281 \u2284-\u2285 \u2288-\u2294 \u2296-\u2298 | |
130 | \u229A-\u22A4 \u22A6-\u22BE \u22C0-\u2311 \u2313-\u2328 \u232B-\u23B3 | |
131 | \u23B7-\u23CE \u2400-\u2426 \u2440-\u244A \u24C0-\u24CF \u24EA | |
132 | \u254C-\u254F \u2575-\u257F \u2590-\u2591 \u2596-\u259F \u25A2 | |
133 | \u25AA-\u25B1 \u25B4-\u25B5 \u25B8-\u25BB \u25BE-\u25BF \u25C2-\u25C5 | |
134 | \u25C9-\u25CA \u25CC-\u25CD \u25D2-\u25E1 \u25E6-\u25EE \u25F0-\u2604 | |
135 | \u2607-\u2608 \u260A-\u260D \u2610-\u2613 \u2619-\u261B \u261D | |
136 | \u261F-\u263F \u2641 \u2643-\u265F \u2662 \u2666 \u266B \u266E | |
137 | \u2670-\u267D \u2680-\u2689 \u2701-\u2704 \u2706-\u2709 \u270C-\u2727 | |
138 | \u2729-\u274B \u274D \u274F-\u2752 \u2756 \u2758-\u275A \u2761 | |
139 | \u2764-\u2767 \u2776-\u2794 \u2798-\u27AF \u27B1-\u27BE \u27D0-\u27E5 | |
140 | \u27F0-\u2982 \u2999-\u29D7 \u29DC-\u29FB \u29FE-\u2AFF \uFB00-\uFB06 | |
141 | \uFB13-\uFB17 \uFB1D \uFB1F-\uFB36 \uFB38-\uFB3C \uFB3E \uFB40-\uFB41 | |
142 | \uFB43-\uFB44 \uFB46-\uFBB1 \uFBD3-\uFD3D \uFD50-\uFD8F \uFD92-\uFDC7 | |
143 | \uFDF0-\uFDFB \uFE70-\uFE74 \uFE76-\uFEFC \uFF66 \uFF71-\uFF9D | |
144 | \uFFA0-\uFFBE \uFFC2-\uFFC7 \uFFCA-\uFFCF \uFFD2-\uFFD7 \uFFDA-\uFFDC | |
145 | \uFFE8-\uFFEE \U00010300-\U0001031E \U00010320-\U00010323 \U00010330-\U0001034A | |
146 | \U00010400-\U00010425 \U00010428-\U0001044D \U0001D000-\U0001D0F5 | |
147 | \U0001D100-\U0001D126 \U0001D12A-\U0001D164 \U0001D16A-\U0001D16C | |
148 | \U0001D183-\U0001D184 \U0001D18C-\U0001D1A9 \U0001D1AE-\U0001D1DD | |
149 | \U0001D400-\U0001D454 \U0001D456-\U0001D49C \U0001D49E-\U0001D49F | |
150 | \U0001D4A2 \U0001D4A5-\U0001D4A6 \U0001D4A9-\U0001D4AC \U0001D4AE-\U0001D4B9 | |
151 | \U0001D4BB \U0001D4BD-\U0001D4C0 \U0001D4C2-\U0001D4C3 \U0001D4C5-\U0001D505 | |
152 | \U0001D507-\U0001D50A \U0001D50D-\U0001D514 \U0001D516-\U0001D51C | |
153 | \U0001D51E-\U0001D539 \U0001D53B-\U0001D53E \U0001D540-\U0001D544 | |
154 | \U0001D546 \U0001D54A-\U0001D550 \U0001D552-\U0001D6A3 \U0001D6A8-\U0001D7C9]; | |
155 | ||
156 | $OP = [ \u0028 \u005B \u007B \u0F3A \u0F3C \u169B \u201A \u201E \u2045 \u207D | |
157 | \u208D \u2329 \u23B4 \u2768 \u276A \u276C \u276E \u2770 \u2772 | |
158 | \u2774 \u27E6 \u27E8 \u27EA \u2983 \u2985 \u2987 \u2989 \u298B | |
159 | \u298D \u298F \u2991 \u2993 \u2995 \u2997 \u29D8 \u29DA \u29FC | |
160 | \u3008 \u300A \u300C \u300E \u3010 \u3014 \u3016 \u3018 \u301A | |
161 | \u301D \uFD3E \uFE35 \uFE37 \uFE39 \uFE3B \uFE3D \uFE3F \uFE41 | |
162 | \uFE43 \uFE59 \uFE5B \uFE5D \uFF08 \uFF3B \uFF5B \uFF5F \uFF62]; | |
163 | ||
164 | $BK = [ \u000C \u2028-\u2029]; | |
165 | ||
166 | $PO = [ \u0025 \u00A2 \u00B0 \u2030-\u2037 \u20A7 \u2103 \u2109 \u2126 \uFDFC | |
167 | \uFE6A \uFF05 \uFFE0]; | |
168 | ||
169 | $NS = [ \u0E5A-\u0E5B \u17D4 \u17D6-\u17DA \u203C \u2044 \u3005 \u301C \u303B-\u303C | |
170 | \u3041 \u3043 \u3045 \u3047 \u3049 \u3063 \u3083 \u3085 \u3087 | |
171 | \u308E \u3095-\u3096 \u309B-\u309E \u30A0-\u30A1 \u30A3 \u30A5 | |
172 | \u30A7 \u30A9 \u30C3 \u30E3 \u30E5 \u30E7 \u30EE \u30F5-\u30F6 | |
173 | \u30FB \u30FD \u31F0-\u31FF \uFE54-\uFE55 \uFF1A-\uFF1B \uFF65 | |
174 | \uFF67-\uFF70 \uFF9E-\uFF9F]; | |
175 | ||
176 | $CL = [ \u0029 \u005D \u007D \u0F3B \u0F3D \u169C \u2046 \u207E \u208E \u232A | |
177 | \u23B5 \u2769 \u276B \u276D \u276F \u2771 \u2773 \u2775 \u27E7 | |
178 | \u27E9 \u27EB \u2984 \u2986 \u2988 \u298A \u298C \u298E \u2990 | |
179 | \u2992 \u2994 \u2996 \u2998 \u29D9 \u29DB \u29FD \u3001-\u3002 | |
180 | \u3009 \u300B \u300D \u300F \u3011 \u3015 \u3017 \u3019 \u301B | |
181 | \u301E-\u301F \uFD3F \uFE36 \uFE38 \uFE3A \uFE3C \uFE3E \uFE40 | |
182 | \uFE42 \uFE44 \uFE50 \uFE52 \uFE5A \uFE5C \uFE5E \uFF09 \uFF0C | |
183 | \uFF0E \uFF3D \uFF5D \uFF60-\uFF61 \uFF63-\uFF64]; | |
184 | ||
185 | $NU = [ \u0030-\u0039 \u0660-\u0669 \u06F0-\u06F9 \u0966-\u096F \u09E6-\u09EF | |
186 | \u0A66-\u0A6F \u0AE6-\u0AEF \u0B66-\u0B6F \u0BE7-\u0BEF \u0C66-\u0C6F | |
187 | \u0CE6-\u0CEF \u0D66-\u0D6F \u0E50-\u0E59 \u0ED0-\u0ED9 \u0F20-\u0F29 | |
188 | \u1040-\u1049 \u1369-\u1371 \u17E0-\u17E9 \u1810-\u1819 \U0001D7CE-\U0001D7FF]; | |
189 | ||
190 | $CM = [ \u0000-\u0008 \u000B \u000E-\u001F \u007F-\u009F \u0300-\u034F \u0360-\u036F | |
191 | \u0483-\u0486 \u0488-\u0489 \u0591-\u05A1 \u05A3-\u05B9 \u05BB-\u05BD | |
192 | \u05BF \u05C1-\u05C2 \u05C4 \u064B-\u0655 \u0670 \u06D6-\u06E4 | |
193 | \u06E7-\u06E8 \u06EA-\u06ED \u070F \u0711 \u0730-\u074A \u07A6-\u07B0 | |
194 | \u0901-\u0903 \u093C \u093E-\u094D \u0951-\u0954 \u0962-\u0963 | |
195 | \u0981-\u0983 \u09BC \u09BE-\u09C4 \u09C7-\u09C8 \u09CB-\u09CD | |
196 | \u09D7 \u09E2-\u09E3 \u0A02 \u0A3C \u0A3E-\u0A42 \u0A47-\u0A48 | |
197 | \u0A4B-\u0A4D \u0A70-\u0A71 \u0A81-\u0A83 \u0ABC \u0ABE-\u0AC5 | |
198 | \u0AC7-\u0AC9 \u0ACB-\u0ACD \u0B01-\u0B03 \u0B3C \u0B3E-\u0B43 | |
199 | \u0B47-\u0B48 \u0B4B-\u0B4D \u0B56-\u0B57 \u0B82 \u0BBE-\u0BC2 | |
200 | \u0BC6-\u0BC8 \u0BCA-\u0BCD \u0BD7 \u0C01-\u0C03 \u0C3E-\u0C44 | |
201 | \u0C46-\u0C48 \u0C4A-\u0C4D \u0C55-\u0C56 \u0C82-\u0C83 \u0CBE-\u0CC4 | |
202 | \u0CC6-\u0CC8 \u0CCA-\u0CCD \u0CD5-\u0CD6 \u0D02-\u0D03 \u0D3E-\u0D43 | |
203 | \u0D46-\u0D48 \u0D4A-\u0D4D \u0D57 \u0D82-\u0D83 \u0DCA \u0DCF-\u0DD4 | |
204 | \u0DD6 \u0DD8-\u0DDF \u0DF2-\u0DF3 \u0E31 \u0E34-\u0E3A \u0E47-\u0E4E | |
205 | \u0EB1 \u0EB4-\u0EB9 \u0EBB-\u0EBC \u0EC8-\u0ECD \u0F18-\u0F19 | |
206 | \u0F35 \u0F37 \u0F39 \u0F3E-\u0F3F \u0F71-\u0F84 \u0F86-\u0F87 | |
207 | \u0F90-\u0F97 \u0F99-\u0FBC \u0FC6 \u102C-\u1032 \u1036-\u1039 | |
208 | \u1056-\u1059 \u1160-\u11A2 \u11A8-\u11F9 \u1712-\u1714 \u1732-\u1734 | |
209 | \u1752-\u1753 \u1772-\u1773 \u17B4-\u17D3 \u180B-\u180E \u18A9 | |
210 | \u200C-\u200F \u202A-\u202E \u206A-\u206F \u20D0-\u20EA \u302A-\u302F | |
211 | \u3099-\u309A \uFB1E \uFE00-\uFE0F \uFE20-\uFE23 \uFFF9-\uFFFB | |
212 | \U0001D165-\U0001D169 \U0001D16D-\U0001D182 \U0001D185-\U0001D18B | |
213 | \U0001D1AA-\U0001D1AD \U000E0001 \U000E0020-\U000E007F]; | |
214 | ||
215 | $PR = [ \u0024 \u002B \u005C \u00A3-\u00A5 \u00B1 \u09F2-\u09F3 \u0E3F \u17DB | |
216 | \u20A0-\u20A6 \u20A8-\u20B1 \u2116 \u2212-\u2213 \uFE69 \uFF04 | |
217 | \uFFE1 \uFFE5-\uFFE6]; | |
218 | ||
219 | $B2 = [ \u2014]; | |
220 | ||
221 | $ID = [ \u1100-\u1159 \u115F \u2E80-\u2E99 \u2E9B-\u2EF3 \u2F00-\u2FD5 \u2FF0-\u2FFB | |
222 | \u3000 \u3003-\u3004 \u3006-\u3007 \u3012-\u3013 \u3020-\u3029 | |
223 | \u3030-\u303A \u303D-\u303F \u3042 \u3044 \u3046 \u3048 \u304A-\u3062 | |
224 | \u3064-\u3082 \u3084 \u3086 \u3088-\u308D \u308F-\u3094 \u309F | |
225 | \u30A2 \u30A4 \u30A6 \u30A8 \u30AA-\u30C2 \u30C4-\u30E2 \u30E4 | |
226 | \u30E6 \u30E8-\u30ED \u30EF-\u30F4 \u30F7-\u30FA \u30FC \u30FE-\u30FF | |
227 | \u3105-\u312C \u3131-\u318E \u3190-\u31B7 \u3200-\u321C \u3220-\u3243 | |
228 | \u3251-\u327B \u327F-\u32CB \u32D0-\u32FE \u3300-\u3376 \u337B-\u33DD | |
229 | \u33E0-\u33FE \u3400-\u4DB5 \u4E00-\u9FA5 \uA000-\uA48C \uA490-\uA4C6 | |
230 | \uAC00-\uD7A3 \uF900-\uFA2D \uFA30-\uFA6A \uFE30-\uFE34 \uFE45-\uFE46 | |
231 | \uFE49-\uFE4F \uFE51 \uFE58 \uFE5F-\uFE66 \uFE68 \uFE6B \uFF02-\uFF03 | |
232 | \uFF06-\uFF07 \uFF0A-\uFF0B \uFF0D \uFF0F-\uFF19 \uFF1C-\uFF1E | |
233 | \uFF20-\uFF3A \uFF3C \uFF3E-\uFF5A \uFF5C \uFF5E \uFFE2-\uFFE4 | |
234 | \U00020000-\U0002A6D6 \U0002F800-\U0002FA1D]; | |
235 | ||
236 | $SP = [ \u0020]; | |
237 | ||
238 | $QU = [ \u0022 \u0027 \u00AB \u00BB \u2018-\u2019 \u201B-\u201D \u201F \u2039-\u203A | |
239 | \u23B6 \u275B-\u275E]; | |
240 | ||
241 | $CR = [ \u000D]; | |
242 | ||
243 | $GL = [ \u00A0 \u0F0C \u2007 \u2011 \u202F \u2060 \uFEFF]; | |
244 | ||
245 | ############ End of Script-Generated Definitions ####################### | |
246 | ||
247 | ||
248 | ||
249 | # | |
250 | # Thai Dictionary related definitions and rules | |
251 | # | |
252 | ||
253 | $dictionary = [\u0e01-\u0e2e \u0e30-\u0e3a \u0e40-\u0e44 \u0e47-\u0e4e]; # this rule breaks the iterator with mixed Thai and English | |
254 | $paiyannoi = [\u0e2f]; | |
255 | $maiyamok = [\u0e46]; | |
256 | $thai_etc = $paiyannoi \u0e25 $paiyannoi; | |
257 | ||
258 | ||
259 | ||
260 | ||
261 | # | |
262 | # Character classes from TR 29. Needed for finding characters. | |
263 | # | |
264 | # $Extend is all combining characters, and none of the other cruft that | |
265 | # TR14 puts into $CM, which is its concept of combining marks. | |
266 | # | |
267 | $Extend = # From UNIDATA/DerivedCoreProperties.txt | |
268 | [\u0300-\u034E \u0360-\u036F \u0483-\u0486 \u0488-\u0489 \u0591-\u05A1 \u05A3-\u05B9 | |
269 | \u05BB-\u05BD \u05BF \u05C1-\u05C2 \u05C4 \u064B-\u0655 \u0670 \u06D6-\u06DC | |
270 | \u06DE \u06DF-\u06E4 \u06E7-\u06E8 \u06EA-\u06ED \u0711 \u0730-\u074A | |
271 | \u07A6-\u07B0 \u0901-\u0902 \u0903 \u093C \u093E-\u0940 \u0941-\u0948 | |
272 | \u0949-\u094C \u0951-\u0954 \u0962-\u0963 \u0981 \u0982-\u0983 \u09BC | |
273 | \u09BE-\u09C0 \u09C1-\u09C4 \u09C7-\u09C8 \u09CB-\u09CC \u09D7 \u09E2-\u09E3 | |
274 | \u0A02 \u0A3C \u0A3E-\u0A40 \u0A41-\u0A42 \u0A47-\u0A48 \u0A4B-\u0A4C | |
275 | \u0A70-\u0A71 \u0A81-\u0A82 \u0A83 \u0ABC \u0ABE-\u0AC0 \u0AC1-\u0AC5 | |
276 | \u0AC7-\u0AC8 \u0AC9 \u0ACB-\u0ACC \u0B01 \u0B02-\u0B03 \u0B3C \u0B3E | |
277 | \u0B3F \u0B40 \u0B41-\u0B43 \u0B47-\u0B48 \u0B4B-\u0B4C \u0B56 \u0B57 | |
278 | \u0B82 \u0BBE-\u0BBF \u0BC0 \u0BC1-\u0BC2 \u0BC6-\u0BC8 \u0BCA-\u0BCC \u0BD7 | |
279 | \u0C01-\u0C03 \u0C3E-\u0C40 \u0C41-\u0C44 \u0C46-\u0C48 \u0C4A-\u0C4C | |
280 | \u0C55-\u0C56 \u0C82-\u0C83 \u0CBE \u0CBF \u0CC0-\u0CC4 \u0CC6 | |
281 | \u0CC7-\u0CC8 \u0CCA-\u0CCB \u0CCC \u0CD5-\u0CD6 \u0D02-\u0D03 \u0D3E-\u0D40 | |
282 | \u0D41-\u0D43 \u0D46-\u0D48 \u0D4A-\u0D4C \u0D57 \u0D82-\u0D83 \u0DCF-\u0DD1 | |
283 | \u0DD2-\u0DD4 \u0DD6 \u0DD8-\u0DDF \u0DF2-\u0DF3 \u0E31 \u0E34-\u0E39 | |
284 | \u0E47-\u0E4E \u0EB1 \u0EB4-\u0EB9 \u0EBB-\u0EBC \u0EC8-\u0ECD \u0F18-\u0F19 | |
285 | \u0F35 \u0F37 \u0F39 \u0F3E-\u0F3F \u0F71-\u0F7E \u0F7F \u0F80-\u0F84 | |
286 | \u0F86-\u0F87 \u0F90-\u0F97 \u0F99-\u0FBC \u0FC6 \u102C \u102D-\u1030 \u1031 | |
287 | \u1032 \u1036-\u1037 \u1038 \u1056-\u1057 \u1058-\u1059 \u1712-\u1714 | |
288 | \u1732-\u1734 \u1752-\u1753 \u1772-\u1773 \u17B4-\u17B6 \u17B7-\u17BD | |
289 | \u17BE-\u17C5 \u17C6 \u17C7-\u17C8 \u17C9-\u17D1 \u17D3 \u180B-\u180D | |
290 | \u18A9 \u20D0-\u20DC \u20DD-\u20E0 \u20E1 \u20E2-\u20E4 \u20E5-\u20EA | |
291 | \u302A-\u302F \u3099-\u309A \uFB1E \uFE00-\uFE0F \uFE20-\uFE23 \uFF9E-\uFF9F | |
292 | \U0001D165-\U0001D166 \U0001D167-\U0001D169 \U0001D16D-\U0001D172 | |
293 | \U0001D17B-\U0001D182 \U0001D185-\U0001D18B \U0001D1AA-\U0001D1AD]; | |
294 | ||
295 | ||
296 | # | |
297 | # Rule LB1. By default, treat AI (characters with ambiguous east Asian width) and | |
298 | # SA (South East Asian: Thai, Lao, Khmer) as $AL (Alphabetic) | |
299 | # | |
300 | $ALPlus = $AL | $AI | [$SA - $dictionary]; | |
301 | ||
302 | # | |
303 | # Combining Marks. X $CM* behaves as if it were X. Rule LB6. | |
304 | # TODO: This is going to produce some odd results, because of the non-combining | |
305 | # chars that are included in $CM. Use $Extend instead, where possible. | |
306 | # | |
307 | $ALcm = $ALPlus $CM*; | |
308 | $IDcm = $ID $CM*; | |
309 | $NUcm = $NU $Extend*; | |
310 | $HYcm = $HY $Extend*; | |
311 | $SPcm = $SP $Extend*; | |
312 | $QUcm = $QU $Extend*; | |
313 | $POcm = $PO $Extend*; | |
314 | $OPcm = $OP $Extend*; | |
315 | $BAcm = $BA $Extend*; | |
316 | $BBcm = $BB $Extend*; | |
317 | $NScm = $NS $Extend*; | |
318 | $GLcm = $GL $Extend*; | |
319 | $B2cm = $B2 $Extend*; | |
320 | $INcm = $IN $Extend*; | |
321 | ||
322 | ||
323 | # New Lines. Always break after, never break before. | |
324 | # Rule LB 3 | |
325 | # | |
326 | # Endings. NewLine or Zero Width Space, or both. Rules 4, 5 | |
327 | # Because we never break before these things, $Endings | |
328 | # appears at the end of line break rule. | |
329 | # | |
330 | $NLF = $BK | $CR | $LF | $CR $LF; | |
331 | $Endings = $SPcm* $ZW* $NLF?; | |
332 | $EndingsMandatory = $SPcm* $NLF | $SPcm* $ZW $NLF?; | |
333 | ||
334 | ||
335 | # | |
336 | # Openings Sequences that can precede Words, and that should not be separated from them. | |
337 | # Rules LB 9, 10 | |
338 | # | |
339 | $Openings = (($QUcm $SPcm*)? $OPcm $SPcm*)*; | |
340 | ||
341 | # | |
342 | # Closings Seqences that follow words, and that should not be separated from them, | |
343 | # Rule LB 8, 11, 15 | |
344 | $Closings = ($SPcm*( ($CL ($SPcm* $NScm)? | $EX | $IS | $SY) $Extend*) | $BAcm | $HYcm | $NScm | $maiyamok)*; | |
345 | ||
346 | # | |
347 | # Words. Includes mixed Alpha-numerics. | |
348 | # Rules 11a, 16, 17, 19, more or less. | |
349 | # | |
350 | $NumberInterior = $IDcm | ($NUcm | $ALcm | $IS $NUcm)+; | |
351 | $Number = $PR? ($OPcm | $HYcm)? $NumberInterior $CL? $POcm?; # Fancy Number 18 | |
352 | $Word = (($IDcm | ($ALcm | $NUcm)+) ($POcm? | $INcm?)); # Alpha-numeric. 16, 17 | |
353 | $Dashes = (($B2cm $SPcm*)*); # Dashes 11a | |
354 | $ThaiRange = $dictionary+ | $thai_etc; | |
355 | $WordLikeThing = $Number | $Word | $Dashes | $ThaiRange; | |
356 | ||
357 | ||
358 | ||
359 | ||
360 | $Word15 = ($BBcm* ($WordLikeThing)? ($BAcm | $HYcm | $NScm)*) | # Rule 15. Stuff sticks around words. | |
361 | [^[:Cc:] $BK $CR $LF $ZW $SP $GL] $Extend* | # Allow characters that don't meet the | |
362 | [^$BK $CR $LF $ZW $SP $GL ]; # more elaborate definitions for WORD | |
363 | # to be glued. | |
364 | ||
365 | $GluedWord = ($GLcm | $QUcm)? $Word15 (($GLcm | $QUcm) $Word15)*; # "Glue" will stick anything below it together. | |
366 | # Rules 13, 14 | |
367 | ||
368 | # | |
369 | # The actual rules, a combination of everything defined above. | |
370 | # | |
371 | $Openings $GluedWord $Closings $paiyannoi? $EndingsMandatory; | |
372 | $Openings $GluedWord $Closings $Endings; | |
373 | ||
374 | $Openings $GluedWord $Closings $paiyannoi / | |
375 | ([^\u0e25 $Extend] | \u0e25[^$paiyannoi $Extend]); | |
376 | ||
377 | ||
378 | #"$word($nbsp+$word)*$paiyannoi/([^[\u0e25$_ignore_]]|" | |
379 | # + "\u0e25[^$paiyannoi$_ignore_]);" | |
380 | ||
381 | ||
382 | # | |
383 | # Reverse Rules. | |
384 | # | |
385 | # Back up to a hard break or a space that will cause a boundary. | |
386 | # Not all spaces cause line breaks. $SpaceGlue represents a sequence | |
387 | # containing a space that may inhibit a break from occuring. | |
388 | # | |
389 | $SpaceGlue = ([$ZW $CL $IS $NS $OP] ($Extend* $SP)) | (($Extend* $SP)+ $OP); | |
390 | $ClumpingChars = [^$SP $BK $CR $LF]; | |
391 | ||
392 | !. . $ClumpingChars* ($SpaceGlue $ClumpingChars*)* (. | $LF $CR); | |
393 |