]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # Copyright (C) 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
2ca993e8 | 4 | # Copyright (c) 2002-2015, International Business Machines Corporation and |
b75a7d8f A |
5 | # others. All Rights Reserved. |
6 | # | |
7 | # Title Casing Break Rules | |
8 | # | |
9 | ||
57a6839d | 10 | |
b75a7d8f A |
11 | $CaseIgnorable = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019]; |
12 | $Cased = [[:Upper_Case:][:Lower_Case:][:Lt:] - $CaseIgnorable]; | |
57a6839d | 13 | $NotCased = [[^ $Cased] - $CaseIgnorable]; |
b75a7d8f | 14 | |
57a6839d A |
15 | !!forward; |
16 | ||
17 | # If the iterator begins on a CaseIgnorable, advance it past it/them. | |
18 | # This can occur at the start-of-text, or after application of the | |
19 | # safe-reverse rule. | |
20 | ||
21 | ($CaseIgnorable | $NotCased)*; | |
22 | ||
23 | # Normal exact forward rule: beginning at the start of a word | |
24 | # (at a cased character), advance through the word and through | |
25 | # the uncased characters following the word. | |
26 | ||
27 | $Cased ($Cased | $CaseIgnorable)* ($NotCased | $CaseIgnorable)*; | |
b75a7d8f | 28 | |
b75a7d8f | 29 | |
b75a7d8f | 30 | # Reverse Rules |
57a6839d | 31 | !!reverse; |
b75a7d8f | 32 | |
57a6839d A |
33 | # Normal Rule, will work nearly universally, so long as there is a |
34 | # start-of-word preceding the current iteration position. | |
374ca955 | 35 | |
57a6839d A |
36 | ($NotCased | $CaseIgnorable)* ($Cased | $CaseIgnorable)* $Cased; |
37 | ||
38 | # Short rule, will be effective only when moving to the start of text, | |
39 | # with no word (cased character) preceding the current iteration position. | |
40 | ||
41 | ($NotCased | $CaseIgnorable)*; | |
42 | ||
43 | !!safe_reverse; | |
44 | ||
45 | # Safe Reverse: the exact forward rule must not start in the middle | |
46 | # of a word, so the safe reverse skips over any Cased characters, | |
47 | # leaving it just before the start of a word. | |
48 | ||
49 | ($Cased | $CaseIgnorable)*; | |
50 | ||
51 | !!safe_forward; | |
52 | ||
53 | # Safe Forward, nothing needs to be done, the exact Reverse rules will | |
54 | # always find valid boundaries from any starting position. | |
55 | # Still, some rule is needed, so '.', a one character movement. | |
56 | .; |