]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/brkitr/rules/title.txt
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / data / brkitr / rules / title.txt
CommitLineData
f3c0d7a5
A
1# Copyright (C) 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
2ca993e8 4# Copyright (c) 2002-2015, International Business Machines Corporation and
b75a7d8f
A
5# others. All Rights Reserved.
6#
7# Title Casing Break Rules
8#
9
57a6839d 10
b75a7d8f
A
11$CaseIgnorable = [[:Mn:][:Me:][:Cf:][:Lm:][:Sk:] \u0027 \u00AD \u2019];
12$Cased = [[:Upper_Case:][:Lower_Case:][:Lt:] - $CaseIgnorable];
57a6839d 13$NotCased = [[^ $Cased] - $CaseIgnorable];
b75a7d8f 14
57a6839d
A
15!!forward;
16
17# If the iterator begins on a CaseIgnorable, advance it past it/them.
18# This can occur at the start-of-text, or after application of the
19# safe-reverse rule.
20
21($CaseIgnorable | $NotCased)*;
22
23# Normal exact forward rule: beginning at the start of a word
24# (at a cased character), advance through the word and through
25# the uncased characters following the word.
26
27$Cased ($Cased | $CaseIgnorable)* ($NotCased | $CaseIgnorable)*;
b75a7d8f 28
b75a7d8f 29
b75a7d8f 30# Reverse Rules
57a6839d 31!!reverse;
b75a7d8f 32
57a6839d
A
33# Normal Rule, will work nearly universally, so long as there is a
34# start-of-word preceding the current iteration position.
374ca955 35
57a6839d
A
36($NotCased | $CaseIgnorable)* ($Cased | $CaseIgnorable)* $Cased;
37
38# Short rule, will be effective only when moving to the start of text,
39# with no word (cased character) preceding the current iteration position.
40
41($NotCased | $CaseIgnorable)*;
42
43!!safe_reverse;
44
45# Safe Reverse: the exact forward rule must not start in the middle
46# of a word, so the safe reverse skips over any Cased characters,
47# leaving it just before the start of a word.
48
49($Cased | $CaseIgnorable)*;
50
51!!safe_forward;
52
53# Safe Forward, nothing needs to be done, the exact Reverse rules will
54# always find valid boundaries from any starting position.
55# Still, some rule is needed, so '.', a one character movement.
56.;