]>
Commit | Line | Data |
---|---|---|
2ca993e8 | 1 | # |
f3c0d7a5 A |
2 | # Copyright (C) 2016 and later: Unicode, Inc. and others. |
3 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
4 | ||
2ca993e8 A |
5 | # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved. |
6 | # file: sentence.txt | |
7 | ||
8 | type = sentence; # one of grapheme | word | line | sentence | |
9 | locale = en; | |
10 | ||
11 | CR = [\p{Sentence_Break = CR}]; | |
12 | LF = [\p{Sentence_Break = LF}]; | |
13 | Extend = [\p{Sentence_Break = Extend}]; | |
14 | Sep = [\p{Sentence_Break = Sep}]; | |
15 | Format = [\p{Sentence_Break = Format}]; | |
16 | Sp = [\p{Sentence_Break = Sp}]; | |
17 | Lower = [\p{Sentence_Break = Lower}]; | |
18 | Upper = [\p{Sentence_Break = Upper}]; | |
19 | OLetter = [\p{Sentence_Break = OLetter}]; | |
20 | Numeric = [\p{Sentence_Break = Numeric}]; | |
21 | ATerm = [\p{Sentence_Break = ATerm}]; | |
22 | SContinue = [\p{Sentence_Break = SContinue}]; | |
23 | STerm = [\p{Sentence_Break = STerm}]; | |
24 | Close = [\p{Sentence_Break = Close}]; | |
25 | ||
26 | ParaSep = [Sep CR LF]; | |
27 | SATerm = [STerm ATerm]; | |
28 | ExtFmt = [Extend Format]; | |
29 | ||
30 | # SB2: ÷ eot | |
31 | # Conventional regular expression matching for '$' as end-of-text also matches | |
32 | # at a line separator just preceding the physical end of text. | |
33 | # Instead, use a look-ahead assertion that there is no following character. | |
34 | SB2: . ÷ (?!.); | |
35 | ||
36 | SB3: CR LF; | |
37 | SB4: ParaSep ÷; | |
38 | ||
39 | # SB5: ignore Format and Extend characters. | |
40 | ||
41 | SB6: ATerm ExtFmt* Numeric; | |
42 | SB7: (Upper | Lower) ExtFmt* ATerm ExtFmt* Upper; | |
43 | SB8: ATerm ExtFmt* (Close ExtFmt*)* (Sp ExtFmt*)* ([^OLetter Upper Lower ParaSep SATerm ExtFmt] ExtFmt *)* Lower; | |
44 | SB8a: SATerm ExtFmt* (Close ExtFmt*)* (Sp ExtFmt*)* (SContinue | SATerm); | |
45 | ||
46 | SB9: SATerm ExtFmt* (Close ExtFmt*)* (Sp ExtFmt*)* (CR LF | ParaSep)? ÷; | |
47 | # Also covers SB10, SB11. | |
48 | ||
49 | SB12: . ExtFmt* [^ExtFmt]?; | |
50 |