2 # Copyright (c) 2016, International Business Machines Corporation and others. All Rights Reserved.
5 type = sentence; # one of grapheme | word | line | sentence
8 CR = [\p{Sentence_Break = CR}];
9 LF = [\p{Sentence_Break = LF}];
10 Extend = [\p{Sentence_Break = Extend}];
11 Sep = [\p{Sentence_Break = Sep}];
12 Format = [\p{Sentence_Break = Format}];
13 Sp = [\p{Sentence_Break = Sp}];
14 Lower = [\p{Sentence_Break = Lower}];
15 Upper = [\p{Sentence_Break = Upper}];
16 OLetter = [\p{Sentence_Break = OLetter}];
17 Numeric = [\p{Sentence_Break = Numeric}];
18 ATerm = [\p{Sentence_Break = ATerm}];
19 SContinue = [\p{Sentence_Break = SContinue}];
20 STerm = [\p{Sentence_Break = STerm}];
21 Close = [\p{Sentence_Break = Close}];
23 ParaSep = [Sep CR LF];
24 SATerm = [STerm ATerm];
25 ExtFmt = [Extend Format];
28 # Conventional regular expression matching for '$' as end-of-text also matches
29 # at a line separator just preceding the physical end of text.
30 # Instead, use a look-ahead assertion that there is no following character.
36 # SB5: ignore Format and Extend characters.
38 SB6: ATerm ExtFmt* Numeric;
39 SB7: (Upper | Lower) ExtFmt* ATerm ExtFmt* Upper;
40 SB8: ATerm ExtFmt* (Close ExtFmt*)* (Sp ExtFmt*)* ([^OLetter Upper Lower ParaSep SATerm ExtFmt] ExtFmt *)* Lower;
41 SB8a: SATerm ExtFmt* (Close ExtFmt*)* (Sp ExtFmt*)* (SContinue | SATerm);
43 SB9: SATerm ExtFmt* (Close ExtFmt*)* (Sp ExtFmt*)* (CR LF | ParaSep)? ÷;
44 # Also covers SB10, SB11.
46 SB12: . ExtFmt* [^ExtFmt]?;