ICU-64260.0.1.tar.gz

[apple/icu.git] / icuSources / data / brkitr / rules / word_fi_sv.txt
diff --git a/icuSources/data/brkitr/rules/word_fi_sv.txt b/icuSources/data/brkitr/rules/word_fi_sv.txt

index 4cfa8e0907ec31634d9fab667ada5511f2945dbf..cfabce1ada15b29b88b6995d4b7c587cff6cbca3 100644 (file)
--- a/icuSources/data/brkitr/rules/word_fi_sv.txt
+++ b/icuSources/data/brkitr/rules/word_fi_sv.txt
@@ -8,9 +8,7 @@
  #
  # ICU Word Break Rules, fi/sv locales (these are actually the standard UAX #29 rules)
  #      See Unicode Standard Annex #29.
-#      These rules are based on UAX #29 Revision 29 for Unicode Version 9.0
-#      with additions for Emoji Sequences from https://goo.gl/cluFCn
-#      Plus additional characters introduces with Emoji 5, http://www.unicode.org/reports/tr51/proposed.html
+#      These rules are based on UAX #29 Revision 34 for Unicode Version 12.0
  #
  # Note:  Updates to word.txt will usually need to be merged into
  #        word_POSIX.txt also.
@@ -44,7 +42,7 @@ $Double_Quote       = [\p{Word_Break = Double_Quote}];
  $MidNumLet          = [\p{Word_Break = MidNumLet}];
  $MidLetter          = [\p{Word_Break = MidLetter}];
  $MidNum             = [\p{Word_Break = MidNum}];
-$Numeric            = [\p{Word_Break = Numeric}];
+$Numeric            = [[\p{Word_Break = Numeric}] [\uFF10-\uff19]];  # Patch for ICU-12079
  $ExtendNumLet       = [\p{Word_Break = ExtendNumLet}];
  $WSegSpace          = [\p{Word_Break = WSegSpace}];
  $Extended_Pict      = [:ExtPict:];
@@ -58,7 +56,7 @@ $Hiragana           = [:Hiragana:];
  #   5.0 or later as the definition of Complex_Context was corrected to include all
  #   characters requiring dictionary break.
  
-$Control        = [\p{Grapheme_Cluster_Break = Control}]; 
+$Control        = [\p{Grapheme_Cluster_Break = Control}];
  $HangulSyllable = [\uac00-\ud7a3];
  $ComplexContext = [:LineBreak = Complex_Context:];
  $KanaKanji      = [$Han $Hiragana $Katakana];
@@ -70,7 +68,7 @@ $ALetterPlus  = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
  
  
  #
-#  Rules 4    Ignore Format and Extend characters, 
+#  Rules 4    Ignore Format and Extend characters,
  #             except when they appear at the beginning of a region of text.
  #
  # TODO: check if handling of katakana in dictionary makes rules incorrect/void
@@ -148,7 +146,7 @@ $NumericEx $NumericEx {100};
  
  $NumericEx ($ALetterEx | $Hebrew_LetterEx) {200};
  
-# rule 11 and 12 
+# rule 11 and 12
  
  $NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100};
  
@@ -180,7 +178,7 @@ $ExtendNumLetEx  $KatakanaEx     {400};    #  (13b)
  
  # special handling for CJK characters: chain for later dictionary segmentation
  $HangulSyllable $HangulSyllable {200};
-$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found 
+$KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji found
  
  # Rule 999
  #     Match a single code point if no other rule applies.