X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..f3c0d7a59d99c2a94c6b8822291f0e42be3773c9:/icuSources/test/testdata/casing.txt?ds=sidebyside

diff --git a/icuSources/test/testdata/casing.txt b/icuSources/test/testdata/casing.txt
index c4c3bb14..8643355a 100644
--- a/icuSources/test/testdata/casing.txt
+++ b/icuSources/test/testdata/casing.txt
@@ -1,28 +1,71 @@
 ï»¿//*******************************************************************************
+// Copyright (C) 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 //*
-//*   Copyright (C) 2002, International Business Machines
+//*   Copyright (C) 2002-2010, International Business Machines
 //*   Corporation and others.  All Rights Reserved.
 //*
 //*******************************************************************************
 
-casing {
+casing:table(nofallback) {
     Info {
-        Description { "This is test data file for string casing" }
+        Description { "This is test data file for string casing." }
 
-        LongDescription { "each item is an array with"
-                      "input string, result string, locale ID, break iterator"
-                      "the break iterator is specified as an int, same as in UBreakIteratorType:"
-                      "0=UBRK_CHARACTER  1=UBRK_WORD  2=UBRK_LINE  3=UBRK_SENTENCE  4=UBRK_TITLE  -1=default"
-                     }
+        LongDescription {
+            "each item is an array with\n"
+            "input string, result string, locale ID[, break iterator]\n"
+            "the break iterator (only for titlecasing) is specified as an int, same as in UBreakIteratorType:\n"
+            "0=UBRK_CHARACTER  1=UBRK_WORD  2=UBRK_LINE  3=UBRK_SENTENCE  4=UBRK_TITLE  -1=default (NULL=words) -2=no breaks (.*)\n"
+            "options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I  L=U_TITLECASE_NO_LOWERCASE  A=U_TITLECASE_NO_BREAK_ADJUSTMENT\n"
+        }
     }
     TestData {
+      lowercasing {
+        Headers { "Input", "Output", "Locale" }
+        Cases {
+            { " tHe QUIcK bRoWn", " the quick brown", "" },
+            { "aBIÎ£ÃÎ£/ñ¿¿ð", "abiÏÃÏ/ñ¿¿ð­", "" },
+            { "aBIÎ£ÃÎ£/ñ¿¿ð", "abÄ±ÏÃÏ/ñ¿¿ð­", "tur" } // tur: 3-letter code for Turkish
+        }
+      }
+      uppercasing {
+        Headers { "Input", "Output", "Locale" }
+        Cases {
+            { " tHe QUIcK bRoWn", " THE QUICK BROWN", "" },
+            { "aBiÏÃÏ/ï¬ñ¿¿ð­", "ABIÎ£SSÎ£/FFIñ¿¿ð", "" },
+            { "aBiÏÃÏ/ï¬ñ¿¿ð­", "ABÄ°Î£SSÎ£/FFIñ¿¿ð", "az" } // az same casing as tr
+        }
+      }
       titlecasing {
-        Headers { "Input", "Output", "Locale", "Type" }
+        Headers { "Input", "Output", "Locale", "Type", "Options" }
+        Cases {
+            { "Ê»aMeLikA huI PÅ« Ê»Ê»Ê»iA", "Ê»Amelika Hui PÅ« Ê»Ê»Ê»Ia", "", "-1", "" }, // titlecase first _cased_ letter, j4933
+            { " tHe QUIcK bRoWn", " The Quick Brown", "", "4", "" },
+            { "ÇÇÇÇÇÇÇÇÇ", "ÇÇÇÇÇÇÇÇÇ", "", "0", "" }, // UBRK_CHARACTER
+            { "Çubav ljubav", "Çubav Ljubav", "", "-1", "" }, // Lj vs. L+j
+            { "ijssel igloo IJMUIDEN", "Ijssel Igloo Ijmuiden", "", "1", "" }, // Dutch titlecasing default
+            { "ijssel igloo IJMUIDEN", "IJssel Igloo IJmuiden", "nl", "1", "" }, // Dutch titlecasing
+            { "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", "", "-1", "" },
+            
+            { "a Ê»CaT. A Ê»dOg! Ê»eTc.", "A Ê»Cat. A Ê»Dog! Ê»Etc.", "", "-1", "" }, // default
+            { "a Ê»CaT. A Ê»dOg! Ê»eTc.", "A Ê»cat. A Ê»dog! Ê»etc.", "", "-1", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
+            { "a Ê»CaT. A Ê»dOg! Ê»eTc.", "A Ê»CaT. A Ê»dOg! Ê»ETc.", "", "3", "L" }, // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
+            
+            { "Ê»cAt! Ê»eTc.", "Ê»Cat! Ê»etc.", "", "-2", "" }, // -2=Trivial break iterator
+            { "Ê»cAt! Ê»eTc.", "Ê»cat! Ê»etc.", "", "-2", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
+            { "Ê»cAt! Ê»eTc.", "Ê»CAt! Ê»eTc.", "", "-2", "L" }, // U_TITLECASE_NO_LOWERCASE
+            { "Ê»cAt! Ê»eTc.", "Ê»cAt! Ê»eTc.", "", "-2", "AL" }, // Both options
+
+            // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
+            // when TITLECASE_NO_LOWERCASE encounters a single-letter word
+            { "a b c", "A B C", "", "1", "L" } // U_TITLECASE_NO_LOWERCASE
+        }
+      }
+      casefolding {
+        Headers { "Input", "Output", "Options" }
         Cases {
-            { " tHe QUIcK bRoWn", " The Quick Brown", "", "4" },
-            { "ÇÇÇÇÇÇÇÇÇ", "ÇÇÇÇÇÇÇÇÇ", "", "0" }, // UBRK_CHARACTER
-            { "Çubav ljubav", "Çubav Ljubav", "", "-1" }, // Lj vs. L+j
-            { "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", "", "-1" }
+            { "aBÄ°IÄ±ÏÃï¬ñ¿¿", "abiÌiÄ±Î²ssffiñ¿¿", "" },
+            { "aBÄ°IÄ±ÏÃï¬ñ¿¿", "abiÄ±Ä±Î²ssffiñ¿¿", "T" } // U_FOLD_CASE_EXCLUDE_SPECIAL_I
         }
       }
     }