]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/testdata/casing.txt
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / test / testdata / casing.txt
1 //*******************************************************************************
2 // Copyright (C) 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 //*
5 //* Copyright (C) 2002-2010, International Business Machines
6 //* Corporation and others. All Rights Reserved.
7 //*
8 //*******************************************************************************
9
10 casing:table(nofallback) {
11 Info {
12 Description { "This is test data file for string casing." }
13
14 LongDescription {
15 "each item is an array with\n"
16 "input string, result string, locale ID[, break iterator]\n"
17 "the break iterator (only for titlecasing) is specified as an int, same as in UBreakIteratorType:\n"
18 "0=UBRK_CHARACTER 1=UBRK_WORD 2=UBRK_LINE 3=UBRK_SENTENCE 4=UBRK_TITLE -1=default (NULL=words) -2=no breaks (.*)\n"
19 "options: T=U_FOLD_CASE_EXCLUDE_SPECIAL_I L=U_TITLECASE_NO_LOWERCASE A=U_TITLECASE_NO_BREAK_ADJUSTMENT\n"
20 }
21 }
22 TestData {
23 lowercasing {
24 Headers { "Input", "Output", "Locale" }
25 Cases {
26 { " tHe QUIcK bRoWn", " the quick brown", "" },
27 { "aBIΣßΣ/񟿿𐐅", "abiσßς/񟿿𐐭", "" },
28 { "aBIΣßΣ/񟿿𐐅", "abıσßς/񟿿𐐭", "tur" } // tur: 3-letter code for Turkish
29 }
30 }
31 uppercasing {
32 Headers { "Input", "Output", "Locale" }
33 Cases {
34 { " tHe QUIcK bRoWn", " THE QUICK BROWN", "" },
35 { "aBiσßς/ffi񟿿𐐭", "ABIΣSSΣ/FFI񟿿𐐅", "" },
36 { "aBiσßς/ffi񟿿𐐭", "ABİΣSSΣ/FFI񟿿𐐅", "az" } // az same casing as tr
37 }
38 }
39 titlecasing {
40 Headers { "Input", "Output", "Locale", "Type", "Options" }
41 Cases {
42 { "ʻaMeLikA huI Pū ʻʻʻiA", "ʻAmelika Hui Pū ʻʻʻIa", "", "-1", "" }, // titlecase first _cased_ letter, j4933
43 { " tHe QUIcK bRoWn", " The Quick Brown", "", "4", "" },
44 { "DŽDždžLJLjljNJNjnj", "DžDžDžLjLjLjNjNjNj", "", "0", "" }, // UBRK_CHARACTER
45 { "ljubav ljubav", "Ljubav Ljubav", "", "-1", "" }, // Lj vs. L+j
46 { "ijssel igloo IJMUIDEN", "Ijssel Igloo Ijmuiden", "", "1", "" }, // Dutch titlecasing default
47 { "ijssel igloo IJMUIDEN", "IJssel Igloo IJmuiden", "nl", "1", "" }, // Dutch titlecasing
48 { "'oH dOn'T tItLeCaSe AfTeR lEtTeR+'", "'Oh Don't Titlecase After Letter+'", "", "-1", "" },
49
50 { "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCat. A ʻDog! ʻEtc.", "", "-1", "" }, // default
51 { "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻcat. A ʻdog! ʻetc.", "", "-1", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
52 { "a ʻCaT. A ʻdOg! ʻeTc.", "A ʻCaT. A ʻdOg! ʻETc.", "", "3", "L" }, // UBRK_SENTENCE and U_TITLECASE_NO_LOWERCASE
53
54 { "ʻcAt! ʻeTc.", "ʻCat! ʻetc.", "", "-2", "" }, // -2=Trivial break iterator
55 { "ʻcAt! ʻeTc.", "ʻcat! ʻetc.", "", "-2", "A" }, // U_TITLECASE_NO_BREAK_ADJUSTMENT
56 { "ʻcAt! ʻeTc.", "ʻCAt! ʻeTc.", "", "-2", "L" }, // U_TITLECASE_NO_LOWERCASE
57 { "ʻcAt! ʻeTc.", "ʻcAt! ʻeTc.", "", "-2", "AL" }, // Both options
58
59 // Test case for ticket #7251: UCharacter.toTitleCase() throws OutOfMemoryError
60 // when TITLECASE_NO_LOWERCASE encounters a single-letter word
61 { "a b c", "A B C", "", "1", "L" } // U_TITLECASE_NO_LOWERCASE
62 }
63 }
64 casefolding {
65 Headers { "Input", "Output", "Options" }
66 Cases {
67 { "aBİIıϐßffi񟿿", "abi̇iıβssffi񟿿", "" },
68 { "aBİIıϐßffi񟿿", "abiııβssffi񟿿", "T" } // U_FOLD_CASE_EXCLUDE_SPECIAL_I
69 }
70 }
71 }
72 }