icuSources/data/unidata/PropertyValueAliases.txt

   1 # PropertyValueAliases-4.0.0.txt
   2 # Date: 2003-03-12, 23:46:08 GMT [MD]
   3 #
   4 # This file contains aliases for property values used in the UCD.
   5 # These names can be used for XML formats of UCD data, for regular-expression
   6 # property tests, and other programmatic textual descriptions of Unicode data.
   7 # For information on which properties are normative, see UCD.html.
   8 #
   9 # The names may be translated in appropriate environments, and additional
  10 # aliases may be useful.
  11 #
  12 # FORMAT
  13 #
  14 # Each line describes a property value name.
  15 # This consists of three fields, separated by semicolons.
  16 #
  17 # First Field: The first field describes the property for which that
  18 # property value name is used.
  19 # There is one special pseudo-property: "qc" stands for any quick-check property
  20 #
  21 # Second Field: The second field is an abbreviated name.
  22 # If there is no abbreviated name available, the field is marked with "n/a".
  23 #
  24 # Third Field: The third field is a long name.
  25 #
  26 # In the case of ccc, there are 4 fields. The second field is numeric, third
  27 # is abbreviated, and fourth is long.
  28 #
  29 # With loose matching of property names, the case distinctions, whitespace,
  30 # and '_' are ignored.
  31 #
  32 # NOTE: Currently there is at most one abbreviated name and one long name for
  33 # property value. However, in the future additional aliases may be added.
  34 # In such a case, the first line for the property value would have
  35 # the preferred alias for output.
  36 #
  37 # NOTE: The property value names are NOT unique across properties, especially
  38 # with loose matches. For example:
  39 #
  40 # AL means Arabic Letter for the Bidi_Class property, and
  41 # AL means Alpha_Left for the Combining_Class property, and
  42 # AL means Alphabetic for the Line_Break property.
  43 #
  44 # In addition, some property names may be the same as some property value names.
  45 # For example:
  46 #
  47 #   cc means Combining_Class property, and
  48 #   cc means the General_Category property value Control (cc)
  49 #
  50 # The combination of property value and property name is, however, unique.
  51 # For more information, see UTR #18: Regular Expression Guidelines
  52 # ================================================
  53
  54
  55 bc ; AL        ; Arabic_Letter
  56 bc ; AN        ; Arabic_Number
  57 bc ; B         ; Paragraph_Separator
  58 bc ; BN        ; Boundary_Neutral
  59 bc ; CS        ; Common_Separator
  60 bc ; EN        ; European_Number
  61 bc ; ES        ; European_Separator
  62 bc ; ET        ; European_Terminator
  63 bc ; L         ; Left_To_Right
  64 bc ; LRE       ; Left_To_Right_Embedding
  65 bc ; LRO       ; Left_To_Right_Override
  66 bc ; NSM       ; Nonspacing_Mark
  67 bc ; ON        ; Other_Neutral
  68 bc ; PDF       ; Pop_Directional_Format
  69 bc ; R         ; Right_To_Left
  70 bc ; RLE       ; Right_To_Left_Embedding
  71 bc ; RLO       ; Right_To_Left_Override
  72 bc ; S         ; Segment_Separator
  73 bc ; WS        ; White_Space
  74
  75 blk; n/a       ; Aegean_Numbers
  76 blk; n/a       ; Alphabetic_Presentation_Forms
  77 blk; n/a       ; Arabic
  78 blk; n/a       ; Arabic_Presentation_Forms-A
  79 blk; n/a       ; Arabic_Presentation_Forms-B
  80 blk; n/a       ; Armenian
  81 blk; n/a       ; Arrows
  82 blk; n/a       ; Basic_Latin
  83 blk; n/a       ; Bengali
  84 blk; n/a       ; Block_Elements
  85 blk; n/a       ; Bopomofo
  86 blk; n/a       ; Bopomofo_Extended
  87 blk; n/a       ; Box_Drawing
  88 blk; n/a       ; Braille_Patterns
  89 blk; n/a       ; Buhid
  90 blk; n/a       ; Byzantine_Musical_Symbols
  91 blk; n/a       ; Cherokee
  92 blk; n/a       ; CJK_Compatibility
  93 blk; n/a       ; CJK_Compatibility_Forms
  94 blk; n/a       ; CJK_Compatibility_Ideographs
  95 blk; n/a       ; CJK_Compatibility_Ideographs_Supplement
  96 blk; n/a       ; CJK_Radicals_Supplement
  97 blk; n/a       ; CJK_Symbols_and_Punctuation
  98 blk; n/a       ; CJK_Unified_Ideographs
  99 blk; n/a       ; CJK_Unified_Ideographs_Extension_A
 100 blk; n/a       ; CJK_Unified_Ideographs_Extension_B
 101 blk; n/a       ; Combining_Diacritical_Marks
 102 blk; n/a       ; Combining_Diacritical_Marks_for_Symbols
 103 blk; n/a       ; Combining_Half_Marks
 104 blk; n/a       ; Control_Pictures
 105 blk; n/a       ; Currency_Symbols
 106 blk; n/a       ; Cypriot_Syllabary
 107 blk; n/a       ; Cyrillic
 108 blk; n/a       ; Cyrillic_Supplementary
 109 blk; n/a       ; Deseret
 110 blk; n/a       ; Devanagari
 111 blk; n/a       ; Dingbats
 112 blk; n/a       ; Enclosed_Alphanumerics
 113 blk; n/a       ; Enclosed_CJK_Letters_and_Months
 114 blk; n/a       ; Ethiopic
 115 blk; n/a       ; General_Punctuation
 116 blk; n/a       ; Geometric_Shapes
 117 blk; n/a       ; Georgian
 118 blk; n/a       ; Gothic
 119 blk; n/a       ; Greek_and_Coptic
 120 blk; n/a       ; Greek_Extended
 121 blk; n/a       ; Gujarati
 122 blk; n/a       ; Gurmukhi
 123 blk; n/a       ; Halfwidth_and_Fullwidth_Forms
 124 blk; n/a       ; Hangul_Compatibility_Jamo
 125 blk; n/a       ; Hangul_Jamo
 126 blk; n/a       ; Hangul_Syllables
 127 blk; n/a       ; Hanunoo
 128 blk; n/a       ; Hebrew
 129 blk; n/a       ; High_Private_Use_Surrogates
 130 blk; n/a       ; High_Surrogates
 131 blk; n/a       ; Hiragana
 132 blk; n/a       ; Ideographic_Description_Characters
 133 blk; n/a       ; IPA_Extensions
 134 blk; n/a       ; Kanbun
 135 blk; n/a       ; Kangxi_Radicals
 136 blk; n/a       ; Kannada
 137 blk; n/a       ; Katakana
 138 blk; n/a       ; Katakana_Phonetic_Extensions
 139 blk; n/a       ; Khmer
 140 blk; n/a       ; Khmer_Symbols
 141 blk; n/a       ; Lao
 142 blk; n/a       ; Latin_Extended_Additional
 143 blk; n/a       ; Latin_Extended-A
 144 blk; n/a       ; Latin_Extended-B
 145 blk; n/a       ; Latin-1_Supplement
 146 blk; n/a       ; Letterlike_Symbols
 147 blk; n/a       ; Limbu
 148 blk; n/a       ; Linear_B_Ideograms
 149 blk; n/a       ; Linear_B_Syllabary
 150 blk; n/a       ; Low_Surrogates
 151 blk; n/a       ; Malayalam
 152 blk; n/a       ; Mathematical_Alphanumeric_Symbols
 153 blk; n/a       ; Mathematical_Operators
 154 blk; n/a       ; Miscellaneous_Mathematical_Symbols-A
 155 blk; n/a       ; Miscellaneous_Mathematical_Symbols-B
 156 blk; n/a       ; Miscellaneous_Symbols
 157 blk; n/a       ; Miscellaneous_Symbols_and_Arrows
 158 blk; n/a       ; Miscellaneous_Technical
 159 blk; n/a       ; Mongolian
 160 blk; n/a       ; Musical_Symbols
 161 blk; n/a       ; Myanmar
 162 blk; n/a       ; No_Block
 163 blk; n/a       ; Number_Forms
 164 blk; n/a       ; Ogham
 165 blk; n/a       ; Old_Italic
 166 blk; n/a       ; Optical_Character_Recognition
 167 blk; n/a       ; Oriya
 168 blk; n/a       ; Osmanya
 169 blk; n/a       ; Phonetic_Extensions
 170 blk; n/a       ; Private_Use_Area
 171 blk; n/a       ; Runic
 172 blk; n/a       ; Shavian
 173 blk; n/a       ; Sinhala
 174 blk; n/a       ; Small_Form_Variants
 175 blk; n/a       ; Spacing_Modifier_Letters
 176 blk; n/a       ; Specials
 177 blk; n/a       ; Superscripts_and_Subscripts
 178 blk; n/a       ; Supplemental_Arrows-A
 179 blk; n/a       ; Supplemental_Arrows-B
 180 blk; n/a       ; Supplemental_Mathematical_Operators
 181 blk; n/a       ; Supplementary_Private_Use_Area-A
 182 blk; n/a       ; Supplementary_Private_Use_Area-B
 183 blk; n/a       ; Syriac
 184 blk; n/a       ; Tagalog
 185 blk; n/a       ; Tagbanwa
 186 blk; n/a       ; Tags
 187 blk; n/a       ; Tai_Le
 188 blk; n/a       ; Tai_Xuan_Jing_Symbols
 189 blk; n/a       ; Tamil
 190 blk; n/a       ; Telugu
 191 blk; n/a       ; Thaana
 192 blk; n/a       ; Thai
 193 blk; n/a       ; Tibetan
 194 blk; n/a       ; Ugaritic
 195 blk; n/a       ; Unified_Canadian_Aboriginal_Syllabics
 196 blk; n/a       ; Variation_Selectors
 197 blk; n/a       ; Variation_Selectors_Supplement
 198 blk; n/a       ; Yi_Radicals
 199 blk; n/a       ; Yi_Syllables
 200 blk; n/a       ; Yijing_Hexagram_Symbols
 201
 202 ccc;   0; NR   ; Not_Reordered
 203 ccc;   1; OV   ; Overlay
 204 ccc; 202; ATB  ; Attached_Below
 205 ccc; 216; ATAR ; Attached_Above_Right
 206 ccc; 218; BL   ; Below_Left
 207 ccc; 220; B    ; Below
 208 ccc; 222; BR   ; Below_Right
 209 ccc; 224; L    ; Left
 210 ccc; 226; R    ; Right
 211 ccc; 228; AL   ; Above_Left
 212 ccc; 230; A    ; Above
 213 ccc; 232; AR   ; Above_Right
 214 ccc; 233; DB   ; Double_Below
 215 ccc; 234; DA   ; Double_Above
 216 ccc; 240; IS   ; Iota_Subscript
 217 ccc;   7; NK   ; Nukta
 218 ccc;   8; KV   ; Kana_Voicing
 219 ccc;   9; VR   ; Virama
 220
 221 dt ; can       ; canonical
 222 dt ; com       ; compat
 223 dt ; enc       ; circle
 224 dt ; fin       ; final
 225 dt ; font      ; font
 226 dt ; fra       ; fraction
 227 dt ; init      ; initial
 228 dt ; iso       ; isolated
 229 dt ; med       ; medial
 230 dt ; n/a       ; none
 231 dt ; nar       ; narrow
 232 dt ; nb        ; noBreak
 233 dt ; sml       ; small
 234 dt ; sqr       ; square
 235 dt ; sub       ; sub
 236 dt ; sup       ; super
 237 dt ; vert      ; vertical
 238 dt ; wide      ; wide
 239
 240 ea ; A         ; Ambiguous
 241 ea ; F         ; Fullwidth
 242 ea ; H         ; Halfwidth
 243 ea ; N         ; Neutral
 244 ea ; Na        ; Narrow
 245 ea ; W         ; Wide
 246
 247 gc ; C         ; Other                            # Cc | Cf | Cn | Co | Cs
 248 gc ; Cc        ; Control
 249 gc ; Cf        ; Format
 250 gc ; Cn        ; Unassigned
 251 gc ; Co        ; Private_Use
 252 gc ; Cs        ; Surrogate
 253 gc ; L         ; Letter                           # Ll | Lm | Lo | Lt | Lu
 254 gc ; LC        ; Cased_Letter                     # Ll | Lt | Lu
 255 gc ; Ll        ; Lowercase_Letter
 256 gc ; Lm        ; Modifier_Letter
 257 gc ; Lo        ; Other_Letter
 258 gc ; Lt        ; Titlecase_Letter
 259 gc ; Lu        ; Uppercase_Letter
 260 gc ; M         ; Mark                             # Mc | Me | Mn
 261 gc ; Mc        ; Spacing_Mark
 262 gc ; Me        ; Enclosing_Mark
 263 gc ; Mn        ; Nonspacing_Mark
 264 gc ; N         ; Number                           # Nd | Nl | No
 265 gc ; Nd        ; Decimal_Number
 266 gc ; Nl        ; Letter_Number
 267 gc ; No        ; Other_Number
 268 gc ; P         ; Punctuation                      # Pc | Pd | Pe | Pf | Pi | Po | Ps
 269 gc ; Pc        ; Connector_Punctuation
 270 gc ; Pd        ; Dash_Punctuation
 271 gc ; Pe        ; Close_Punctuation
 272 gc ; Pf        ; Final_Punctuation
 273 gc ; Pi        ; Initial_Punctuation
 274 gc ; Po        ; Other_Punctuation
 275 gc ; Ps        ; Open_Punctuation
 276 gc ; S         ; Symbol                           # Sc | Sk | Sm | So
 277 gc ; Sc        ; Currency_Symbol
 278 gc ; Sk        ; Modifier_Symbol
 279 gc ; Sm        ; Math_Symbol
 280 gc ; So        ; Other_Symbol
 281 gc ; Z         ; Separator                        # Zl | Zp | Zs
 282 gc ; Zl        ; Line_Separator
 283 gc ; Zp        ; Paragraph_Separator
 284 gc ; Zs        ; Space_Separator
 285
 286 hst; L         ; Leading_Jamo
 287 hst; LV        ; LV_Syllable
 288 hst; LVT       ; LVT_Syllable
 289 hst; NA        ; Not_Applicable
 290 hst; T         ; Trailing_Jamo
 291 hst; V         ; Vowel_Jamo
 292
 293 jg ; n/a       ; AIN
 294 jg ; n/a       ; ALAPH
 295 jg ; n/a       ; ALEF
 296 jg ; n/a       ; BEH
 297 jg ; n/a       ; BETH
 298 jg ; n/a       ; DAL
 299 jg ; n/a       ; DALATH_RISH
 300 jg ; n/a       ; E
 301 jg ; n/a       ; FE
 302 jg ; n/a       ; FEH
 303 jg ; n/a       ; FINAL_SEMKATH
 304 jg ; n/a       ; GAF
 305 jg ; n/a       ; GAMAL
 306 jg ; n/a       ; HAH
 307 jg ; n/a       ; HAMZA_ON_HEH_GOAL
 308 jg ; n/a       ; HE
 309 jg ; n/a       ; HEH
 310 jg ; n/a       ; HEH_GOAL
 311 jg ; n/a       ; HETH
 312 jg ; n/a       ; KAF
 313 jg ; n/a       ; KAPH
 314 jg ; n/a       ; KHAPH
 315 jg ; n/a       ; KNOTTED_HEH
 316 jg ; n/a       ; LAM
 317 jg ; n/a       ; LAMADH
 318 jg ; n/a       ; MEEM
 319 jg ; n/a       ; MIM
 320 jg ; n/a       ; NO_JOINING_GROUP
 321 jg ; n/a       ; NOON
 322 jg ; n/a       ; NUN
 323 jg ; n/a       ; PE
 324 jg ; n/a       ; QAF
 325 jg ; n/a       ; QAPH
 326 jg ; n/a       ; REH
 327 jg ; n/a       ; REVERSED_PE
 328 jg ; n/a       ; SAD
 329 jg ; n/a       ; SADHE
 330 jg ; n/a       ; SEEN
 331 jg ; n/a       ; SEMKATH
 332 jg ; n/a       ; SHIN
 333 jg ; n/a       ; SWASH_KAF
 334 jg ; n/a       ; SYRIAC_WAW
 335 jg ; n/a       ; TAH
 336 jg ; n/a       ; TAW
 337 jg ; n/a       ; TEH_MARBUTA
 338 jg ; n/a       ; TETH
 339 jg ; n/a       ; WAW
 340 jg ; n/a       ; YEH
 341 jg ; n/a       ; YEH_BARREE
 342 jg ; n/a       ; YEH_WITH_TAIL
 343 jg ; n/a       ; YUDH
 344 jg ; n/a       ; YUDH_HE
 345 jg ; n/a       ; ZAIN
 346 jg ; n/a       ; ZHAIN
 347
 348 jt ; C         ; Join_Causing
 349 jt ; D         ; Dual_Joining
 350 jt ; L         ; Left_Joining
 351 jt ; R         ; Right_Joining
 352 jt ; T         ; Transparent
 353 jt ; U         ; Non_Joining
 354
 355 lb ; AI        ; Ambiguous
 356 lb ; AL        ; Alphabetic
 357 lb ; B2        ; Break_Both
 358 lb ; BA        ; Break_After
 359 lb ; BB        ; Break_Before
 360 lb ; BK        ; Mandatory_Break
 361 lb ; CB        ; Contingent_Break
 362 lb ; CL        ; Close_Punctuation
 363 lb ; CM        ; Combining_Mark
 364 lb ; CR        ; Carriage_Return
 365 lb ; EX        ; Exclamation
 366 lb ; GL        ; Glue
 367 lb ; HY        ; Hyphen
 368 lb ; ID        ; Ideographic
 369 lb ; IN        ; Inseperable
 370 lb ; IS        ; Infix_Numeric
 371 lb ; LF        ; Line_Feed
 372 lb ; NL        ; Next_Line
 373 lb ; NS        ; Nonstarter
 374 lb ; NU        ; Numeric
 375 lb ; OP        ; Open_Punctuation
 376 lb ; PO        ; Postfix_Numeric
 377 lb ; PR        ; Prefix_Numeric
 378 lb ; QU        ; Quotation
 379 lb ; SA        ; Complex_Context
 380 lb ; SG        ; Surrogate
 381 lb ; SP        ; Space
 382 lb ; SY        ; Break_Symbols
 383 lb ; WJ        ; Word_Joiner
 384 lb ; XX        ; Unknown
 385 lb ; ZW        ; ZWSpace
 386
 387 nt ; de        ; Decimal
 388 nt ; di        ; Digit
 389 nt ; n/a       ; None
 390 nt ; nu        ; Numeric
 391
 392 qc ; M         ; Maybe
 393 qc ; N         ; No
 394 qc ; Y         ; Yes
 395
 396 sc ; Arab      ; Arabic
 397 sc ; Armn      ; Armenian
 398 sc ; Beng      ; Bengali
 399 sc ; Bopo      ; Bopomofo
 400 sc ; Brai      ; Braille
 401 sc ; Buhd      ; Buhid
 402 sc ; Cans      ; Canadian_Aboriginal
 403 sc ; Cher      ; Cherokee
 404 sc ; Cprt      ; Cypriot
 405 sc ; Cyrl      ; Cyrillic
 406 sc ; Deva      ; Devanagari
 407 sc ; Dsrt      ; Deseret
 408 sc ; Ethi      ; Ethiopic
 409 sc ; Geor      ; Georgian
 410 sc ; Goth      ; Gothic
 411 sc ; Grek      ; Greek
 412 sc ; Gujr      ; Gujarati
 413 sc ; Guru      ; Gurmukhi
 414 sc ; Hang      ; Hangul
 415 sc ; Hani      ; Han
 416 sc ; Hano      ; Hanunoo
 417 sc ; Hebr      ; Hebrew
 418 sc ; Hira      ; Hiragana
 419 sc ; Ital      ; Old_Italic
 420 sc ; Kana      ; Katakana
 421 sc ; Khmr      ; Khmer
 422 sc ; Knda      ; Kannada
 423 sc ; Laoo      ; Lao
 424 sc ; Latn      ; Latin
 425 sc ; Limb      ; Limbu
 426 sc ; Linb      ; Linear_B
 427 sc ; Mlym      ; Malayalam
 428 sc ; Mong      ; Mongolian
 429 sc ; Mymr      ; Myanmar
 430 sc ; Ogam      ; Ogham
 431 sc ; Orya      ; Oriya
 432 sc ; Osma      ; Osmanya
 433 sc ; Qaai      ; Inherited
 434 sc ; Runr      ; Runic
 435 sc ; Shaw      ; Shavian
 436 sc ; Sinh      ; Sinhala
 437 sc ; Syrc      ; Syriac
 438 sc ; Tagb      ; Tagbanwa
 439 sc ; Tale      ; Tai_Le
 440 sc ; Taml      ; Tamil
 441 sc ; Telu      ; Telugu
 442 sc ; Tglg      ; Tagalog
 443 sc ; Thaa      ; Thaana
 444 sc ; Thai      ; Thai
 445 sc ; Tibt      ; Tibetan
 446 sc ; Ugar      ; Ugaritic
 447 sc ; Yiii      ; Yi
 448 sc ; Zyyy      ; Common