2 *******************************************************************************
3 * Copyright (C) 2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
10 * Modification History:*
11 * Date Name Description
12 * 01/15/13 Emmons Original Port from ICU4J
13 ********************************************************************************
18 * \brief C++ API: Region classes (territory containment)
21 #include "unicode/region.h"
22 #include "unicode/utypes.h"
23 #include "unicode/uobject.h"
24 #include "unicode/unistr.h"
25 #include "unicode/ures.h"
26 #include "unicode/decimfmt.h"
32 #include "region_impl.h"
34 #if !UCONFIG_NO_FORMATTING
39 static void U_CALLCONV
40 deleteRegion(void *obj
) {
41 delete (icu::Region
*)obj
;
45 * Cleanup callback func
47 static UBool U_CALLCONV
region_cleanup(void)
49 icu::Region::cleanupRegionData();
58 static UMutex gRegionDataLock
= U_MUTEX_INITIALIZER
;
59 static UBool regionDataIsLoaded
= false;
60 static UVector
* availableRegions
[URGN_LIMIT
];
62 static UHashtable
*regionAliases
;
63 static UHashtable
*regionIDMap
;
64 static UHashtable
*numericCodeMap
;
66 static const UChar UNKNOWN_REGION_ID
[] = { 0x5A, 0x5A, 0 }; /* "ZZ" */
67 static const UChar OUTLYING_OCEANIA_REGION_ID
[] = { 0x51, 0x4F, 0 }; /* "QO" */
68 static const UChar WORLD_ID
[] = { 0x30, 0x30, 0x31, 0 }; /* "001" */
70 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegionNameEnumeration
)
73 * Initializes the region data from the ICU resource bundles. The region data
74 * contains the basic relationships such as which regions are known, what the numeric
75 * codes are, any known aliases, and the territory containment data.
77 * If the region data has already loaded, then this method simply returns without doing
78 * anything meaningful.
80 void Region::loadRegionData() {
82 if (regionDataIsLoaded
) {
86 umtx_lock(&gRegionDataLock
);
88 if (regionDataIsLoaded
) { // In case another thread gets to it before we do...
89 umtx_unlock(&gRegionDataLock
);
94 UErrorCode status
= U_ZERO_ERROR
;
96 UResourceBundle
* regionCodes
= NULL
;
97 UResourceBundle
* territoryAlias
= NULL
;
98 UResourceBundle
* codeMappings
= NULL
;
99 UResourceBundle
* worldContainment
= NULL
;
100 UResourceBundle
* territoryContainment
= NULL
;
101 UResourceBundle
* groupingContainment
= NULL
;
103 DecimalFormat
*df
= new DecimalFormat(status
);
104 if (U_FAILURE(status
)) {
105 umtx_unlock(&gRegionDataLock
);
108 df
->setParseIntegerOnly(TRUE
);
110 regionIDMap
= uhash_open(uhash_hashUnicodeString
,uhash_compareUnicodeString
,NULL
,&status
);
111 uhash_setValueDeleter(regionIDMap
, deleteRegion
);
113 numericCodeMap
= uhash_open(uhash_hashLong
,uhash_compareLong
,NULL
,&status
);
115 regionAliases
= uhash_open(uhash_hashUnicodeString
,uhash_compareUnicodeString
,NULL
,&status
);
116 uhash_setKeyDeleter(regionAliases
,uprv_deleteUObject
);
118 UResourceBundle
*rb
= ures_openDirect(NULL
,"metadata",&status
);
119 regionCodes
= ures_getByKey(rb
,"regionCodes",NULL
,&status
);
120 territoryAlias
= ures_getByKey(rb
,"territoryAlias",NULL
,&status
);
122 UResourceBundle
*rb2
= ures_openDirect(NULL
,"supplementalData",&status
);
123 codeMappings
= ures_getByKey(rb2
,"codeMappings",NULL
,&status
);
125 territoryContainment
= ures_getByKey(rb2
,"territoryContainment",NULL
,&status
);
126 worldContainment
= ures_getByKey(territoryContainment
,"001",NULL
,&status
);
127 groupingContainment
= ures_getByKey(territoryContainment
,"grouping",NULL
,&status
);
129 UVector
*continents
= new UVector(uprv_deleteUObject
, uhash_compareUnicodeString
, status
);
131 while ( ures_hasNext(worldContainment
) ) {
132 UnicodeString
*continentName
= new UnicodeString(ures_getNextUnicodeString(worldContainment
,NULL
,&status
));
133 continents
->addElement(continentName
,status
);
136 UVector
*groupings
= new UVector(uprv_deleteUObject
, uhash_compareUnicodeString
, status
);
137 while ( ures_hasNext(groupingContainment
) ) {
138 UnicodeString
*groupingName
= new UnicodeString(ures_getNextUnicodeString(groupingContainment
,NULL
,&status
));
139 groupings
->addElement(groupingName
,status
);
142 while ( ures_hasNext(regionCodes
) ) {
143 UnicodeString regionID
= ures_getNextUnicodeString(regionCodes
,NULL
,&status
);
144 Region
*r
= new Region();
146 r
->idStr
.extract(0,r
->idStr
.length(),r
->id
,sizeof(r
->id
),US_INV
);
147 r
->type
= URGN_TERRITORY
; // Only temporary - figure out the real type later once the aliases are known.
149 uhash_put(regionIDMap
,(void *)&(r
->idStr
),(void *)r
,&status
);
151 UErrorCode ps
= U_ZERO_ERROR
;
152 df
->parse(r
->idStr
,result
,ps
);
153 if ( U_SUCCESS(ps
) ) {
154 r
->code
= result
.getLong(); // Convert string to number
155 uhash_iput(numericCodeMap
,r
->code
,(void *)r
,&status
);
156 r
->type
= URGN_SUBCONTINENT
;
163 // Process the territory aliases
164 while ( ures_hasNext(territoryAlias
) ) {
165 UResourceBundle
*res
= ures_getNextResource(territoryAlias
,NULL
,&status
);
166 const char *aliasFrom
= ures_getKey(res
);
167 UnicodeString
* aliasFromStr
= new UnicodeString(aliasFrom
, -1, US_INV
);
168 UnicodeString aliasTo
= ures_getUnicodeString(res
,&status
);
171 Region
*aliasToRegion
= (Region
*) uhash_get(regionIDMap
,&aliasTo
);
172 Region
*aliasFromRegion
= (Region
*)uhash_get(regionIDMap
,aliasFromStr
);
174 if ( aliasToRegion
!= NULL
&& aliasFromRegion
== NULL
) { // This is just an alias from some string to a region
175 uhash_put(regionAliases
,(void *)aliasFromStr
, (void *)aliasToRegion
,&status
);
177 if ( aliasFromRegion
== NULL
) { // Deprecated region code not in the master codes list - so need to create a deprecated region for it.
178 aliasFromRegion
= new Region();
179 aliasFromRegion
->idStr
.setTo(*aliasFromStr
);
180 aliasFromRegion
->idStr
.extract(0,aliasFromRegion
->idStr
.length(),aliasFromRegion
->id
,sizeof(aliasFromRegion
->id
),US_INV
);
181 uhash_put(regionIDMap
,(void *)&(aliasFromRegion
->idStr
),(void *)aliasFromRegion
,&status
);
183 UErrorCode ps
= U_ZERO_ERROR
;
184 df
->parse(aliasFromRegion
->idStr
,result
,ps
);
185 if ( U_SUCCESS(ps
) ) {
186 aliasFromRegion
->code
= result
.getLong(); // Convert string to number
187 uhash_iput(numericCodeMap
,aliasFromRegion
->code
,(void *)aliasFromRegion
,&status
);
189 aliasFromRegion
->code
= -1;
191 aliasFromRegion
->type
= URGN_DEPRECATED
;
193 aliasFromRegion
->type
= URGN_DEPRECATED
;
197 aliasFromRegion
->preferredValues
= new UVector(uprv_deleteUObject
, uhash_compareUnicodeString
, status
);
198 UnicodeString currentRegion
;
199 currentRegion
.remove();
200 for (int32_t i
= 0 ; i
< aliasTo
.length() ; i
++ ) {
201 if ( aliasTo
.charAt(i
) != 0x0020 ) {
202 currentRegion
.append(aliasTo
.charAt(i
));
204 if ( aliasTo
.charAt(i
) == 0x0020 || i
+1 == aliasTo
.length() ) {
205 Region
*target
= (Region
*)uhash_get(regionIDMap
,(void *)¤tRegion
);
207 UnicodeString
*preferredValue
= new UnicodeString(target
->idStr
);
208 aliasFromRegion
->preferredValues
->addElement((void *)preferredValue
,status
);
210 currentRegion
.remove();
216 // Process the code mappings - This will allow us to assign numeric codes to most of the territories.
217 while ( ures_hasNext(codeMappings
) ) {
218 UResourceBundle
*mapping
= ures_getNextResource(codeMappings
,NULL
,&status
);
219 if ( ures_getType(mapping
) == URES_ARRAY
&& ures_getSize(mapping
) == 3) {
220 UnicodeString codeMappingID
= ures_getUnicodeStringByIndex(mapping
,0,&status
);
221 UnicodeString codeMappingNumber
= ures_getUnicodeStringByIndex(mapping
,1,&status
);
222 UnicodeString codeMapping3Letter
= ures_getUnicodeStringByIndex(mapping
,2,&status
);
224 Region
*r
= (Region
*)uhash_get(regionIDMap
,(void *)&codeMappingID
);
227 UErrorCode ps
= U_ZERO_ERROR
;
228 df
->parse(codeMappingNumber
,result
,ps
);
229 if ( U_SUCCESS(ps
) ) {
230 r
->code
= result
.getLong(); // Convert string to number
231 uhash_iput(numericCodeMap
,r
->code
,(void *)r
,&status
);
233 UnicodeString
*code3
= new UnicodeString(codeMapping3Letter
);
234 uhash_put(regionAliases
,(void *)code3
, (void *)r
,&status
);
240 // Now fill in the special cases for WORLD, UNKNOWN, CONTINENTS, and GROUPINGS
242 UnicodeString
WORLD_ID_STRING(WORLD_ID
);
243 r
= (Region
*) uhash_get(regionIDMap
,(void *)&WORLD_ID_STRING
);
245 r
->type
= URGN_WORLD
;
248 UnicodeString
UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID
);
249 r
= (Region
*) uhash_get(regionIDMap
,(void *)&UNKNOWN_REGION_ID_STRING
);
251 r
->type
= URGN_UNKNOWN
;
254 for ( int32_t i
= 0 ; i
< continents
->size() ; i
++ ) {
255 r
= (Region
*) uhash_get(regionIDMap
,(void *)continents
->elementAt(i
));
257 r
->type
= URGN_CONTINENT
;
262 for ( int32_t i
= 0 ; i
< groupings
->size() ; i
++ ) {
263 r
= (Region
*) uhash_get(regionIDMap
,(void *)groupings
->elementAt(i
));
265 r
->type
= URGN_GROUPING
;
270 // Special case: The region code "QO" (Outlying Oceania) is a subcontinent code added by CLDR
271 // even though it looks like a territory code. Need to handle it here.
273 UnicodeString
OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID
);
274 r
= (Region
*) uhash_get(regionIDMap
,(void *)&OUTLYING_OCEANIA_REGION_ID_STRING
);
276 r
->type
= URGN_SUBCONTINENT
;
279 // Load territory containment info from the supplemental data.
280 while ( ures_hasNext(territoryContainment
) ) {
281 UResourceBundle
*mapping
= ures_getNextResource(territoryContainment
,NULL
,&status
);
282 const char *parent
= ures_getKey(mapping
);
283 UnicodeString parentStr
= UnicodeString(parent
, -1 , US_INV
);
284 Region
*parentRegion
= (Region
*) uhash_get(regionIDMap
,(void *)&parentStr
);
286 for ( int j
= 0 ; j
< ures_getSize(mapping
); j
++ ) {
287 UnicodeString child
= ures_getUnicodeStringByIndex(mapping
,j
,&status
);
288 Region
*childRegion
= (Region
*) uhash_get(regionIDMap
,(void *)&child
);
289 if ( parentRegion
!= NULL
&& childRegion
!= NULL
) {
291 // Add the child region to the set of regions contained by the parent
292 if (parentRegion
->containedRegions
== NULL
) {
293 parentRegion
->containedRegions
= new UVector(uprv_deleteUObject
, uhash_compareUnicodeString
, status
);
296 UnicodeString
*childStr
= new UnicodeString(status
);
297 childStr
->fastCopyFrom(childRegion
->idStr
);
298 parentRegion
->containedRegions
->addElement((void *)childStr
,status
);
300 // Set the parent region to be the containing region of the child.
301 // Regions of type GROUPING can't be set as the parent, since another region
302 // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent.
303 if ( parentRegion
->type
!= URGN_GROUPING
) {
304 childRegion
->containingRegion
= parentRegion
;
311 // Create the availableRegions lists
313 while ( const UHashElement
* element
= uhash_nextElement(regionIDMap
,&pos
)) {
314 Region
*ar
= (Region
*)element
->value
.pointer
;
315 if ( availableRegions
[ar
->type
] == NULL
) {
316 availableRegions
[ar
->type
] = new UVector(uprv_deleteUObject
, uhash_compareUnicodeString
, status
);
318 UnicodeString
*arString
= new UnicodeString(ar
->idStr
);
319 availableRegions
[ar
->type
]->addElement((void *)arString
,status
);
322 ures_close(territoryContainment
);
323 ures_close(worldContainment
);
324 ures_close(groupingContainment
);
326 ures_close(codeMappings
);
328 ures_close(territoryAlias
);
329 ures_close(regionCodes
);
334 ucln_i18n_registerCleanup(UCLN_I18N_REGION
, region_cleanup
);
336 regionDataIsLoaded
= true;
337 umtx_unlock(&gRegionDataLock
);
341 void Region::cleanupRegionData() {
343 for (int32_t i
= 0 ; i
< URGN_LIMIT
; i
++ ) {
344 if ( availableRegions
[i
] ) {
345 delete availableRegions
[i
];
350 uhash_close(regionAliases
);
353 if (numericCodeMap
) {
354 uhash_close(numericCodeMap
);
358 uhash_close(regionIDMap
);
365 containingRegion(NULL
),
366 containedRegions(NULL
),
367 preferredValues(NULL
) {
372 if (containedRegions
) {
373 delete containedRegions
;
375 if (preferredValues
) {
376 delete preferredValues
;
381 * Returns true if the two regions are equal.
384 Region::operator==(const Region
&that
) const {
385 return (idStr
== that
.idStr
);
389 * Returns true if the two regions are NOT equal; that is, if operator ==() returns false.
392 Region::operator!=(const Region
&that
) const {
393 return (idStr
!= that
.idStr
);
397 * Returns a pointer to a Region using the given region code. The region code can be either 2-letter ISO code,
398 * 3-letter ISO code, UNM.49 numeric code, or other valid Unicode Region Code as defined by the LDML specification.
399 * The identifier will be canonicalized internally using the supplemental metadata as defined in the CLDR.
400 * If the region code is NULL or not recognized, the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR )
402 const Region
* U_EXPORT2
403 Region::getInstance(const char *region_code
, UErrorCode
&status
) {
405 if ( !region_code
) {
406 status
= U_ILLEGAL_ARGUMENT_ERROR
;
412 if (regionIDMap
== NULL
) {
413 status
= U_ILLEGAL_ARGUMENT_ERROR
;
417 UnicodeString regionCodeString
= UnicodeString(region_code
, -1, US_INV
);
418 Region
*r
= (Region
*)uhash_get(regionIDMap
,(void *)®ionCodeString
);
421 r
= (Region
*)uhash_get(regionAliases
,(void *)®ionCodeString
);
424 if ( !r
) { // Unknown region code
425 status
= U_ILLEGAL_ARGUMENT_ERROR
;
429 if ( r
->type
== URGN_DEPRECATED
&& r
->preferredValues
->size() == 1) {
430 StringEnumeration
*pv
= r
->getPreferredValues();
432 const UnicodeString
*ustr
= pv
->snext(status
);
433 r
= (Region
*)uhash_get(regionIDMap
,(void *)ustr
);
442 * Returns a pointer to a Region using the given numeric region code. If the numeric region code is not recognized,
443 * the appropriate error code will be set ( U_ILLEGAL_ARGUMENT_ERROR ).
445 const Region
* U_EXPORT2
446 Region::getInstance (int32_t code
, UErrorCode
&status
) {
450 if (numericCodeMap
== NULL
) {
451 status
= U_ILLEGAL_ARGUMENT_ERROR
;
455 Region
*r
= (Region
*)uhash_iget(numericCodeMap
,code
);
457 if ( !r
) { // Just in case there's an alias that's numeric, try to find it.
458 UErrorCode fs
= U_ZERO_ERROR
;
459 UnicodeString pat
= UNICODE_STRING_SIMPLE("00#");
460 DecimalFormat
*df
= new DecimalFormat(pat
,fs
);
466 r
= (Region
*)uhash_get(regionAliases
,&id
);
470 status
= U_ILLEGAL_ARGUMENT_ERROR
;
474 if ( r
->type
== URGN_DEPRECATED
&& r
->preferredValues
->size() == 1) {
475 StringEnumeration
*pv
= r
->getPreferredValues();
477 const UnicodeString
*ustr
= pv
->snext(status
);
478 r
= (Region
*)uhash_get(regionIDMap
,(void *)ustr
);
487 * Returns an enumeration over the IDs of all known regions that match the given type.
489 StringEnumeration
* U_EXPORT2
490 Region::getAvailable(URegionType type
) {
493 UErrorCode status
= U_ZERO_ERROR
;
494 return new RegionNameEnumeration(availableRegions
[type
],status
);
500 * Returns a pointer to the region that contains this region. Returns NULL if this region is code "001" (World)
501 * or "ZZ" (Unknown region). For example, calling this method with region "IT" (Italy) returns the
502 * region "039" (Southern Europe).
505 Region::getContainingRegion() const {
507 return containingRegion
;
511 * Return a pointer to the region that geographically contains this region and matches the given type,
512 * moving multiple steps up the containment chain if necessary. Returns NULL if no containing region can be found
513 * that matches the given type. Note: The URegionTypes = "URGN_GROUPING", "URGN_DEPRECATED", or "URGN_UNKNOWN"
514 * are not appropriate for use in this API. NULL will be returned in this case. For example, calling this method
515 * with region "IT" (Italy) for type "URGN_CONTINENT" returns the region "150" ( Europe ).
518 Region::getContainingRegion(URegionType type
) const {
520 if ( containingRegion
== NULL
) {
524 if ( containingRegion
->type
== type
) {
525 return containingRegion
;
527 return containingRegion
->getContainingRegion(type
);
532 * Return an enumeration over the IDs of all the regions that are immediate children of this region in the
533 * region hierarchy. These returned regions could be either macro regions, territories, or a mixture of the two,
534 * depending on the containment data as defined in CLDR. This API may return NULL if this region doesn't have
535 * any sub-regions. For example, calling this method with region "150" (Europe) returns an enumeration containing
536 * the various sub regions of Europe - "039" (Southern Europe) - "151" (Eastern Europe) - "154" (Northern Europe)
537 * and "155" (Western Europe).
540 Region::getContainedRegions() const {
542 UErrorCode status
= U_ZERO_ERROR
;
543 return new RegionNameEnumeration(containedRegions
,status
);
547 * Returns an enumeration over the IDs of all the regions that are children of this region anywhere in the region
548 * hierarchy and match the given type. This API may return an empty enumeration if this region doesn't have any
549 * sub-regions that match the given type. For example, calling this method with region "150" (Europe) and type
550 * "URGN_TERRITORY" returns a set containing all the territories in Europe ( "FR" (France) - "IT" (Italy) - "DE" (Germany) etc. )
553 Region::getContainedRegions( URegionType type
) const {
556 UErrorCode status
= U_ZERO_ERROR
;
557 UVector
*result
= new UVector(NULL
, uhash_compareChars
, status
);
559 StringEnumeration
*cr
= getContainedRegions();
561 for ( int32_t i
= 0 ; i
< cr
->count(status
) ; i
++ ) {
562 const char *id
= cr
->next(NULL
,status
);
563 const Region
*r
= Region::getInstance(id
,status
);
564 if ( r
->getType() == type
) {
565 result
->addElement((void *)&r
->idStr
,status
);
567 StringEnumeration
*children
= r
->getContainedRegions(type
);
568 for ( int32_t j
= 0 ; j
< children
->count(status
) ; j
++ ) {
569 const char *id2
= children
->next(NULL
,status
);
570 const Region
*r2
= Region::getInstance(id2
,status
);
571 result
->addElement((void *)&r2
->idStr
,status
);
577 StringEnumeration
* resultEnumeration
= new RegionNameEnumeration(result
,status
);
579 return resultEnumeration
;
583 * Returns true if this region contains the supplied other region anywhere in the region hierarchy.
586 Region::contains(const Region
&other
) const {
589 if (!containedRegions
) {
592 if (containedRegions
->contains((void *)&other
.idStr
)) {
595 for ( int32_t i
= 0 ; i
< containedRegions
->size() ; i
++ ) {
596 UnicodeString
*crStr
= (UnicodeString
*)containedRegions
->elementAt(i
);
597 Region
*cr
= (Region
*) uhash_get(regionIDMap
,(void *)crStr
);
598 if ( cr
&& cr
->contains(other
) ) {
608 * For deprecated regions, return an enumeration over the IDs of the regions that are the preferred replacement
609 * regions for this region. Returns NULL for a non-deprecated region. For example, calling this method with region
610 * "SU" (Soviet Union) would return a list of the regions containing "RU" (Russia), "AM" (Armenia), "AZ" (Azerbaijan), etc...
613 Region::getPreferredValues() const {
615 UErrorCode status
= U_ZERO_ERROR
;
616 if ( type
== URGN_DEPRECATED
) {
617 return new RegionNameEnumeration(preferredValues
,status
);
625 * Return this region's canonical region code.
628 Region::getRegionCode() const {
633 Region::getNumericCode() const {
638 * Returns the region type of this region.
641 Region::getType() const {
645 RegionNameEnumeration::RegionNameEnumeration(UVector
*fNameList
, UErrorCode
& status
) {
647 if (fNameList
&& U_SUCCESS(status
)) {
648 fRegionNames
= new UVector(uprv_deleteUObject
, uhash_compareUnicodeString
, fNameList
->size(),status
);
649 for ( int32_t i
= 0 ; i
< fNameList
->size() ; i
++ ) {
650 UnicodeString
* this_region_name
= (UnicodeString
*)fNameList
->elementAt(i
);
651 UnicodeString
* new_region_name
= new UnicodeString(*this_region_name
);
652 fRegionNames
->addElement((void *)new_region_name
,status
);
661 RegionNameEnumeration::snext(UErrorCode
& status
) {
662 if (U_FAILURE(status
) || (fRegionNames
==NULL
)) {
665 const UnicodeString
* nextStr
= (const UnicodeString
*)fRegionNames
->elementAt(pos
);
673 RegionNameEnumeration::reset(UErrorCode
& /*status*/) {
678 RegionNameEnumeration::count(UErrorCode
& /*status*/) const {
679 return (fRegionNames
==NULL
) ? 0 : fRegionNames
->size();
682 RegionNameEnumeration::~RegionNameEnumeration() {
688 #endif /* #if !UCONFIG_NO_FORMATTING */