+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
- * Copyright (C) 2003-2006, International Business Machines
+ * Copyright (C) 2003-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: usprep.cpp
- * encoding: US-ASCII
+ * encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/usprep.h"
-#include "unicode/unorm.h"
+#include "unicode/normalizer2.h"
#include "unicode/ustring.h"
#include "unicode/uchar.h"
#include "unicode/uversion.h"
#include "cstring.h"
#include "udataswp.h"
#include "ucln_cmn.h"
-#include "unormimp.h"
#include "ubidi_props.h"
+#include "uprops.h"
+
+U_NAMESPACE_USE
U_CDECL_BEGIN
Static cache for already opened StringPrep profiles
*/
static UHashtable *SHARED_DATA_HASHTABLE = NULL;
+static icu::UInitOnce gSharedDataInitOnce;
-static UMTX usprepMutex = NULL;
+static UMutex usprepMutex = U_MUTEX_INITIALIZER;
/* format version of spp file */
-static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
+//static uint8_t formatVersion[4]={ 0, 0, 0, 0 };
/* the Unicode version of the sprep data */
static UVersionInfo dataVersion={ 0, 0, 0, 0 };
+/* Profile names must be aligned to UStringPrepProfileType */
+static const char * const PROFILE_NAMES[] = {
+ "rfc3491", /* USPREP_RFC3491_NAMEPREP */
+ "rfc3530cs", /* USPREP_RFC3530_NFS4_CS_PREP */
+ "rfc3530csci", /* USPREP_RFC3530_NFS4_CS_PREP_CI */
+ "rfc3491", /* USPREP_RFC3530_NSF4_CIS_PREP */
+ "rfc3530mixp", /* USPREP_RFC3530_NSF4_MIXED_PREP_PREFIX */
+ "rfc3491", /* USPREP_RFC3530_NSF4_MIXED_PREP_SUFFIX */
+ "rfc3722", /* USPREP_RFC3722_ISCSI */
+ "rfc3920node", /* USPREP_RFC3920_NODEPREP */
+ "rfc3920res", /* USPREP_RFC3920_RESOURCEPREP */
+ "rfc4011", /* USPREP_RFC4011_MIB */
+ "rfc4013", /* USPREP_RFC4013_SASLPREP */
+ "rfc4505", /* USPREP_RFC4505_TRACE */
+ "rfc4518", /* USPREP_RFC4518_LDAP */
+ "rfc4518ci", /* USPREP_RFC4518_LDAP_CI */
+};
+
static UBool U_CALLCONV
isSPrepAcceptable(void * /* context */,
const char * /* type */,
pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT
) {
- uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
+ //uprv_memcpy(formatVersion, pInfo->formatVersion, 4);
uprv_memcpy(dataVersion, pInfo->dataVersion, 4);
return TRUE;
} else {
usprep_internal_flushCache(UBool noRefCount){
UStringPrepProfile *profile = NULL;
UStringPrepKey *key = NULL;
- int32_t pos = -1;
+ int32_t pos = UHASH_FIRST;
int32_t deletedNum = 0;
const UHashElement *e;
SHARED_DATA_HASHTABLE = NULL;
}
}
-
- umtx_destroy(&usprepMutex); /* Don't worry about destroying the mutex even */
- /* if the hash table still exists. The mutex */
- /* will lazily re-init itself if needed. */
+ gSharedDataInitOnce.reset();
return (SHARED_DATA_HASHTABLE == NULL);
}
U_CDECL_END
-static void
-usprep_init() {
- umtx_init(&usprepMutex);
-}
/** Initializes the cache for resources */
+static void U_CALLCONV
+createCache(UErrorCode &status) {
+ SHARED_DATA_HASHTABLE = uhash_open(hashEntry, compareEntries, NULL, &status);
+ if (U_FAILURE(status)) {
+ SHARED_DATA_HASHTABLE = NULL;
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
+}
+
static void
initCache(UErrorCode *status) {
- UBool makeCache = FALSE;
- umtx_lock(&usprepMutex);
- makeCache = (SHARED_DATA_HASHTABLE == NULL);
- umtx_unlock(&usprepMutex);
- if(makeCache) {
- UHashtable *newCache = uhash_open(hashEntry, compareEntries, NULL, status);
- if (U_SUCCESS(*status)) {
- umtx_lock(&usprepMutex);
- if(SHARED_DATA_HASHTABLE == NULL) {
- SHARED_DATA_HASHTABLE = newCache;
- ucln_common_registerCleanup(UCLN_COMMON_USPREP, usprep_cleanup);
- newCache = NULL;
- }
- umtx_unlock(&usprepMutex);
- if(newCache != NULL) {
- uhash_close(newCache);
- }
- }
- }
+ umtx_initOnce(gSharedDataInitOnce, &createCache, *status);
}
static UBool U_CALLCONV
/* initialize some variables */
profile->mappingData=(uint16_t *)((uint8_t *)(p+_SPREP_INDEX_TOP)+profile->indexes[_SPREP_INDEX_TRIE_SIZE]);
- unorm_getUnicodeVersion(&normUnicodeVersion, errorCode);
+ u_getUnicodeVersion(normUnicodeVersion);
normUniVer = (normUnicodeVersion[0] << 24) + (normUnicodeVersion[1] << 16) +
(normUnicodeVersion[2] << 8 ) + (normUnicodeVersion[3]);
sprepUniVer = (dataVersion[0] << 24) + (dataVersion[1] << 16) +
/* fetch the data from the cache */
umtx_lock(&usprepMutex);
profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
+ if(profile != NULL) {
+ profile->refCount++;
+ }
umtx_unlock(&usprepMutex);
- if(profile == NULL){
- UStringPrepKey* key = (UStringPrepKey*) uprv_malloc(sizeof(UStringPrepKey));
- if(key == NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
+ if(profile == NULL) {
/* else load the data and put the data in the cache */
- profile = (UStringPrepProfile*) uprv_malloc(sizeof(UStringPrepProfile));
- if(profile == NULL){
+ LocalMemory<UStringPrepProfile> newProfile;
+ if(newProfile.allocateInsteadAndReset() == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(key);
return NULL;
}
- /* initialize the data struct members */
- uprv_memset(profile->indexes,0,sizeof(profile->indexes));
- profile->mappingData = NULL;
- profile->sprepData = NULL;
- profile->refCount = 0;
-
- /* initialize the key memebers */
- key->name = (char*) uprv_malloc(uprv_strlen(name)+1);
- if(key->name == NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(key);
- uprv_free(profile);
+ /* load the data */
+ if(!loadData(newProfile.getAlias(), path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
return NULL;
}
- uprv_strcpy(key->name, name);
-
- key->path=NULL;
-
- if(path != NULL){
- key->path = (char*) uprv_malloc(uprv_strlen(path)+1);
- if(key->path == NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(key->name);
- uprv_free(key);
- uprv_free(profile);
- return NULL;
- }
- uprv_strcpy(key->path, path);
- }
-
- /* load the data */
- if(!loadData(profile, path, name, _SPREP_DATA_TYPE, status) || U_FAILURE(*status) ){
- uprv_free(key->path);
- uprv_free(key->name);
- uprv_free(key);
- uprv_free(profile);
+ /* get the options */
+ newProfile->doNFKC = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
+ newProfile->checkBiDi = (UBool)((newProfile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
+
+ LocalMemory<UStringPrepKey> key;
+ LocalMemory<char> keyName;
+ LocalMemory<char> keyPath;
+ if( key.allocateInsteadAndReset() == NULL ||
+ keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL ||
+ (path != NULL &&
+ keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL)
+ ) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ usprep_unload(newProfile.getAlias());
return NULL;
}
-
- /* get the options */
- profile->doNFKC = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_NORMALIZATION_ON) > 0);
- profile->checkBiDi = (UBool)((profile->indexes[_SPREP_OPTIONS] & _SPREP_CHECK_BIDI_ON) > 0);
- if(profile->checkBiDi) {
- profile->bdp = ubidi_getSingleton(status);
- if(U_FAILURE(*status)) {
- usprep_unload(profile);
- uprv_free(key->path);
- uprv_free(key->name);
- uprv_free(key);
- uprv_free(profile);
- return NULL;
- }
- } else {
- profile->bdp = NULL;
- }
-
umtx_lock(&usprepMutex);
- /* add the data object to the cache */
- uhash_put(SHARED_DATA_HASHTABLE, key, profile, status);
+ // If another thread already inserted the same key/value, refcount and cleanup our thread data
+ profile = (UStringPrepProfile*) (uhash_get(SHARED_DATA_HASHTABLE,&stackKey));
+ if(profile != NULL) {
+ profile->refCount++;
+ usprep_unload(newProfile.getAlias());
+ }
+ else {
+ /* initialize the key members */
+ key->name = keyName.orphan();
+ uprv_strcpy(key->name, name);
+ if(path != NULL){
+ key->path = keyPath.orphan();
+ uprv_strcpy(key->path, path);
+ }
+ profile = newProfile.orphan();
+
+ /* add the data object to the cache */
+ profile->refCount = 1;
+ uhash_put(SHARED_DATA_HASHTABLE, key.orphan(), profile, status);
+ }
umtx_unlock(&usprepMutex);
}
- umtx_lock(&usprepMutex);
- /* increment the refcount */
- profile->refCount++;
- umtx_unlock(&usprepMutex);
return profile;
}
if(status == NULL || U_FAILURE(*status)){
return NULL;
}
- /* initialize the mutex */
- usprep_init();
/* initialize the profile struct members */
return usprep_getProfile(path,name,status);
}
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_openByType(UStringPrepProfileType type,
+ UErrorCode* status) {
+ if(status == NULL || U_FAILURE(*status)){
+ return NULL;
+ }
+ int32_t index = (int32_t)type;
+ if (index < 0 || index >= UPRV_LENGTHOF(PROFILE_NAMES)) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ return usprep_open(NULL, PROFILE_NAMES[index], status);
+}
+
U_CAPI void U_EXPORT2
usprep_close(UStringPrepProfile* profile){
if(profile==NULL){
parseError->line = 0 ; // we are not using line numbers
// for pre-context
- int32_t start = (pos <=U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
+ int32_t start = (pos < U_PARSE_CONTEXT_LEN)? 0 : (pos - (U_PARSE_CONTEXT_LEN-1));
int32_t limit = pos;
u_memcpy(parseError->preContext,rules+start,limit-start);
return type;
}
-
-
+// TODO: change to writing to UnicodeString not UChar *
static int32_t
usprep_map( const UStringPrepProfile* profile,
const UChar* src, int32_t srcLength,
return u_terminateUChars(dest, destCapacity, destIndex, status);
}
-
-static int32_t
-usprep_normalize( const UChar* src, int32_t srcLength,
- UChar* dest, int32_t destCapacity,
- UErrorCode* status ){
- /*
- * Option UNORM_BEFORE_PRI_29:
- *
- * IDNA as interpreted by IETF members (see unicode mailing list 2004H1)
- * requires strict adherence to Unicode 3.2 normalization,
- * including buggy composition from before fixing Public Review Issue #29.
- * Note that this results in some valid but nonsensical text to be
- * either corrupted or rejected, depending on the text.
- * See http://www.unicode.org/review/resolved-pri.html#pri29
- * See unorm.cpp and cnormtst.c
- */
- return unorm_normalize(
- src, srcLength,
- UNORM_NFKC, UNORM_UNICODE_3_2|UNORM_BEFORE_PRI_29,
- dest, destCapacity,
- status);
-}
-
-
- /*
+/*
1) Map -- For each character in the input, check if it has a mapping
and, if so, replace it with its mapping.
character MUST be the first character of the string, and a
RandALCat character MUST be the last character of the string.
*/
-
-#define MAX_STACK_BUFFER_SIZE 300
-
-
U_CAPI int32_t U_EXPORT2
usprep_prepare( const UStringPrepProfile* profile,
const UChar* src, int32_t srcLength,
UErrorCode* status ){
// check error status
- if(status == NULL || U_FAILURE(*status)){
+ if(U_FAILURE(*status)){
return 0;
}
-
+
//check arguments
- if(profile==NULL || src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
+ if(profile==NULL ||
+ (src==NULL ? srcLength!=0 : srcLength<-1) ||
+ (dest==NULL ? destCapacity!=0 : destCapacity<0)) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
- UChar b1Stack[MAX_STACK_BUFFER_SIZE], b2Stack[MAX_STACK_BUFFER_SIZE];
- UChar *b1 = b1Stack, *b2 = b2Stack;
- int32_t b1Len, b2Len=0,
- b1Capacity = MAX_STACK_BUFFER_SIZE ,
- b2Capacity = MAX_STACK_BUFFER_SIZE;
- uint16_t result;
- int32_t b2Index = 0;
- UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
- UBool leftToRight=FALSE, rightToLeft=FALSE;
- int32_t rtlPos =-1, ltrPos =-1;
-
//get the string length
- if(srcLength == -1){
+ if(srcLength < 0){
srcLength = u_strlen(src);
}
// map
- b1Len = usprep_map(profile, src, srcLength, b1, b1Capacity, options, parseError, status);
+ UnicodeString s1;
+ UChar *b1 = s1.getBuffer(srcLength);
+ if(b1==NULL){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return 0;
+ }
+ int32_t b1Len = usprep_map(profile, src, srcLength,
+ b1, s1.getCapacity(), options, parseError, status);
+ s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
if(*status == U_BUFFER_OVERFLOW_ERROR){
// redo processing of string
/* we do not have enough room so grow the buffer*/
- b1 = (UChar*) uprv_malloc(b1Len * U_SIZEOF_UCHAR);
+ b1 = s1.getBuffer(b1Len);
if(b1==NULL){
*status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
+ return 0;
}
*status = U_ZERO_ERROR; // reset error
-
- b1Len = usprep_map(profile, src, srcLength, b1, b1Len, options, parseError, status);
-
+ b1Len = usprep_map(profile, src, srcLength,
+ b1, s1.getCapacity(), options, parseError, status);
+ s1.releaseBuffer(U_SUCCESS(*status) ? b1Len : 0);
+ }
+ if(U_FAILURE(*status)){
+ return 0;
}
// normalize
- if(profile->doNFKC == TRUE){
- b2Len = usprep_normalize(b1,b1Len, b2,b2Capacity,status);
-
- if(*status == U_BUFFER_OVERFLOW_ERROR){
- // redo processing of string
- /* we do not have enough room so grow the buffer*/
- b2 = (UChar*) uprv_malloc(b2Len * U_SIZEOF_UCHAR);
- if(b2==NULL){
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto CLEANUP;
- }
-
- *status = U_ZERO_ERROR; // reset error
-
- b2Len = usprep_normalize(b1,b1Len, b2,b2Len,status);
-
+ UnicodeString s2;
+ if(profile->doNFKC){
+ const Normalizer2 *n2 = Normalizer2::getNFKCInstance(*status);
+ FilteredNormalizer2 fn2(*n2, *uniset_getUnicode32Instance(*status));
+ if(U_FAILURE(*status)){
+ return 0;
}
-
+ fn2.normalize(s1, s2, *status);
}else{
- b2 = b1;
- b2Len = b1Len;
+ s2.fastCopyFrom(s1);
}
-
-
if(U_FAILURE(*status)){
- goto CLEANUP;
+ return 0;
}
- UChar32 ch;
- UStringPrepType type;
- int16_t value;
- UBool isIndex;
-
// Prohibit and checkBiDi in one pass
- for(b2Index=0; b2Index<b2Len;){
-
- ch = 0;
+ const UChar *b2 = s2.getBuffer();
+ int32_t b2Len = s2.length();
+ UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
+ UBool leftToRight=FALSE, rightToLeft=FALSE;
+ int32_t rtlPos =-1, ltrPos =-1;
+ for(int32_t b2Index=0; b2Index<b2Len;){
+ UChar32 ch = 0;
U16_NEXT(b2, b2Index, b2Len, ch);
+ uint16_t result;
UTRIE_GET16(&profile->sprepTrie,ch,result);
-
- type = getValues(result, value, isIndex);
+
+ int16_t value;
+ UBool isIndex;
+ UStringPrepType type = getValues(result, value, isIndex);
if( type == USPREP_PROHIBITED ||
((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/)
){
*status = U_STRINGPREP_PROHIBITED_ERROR;
uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError);
- goto CLEANUP;
+ return 0;
}
if(profile->checkBiDi) {
- direction = ubidi_getClass(profile->bdp, ch);
+ direction = ubidi_getClass(ch);
if(firstCharDir == U_CHAR_DIRECTION_COUNT){
firstCharDir = direction;
}
rtlPos = b2Index-1;
}
}
- }
+ }
if(profile->checkBiDi == TRUE){
// satisfy 2
if( leftToRight == TRUE && rightToLeft == TRUE){
*status = U_STRINGPREP_CHECK_BIDI_ERROR;
uprv_syntaxError(b2,(rtlPos>ltrPos) ? rtlPos : ltrPos, b2Len, parseError);
- goto CLEANUP;
+ return 0;
}
//satisfy 3
return FALSE;
}
}
- if(b2Len>0 && b2Len <= destCapacity){
- uprv_memmove(dest,b2, b2Len*U_SIZEOF_UCHAR);
- }
-
-CLEANUP:
- if(b1!=b1Stack){
- uprv_free(b1);
- b1=NULL;
- }
-
- if(b2!=b1Stack && b2!=b2Stack && b2!=b1 /* b1 should not be freed twice */){
- uprv_free(b2);
- b2=NULL;
- }
- return u_terminateUChars(dest, destCapacity, b2Len, status);
+ return s2.extract(dest, destCapacity, *status);
}
/* swap the uint16_t mappingTable[] */
count=indexes[_SPREP_INDEX_MAPPING_DATA_SIZE];
ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
- offset+=count;
+ //offset+=count;
}
return headerSize+size;