[apple/icu.git] / icuSources / common / uset.cpp

/*
*******************************************************************************
*
*   Copyright (C) 2002-2006, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  uset.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 2002mar07
*   created by: Markus W. Scherer
*
*   The serialized structure, the array of range limits, is
*   the same as in UnicodeSet, except that the HIGH value is not stored.
*
*   There are functions to efficiently serialize a USet into an array of uint16_t
*   and functions to use such a serialized form efficiently without
*   instantiating a new USet.
*/

#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/uset.h"
#include "unicode/uniset.h"
#include "cmemory.h"
#include "unicode/ustring.h"
#include "unicode/parsepos.h"

U_CAPI USet* U_EXPORT2
uset_open(UChar32 start, UChar32 end) {
    return (USet*) new UnicodeSet(start, end);
}

U_CAPI void U_EXPORT2
uset_close(USet* set) {
    delete (UnicodeSet*) set;
}

U_CAPI void U_EXPORT2
uset_set(USet* set,
     UChar32 start, UChar32 end) {
    ((UnicodeSet*) set)->UnicodeSet::set(start, end);
}

U_CAPI void U_EXPORT2
uset_addAll(USet* set, const USet *additionalSet) {
    ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet));
}

U_CAPI void U_EXPORT2
uset_add(USet* set, UChar32 c) {
    ((UnicodeSet*) set)->UnicodeSet::add(c);
}

U_CAPI void U_EXPORT2
uset_addRange(USet* set, UChar32 start, UChar32 end) {
    ((UnicodeSet*) set)->UnicodeSet::add(start, end);    
}

U_CAPI void U_EXPORT2
uset_addString(USet* set, const UChar* str, int32_t strLen) {
    // WRONG! Do not alias, it will stay aliased, even after 
    // copying. TODO: do we need a copy ctor that unaliases
    //UnicodeString s(strLen==-1, str, strLen);

    // UnicodeString handles -1 for strLen
    UnicodeString s(str, strLen);
    ((UnicodeSet*) set)->UnicodeSet::add(s);
}

U_CAPI void U_EXPORT2
uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
    // UnicodeString handles -1 for strLen
    UnicodeString s(str, strLen);
    ((UnicodeSet*) set)->UnicodeSet::addAll(s);
}

U_CAPI void U_EXPORT2
uset_remove(USet* set, UChar32 c) {
    ((UnicodeSet*) set)->UnicodeSet::remove(c);
}

U_CAPI void U_EXPORT2
uset_removeRange(USet* set, UChar32 start, UChar32 end) {
    ((UnicodeSet*) set)->UnicodeSet::remove(start, end);
}

U_CAPI void U_EXPORT2
uset_removeString(USet* set, const UChar* str, int32_t strLen) {
    UnicodeString s(strLen==-1, str, strLen);
    ((UnicodeSet*) set)->UnicodeSet::remove(s);
}

U_CAPI void U_EXPORT2
uset_removeAll(USet* set, const USet* remove) {
    ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove);
}

U_CAPI void U_EXPORT2
uset_retain(USet* set, UChar32 start, UChar32 end) {
    ((UnicodeSet*) set)->UnicodeSet::retain(start, end);
}

U_CAPI void U_EXPORT2
uset_retainAll(USet* set, const USet* retain) {
    ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain);
}

U_CAPI void U_EXPORT2
uset_compact(USet* set) {
    ((UnicodeSet*) set)->UnicodeSet::compact();
}

U_CAPI void U_EXPORT2
uset_complement(USet* set) {
    ((UnicodeSet*) set)->UnicodeSet::complement();
}

U_CAPI void U_EXPORT2
uset_complementAll(USet* set, const USet* complement) {
    ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement);
}

U_CAPI void U_EXPORT2
uset_clear(USet* set) {
    ((UnicodeSet*) set)->UnicodeSet::clear();
}

U_CAPI UBool U_EXPORT2
uset_isEmpty(const USet* set) {
    return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
}

U_CAPI UBool U_EXPORT2
uset_contains(const USet* set, UChar32 c) {
    return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
}

U_CAPI UBool U_EXPORT2
uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
    return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
}

U_CAPI UBool U_EXPORT2
uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
    UnicodeString s(strLen==-1, str, strLen);
    return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
}

U_CAPI UBool U_EXPORT2
uset_containsAll(const USet* set1, const USet* set2) {
    return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2);
}

U_CAPI UBool U_EXPORT2
uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
    // Create a string alias, since nothing is being added to the set.
    UnicodeString s(strLen==-1, str, strLen);
    return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
}

U_CAPI UBool U_EXPORT2
uset_containsNone(const USet* set1, const USet* set2) {
    return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2);
}

U_CAPI UBool U_EXPORT2
uset_containsSome(const USet* set1, const USet* set2) {
    return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2);
}

U_CAPI UBool U_EXPORT2
uset_equals(const USet* set1, const USet* set2) {
    return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2;
}

U_CAPI int32_t U_EXPORT2
uset_indexOf(const USet* set, UChar32 c) {
    return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
}

U_CAPI UChar32 U_EXPORT2
uset_charAt(const USet* set, int32_t index) {
    return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
}

U_CAPI int32_t U_EXPORT2
uset_size(const USet* set) {
    return ((const UnicodeSet*) set)->UnicodeSet::size();
}

U_NAMESPACE_BEGIN
/**
 * This class only exists to provide access to the UnicodeSet private
 * USet support API.  Declaring a class a friend is more portable than
 * trying to declare extern "C" functions as friends.
 */
class USetAccess /* not : public UObject because all methods are static */ {
public:
    /* Try to have the compiler inline these*/
    inline static int32_t getStringCount(const UnicodeSet& set) {
        return set.getStringCount();
    }
    inline static const UnicodeString* getString(const UnicodeSet& set,
                                                 int32_t i) {
        return set.getString(i);
    }
private:
    /* do not instantiate*/
    USetAccess();
};
U_NAMESPACE_END

U_CAPI int32_t U_EXPORT2
uset_getItemCount(const USet* uset) {
    const UnicodeSet& set = *(const UnicodeSet*)uset;
    return set.getRangeCount() + USetAccess::getStringCount(set);
}

U_CAPI int32_t U_EXPORT2
uset_getItem(const USet* uset, int32_t itemIndex,
             UChar32* start, UChar32* end,
             UChar* str, int32_t strCapacity,
             UErrorCode* ec) {
    if (U_FAILURE(*ec)) return 0;
    const UnicodeSet& set = *(const UnicodeSet*)uset;
    int32_t rangeCount;

    if (itemIndex < 0) {
        *ec = U_ILLEGAL_ARGUMENT_ERROR;
        return -1;
    } else if (itemIndex < (rangeCount = set.getRangeCount())) {
        *start = set.getRangeStart(itemIndex);
        *end = set.getRangeEnd(itemIndex);
        return 0;
    } else {
        itemIndex -= rangeCount;
        if (itemIndex < USetAccess::getStringCount(set)) {
            const UnicodeString* s = USetAccess::getString(set, itemIndex);
            return s->extract(str, strCapacity, *ec);
        } else {
            *ec = U_INDEX_OUTOFBOUNDS_ERROR;
            return -1;
        }
    }
}

//U_CAPI int32_t U_EXPORT2
//uset_getRangeCount(const USet* set) {
//    return ((const UnicodeSet*) set)->getRangeCount();
//}
//
//U_CAPI UBool U_EXPORT2
//uset_getRange(const USet* set, int32_t rangeIndex,
//              UChar32* pStart, UChar32* pEnd) {
//    if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
//        return FALSE;
//    }
//    const UnicodeSet* us = (const UnicodeSet*) set;
//    *pStart = us->getRangeStart(rangeIndex);
//    *pEnd = us->getRangeEnd(rangeIndex);
//    return TRUE;
//}

U_CAPI USet* U_EXPORT2
uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
    if(U_FAILURE(*ec)) {
        return NULL;
    }
    // create a set with the Pattern_White_Space characters,
    // without a pattern for fewer code dependencies
    UnicodeSet *set=new UnicodeSet(9, 0xd);
    set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
    return (USet *)set;
}

/*
 * Serialize a USet into 16-bit units.
 * Store BMP code points as themselves with one 16-bit unit each.
 *
 * Important: the code points in the array are in ascending order,
 * therefore all BMP code points precede all supplementary code points.
 *
 * Store each supplementary code point in 2 16-bit units,
 * simply with higher-then-lower 16-bit halfs.
 *
 * Precede the entire list with the length.
 * If there are supplementary code points, then set bit 15 in the length
 * and add the bmpLength between it and the array.
 *
 * In other words:
 * - all BMP:            (length=bmpLength) BMP, .., BMP
 * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
 */
U_CAPI int32_t U_EXPORT2
uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
    if (ec==NULL || U_FAILURE(*ec)) {
        return 0;
    }

    return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec);
}

U_CAPI UBool U_EXPORT2
uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
    int32_t length;

    if(fillSet==NULL) {
        return FALSE;
    }
    if(src==NULL || srcLength<=0) {
        fillSet->length=fillSet->bmpLength=0;
        return FALSE;
    }

    length=*src++;
    if(length&0x8000) {
        /* there are supplementary values */
        length&=0x7fff;
        if(srcLength<(2+length)) {
            fillSet->length=fillSet->bmpLength=0;
            return FALSE;
        }
        fillSet->bmpLength=*src++;
    } else {
        /* only BMP values */
        if(srcLength<(1+length)) {
            fillSet->length=fillSet->bmpLength=0;
            return FALSE;
        }
        fillSet->bmpLength=length;
    }
    fillSet->array=src;
    fillSet->length=length;
    return TRUE;
}

U_CAPI void U_EXPORT2
uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
    if(fillSet==NULL || (uint32_t)c>0x10ffff) {
        return;
    }

    fillSet->array=fillSet->staticArray;
    if(c<0xffff) {
        fillSet->bmpLength=fillSet->length=2;
        fillSet->staticArray[0]=(uint16_t)c;
        fillSet->staticArray[1]=(uint16_t)c+1;
    } else if(c==0xffff) {
        fillSet->bmpLength=1;
        fillSet->length=3;
        fillSet->staticArray[0]=0xffff;
        fillSet->staticArray[1]=1;
        fillSet->staticArray[2]=0;
    } else if(c<0x10ffff) {
        fillSet->bmpLength=0;
        fillSet->length=4;
        fillSet->staticArray[0]=(uint16_t)(c>>16);
        fillSet->staticArray[1]=(uint16_t)c;
        ++c;
        fillSet->staticArray[2]=(uint16_t)(c>>16);
        fillSet->staticArray[3]=(uint16_t)c;
    } else /* c==0x10ffff */ {
        fillSet->bmpLength=0;
        fillSet->length=2;
        fillSet->staticArray[0]=0x10;
        fillSet->staticArray[1]=0xffff;
    }
}

U_CAPI UBool U_EXPORT2
uset_serializedContains(const USerializedSet* set, UChar32 c) {
    const uint16_t* array;

    if(set==NULL || (uint32_t)c>0x10ffff) {
        return FALSE;
    }

    array=set->array;
    if(c<=0xffff) {
        /* find c in the BMP part */
        int32_t lo = 0;
        int32_t hi = set->bmpLength-1;
        if (c < array[0]) {
            hi = 0;
        } else if (c < array[hi]) {
            for(;;) {
                int32_t i = (lo + hi) >> 1;
                if (i == lo) {
                    break;  // Done!
                } else if (c < array[i]) {
                    hi = i;
                } else {
                    lo = i;
                }
            }
        } else {
            hi += 1;
        }
        return (UBool)(hi&1);
    } else {
        /* find c in the supplementary part */
        uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
        int32_t base = set->bmpLength;
        int32_t lo = 0;
        int32_t hi = set->length - 2 - base;
        if (high < array[base] || (high==array[base] && low<array[base+1])) {
            hi = 0;
        } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) {
            for (;;) {
                int32_t i = ((lo + hi) >> 1) & ~1;  // Guarantee even result
                int32_t iabs = i + base;
                if (i == lo) {
                    break;  // Done!
                } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) {
                    hi = i;
                } else {
                    lo = i;
                }
            }
        } else {
            hi += 2;
        }
        /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
        return (UBool)(((hi+(base<<1))&2)!=0);
    }
}

U_CAPI int32_t U_EXPORT2
uset_getSerializedRangeCount(const USerializedSet* set) {
    if(set==NULL) {
        return 0;
    }

    return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
}

U_CAPI UBool U_EXPORT2
uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
                        UChar32* pStart, UChar32* pEnd) {
    const uint16_t* array;
    int32_t bmpLength, length;

    if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) {
        return FALSE;
    }

    array=set->array;
    length=set->length;
    bmpLength=set->bmpLength;

    rangeIndex*=2; /* address start/limit pairs */
    if(rangeIndex<bmpLength) {
        *pStart=array[rangeIndex++];
        if(rangeIndex<bmpLength) {
            *pEnd=array[rangeIndex];
        } else if(rangeIndex<length) {
            *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
        } else {
            *pEnd=0x110000;
        }
        --*pEnd;
        return TRUE;
    } else {
        rangeIndex-=bmpLength;
        rangeIndex*=2; /* address pairs of pairs of units */
        length-=bmpLength;
        if(rangeIndex<length) {
            array+=bmpLength;
            *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
            rangeIndex+=2;
            if(rangeIndex<length) {
                *pEnd=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1];
            } else {
                *pEnd=0x110000;
            }
            --*pEnd;
            return TRUE;
        } else {
            return FALSE;
        }
    }
}

// TODO The old, internal uset.c had an efficient uset_containsOne function.
// Returned the one and only code point, or else -1 or something.
// Consider adding such a function to both C and C++ UnicodeSet/uset.
// See tools/gennorm/store.c for usage, now usetContainsOne there.

// TODO Investigate incorporating this code into UnicodeSet to improve
// efficiency.
// ---
// #define USET_GROW_DELTA 20
// 
// static U_INLINE int32_t
// findChar(const UChar32* array, int32_t length, UChar32 c) {
//     int32_t i;
// 
//     /* check the last range limit first for more efficient appending */
//     if(length>0) {
//         if(c>=array[length-1]) {
//             return length;
//         }
// 
//         /* do not check the last range limit again in the loop below */
//         --length;
//     }
// 
//     for(i=0; i<length && c>=array[i]; ++i) {}
//     return i;
// }
// 
// static UBool
// addRemove(USet* set, UChar32 c, int32_t doRemove) {
//     int32_t i, length, more;
// 
//     if(set==NULL || (uint32_t)c>0x10ffff) {
//         return FALSE;
//     }
// 
//     length=set->length;
//     i=findChar(set->array, length, c);
//     if((i&1)^doRemove) {
//         /* c is already in the set */
//         return TRUE;
//     }
// 
//     /* how many more array items do we need? */
//     if(i<length && (c+1)==set->array[i]) {
//         /* c is just before the following range, extend that in-place by one */
//         set->array[i]=c;
//         if(i>0) {
//             --i;
//             if(c==set->array[i]) {
//                 /* the previous range collapsed, remove it */
//                 set->length=length-=2;
//                 if(i<length) {
//                     uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
//                 }
//             }
//         }
//         return TRUE;
//     } else if(i>0 && c==set->array[i-1]) {
//         /* c is just after the previous range, extend that in-place by one */
//         if(++c<=0x10ffff) {
//             set->array[i-1]=c;
//             if(i<length && c==set->array[i]) {
//                 /* the following range collapsed, remove it */
//                 --i;
//                 set->length=length-=2;
//                 if(i<length) {
//                     uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
//                 }
//             }
//         } else {
//             /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
//             set->length=i-1;
//         }
//         return TRUE;
//     } else if(i==length && c==0x10ffff) {
//         /* insert one range limit c */
//         more=1;
//     } else {
//         /* insert two range limits c, c+1 */
//         more=2;
//     }
// 
//     /* insert <more> range limits */
//     if(length+more>set->capacity) {
//         /* reallocate */
//         int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
//         UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
//         if(newArray==NULL) {
//             return FALSE;
//         }
//         set->capacity=newCapacity;
//         uprv_memcpy(newArray, set->array, length*4);
// 
//         if(set->array!=set->staticBuffer) {
//             uprv_free(set->array);
//         }
//         set->array=newArray;
//     }
// 
//     if(i<length) {
//         uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
//     }
//     set->array[i]=c;
//     if(more==2) {
//         set->array[i+1]=c+1;
//     }
//     set->length+=more;
// 
//     return TRUE;
// }
// 
// U_CAPI UBool U_EXPORT2
// uset_add(USet* set, UChar32 c) {
//     return addRemove(set, c, 0);
// }
// 
// U_CAPI void U_EXPORT2
// uset_remove(USet* set, UChar32 c) {
//     addRemove(set, c, 1);
// }
Commit	Line	Data
b75a7d8f A	1	/*
	2	*******************************************************************************
	3	*
73c04bcf	4	* Copyright (C) 2002-2006, International Business Machines
b75a7d8f A	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: uset.c
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:4
	12	*
	13	* created on: 2002mar07
	14	* created by: Markus W. Scherer
	15	*
	16	* The serialized structure, the array of range limits, is
	17	* the same as in UnicodeSet, except that the HIGH value is not stored.
	18	*
	19	* There are functions to efficiently serialize a USet into an array of uint16_t
	20	* and functions to use such a serialized form efficiently without
	21	* instantiating a new USet.
	22	*/
	23
	24	#include "unicode/utypes.h"
	25	#include "unicode/uobject.h"
	26	#include "unicode/uset.h"
	27	#include "unicode/uniset.h"
	28	#include "cmemory.h"
	29	#include "unicode/ustring.h"
374ca955	30	#include "unicode/parsepos.h"
b75a7d8f A	31
	32	U_CAPI USet* U_EXPORT2
	33	uset_open(UChar32 start, UChar32 end) {
	34	return (USet*) new UnicodeSet(start, end);
	35	}
	36
b75a7d8f A	37	U_CAPI void U_EXPORT2
	38	uset_close(USet* set) {
	39	delete (UnicodeSet*) set;
	40	}
	41
374ca955 A	42	U_CAPI void U_EXPORT2
	43	uset_set(USet* set,
	44	UChar32 start, UChar32 end) {
73c04bcf	45	((UnicodeSet*) set)->UnicodeSet::set(start, end);
b75a7d8f A	46	}
	47
	48	U_CAPI void U_EXPORT2
	49	uset_addAll(USet* set, const USet *additionalSet) {
73c04bcf	50	((UnicodeSet) set)->UnicodeSet::addAll(((const UnicodeSet*)additionalSet));
b75a7d8f A	51	}
	52
	53	U_CAPI void U_EXPORT2
	54	uset_add(USet* set, UChar32 c) {
73c04bcf	55	((UnicodeSet*) set)->UnicodeSet::add(c);
b75a7d8f A	56	}
	57
	58	U_CAPI void U_EXPORT2
	59	uset_addRange(USet* set, UChar32 start, UChar32 end) {
73c04bcf	60	((UnicodeSet*) set)->UnicodeSet::add(start, end);
b75a7d8f A	61	}
	62
	63	U_CAPI void U_EXPORT2
	64	uset_addString(USet* set, const UChar* str, int32_t strLen) {
73c04bcf A	65	// WRONG! Do not alias, it will stay aliased, even after
73c04bcf A	66	// copying. TODO: do we need a copy ctor that unaliases
b75a7d8f	67	//UnicodeString s(strLen==-1, str, strLen);
73c04bcf A	68
	69	// UnicodeString handles -1 for strLen
	70	UnicodeString s(str, strLen);
	71	((UnicodeSet*) set)->UnicodeSet::add(s);
	72	}
	73
	74	U_CAPI void U_EXPORT2
	75	uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) {
	76	// UnicodeString handles -1 for strLen
b75a7d8f	77	UnicodeString s(str, strLen);
73c04bcf	78	((UnicodeSet*) set)->UnicodeSet::addAll(s);
b75a7d8f A	79	}
	80
	81	U_CAPI void U_EXPORT2
	82	uset_remove(USet* set, UChar32 c) {
73c04bcf	83	((UnicodeSet*) set)->UnicodeSet::remove(c);
b75a7d8f A	84	}
	85
	86	U_CAPI void U_EXPORT2
	87	uset_removeRange(USet* set, UChar32 start, UChar32 end) {
73c04bcf	88	((UnicodeSet*) set)->UnicodeSet::remove(start, end);
b75a7d8f A	89	}
	90
	91	U_CAPI void U_EXPORT2
	92	uset_removeString(USet* set, const UChar* str, int32_t strLen) {
	93	UnicodeString s(strLen==-1, str, strLen);
73c04bcf	94	((UnicodeSet*) set)->UnicodeSet::remove(s);
b75a7d8f A	95	}
b75a7d8f A	96
374ca955 A	97	U_CAPI void U_EXPORT2
374ca955 A	98	uset_removeAll(USet* set, const USet* remove) {
73c04bcf	99	((UnicodeSet) set)->UnicodeSet::removeAll((const UnicodeSet*)remove);
374ca955 A	100	}
	101
	102	U_CAPI void U_EXPORT2
	103	uset_retain(USet* set, UChar32 start, UChar32 end) {
73c04bcf	104	((UnicodeSet*) set)->UnicodeSet::retain(start, end);
374ca955 A	105	}
	106
	107	U_CAPI void U_EXPORT2
	108	uset_retainAll(USet* set, const USet* retain) {
73c04bcf	109	((UnicodeSet) set)->UnicodeSet::retainAll((const UnicodeSet*)retain);
374ca955 A	110	}
	111
	112	U_CAPI void U_EXPORT2
	113	uset_compact(USet* set) {
73c04bcf	114	((UnicodeSet*) set)->UnicodeSet::compact();
374ca955 A	115	}
374ca955 A	116
b75a7d8f A	117	U_CAPI void U_EXPORT2
b75a7d8f A	118	uset_complement(USet* set) {
73c04bcf	119	((UnicodeSet*) set)->UnicodeSet::complement();
b75a7d8f A	120	}
b75a7d8f A	121
374ca955 A	122	U_CAPI void U_EXPORT2
374ca955 A	123	uset_complementAll(USet* set, const USet* complement) {
73c04bcf	124	((UnicodeSet) set)->UnicodeSet::complementAll((const UnicodeSet*)complement);
374ca955 A	125	}
374ca955 A	126
b75a7d8f A	127	U_CAPI void U_EXPORT2
b75a7d8f A	128	uset_clear(USet* set) {
73c04bcf	129	((UnicodeSet*) set)->UnicodeSet::clear();
b75a7d8f A	130	}
	131
	132	U_CAPI UBool U_EXPORT2
	133	uset_isEmpty(const USet* set) {
73c04bcf	134	return ((const UnicodeSet*) set)->UnicodeSet::isEmpty();
b75a7d8f A	135	}
	136
	137	U_CAPI UBool U_EXPORT2
	138	uset_contains(const USet* set, UChar32 c) {
73c04bcf	139	return ((const UnicodeSet*) set)->UnicodeSet::contains(c);
b75a7d8f A	140	}
	141
	142	U_CAPI UBool U_EXPORT2
	143	uset_containsRange(const USet* set, UChar32 start, UChar32 end) {
73c04bcf	144	return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end);
b75a7d8f A	145	}
	146
	147	U_CAPI UBool U_EXPORT2
	148	uset_containsString(const USet* set, const UChar* str, int32_t strLen) {
	149	UnicodeString s(strLen==-1, str, strLen);
73c04bcf	150	return ((const UnicodeSet*) set)->UnicodeSet::contains(s);
b75a7d8f A	151	}
b75a7d8f A	152
374ca955 A	153	U_CAPI UBool U_EXPORT2
374ca955 A	154	uset_containsAll(const USet* set1, const USet* set2) {
73c04bcf A	155	return ((const UnicodeSet) set1)->UnicodeSet::containsAll( (const UnicodeSet*) set2);
	156	}
	157
	158	U_CAPI UBool U_EXPORT2
	159	uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) {
	160	// Create a string alias, since nothing is being added to the set.
	161	UnicodeString s(strLen==-1, str, strLen);
	162	return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s);
374ca955 A	163	}
	164
	165	U_CAPI UBool U_EXPORT2
	166	uset_containsNone(const USet* set1, const USet* set2) {
73c04bcf	167	return ((const UnicodeSet) set1)->UnicodeSet::containsNone( (const UnicodeSet*) set2);
374ca955 A	168	}
	169
	170	U_CAPI UBool U_EXPORT2
	171	uset_containsSome(const USet* set1, const USet* set2) {
73c04bcf	172	return ((const UnicodeSet) set1)->UnicodeSet::containsSome( (const UnicodeSet*) set2);
374ca955 A	173	}
	174
	175	U_CAPI UBool U_EXPORT2
	176	uset_equals(const USet* set1, const USet* set2) {
	177	return (const UnicodeSet)set1 == (const UnicodeSet)set2;
	178	}
	179
	180	U_CAPI int32_t U_EXPORT2
	181	uset_indexOf(const USet* set, UChar32 c) {
73c04bcf	182	return ((UnicodeSet*) set)->UnicodeSet::indexOf(c);
374ca955 A	183	}
	184
	185	U_CAPI UChar32 U_EXPORT2
	186	uset_charAt(const USet* set, int32_t index) {
73c04bcf	187	return ((UnicodeSet*) set)->UnicodeSet::charAt(index);
374ca955 A	188	}
374ca955 A	189
b75a7d8f A	190	U_CAPI int32_t U_EXPORT2
b75a7d8f A	191	uset_size(const USet* set) {
73c04bcf	192	return ((const UnicodeSet*) set)->UnicodeSet::size();
b75a7d8f A	193	}
	194
	195	U_NAMESPACE_BEGIN
	196	/**
	197	* This class only exists to provide access to the UnicodeSet private
	198	* USet support API. Declaring a class a friend is more portable than
	199	* trying to declare extern "C" functions as friends.
	200	*/
	201	class USetAccess /* not : public UObject because all methods are static */ {
	202	public:
	203	/* Try to have the compiler inline these*/
	204	inline static int32_t getStringCount(const UnicodeSet& set) {
	205	return set.getStringCount();
	206	}
	207	inline static const UnicodeString* getString(const UnicodeSet& set,
	208	int32_t i) {
	209	return set.getString(i);
	210	}
	211	private:
	212	/* do not instantiate*/
	213	USetAccess();
	214	};
	215	U_NAMESPACE_END
	216
	217	U_CAPI int32_t U_EXPORT2
	218	uset_getItemCount(const USet* uset) {
	219	const UnicodeSet& set = (const UnicodeSet)uset;
	220	return set.getRangeCount() + USetAccess::getStringCount(set);
	221	}
	222
	223	U_CAPI int32_t U_EXPORT2
	224	uset_getItem(const USet* uset, int32_t itemIndex,
	225	UChar32* start, UChar32* end,
	226	UChar* str, int32_t strCapacity,
	227	UErrorCode* ec) {
	228	if (U_FAILURE(*ec)) return 0;
	229	const UnicodeSet& set = (const UnicodeSet)uset;
	230	int32_t rangeCount;
	231
	232	if (itemIndex < 0) {
	233	*ec = U_ILLEGAL_ARGUMENT_ERROR;
	234	return -1;
	235	} else if (itemIndex < (rangeCount = set.getRangeCount())) {
	236	*start = set.getRangeStart(itemIndex);
	237	*end = set.getRangeEnd(itemIndex);
	238	return 0;
	239	} else {
	240	itemIndex -= rangeCount;
	241	if (itemIndex < USetAccess::getStringCount(set)) {
	242	const UnicodeString* s = USetAccess::getString(set, itemIndex);
	243	return s->extract(str, strCapacity, *ec);
	244	} else {
	245	*ec = U_INDEX_OUTOFBOUNDS_ERROR;
	246	return -1;
	247	}
	248	}
	249	}
	250
	251	//U_CAPI int32_t U_EXPORT2
	252	//uset_getRangeCount(const USet* set) {
	253	// return ((const UnicodeSet*) set)->getRangeCount();
	254	//}
	255	//
	256	//U_CAPI UBool U_EXPORT2
257	//uset_getRange(const USet* set, int32_t rangeIndex,
258	// UChar32* pStart, UChar32* pEnd) {
259	// if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) {
260	// return FALSE;
261	// }
262	// const UnicodeSet* us = (const UnicodeSet*) set;
263	// *pStart = us->getRangeStart(rangeIndex);
264	// *pEnd = us->getRangeEnd(rangeIndex);
265	// return TRUE;
266	//}
267
73c04bcf A	268	U_CAPI USet* U_EXPORT2
	269	uprv_openRuleWhiteSpaceSet(UErrorCode* ec) {
	270	if(U_FAILURE(*ec)) {
	271	return NULL;
	272	}
	273	// create a set with the Pattern_White_Space characters,
	274	// without a pattern for fewer code dependencies
	275	UnicodeSet *set=new UnicodeSet(9, 0xd);
	276	set->UnicodeSet::add(0x20).add(0x85).add(0x200e, 0x200f).add(0x2028, 0x2029);
	277	return (USet *)set;
	278	}
	279
b75a7d8f A	280	/*
	281	* Serialize a USet into 16-bit units.
	282	* Store BMP code points as themselves with one 16-bit unit each.
	283	*
	284	* Important: the code points in the array are in ascending order,
	285	* therefore all BMP code points precede all supplementary code points.
	286	*
	287	* Store each supplementary code point in 2 16-bit units,
	288	* simply with higher-then-lower 16-bit halfs.
	289	*
	290	* Precede the entire list with the length.
	291	* If there are supplementary code points, then set bit 15 in the length
	292	* and add the bmpLength between it and the array.
	293	*
	294	* In other words:
	295	* - all BMP: (length=bmpLength) BMP, .., BMP
	296	* - some supplementary: (length\|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, ..
	297	*/
	298	U_CAPI int32_t U_EXPORT2
	299	uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) {
	300	if (ec==NULL \|\| U_FAILURE(*ec)) {
	301	return 0;
	302	}
	303
73c04bcf	304	return ((const UnicodeSet) set)->UnicodeSet::serialize(dest, destCapacity, ec);
b75a7d8f A	305	}
	306
	307	U_CAPI UBool U_EXPORT2
	308	uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) {
	309	int32_t length;
	310
	311	if(fillSet==NULL) {
	312	return FALSE;
	313	}
	314	if(src==NULL \|\| srcLength<=0) {
	315	fillSet->length=fillSet->bmpLength=0;
	316	return FALSE;
	317	}
	318
	319	length=*src++;
	320	if(length&0x8000) {
	321	/* there are supplementary values */
	322	length&=0x7fff;
	323	if(srcLength<(2+length)) {
	324	fillSet->length=fillSet->bmpLength=0;
	325	return FALSE;
	326	}
	327	fillSet->bmpLength=*src++;
	328	} else {
	329	/* only BMP values */
	330	if(srcLength<(1+length)) {
	331	fillSet->length=fillSet->bmpLength=0;
	332	return FALSE;
	333	}
	334	fillSet->bmpLength=length;
	335	}
	336	fillSet->array=src;
	337	fillSet->length=length;
	338	return TRUE;
	339	}
	340
	341	U_CAPI void U_EXPORT2
	342	uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) {
	343	if(fillSet==NULL \|\| (uint32_t)c>0x10ffff) {
	344	return;
	345	}
	346
	347	fillSet->array=fillSet->staticArray;
	348	if(c<0xffff) {
	349	fillSet->bmpLength=fillSet->length=2;
	350	fillSet->staticArray[0]=(uint16_t)c;
	351	fillSet->staticArray[1]=(uint16_t)c+1;
	352	} else if(c==0xffff) {
	353	fillSet->bmpLength=1;
	354	fillSet->length=3;
	355	fillSet->staticArray[0]=0xffff;
	356	fillSet->staticArray[1]=1;
	357	fillSet->staticArray[2]=0;
	358	} else if(c<0x10ffff) {
	359	fillSet->bmpLength=0;
	360	fillSet->length=4;
	361	fillSet->staticArray[0]=(uint16_t)(c>>16);
	362	fillSet->staticArray[1]=(uint16_t)c;
	363	++c;
	364	fillSet->staticArray[2]=(uint16_t)(c>>16);
	365	fillSet->staticArray[3]=(uint16_t)c;
	366	} else /* c==0x10ffff */ {
	367	fillSet->bmpLength=0;
	368	fillSet->length=2;
369	fillSet->staticArray[0]=0x10;
370	fillSet->staticArray[1]=0xffff;
371	}
372	}
373
374	U_CAPI UBool U_EXPORT2
375	uset_serializedContains(const USerializedSet* set, UChar32 c) {
376	const uint16_t* array;
377
378	if(set==NULL \|\| (uint32_t)c>0x10ffff) {
379	return FALSE;
380	}
381
382	array=set->array;
383	if(c<=0xffff) {
384	/* find c in the BMP part */
73c04bcf A	385	int32_t lo = 0;
	386	int32_t hi = set->bmpLength-1;
	387	if (c < array[0]) {
	388	hi = 0;
	389	} else if (c < array[hi]) {
	390	for(;;) {
	391	int32_t i = (lo + hi) >> 1;
	392	if (i == lo) {
	393	break; // Done!
	394	} else if (c < array[i]) {
	395	hi = i;
	396	} else {
	397	lo = i;
	398	}
	399	}
	400	} else {
	401	hi += 1;
	402	}
	403	return (UBool)(hi&1);
b75a7d8f A	404	} else {
b75a7d8f A	405	/* find c in the supplementary part */
b75a7d8f	406	uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c;
73c04bcf A	407	int32_t base = set->bmpLength;
	408	int32_t lo = 0;
	409	int32_t hi = set->length - 2 - base;
	410	if (high < array[base] \|\| (high==array[base] && low<array[base+1])) {
	411	hi = 0;
	412	} else if (high < array[base+hi] \|\| (high==array[base+hi] && low<array[base+hi+1])) {
	413	for (;;) {
	414	int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result
	415	int32_t iabs = i + base;
	416	if (i == lo) {
	417	break; // Done!
	418	} else if (high < array[iabs] \|\| (high==array[iabs] && low<array[iabs+1])) {
	419	hi = i;
	420	} else {
	421	lo = i;
	422	}
	423	}
	424	} else {
	425	hi += 2;
	426	}
b75a7d8f	427	/* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */
73c04bcf	428	return (UBool)(((hi+(base<<1))&2)!=0);
b75a7d8f A	429	}
	430	}
	431
	432	U_CAPI int32_t U_EXPORT2
	433	uset_getSerializedRangeCount(const USerializedSet* set) {
	434	if(set==NULL) {
	435	return 0;
	436	}
	437
	438	return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2;
	439	}
	440
	441	U_CAPI UBool U_EXPORT2
	442	uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
	443	UChar32* pStart, UChar32* pEnd) {
	444	const uint16_t* array;
	445	int32_t bmpLength, length;
	446
	447	if(set==NULL \|\| rangeIndex<0 \|\| pStart==NULL \|\| pEnd==NULL) {
	448	return FALSE;
	449	}
	450
	451	array=set->array;
	452	length=set->length;
	453	bmpLength=set->bmpLength;
	454
	455	rangeIndex=2; / address start/limit pairs */
	456	if(rangeIndex<bmpLength) {
	457	*pStart=array[rangeIndex++];
	458	if(rangeIndex<bmpLength) {
	459	*pEnd=array[rangeIndex];
	460	} else if(rangeIndex<length) {
	461	*pEnd=(((int32_t)array[rangeIndex])<<16)\|array[rangeIndex+1];
	462	} else {
	463	*pEnd=0x110000;
	464	}
	465	--*pEnd;
	466	return TRUE;
	467	} else {
	468	rangeIndex-=bmpLength;
	469	rangeIndex=2; / address pairs of pairs of units */
	470	length-=bmpLength;
	471	if(rangeIndex<length) {
	472	array+=bmpLength;
	473	*pStart=(((int32_t)array[rangeIndex])<<16)\|array[rangeIndex+1];
	474	rangeIndex+=2;
	475	if(rangeIndex<length) {
	476	*pEnd=(((int32_t)array[rangeIndex])<<16)\|array[rangeIndex+1];
	477	} else {
	478	*pEnd=0x110000;
	479	}
	480	--*pEnd;
	481	return TRUE;
	482	} else {
	483	return FALSE;
	484	}
	485	}
	486	}
	487
	488	// TODO The old, internal uset.c had an efficient uset_containsOne function.
	489	// Returned the one and only code point, or else -1 or something.
	490	// Consider adding such a function to both C and C++ UnicodeSet/uset.
	491	// See tools/gennorm/store.c for usage, now usetContainsOne there.
	492
493	// TODO Investigate incorporating this code into UnicodeSet to improve
494	// efficiency.
495	// ---
496	// #define USET_GROW_DELTA 20
497	//
498	// static U_INLINE int32_t
499	// findChar(const UChar32* array, int32_t length, UChar32 c) {
500	// int32_t i;
501	//
502	// /* check the last range limit first for more efficient appending */
503	// if(length>0) {
504	// if(c>=array[length-1]) {
505	// return length;
506	// }
507	//
508	// /* do not check the last range limit again in the loop below */
509	// --length;
510	// }
511	//
512	// for(i=0; i<length && c>=array[i]; ++i) {}
513	// return i;
514	// }
515	//
516	// static UBool
517	// addRemove(USet* set, UChar32 c, int32_t doRemove) {
518	// int32_t i, length, more;
519	//
520	// if(set==NULL \|\| (uint32_t)c>0x10ffff) {
521	// return FALSE;
522	// }
523	//
524	// length=set->length;
525	// i=findChar(set->array, length, c);
526	// if((i&1)^doRemove) {
527	// /* c is already in the set */
528	// return TRUE;
529	// }
530	//
531	// /* how many more array items do we need? */
532	// if(i<length && (c+1)==set->array[i]) {
533	// /* c is just before the following range, extend that in-place by one */
534	// set->array[i]=c;
535	// if(i>0) {
536	// --i;
537	// if(c==set->array[i]) {
538	// /* the previous range collapsed, remove it */
539	// set->length=length-=2;
540	// if(i<length) {
541	// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
542	// }
543	// }
544	// }
545	// return TRUE;
546	// } else if(i>0 && c==set->array[i-1]) {
547	// /* c is just after the previous range, extend that in-place by one */
548	// if(++c<=0x10ffff) {
549	// set->array[i-1]=c;
550	// if(i<length && c==set->array[i]) {
551	// /* the following range collapsed, remove it */
552	// --i;
553	// set->length=length-=2;
554	// if(i<length) {
555	// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4);
556	// }
557	// }
558	// } else {
559	// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */
560	// set->length=i-1;
561	// }
562	// return TRUE;
563	// } else if(i==length && c==0x10ffff) {
564	// /* insert one range limit c */
565	// more=1;
566	// } else {
567	// /* insert two range limits c, c+1 */
568	// more=2;
569	// }
570	//
571	// /* insert <more> range limits */
572	// if(length+more>set->capacity) {
573	// /* reallocate */
574	// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA;
575	// UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4);
576	// if(newArray==NULL) {
577	// return FALSE;
578	// }
579	// set->capacity=newCapacity;
580	// uprv_memcpy(newArray, set->array, length*4);
581	//
582	// if(set->array!=set->staticBuffer) {
583	// uprv_free(set->array);
584	// }
585	// set->array=newArray;
586	// }
587	//
588	// if(i<length) {
589	// uprv_memmove(set->array+i+more, set->array+i, (length-i)*4);
590	// }
591	// set->array[i]=c;
592	// if(more==2) {
593	// set->array[i+1]=c+1;
594	// }
595	// set->length+=more;
596	//
597	// return TRUE;
598	// }
599	//
600	// U_CAPI UBool U_EXPORT2
601	// uset_add(USet* set, UChar32 c) {
602	// return addRemove(set, c, 0);
603	// }
604	//
605	// U_CAPI void U_EXPORT2
606	// uset_remove(USet* set, UChar32 c) {
607	// addRemove(set, c, 1);
608	// }