git.saurik.com Git - apple/icu.git/blame_incremental

... / ...

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	*******************************************************************************
	5	* Copyright (C) 1996-2012, International Business Machines Corporation and
	6	* others. All Rights Reserved.
	7	*******************************************************************************
	8	*/
	9	//===============================================================================
	10	//
	11	// File sortkey.cpp
	12	//
	13	//
	14	//
	15	// Created by: Helena Shih
	16	//
	17	// Modification History:
	18	//
	19	// Date Name Description
	20	//
	21	// 6/20/97 helena Java class name change.
	22	// 6/23/97 helena Added comments to make code more readable.
	23	// 6/26/98 erm Canged to use byte arrays instead of UnicodeString
	24	// 7/31/98 erm hashCode: minimum inc should be 2 not 1,
	25	// Cleaned up operator=
	26	// 07/12/99 helena HPUX 11 CC port.
	27	// 03/06/01 synwee Modified compareTo, to handle the result of
	28	// 2 string similar in contents, but one is longer
	29	// than the other
	30	//===============================================================================
	31
	32	#include "unicode/utypes.h"
	33
	34	#if !UCONFIG_NO_COLLATION
	35
	36	#include "unicode/sortkey.h"
	37	#include "cmemory.h"
	38	#include "uelement.h"
	39	#include "ustr_imp.h"
	40
	41	U_NAMESPACE_BEGIN
	42
	43	// A hash code of kInvalidHashCode indicates that the hash code needs
	44	// to be computed. A hash code of kEmptyHashCode is used for empty keys
	45	// and for any key whose computed hash code is kInvalidHashCode.
	46	static const int32_t kInvalidHashCode = 0;
	47	static const int32_t kEmptyHashCode = 1;
	48	// The "bogus hash code" replaces a separate fBogus flag.
	49	static const int32_t kBogusHashCode = 2;
	50
	51	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationKey)
	52
	53	CollationKey::CollationKey()
	54	: UObject(), fFlagAndLength(0),
	55	fHashCode(kEmptyHashCode)
	56	{
	57	}
	58
	59	// Create a collation key from a bit array.
	60	CollationKey::CollationKey(const uint8_t* newValues, int32_t count)
	61	: UObject(), fFlagAndLength(count),
	62	fHashCode(kInvalidHashCode)
	63	{
	64	if (count < 0 \|\| (newValues == NULL && count != 0) \|\|
	65	(count > getCapacity() && reallocate(count, 0) == NULL)) {
	66	setToBogus();
	67	return;
	68	}
	69
	70	if (count > 0) {
	71	uprv_memcpy(getBytes(), newValues, count);
	72	}
	73	}
	74
	75	CollationKey::CollationKey(const CollationKey& other)
	76	: UObject(other), fFlagAndLength(other.getLength()),
	77	fHashCode(other.fHashCode)
	78	{
	79	if (other.isBogus())
	80	{
	81	setToBogus();
	82	return;
	83	}
	84
	85	int32_t length = fFlagAndLength;
	86	if (length > getCapacity() && reallocate(length, 0) == NULL) {
	87	setToBogus();
	88	return;
	89	}
	90
	91	if (length > 0) {
	92	uprv_memcpy(getBytes(), other.getBytes(), length);
	93	}
	94	}
	95
	96	CollationKey::~CollationKey()
	97	{
	98	if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
	99	}
	100
	101	uint8_t *CollationKey::reallocate(int32_t newCapacity, int32_t length) {
	102	uint8_t newBytes = static_cast<uint8_t >(uprv_malloc(newCapacity));
	103	if(newBytes == NULL) { return NULL; }
	104	if(length > 0) {
	105	uprv_memcpy(newBytes, getBytes(), length);
	106	}
	107	if(fFlagAndLength < 0) { uprv_free(fUnion.fFields.fBytes); }
	108	fUnion.fFields.fBytes = newBytes;
	109	fUnion.fFields.fCapacity = newCapacity;
	110	fFlagAndLength \|= 0x80000000;
	111	return newBytes;
	112	}
	113
	114	void CollationKey::setLength(int32_t newLength) {
	115	// U_ASSERT(newLength >= 0 && newLength <= getCapacity());
	116	fFlagAndLength = (fFlagAndLength & 0x80000000) \| newLength;
	117	fHashCode = kInvalidHashCode;
	118	}
	119
	120	// set the key to an empty state
	121	CollationKey&
	122	CollationKey::reset()
	123	{
	124	fFlagAndLength &= 0x80000000;
	125	fHashCode = kEmptyHashCode;
	126
	127	return *this;
	128	}
	129
	130	// set the key to a "bogus" or invalid state
	131	CollationKey&
	132	CollationKey::setToBogus()
	133	{
	134	fFlagAndLength &= 0x80000000;
	135	fHashCode = kBogusHashCode;
	136
	137	return *this;
	138	}
	139
	140	UBool
	141	CollationKey::operator==(const CollationKey& source) const
	142	{
	143	return getLength() == source.getLength() &&
	144	(this == &source \|\|
	145	uprv_memcmp(getBytes(), source.getBytes(), getLength()) == 0);
	146	}
	147
	148	const CollationKey&
	149	CollationKey::operator=(const CollationKey& other)
	150	{
	151	if (this != &other)
	152	{
	153	if (other.isBogus())
	154	{
	155	return setToBogus();
	156	}
	157
	158	int32_t length = other.getLength();
	159	if (length > getCapacity() && reallocate(length, 0) == NULL) {
	160	return setToBogus();
	161	}
	162	if (length > 0) {
	163	uprv_memcpy(getBytes(), other.getBytes(), length);
	164	}
	165	fFlagAndLength = (fFlagAndLength & 0x80000000) \| length;
	166	fHashCode = other.fHashCode;
	167	}
	168
	169	return *this;
	170	}
	171
	172	// Bitwise comparison for the collation keys.
	173	Collator::EComparisonResult
	174	CollationKey::compareTo(const CollationKey& target) const
	175	{
	176	UErrorCode errorCode = U_ZERO_ERROR;
	177	return static_cast<Collator::EComparisonResult>(compareTo(target, errorCode));
	178	}
	179
	180	// Bitwise comparison for the collation keys.
	181	UCollationResult
	182	CollationKey::compareTo(const CollationKey& target, UErrorCode &status) const
	183	{
	184	if(U_SUCCESS(status)) {
	185	const uint8_t *src = getBytes();
	186	const uint8_t *tgt = target.getBytes();
	187
	188	// are we comparing the same string
	189	if (src == tgt)
	190	return UCOL_EQUAL;
	191
	192	UCollationResult result;
	193
	194	// are we comparing different lengths?
	195	int32_t minLength = getLength();
	196	int32_t targetLength = target.getLength();
	197	if (minLength < targetLength) {
	198	result = UCOL_LESS;
	199	} else if (minLength == targetLength) {
	200	result = UCOL_EQUAL;
	201	} else {
	202	minLength = targetLength;
	203	result = UCOL_GREATER;
	204	}
	205
	206	if (minLength > 0) {
	207	int diff = uprv_memcmp(src, tgt, minLength);
	208	if (diff > 0) {
	209	return UCOL_GREATER;
	210	}
	211	else
	212	if (diff < 0) {
	213	return UCOL_LESS;
	214	}
	215	}
	216
	217	return result;
	218	} else {
	219	return UCOL_EQUAL;
	220	}
	221	}
	222
	223	#ifdef U_USE_COLLATION_KEY_DEPRECATES
	224	// Create a copy of the byte array.
	225	uint8_t*
	226	CollationKey::toByteArray(int32_t& count) const
	227	{
	228	uint8_t result = (uint8_t) uprv_malloc( sizeof(uint8_t) * fCount );
	229
	230	if (result == NULL)
	231	{
	232	count = 0;
	233	}
	234	else
	235	{
	236	count = fCount;
	237	if (count > 0) {
	238	uprv_memcpy(result, fBytes, fCount);
	239	}
	240	}
	241
	242	return result;
	243	}
	244	#endif
	245
	246	static int32_t
	247	computeHashCode(const uint8_t *key, int32_t length) {
	248	const char s = reinterpret_cast<const char >(key);
	249	int32_t hash;
	250	if (s == NULL \|\| length == 0) {
	251	hash = kEmptyHashCode;
	252	} else {
	253	hash = ustr_hashCharsN(s, length);
	254	if (hash == kInvalidHashCode \|\| hash == kBogusHashCode) {
	255	hash = kEmptyHashCode;
	256	}
	257	}
	258	return hash;
	259	}
	260
	261	int32_t
	262	CollationKey::hashCode() const
	263	{
	264	// (Cribbed from UnicodeString)
	265	// We cache the hashCode; when it becomes invalid, due to any change to the
	266	// string, we note this by setting it to kInvalidHashCode. [LIU]
	267
	268	// Note: This method is semantically const, but physically non-const.
	269
	270	if (fHashCode == kInvalidHashCode)
	271	{
	272	fHashCode = computeHashCode(getBytes(), getLength());
	273	}
	274
	275	return fHashCode;
	276	}
	277
	278	U_NAMESPACE_END
	279
	280	U_CAPI int32_t U_EXPORT2
	281	ucol_keyHashCode(const uint8_t *key,
	282	int32_t length)
	283	{
	284	return icu::computeHashCode(key, length);
	285	}
	286
	287	#endif /* #if !UCONFIG_NO_COLLATION */