+unsigned calculateStringHashAndLengthFromUTF8(const char* data, const char* dataEnd, unsigned& dataLength, unsigned& utf16Length)
+{
+ if (!data)
+ return 0;
+
+ StringHasher stringHasher;
+ dataLength = 0;
+ utf16Length = 0;
+
+ while (data < dataEnd || (!dataEnd && *data)) {
+ if (isASCII(*data)) {
+ stringHasher.addCharacter(*data++);
+ dataLength++;
+ utf16Length++;
+ continue;
+ }
+
+ int utf8SequenceLength = inlineUTF8SequenceLengthNonASCII(*data);
+ dataLength += utf8SequenceLength;
+
+ if (!dataEnd) {
+ for (int i = 1; i < utf8SequenceLength; ++i) {
+ if (!data[i])
+ return 0;
+ }
+ } else if (dataEnd - data < utf8SequenceLength)
+ return 0;
+
+ if (!isLegalUTF8(reinterpret_cast<const unsigned char*>(data), utf8SequenceLength))
+ return 0;
+
+ UChar32 character = readUTF8Sequence(data, utf8SequenceLength);
+ ASSERT(!isASCII(character));
+
+ if (U_IS_BMP(character)) {
+ // UTF-16 surrogate values are illegal in UTF-32
+ if (U_IS_SURROGATE(character))
+ return 0;
+ stringHasher.addCharacter(static_cast<UChar>(character)); // normal case
+ utf16Length++;
+ } else if (U_IS_SUPPLEMENTARY(character)) {
+ stringHasher.addCharacters(static_cast<UChar>(U16_LEAD(character)),
+ static_cast<UChar>(U16_TRAIL(character)));
+ utf16Length += 2;
+ } else
+ return 0;
+ }
+
+ return stringHasher.hash();