]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/dictionarydata.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / common / dictionarydata.cpp
index f174f9d450843da4c94047e24171ed5a2a21fe8d..6e2dbee5b618be3d292a2a0a5bc8677aeadf75ba 100644 (file)
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 *******************************************************************************
-* Copyright (C) 2014, International Business Machines
+* Copyright (C) 2014-2016, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * dictionarydata.h
@@ -40,22 +42,31 @@ int32_t UCharsDictionaryMatcher::getType() const {
     return DictionaryData::TRIE_TYPE_UCHARS;
 }
 
-int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
+int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
+                            int32_t *prefix) const {
+
     UCharsTrie uct(characters);
-    UChar32 c = utext_next32(text);
-    if (c < 0) {
-        return 0;
-    }
-    UStringTrieResult result = uct.first(c);
-    int32_t numChars = 1;
-    count = 0;
-    for (;;) {
+    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+    int32_t wordCount = 0;
+    int32_t codePointsMatched = 0;
+
+    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
+        UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
+        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+        codePointsMatched += 1;
         if (USTRINGTRIE_HAS_VALUE(result)) {
-            if (count < limit) {
+            if (wordCount < limit) {
                 if (values != NULL) {
-                    values[count] = uct.getValue();
+                    values[wordCount] = uct.getValue();
+                }
+                if (lengths != NULL) {
+                    lengths[wordCount] = lengthMatched;
+                }
+                if (cpLengths != NULL) {
+                    cpLengths[wordCount] = codePointsMatched;
                 }
-                lengths[count++] = numChars;
+                ++wordCount;
             }
             if (result == USTRINGTRIE_FINAL_VALUE) {
                 break;
@@ -64,20 +75,15 @@ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
         else if (result == USTRINGTRIE_NO_MATCH) {
             break;
         }
-
-        // TODO: why do we have a text limit if the UText knows its length?
-        if (numChars >= maxLength) {
+        if (lengthMatched >= maxLength) {
             break;
         }
+    }
 
-        c = utext_next32(text);
-        if (c < 0) {
-            break;
-        }
-        ++numChars;
-        result = uct.next(c);
+    if (prefix != NULL) {
+        *prefix = codePointsMatched;
     }
-    return numChars;
+    return wordCount;
 }
 
 BytesDictionaryMatcher::~BytesDictionaryMatcher() {
@@ -104,22 +110,30 @@ int32_t BytesDictionaryMatcher::getType() const {
     return DictionaryData::TRIE_TYPE_BYTES;
 }
 
-int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count, int32_t limit, int32_t *values) const {
+int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+                            int32_t *lengths, int32_t *cpLengths, int32_t *values,
+                            int32_t *prefix) const {
     BytesTrie bt(characters);
-    UChar32 c = utext_next32(text);
-    if (c < 0) {
-        return 0;
-    }
-    UStringTrieResult result = bt.first(transform(c));
-    int32_t numChars = 1;
-    count = 0;
-    for (;;) {
+    int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+    int32_t wordCount = 0;
+    int32_t codePointsMatched = 0;
+
+    for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) {
+        UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
+        int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+        codePointsMatched += 1;
         if (USTRINGTRIE_HAS_VALUE(result)) {
-            if (count < limit) {
+            if (wordCount < limit) {
                 if (values != NULL) {
-                    values[count] = bt.getValue();
+                    values[wordCount] = bt.getValue();
+                }
+                if (lengths != NULL) {
+                    lengths[wordCount] = lengthMatched;
+                }
+                if (cpLengths != NULL) {
+                    cpLengths[wordCount] = codePointsMatched;
                 }
-                lengths[count++] = numChars;
+                ++wordCount;
             }
             if (result == USTRINGTRIE_FINAL_VALUE) {
                 break;
@@ -128,20 +142,15 @@ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t
         else if (result == USTRINGTRIE_NO_MATCH) {
             break;
         }
-
-        // TODO: why do we have a text limit if the UText knows its length?
-        if (numChars >= maxLength) {
+        if (lengthMatched >= maxLength) {
             break;
         }
+    }
 
-        c = utext_next32(text);
-        if (c < 0) {
-            break;
-        }
-        ++numChars;
-        result = bt.next(transform(c));
+    if (prefix != NULL) {
+        *prefix = codePointsMatched;
     }
-    return numChars;
+    return wordCount;
 }