+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
-* Copyright (C) 2012, International Business Machines
+* Copyright (C) 2014, International Business Machines
* Corporation and others. All Rights Reserved.
*******************************************************************************
* dictionarydata.h
class U_COMMON_API DictionaryData : public UMemory {
public:
- static const int32_t TRIE_TYPE_BYTES = 0;
- static const int32_t TRIE_TYPE_UCHARS = 1;
- static const int32_t TRIE_TYPE_MASK = 7;
- static const int32_t TRIE_HAS_VALUES = 8;
+ static const int32_t TRIE_TYPE_BYTES; // = 0;
+ static const int32_t TRIE_TYPE_UCHARS; // = 1;
+ static const int32_t TRIE_TYPE_MASK; // = 7;
+ static const int32_t TRIE_HAS_VALUES; // = 8;
- static const int32_t TRANSFORM_NONE = 0;
- static const int32_t TRANSFORM_TYPE_OFFSET = 0x1000000;
- static const int32_t TRANSFORM_TYPE_MASK = 0x7f000000;
- static const int32_t TRANSFORM_OFFSET_MASK = 0x1fffff;
+ static const int32_t TRANSFORM_NONE; // = 0;
+ static const int32_t TRANSFORM_TYPE_OFFSET; // = 0x1000000;
+ static const int32_t TRANSFORM_TYPE_MASK; // = 0x7f000000;
+ static const int32_t TRANSFORM_OFFSET_MASK; // = 0x1fffff;
enum {
// Byte offsets from the start of the data, after the generic header.
*/
class U_COMMON_API DictionaryMatcher : public UMemory {
public:
+ DictionaryMatcher() {};
virtual ~DictionaryMatcher();
// this should emulate CompactTrieDictionary::matches()
- virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count,
- int32_t limit, int32_t *values = NULL) const = 0;
+ /* @param text The text in which to look for matching words. Matching begins
+ * at the current position of the UText.
+ * @param maxLength The max length of match to consider. Units are the native indexing
+ * units of the UText.
+ * @param limit Capacity of output arrays, which is also the maximum number of
+ * matching words to be found.
+ * @param lengths output array, filled with the lengths of the matches, in order,
+ * from shortest to longest. Lengths are in native indexing units
+ * of the UText. May be NULL.
+ * @param cpLengths output array, filled with the lengths of the matches, in order,
+ * from shortest to longest. Lengths are the number of Unicode code points.
+ * May be NULL.
+ * @param values Output array, filled with the values associated with the words found.
+ * May be NULL.
+ * @param prefix Output parameter, the code point length of the prefix match, even if that
+ * prefix didn't lead to a complete word. Will always be >= the cpLength
+ * of the longest complete word matched. May be NULL.
+ * @return Number of matching words found.
+ */
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const = 0;
+
/** @return DictionaryData::TRIE_TYPE_XYZ */
virtual int32_t getType() const = 0;
};
// The UDataMemory * will be closed on this object's destruction.
UCharsDictionaryMatcher(const UChar *c, UDataMemory *f) : characters(c), file(f) { }
virtual ~UCharsDictionaryMatcher();
- virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count,
- int32_t limit, int32_t *values = NULL) const;
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const;
virtual int32_t getType() const;
private:
const UChar *characters;
BytesDictionaryMatcher(const char *c, int32_t t, UDataMemory *f)
: characters(c), transformConstant(t), file(f) { }
virtual ~BytesDictionaryMatcher();
- virtual int32_t matches(UText *text, int32_t maxLength, int32_t *lengths, int32_t &count,
- int32_t limit, int32_t *values = NULL) const;
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+ int32_t *prefix) const;
virtual int32_t getType() const;
private:
UChar32 transform(UChar32 c) const;