2 **********************************************************************
3 * Copyright (C) 2001-2011 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 08/13/2001 synwee Creation.
7 **********************************************************************
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/normalizer2.h"
17 #include "unicode/ucol.h"
18 #include "unicode/ucoleitr.h"
19 #include "unicode/ubrk.h"
21 #define INITIAL_ARRAY_SIZE_ 256
22 #define MAX_TABLE_SIZE_ 257
25 // required since collation element iterator does not have a getText API
27 int32_t textLength
; // exact length
29 UBool isCanonicalMatch
;
30 int16_t elementComparisonType
;
31 UBreakIterator
*internalBreakIter
; //internal character breakiterator
32 UBreakIterator
*breakIter
;
33 // value USEARCH_DONE is the default value
34 // if we are not at the start of the text or the end of the text,
35 // depending on the iteration direction and matchedIndex is USEARCH_DONE
36 // it means that we can't find any more matches in that particular direction
38 int32_t matchedLength
;
39 UBool isForwardSearching
;
45 int32_t textLength
; // exact length
46 // length required for backwards ce comparison
49 int32_t CEBuffer
[INITIAL_ARRAY_SIZE_
];
52 int64_t PCEBuffer
[INITIAL_ARRAY_SIZE_
];
53 UBool hasPrefixAccents
;
54 UBool hasSuffixAccents
;
55 int16_t defaultShiftSize
;
56 int16_t shift
[MAX_TABLE_SIZE_
];
57 int16_t backShift
[MAX_TABLE_SIZE_
];
60 struct UStringSearch
{
61 struct USearch
*search
;
62 struct UPattern pattern
;
63 const UCollator
*collator
;
64 const icu::Normalizer2
*nfd
;
65 // positions within the collation element iterator is used to determine
66 // if we are at the start of the text.
67 UCollationElements
*textIter
;
68 // utility collation element, used throughout program for temporary
70 UCollationElements
*utilIter
;
72 UCollationStrength strength
;
76 UChar canonicalPrefixAccents
[INITIAL_ARRAY_SIZE_
];
77 UChar canonicalSuffixAccents
[INITIAL_ARRAY_SIZE_
];
81 * Exact matches without checking for the ends for extra accents.
82 * The match after the position within the collation element iterator is to be
84 * After a match is found the offset in the collation element iterator will be
85 * shifted to the start of the match.
86 * Implementation note:
87 * For tertiary we can't use the collator->tertiaryMask, that is a
88 * preprocessed mask that takes into account case options. since we are only
89 * concerned with exact matches, we don't need that.
90 * Alternate handling - since only the 16 most significant digits is only used,
91 * we can safely do a compare without masking if the ce is a variable, we mask
92 * and get only the primary values no shifting to quartenary is required since
93 * all primary values less than variabletop will need to be masked off anyway.
94 * If the end character is composite and the pattern ce does not match the text
95 * ce, we skip it until we find a match in the end composite character or when
96 * it has passed the character. This is so that we can match pattern "a" with
98 * @param strsrch string search data
99 * @param status error status if any
100 * @return TRUE if an exact match is found, FALSE otherwise
103 UBool
usearch_handleNextExact(UStringSearch
*strsrch
, UErrorCode
*status
);
107 * According to the definition, matches found here will include the whole span
108 * of beginning and ending accents if it overlaps that region.
109 * @param strsrch string search data
110 * @param status error status if any
111 * @return TRUE if a canonical match is found, FALSE otherwise
114 UBool
usearch_handleNextCanonical(UStringSearch
*strsrch
, UErrorCode
*status
);
117 * Gets the previous match.
118 * Comments follows from handleNextExact
119 * @param strsrch string search data
120 * @param status error status if any
121 * @return True if a exact math is found, FALSE otherwise.
124 UBool
usearch_handlePreviousExact(UStringSearch
*strsrch
, UErrorCode
*status
);
128 * According to the definition, matches found here will include the whole span
129 * of beginning and ending accents if it overlaps that region.
130 * @param strsrch string search data
131 * @param status error status if any
132 * @return TRUE if a canonical match is found, FALSE otherwise
135 UBool
usearch_handlePreviousCanonical(UStringSearch
*strsrch
,
138 #endif /* #if !UCONFIG_NO_COLLATION */