2 **********************************************************************
3 * Copyright (C) 2001-2008 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 08/13/2001 synwee Creation.
7 **********************************************************************
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/ucol.h"
17 #include "unicode/ucoleitr.h"
18 #include "unicode/ubrk.h"
20 #define INITIAL_ARRAY_SIZE_ 256
21 #define MAX_TABLE_SIZE_ 257
24 // required since collation element iterator does not have a getText API
26 int32_t textLength
; // exact length
28 UBool isCanonicalMatch
;
29 UBreakIterator
*internalBreakIter
; //internal character breakiterator
30 UBreakIterator
*breakIter
;
31 // value USEARCH_DONE is the default value
32 // if we are not at the start of the text or the end of the text,
33 // depending on the iteration direction and matchedIndex is USEARCH_DONE
34 // it means that we can't find any more matches in that particular direction
36 int32_t matchedLength
;
37 UBool isForwardSearching
;
43 int32_t textLength
; // exact length
44 // length required for backwards ce comparison
47 int32_t CEBuffer
[INITIAL_ARRAY_SIZE_
];
50 int64_t PCEBuffer
[INITIAL_ARRAY_SIZE_
];
51 UBool hasPrefixAccents
;
52 UBool hasSuffixAccents
;
53 int16_t defaultShiftSize
;
54 int16_t shift
[MAX_TABLE_SIZE_
];
55 int16_t backShift
[MAX_TABLE_SIZE_
];
58 struct UStringSearch
{
59 struct USearch
*search
;
60 struct UPattern pattern
;
61 const UCollator
*collator
;
62 // positions within the collation element iterator is used to determine
63 // if we are at the start of the text.
64 UCollationElements
*textIter
;
65 // utility collation element, used throughout program for temporary
67 UCollationElements
*utilIter
;
69 UCollationStrength strength
;
73 UChar canonicalPrefixAccents
[INITIAL_ARRAY_SIZE_
];
74 UChar canonicalSuffixAccents
[INITIAL_ARRAY_SIZE_
];
78 * Exact matches without checking for the ends for extra accents.
79 * The match after the position within the collation element iterator is to be
81 * After a match is found the offset in the collation element iterator will be
82 * shifted to the start of the match.
83 * Implementation note:
84 * For tertiary we can't use the collator->tertiaryMask, that is a
85 * preprocessed mask that takes into account case options. since we are only
86 * concerned with exact matches, we don't need that.
87 * Alternate handling - since only the 16 most significant digits is only used,
88 * we can safely do a compare without masking if the ce is a variable, we mask
89 * and get only the primary values no shifting to quartenary is required since
90 * all primary values less than variabletop will need to be masked off anyway.
91 * If the end character is composite and the pattern ce does not match the text
92 * ce, we skip it until we find a match in the end composite character or when
93 * it has passed the character. This is so that we can match pattern "a" with
95 * @param strsrch string search data
96 * @param status error status if any
97 * @return TRUE if an exact match is found, FALSE otherwise
100 UBool
usearch_handleNextExact(UStringSearch
*strsrch
, UErrorCode
*status
);
104 * According to the definition, matches found here will include the whole span
105 * of beginning and ending accents if it overlaps that region.
106 * @param strsrch string search data
107 * @param status error status if any
108 * @return TRUE if a canonical match is found, FALSE otherwise
111 UBool
usearch_handleNextCanonical(UStringSearch
*strsrch
, UErrorCode
*status
);
114 * Gets the previous match.
115 * Comments follows from handleNextExact
116 * @param strsrch string search data
117 * @param status error status if any
118 * @return True if a exact math is found, FALSE otherwise.
121 UBool
usearch_handlePreviousExact(UStringSearch
*strsrch
, UErrorCode
*status
);
125 * According to the definition, matches found here will include the whole span
126 * of beginning and ending accents if it overlaps that region.
127 * @param strsrch string search data
128 * @param status error status if any
129 * @return TRUE if a canonical match is found, FALSE otherwise
132 UBool
usearch_handlePreviousCanonical(UStringSearch
*strsrch
,
135 #endif /* #if !UCONFIG_NO_COLLATION */