2 **********************************************************************
3 * Copyright (C) 2001-2004 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 08/13/2001 synwee Creation.
7 **********************************************************************
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_COLLATION
16 #include "unicode/ucol.h"
17 #include "unicode/ucoleitr.h"
18 #include "unicode/ubrk.h"
20 #define INITIAL_ARRAY_SIZE_ 256
21 #define MAX_TABLE_SIZE_ 257
24 // required since collation element iterator does not have a getText API
26 int32_t textLength
; // exact length
28 UBool isCanonicalMatch
;
29 UBreakIterator
*breakIter
;
30 // value USEARCH_DONE is the default value
31 // if we are not at the start of the text or the end of the text,
32 // depending on the iteration direction and matchedIndex is USEARCH_DONE
33 // it means that we can find any more matches in that particular direction
35 int32_t matchedLength
;
36 UBool isForwardSearching
;
42 int32_t textLength
; // exact length
43 // length required for backwards ce comparison
46 int32_t CEBuffer
[INITIAL_ARRAY_SIZE_
];
47 UBool hasPrefixAccents
;
48 UBool hasSuffixAccents
;
49 int16_t defaultShiftSize
;
50 int16_t shift
[MAX_TABLE_SIZE_
];
51 int16_t backShift
[MAX_TABLE_SIZE_
];
54 struct UStringSearch
{
55 struct USearch
*search
;
56 struct UPattern pattern
;
57 const UCollator
*collator
;
58 // positions within the collation element iterator is used to determine
59 // if we are at the start of the text.
60 UCollationElements
*textIter
;
61 // utility collation element, used throughout program for temporary
63 UCollationElements
*utilIter
;
65 UCollationStrength strength
;
69 UChar canonicalPrefixAccents
[INITIAL_ARRAY_SIZE_
];
70 UChar canonicalSuffixAccents
[INITIAL_ARRAY_SIZE_
];
74 * Exact matches without checking for the ends for extra accents.
75 * The match after the position within the collation element iterator is to be
77 * After a match is found the offset in the collation element iterator will be
78 * shifted to the start of the match.
79 * Implementation note:
80 * For tertiary we can't use the collator->tertiaryMask, that is a
81 * preprocessed mask that takes into account case options. since we are only
82 * concerned with exact matches, we don't need that.
83 * Alternate handling - since only the 16 most significant digits is only used,
84 * we can safely do a compare without masking if the ce is a variable, we mask
85 * and get only the primary values no shifting to quartenary is required since
86 * all primary values less than variabletop will need to be masked off anyway.
87 * If the end character is composite and the pattern ce does not match the text
88 * ce, we skip it until we find a match in the end composite character or when
89 * it has passed the character. This is so that we can match pattern "a" with
91 * @param strsrch string search data
92 * @param status error status if any
93 * @return TRUE if an exact match is found, FALSE otherwise
96 UBool
usearch_handleNextExact(UStringSearch
*strsrch
, UErrorCode
*status
);
100 * According to the definition, matches found here will include the whole span
101 * of beginning and ending accents if it overlaps that region.
102 * @param strsrch string search data
103 * @param status error status if any
104 * @return TRUE if a canonical match is found, FALSE otherwise
107 UBool
usearch_handleNextCanonical(UStringSearch
*strsrch
, UErrorCode
*status
);
110 * Gets the previous match.
111 * Comments follows from handleNextExact
112 * @param strsrch string search data
113 * @param status error status if any
114 * @return True if a exact math is found, FALSE otherwise.
117 UBool
usearch_handlePreviousExact(UStringSearch
*strsrch
, UErrorCode
*status
);
121 * According to the definition, matches found here will include the whole span
122 * of beginning and ending accents if it overlaps that region.
123 * @param strsrch string search data
124 * @param status error status if any
125 * @return TRUE if a canonical match is found, FALSE otherwise
128 UBool
usearch_handlePreviousCanonical(UStringSearch
*strsrch
,
131 #endif /* #if !UCONFIG_NO_COLLATION */