]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/usrchimp.h
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / i18n / usrchimp.h
1 /*
2 **********************************************************************
3 * Copyright (C) 2001-2004 IBM and others. All rights reserved.
4 **********************************************************************
5 * Date Name Description
6 * 08/13/2001 synwee Creation.
7 **********************************************************************
8 */
9 #ifndef USRCHIMP_H
10 #define USRCHIMP_H
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION
15
16 #include "unicode/ucol.h"
17 #include "unicode/ucoleitr.h"
18 #include "unicode/ubrk.h"
19
20 #define INITIAL_ARRAY_SIZE_ 256
21 #define MAX_TABLE_SIZE_ 257
22
23 struct USearch {
24 // required since collation element iterator does not have a getText API
25 const UChar *text;
26 int32_t textLength; // exact length
27 UBool isOverlap;
28 UBool isCanonicalMatch;
29 UBreakIterator *breakIter;
30 // value USEARCH_DONE is the default value
31 // if we are not at the start of the text or the end of the text,
32 // depending on the iteration direction and matchedIndex is USEARCH_DONE
33 // it means that we can find any more matches in that particular direction
34 int32_t matchedIndex;
35 int32_t matchedLength;
36 UBool isForwardSearching;
37 UBool reset;
38 };
39
40 struct UPattern {
41 const UChar *text;
42 int32_t textLength; // exact length
43 // length required for backwards ce comparison
44 int32_t CELength;
45 int32_t *CE;
46 int32_t CEBuffer[INITIAL_ARRAY_SIZE_];
47 UBool hasPrefixAccents;
48 UBool hasSuffixAccents;
49 int16_t defaultShiftSize;
50 int16_t shift[MAX_TABLE_SIZE_];
51 int16_t backShift[MAX_TABLE_SIZE_];
52 };
53
54 struct UStringSearch {
55 struct USearch *search;
56 struct UPattern pattern;
57 const UCollator *collator;
58 // positions within the collation element iterator is used to determine
59 // if we are at the start of the text.
60 UCollationElements *textIter;
61 // utility collation element, used throughout program for temporary
62 // iteration.
63 UCollationElements *utilIter;
64 UBool ownCollator;
65 UCollationStrength strength;
66 uint32_t ceMask;
67 uint32_t variableTop;
68 UBool toShift;
69 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
70 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
71 };
72
73 /**
74 * Exact matches without checking for the ends for extra accents.
75 * The match after the position within the collation element iterator is to be
76 * found.
77 * After a match is found the offset in the collation element iterator will be
78 * shifted to the start of the match.
79 * Implementation note:
80 * For tertiary we can't use the collator->tertiaryMask, that is a
81 * preprocessed mask that takes into account case options. since we are only
82 * concerned with exact matches, we don't need that.
83 * Alternate handling - since only the 16 most significant digits is only used,
84 * we can safely do a compare without masking if the ce is a variable, we mask
85 * and get only the primary values no shifting to quartenary is required since
86 * all primary values less than variabletop will need to be masked off anyway.
87 * If the end character is composite and the pattern ce does not match the text
88 * ce, we skip it until we find a match in the end composite character or when
89 * it has passed the character. This is so that we can match pattern "a" with
90 * the text "\u00e6"
91 * @param strsrch string search data
92 * @param status error status if any
93 * @return TRUE if an exact match is found, FALSE otherwise
94 */
95 U_CFUNC
96 UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
97
98 /**
99 * Canonical matches.
100 * According to the definition, matches found here will include the whole span
101 * of beginning and ending accents if it overlaps that region.
102 * @param strsrch string search data
103 * @param status error status if any
104 * @return TRUE if a canonical match is found, FALSE otherwise
105 */
106 U_CFUNC
107 UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
108
109 /**
110 * Gets the previous match.
111 * Comments follows from handleNextExact
112 * @param strsrch string search data
113 * @param status error status if any
114 * @return True if a exact math is found, FALSE otherwise.
115 */
116 U_CFUNC
117 UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
118
119 /**
120 * Canonical matches.
121 * According to the definition, matches found here will include the whole span
122 * of beginning and ending accents if it overlaps that region.
123 * @param strsrch string search data
124 * @param status error status if any
125 * @return TRUE if a canonical match is found, FALSE otherwise
126 */
127 U_CFUNC
128 UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
129 UErrorCode *status);
130
131 #endif /* #if !UCONFIG_NO_COLLATION */
132
133 #endif