]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/usrchimp.h
ICU-400.40.tar.gz
[apple/icu.git] / icuSources / i18n / usrchimp.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
46f4442e 3* Copyright (C) 2001-2008 IBM and others. All rights reserved.
b75a7d8f
A
4**********************************************************************
5* Date Name Description
6* 08/13/2001 synwee Creation.
7**********************************************************************
8*/
9#ifndef USRCHIMP_H
10#define USRCHIMP_H
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_COLLATION
15
16#include "unicode/ucol.h"
17#include "unicode/ucoleitr.h"
18#include "unicode/ubrk.h"
19
20#define INITIAL_ARRAY_SIZE_ 256
21#define MAX_TABLE_SIZE_ 257
22
23struct USearch {
24 // required since collation element iterator does not have a getText API
25 const UChar *text;
26 int32_t textLength; // exact length
27 UBool isOverlap;
28 UBool isCanonicalMatch;
46f4442e 29 UBreakIterator *internalBreakIter; //internal character breakiterator
b75a7d8f
A
30 UBreakIterator *breakIter;
31 // value USEARCH_DONE is the default value
32 // if we are not at the start of the text or the end of the text,
33 // depending on the iteration direction and matchedIndex is USEARCH_DONE
46f4442e
A
34 // it means that we can't find any more matches in that particular direction
35 int32_t matchedIndex;
b75a7d8f
A
36 int32_t matchedLength;
37 UBool isForwardSearching;
38 UBool reset;
39};
40
41struct UPattern {
42 const UChar *text;
43 int32_t textLength; // exact length
44 // length required for backwards ce comparison
45 int32_t CELength;
374ca955
A
46 int32_t *CE;
47 int32_t CEBuffer[INITIAL_ARRAY_SIZE_];
46f4442e
A
48 int32_t PCELength;
49 int64_t *PCE;
50 int64_t PCEBuffer[INITIAL_ARRAY_SIZE_];
b75a7d8f
A
51 UBool hasPrefixAccents;
52 UBool hasSuffixAccents;
53 int16_t defaultShiftSize;
54 int16_t shift[MAX_TABLE_SIZE_];
55 int16_t backShift[MAX_TABLE_SIZE_];
56};
57
58struct UStringSearch {
59 struct USearch *search;
60 struct UPattern pattern;
61 const UCollator *collator;
62 // positions within the collation element iterator is used to determine
63 // if we are at the start of the text.
64 UCollationElements *textIter;
65 // utility collation element, used throughout program for temporary
66 // iteration.
67 UCollationElements *utilIter;
68 UBool ownCollator;
69 UCollationStrength strength;
70 uint32_t ceMask;
71 uint32_t variableTop;
72 UBool toShift;
73 UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
74 UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
75};
76
77/**
78* Exact matches without checking for the ends for extra accents.
79* The match after the position within the collation element iterator is to be
80* found.
81* After a match is found the offset in the collation element iterator will be
82* shifted to the start of the match.
83* Implementation note:
84* For tertiary we can't use the collator->tertiaryMask, that is a
85* preprocessed mask that takes into account case options. since we are only
86* concerned with exact matches, we don't need that.
87* Alternate handling - since only the 16 most significant digits is only used,
88* we can safely do a compare without masking if the ce is a variable, we mask
89* and get only the primary values no shifting to quartenary is required since
90* all primary values less than variabletop will need to be masked off anyway.
91* If the end character is composite and the pattern ce does not match the text
92* ce, we skip it until we find a match in the end composite character or when
93* it has passed the character. This is so that we can match pattern "a" with
94* the text "\u00e6"
95* @param strsrch string search data
96* @param status error status if any
97* @return TRUE if an exact match is found, FALSE otherwise
98*/
99U_CFUNC
100UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);
101
102/**
103* Canonical matches.
104* According to the definition, matches found here will include the whole span
105* of beginning and ending accents if it overlaps that region.
106* @param strsrch string search data
107* @param status error status if any
108* @return TRUE if a canonical match is found, FALSE otherwise
109*/
110U_CFUNC
111UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);
112
113/**
114* Gets the previous match.
115* Comments follows from handleNextExact
116* @param strsrch string search data
117* @param status error status if any
118* @return True if a exact math is found, FALSE otherwise.
119*/
120U_CFUNC
121UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);
122
123/**
124* Canonical matches.
125* According to the definition, matches found here will include the whole span
126* of beginning and ending accents if it overlaps that region.
127* @param strsrch string search data
128* @param status error status if any
129* @return TRUE if a canonical match is found, FALSE otherwise
130*/
131U_CFUNC
132UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
133 UErrorCode *status);
134
135#endif /* #if !UCONFIG_NO_COLLATION */
136
137#endif