[apple/icu.git] / icuSources / i18n / usrchimp.h

/*
**********************************************************************
*   Copyright (C) 2001-2004 IBM and others. All rights reserved.
**********************************************************************
*   Date        Name        Description
*  08/13/2001   synwee      Creation.
**********************************************************************
*/
#ifndef USRCHIMP_H
#define USRCHIMP_H

#include "unicode/utypes.h"

#if !UCONFIG_NO_COLLATION

#include "unicode/ucol.h"
#include "unicode/ucoleitr.h"
#include "unicode/ubrk.h"

#define INITIAL_ARRAY_SIZE_       256
#define MAX_TABLE_SIZE_           257

struct USearch {
    // required since collation element iterator does not have a getText API
    const UChar              *text;
          int32_t             textLength; // exact length
          UBool               isOverlap;
          UBool               isCanonicalMatch;
          UBreakIterator     *breakIter;
    // value USEARCH_DONE is the default value
    // if we are not at the start of the text or the end of the text, 
    // depending on the iteration direction and matchedIndex is USEARCH_DONE 
    // it means that we can find any more matches in that particular direction
          int32_t         matchedIndex; 
          int32_t             matchedLength;
          UBool               isForwardSearching;
          UBool               reset;
};

struct UPattern {
    const UChar              *text;
          int32_t             textLength; // exact length
          // length required for backwards ce comparison
          int32_t             CELength; 
          int32_t            *CE;
          int32_t             CEBuffer[INITIAL_ARRAY_SIZE_];
          UBool               hasPrefixAccents;
          UBool               hasSuffixAccents;
          int16_t             defaultShiftSize;
          int16_t             shift[MAX_TABLE_SIZE_];
          int16_t             backShift[MAX_TABLE_SIZE_];
};

struct UStringSearch {
    struct USearch            *search;
    struct UPattern            pattern;
    const  UCollator          *collator;
    // positions within the collation element iterator is used to determine
    // if we are at the start of the text.
           UCollationElements *textIter;
    // utility collation element, used throughout program for temporary 
    // iteration.
           UCollationElements *utilIter;
           UBool               ownCollator;
           UCollationStrength  strength;
           uint32_t            ceMask;
           uint32_t            variableTop;
           UBool               toShift;
           UChar               canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
           UChar               canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
};

/**
* Exact matches without checking for the ends for extra accents.
* The match after the position within the collation element iterator is to be
* found. 
* After a match is found the offset in the collation element iterator will be
* shifted to the start of the match.
* Implementation note: 
* For tertiary we can't use the collator->tertiaryMask, that is a 
* preprocessed mask that takes into account case options. since we are only 
* concerned with exact matches, we don't need that.
* Alternate handling - since only the 16 most significant digits is only used, 
* we can safely do a compare without masking if the ce is a variable, we mask 
* and get only the primary values no shifting to quartenary is required since 
* all primary values less than variabletop will need to be masked off anyway.
* If the end character is composite and the pattern ce does not match the text 
* ce, we skip it until we find a match in the end composite character or when 
* it has passed the character. This is so that we can match pattern "a" with
* the text "\u00e6" 
* @param strsrch string search data
* @param status error status if any
* @return TRUE if an exact match is found, FALSE otherwise
*/
U_CFUNC
UBool usearch_handleNextExact(UStringSearch *strsrch, UErrorCode *status);

/**
* Canonical matches.
* According to the definition, matches found here will include the whole span 
* of beginning and ending accents if it overlaps that region.
* @param strsrch string search data
* @param status error status if any
* @return TRUE if a canonical match is found, FALSE otherwise
*/
U_CFUNC
UBool usearch_handleNextCanonical(UStringSearch *strsrch, UErrorCode *status);

/**
* Gets the previous match.
* Comments follows from handleNextExact
* @param strsrch string search data
* @param status error status if any
* @return True if a exact math is found, FALSE otherwise.
*/
U_CFUNC
UBool usearch_handlePreviousExact(UStringSearch *strsrch, UErrorCode *status);

/**
* Canonical matches.
* According to the definition, matches found here will include the whole span 
* of beginning and ending accents if it overlaps that region.
* @param strsrch string search data
* @param status error status if any
* @return TRUE if a canonical match is found, FALSE otherwise
*/
U_CFUNC
UBool usearch_handlePreviousCanonical(UStringSearch *strsrch, 
                                      UErrorCode    *status);

#endif /* #if !UCONFIG_NO_COLLATION */

#endif
Commit	Line	Data
b75a7d8f A	1	/*
b75a7d8f A	2	**********************************************************************
374ca955	3	* Copyright (C) 2001-2004 IBM and others. All rights reserved.
b75a7d8f A	4	**********************************************************************
	5	* Date Name Description
	6	* 08/13/2001 synwee Creation.
	7	**********************************************************************
	8	*/
	9	#ifndef USRCHIMP_H
	10	#define USRCHIMP_H
	11
	12	#include "unicode/utypes.h"
	13
	14	#if !UCONFIG_NO_COLLATION
	15
	16	#include "unicode/ucol.h"
	17	#include "unicode/ucoleitr.h"
	18	#include "unicode/ubrk.h"
	19
	20	#define INITIAL_ARRAY_SIZE_ 256
	21	#define MAX_TABLE_SIZE_ 257
	22
	23	struct USearch {
	24	// required since collation element iterator does not have a getText API
	25	const UChar *text;
	26	int32_t textLength; // exact length
	27	UBool isOverlap;
	28	UBool isCanonicalMatch;
	29	UBreakIterator *breakIter;
	30	// value USEARCH_DONE is the default value
	31	// if we are not at the start of the text or the end of the text,
	32	// depending on the iteration direction and matchedIndex is USEARCH_DONE
	33	// it means that we can find any more matches in that particular direction
	34	int32_t matchedIndex;
	35	int32_t matchedLength;
	36	UBool isForwardSearching;
	37	UBool reset;
	38	};
	39
	40	struct UPattern {
	41	const UChar *text;
	42	int32_t textLength; // exact length
	43	// length required for backwards ce comparison
	44	int32_t CELength;
374ca955 A	45	int32_t *CE;
374ca955 A	46	int32_t CEBuffer[INITIAL_ARRAY_SIZE_];
b75a7d8f A	47	UBool hasPrefixAccents;
	48	UBool hasSuffixAccents;
	49	int16_t defaultShiftSize;
	50	int16_t shift[MAX_TABLE_SIZE_];
	51	int16_t backShift[MAX_TABLE_SIZE_];
	52	};
	53
	54	struct UStringSearch {
	55	struct USearch *search;
	56	struct UPattern pattern;
	57	const UCollator *collator;
	58	// positions within the collation element iterator is used to determine
	59	// if we are at the start of the text.
	60	UCollationElements *textIter;
	61	// utility collation element, used throughout program for temporary
	62	// iteration.
	63	UCollationElements *utilIter;
	64	UBool ownCollator;
	65	UCollationStrength strength;
	66	uint32_t ceMask;
	67	uint32_t variableTop;
	68	UBool toShift;
	69	UChar canonicalPrefixAccents[INITIAL_ARRAY_SIZE_];
	70	UChar canonicalSuffixAccents[INITIAL_ARRAY_SIZE_];
	71	};
	72
	73	/**
	74	* Exact matches without checking for the ends for extra accents.
	75	* The match after the position within the collation element iterator is to be
	76	* found.
	77	* After a match is found the offset in the collation element iterator will be
	78	* shifted to the start of the match.
	79	* Implementation note:
	80	* For tertiary we can't use the collator->tertiaryMask, that is a
	81	* preprocessed mask that takes into account case options. since we are only
	82	* concerned with exact matches, we don't need that.
	83	* Alternate handling - since only the 16 most significant digits is only used,
	84	* we can safely do a compare without masking if the ce is a variable, we mask
	85	* and get only the primary values no shifting to quartenary is required since
	86	* all primary values less than variabletop will need to be masked off anyway.
	87	* If the end character is composite and the pattern ce does not match the text
	88	* ce, we skip it until we find a match in the end composite character or when
	89	* it has passed the character. This is so that we can match pattern "a" with
	90	* the text "\u00e6"
	91	* @param strsrch string search data
	92	* @param status error status if any
	93	* @return TRUE if an exact match is found, FALSE otherwise
	94	*/
	95	U_CFUNC
	96	UBool usearch_handleNextExact(UStringSearch strsrch, UErrorCode status);
	97
	98	/**
	99	* Canonical matches.
	100	* According to the definition, matches found here will include the whole span
	101	* of beginning and ending accents if it overlaps that region.
	102	* @param strsrch string search data
	103	* @param status error status if any
	104	* @return TRUE if a canonical match is found, FALSE otherwise
	105	*/
	106	U_CFUNC
	107	UBool usearch_handleNextCanonical(UStringSearch strsrch, UErrorCode status);
	108
	109	/**
	110	* Gets the previous match.
111	* Comments follows from handleNextExact
112	* @param strsrch string search data
113	* @param status error status if any
114	* @return True if a exact math is found, FALSE otherwise.
115	*/
116	U_CFUNC
117	UBool usearch_handlePreviousExact(UStringSearch strsrch, UErrorCode status);
118
119	/**
120	* Canonical matches.
121	* According to the definition, matches found here will include the whole span
122	* of beginning and ending accents if it overlaps that region.
123	* @param strsrch string search data
124	* @param status error status if any
125	* @return TRUE if a canonical match is found, FALSE otherwise
126	*/
127	U_CFUNC
128	UBool usearch_handlePreviousCanonical(UStringSearch *strsrch,
129	UErrorCode *status);
130
131	#endif /* #if !UCONFIG_NO_COLLATION */
132
133	#endif