2 ******************************************************************************
4 * Copyright (C) 2007, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 * file name: unisetspan.h
10 * tab size: 8 (not used)
13 * created on: 2007mar01
14 * created by: Markus W. Scherer
17 #ifndef __UNISETSPAN_H__
18 #define __UNISETSPAN_H__
20 #include "unicode/utypes.h"
21 #include "unicode/uniset.h"
26 * Implement span() etc. for a set with strings.
27 * Avoid recursion because of its exponential complexity.
28 * Instead, try multiple paths at once and track them with an IndexList.
30 class UnicodeSetStringSpan
: public UMemory
{
33 * Which span() variant will be used?
34 * The object is either built for one variant and used once,
35 * or built for all and may be used many times.
47 FWD_UTF16_CONTAINED
= FWD
| UTF16
| CONTAINED
,
48 FWD_UTF16_NOT_CONTAINED
= FWD
| UTF16
| NOT_CONTAINED
,
49 FWD_UTF8_CONTAINED
= FWD
| UTF8
| CONTAINED
,
50 FWD_UTF8_NOT_CONTAINED
= FWD
| UTF8
| NOT_CONTAINED
,
51 BACK_UTF16_CONTAINED
= BACK
| UTF16
| CONTAINED
,
52 BACK_UTF16_NOT_CONTAINED
= BACK
| UTF16
| NOT_CONTAINED
,
53 BACK_UTF8_CONTAINED
= BACK
| UTF8
| CONTAINED
,
54 BACK_UTF8_NOT_CONTAINED
= BACK
| UTF8
| NOT_CONTAINED
57 UnicodeSetStringSpan(const UnicodeSet
&set
, const UVector
&setStrings
, uint32_t which
);
59 // Copy constructor. Assumes which==ALL for a frozen set.
60 UnicodeSetStringSpan(const UnicodeSetStringSpan
&otherStringSpan
, const UVector
&newParentSetStrings
);
62 ~UnicodeSetStringSpan();
65 * Do the strings need to be checked in span() etc.?
66 * @return TRUE if strings need to be checked (call span() here),
67 * FALSE if not (use a BMPSet for best performance).
69 inline UBool
needsStringSpanUTF16();
70 inline UBool
needsStringSpanUTF8();
72 // For fast UnicodeSet::contains(c).
73 inline UBool
contains(UChar32 c
) const;
75 int32_t span(const UChar
*s
, int32_t length
, USetSpanCondition spanCondition
) const;
77 int32_t spanBack(const UChar
*s
, int32_t length
, USetSpanCondition spanCondition
) const;
79 int32_t spanUTF8(const uint8_t *s
, int32_t length
, USetSpanCondition spanCondition
) const;
81 int32_t spanBackUTF8(const uint8_t *s
, int32_t length
, USetSpanCondition spanCondition
) const;
84 // Special spanLength byte values.
86 // The spanLength is >=0xfe.
88 // All code points in the string are contained in the parent set.
92 // Add a starting or ending string character to the spanNotSet
93 // so that a character span ends before any string.
94 void addToSpanNotSet(UChar32 c
);
96 int32_t spanNot(const UChar
*s
, int32_t length
) const;
97 int32_t spanNotBack(const UChar
*s
, int32_t length
) const;
98 int32_t spanNotUTF8(const uint8_t *s
, int32_t length
) const;
99 int32_t spanNotBackUTF8(const uint8_t *s
, int32_t length
) const;
101 // Set for span(). Same as parent but without strings.
104 // Set for span(not contained).
105 // Same as spanSet, plus characters that start or end strings.
106 UnicodeSet
*pSpanNotSet
;
108 // The strings of the parent set.
109 const UVector
&strings
;
111 // Pointer to the UTF-8 string lengths.
112 // Also pointer to further allocated storage for meta data and
113 // UTF-8 string contents as necessary.
114 int32_t *utf8Lengths
;
116 // Pointer to the part of the (utf8Lengths) memory block that stores
117 // the lengths of span(), spanBack() etc. for each string.
118 uint8_t *spanLengths
;
120 // Pointer to the part of the (utf8Lengths) memory block that stores
121 // the UTF-8 versions of the parent set's strings.
124 // Number of bytes for all UTF-8 versions of strings together.
127 // Maximum lengths of relevant strings.
131 // Set up for all variants of span()?
134 // Memory for small numbers and lengths of strings.
135 // For example, for 8 strings:
136 // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters
137 // = 112 bytes = int32_t[28].
138 int32_t staticLengths
[32];
141 UBool
UnicodeSetStringSpan::needsStringSpanUTF16() {
142 return (UBool
)(maxLength16
!=0);
145 UBool
UnicodeSetStringSpan::needsStringSpanUTF8() {
146 return (UBool
)(maxLength8
!=0);
149 UBool
UnicodeSetStringSpan::contains(UChar32 c
) const {
150 return spanSet
.contains(c
);