1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 2007, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
10 * file name: unisetspan.h
12 * tab size: 8 (not used)
15 * created on: 2007mar01
16 * created by: Markus W. Scherer
19 #ifndef __UNISETSPAN_H__
20 #define __UNISETSPAN_H__
22 #include "unicode/utypes.h"
23 #include "unicode/uniset.h"
28 * Implement span() etc. for a set with strings.
29 * Avoid recursion because of its exponential complexity.
30 * Instead, try multiple paths at once and track them with an IndexList.
32 class UnicodeSetStringSpan
: public UMemory
{
35 * Which span() variant will be used?
36 * The object is either built for one variant and used once,
37 * or built for all and may be used many times.
49 FWD_UTF16_CONTAINED
= FWD
| UTF16
| CONTAINED
,
50 FWD_UTF16_NOT_CONTAINED
= FWD
| UTF16
| NOT_CONTAINED
,
51 FWD_UTF8_CONTAINED
= FWD
| UTF8
| CONTAINED
,
52 FWD_UTF8_NOT_CONTAINED
= FWD
| UTF8
| NOT_CONTAINED
,
53 BACK_UTF16_CONTAINED
= BACK
| UTF16
| CONTAINED
,
54 BACK_UTF16_NOT_CONTAINED
= BACK
| UTF16
| NOT_CONTAINED
,
55 BACK_UTF8_CONTAINED
= BACK
| UTF8
| CONTAINED
,
56 BACK_UTF8_NOT_CONTAINED
= BACK
| UTF8
| NOT_CONTAINED
59 UnicodeSetStringSpan(const UnicodeSet
&set
, const UVector
&setStrings
, uint32_t which
);
61 // Copy constructor. Assumes which==ALL for a frozen set.
62 UnicodeSetStringSpan(const UnicodeSetStringSpan
&otherStringSpan
, const UVector
&newParentSetStrings
);
64 ~UnicodeSetStringSpan();
67 * Do the strings need to be checked in span() etc.?
68 * @return TRUE if strings need to be checked (call span() here),
69 * FALSE if not (use a BMPSet for best performance).
71 inline UBool
needsStringSpanUTF16();
72 inline UBool
needsStringSpanUTF8();
74 // For fast UnicodeSet::contains(c).
75 inline UBool
contains(UChar32 c
) const;
77 int32_t span(const UChar
*s
, int32_t length
, USetSpanCondition spanCondition
) const;
79 int32_t spanBack(const UChar
*s
, int32_t length
, USetSpanCondition spanCondition
) const;
81 int32_t spanUTF8(const uint8_t *s
, int32_t length
, USetSpanCondition spanCondition
) const;
83 int32_t spanBackUTF8(const uint8_t *s
, int32_t length
, USetSpanCondition spanCondition
) const;
86 // Special spanLength byte values.
88 // The spanLength is >=0xfe.
90 // All code points in the string are contained in the parent set.
94 // Add a starting or ending string character to the spanNotSet
95 // so that a character span ends before any string.
96 void addToSpanNotSet(UChar32 c
);
98 int32_t spanNot(const UChar
*s
, int32_t length
) const;
99 int32_t spanNotBack(const UChar
*s
, int32_t length
) const;
100 int32_t spanNotUTF8(const uint8_t *s
, int32_t length
) const;
101 int32_t spanNotBackUTF8(const uint8_t *s
, int32_t length
) const;
103 // Set for span(). Same as parent but without strings.
106 // Set for span(not contained).
107 // Same as spanSet, plus characters that start or end strings.
108 UnicodeSet
*pSpanNotSet
;
110 // The strings of the parent set.
111 const UVector
&strings
;
113 // Pointer to the UTF-8 string lengths.
114 // Also pointer to further allocated storage for meta data and
115 // UTF-8 string contents as necessary.
116 int32_t *utf8Lengths
;
118 // Pointer to the part of the (utf8Lengths) memory block that stores
119 // the lengths of span(), spanBack() etc. for each string.
120 uint8_t *spanLengths
;
122 // Pointer to the part of the (utf8Lengths) memory block that stores
123 // the UTF-8 versions of the parent set's strings.
126 // Number of bytes for all UTF-8 versions of strings together.
129 // Maximum lengths of relevant strings.
133 // Set up for all variants of span()?
136 // Memory for small numbers and lengths of strings.
137 // For example, for 8 strings:
138 // 8 UTF-8 lengths, 8*4 bytes span lengths, 8*2 3-byte UTF-8 characters
139 // = 112 bytes = int32_t[28].
140 int32_t staticLengths
[32];
143 UBool
UnicodeSetStringSpan::needsStringSpanUTF16() {
144 return (UBool
)(maxLength16
!=0);
147 UBool
UnicodeSetStringSpan::needsStringSpanUTF8() {
148 return (UBool
)(maxLength8
!=0);
151 UBool
UnicodeSetStringSpan::contains(UChar32 c
) const {
152 return spanSet
.contains(c
);