]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/brktrans.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2008-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 05/11/2008 Andy Heninger Port from Java
10 **********************************************************************
15 #include "unicode/utypes.h"
17 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
19 #include "unicode/brkiter.h"
20 #include "unicode/localpointer.h"
21 #include "unicode/uchar.h"
22 #include "unicode/unifilt.h"
23 #include "unicode/uniset.h"
35 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator
)
37 static const UChar SPACE
= 32; // ' '
41 * Constructs a transliterator with the default delimiters '{' and
44 BreakTransliterator::BreakTransliterator(UnicodeFilter
* adoptedFilter
) :
45 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter
),
46 cachedBI(NULL
), cachedBoundaries(NULL
), fInsertion(SPACE
) {
53 BreakTransliterator::~BreakTransliterator() {
59 BreakTransliterator::BreakTransliterator(const BreakTransliterator
& o
) :
60 Transliterator(o
), cachedBI(NULL
), cachedBoundaries(NULL
), fInsertion(o
.fInsertion
) {
67 BreakTransliterator
* BreakTransliterator::clone() const {
68 return new BreakTransliterator(*this);
72 * Implements {@link Transliterator#handleTransliterate}.
74 void BreakTransliterator::handleTransliterate(Replaceable
& text
, UTransPosition
& offsets
,
75 UBool isIncremental
) const {
77 UErrorCode status
= U_ZERO_ERROR
;
78 LocalPointer
<BreakIterator
> bi
;
79 LocalPointer
<UVector32
> boundaries
;
83 BreakTransliterator
*nonConstThis
= const_cast<BreakTransliterator
*>(this);
84 boundaries
= std::move(nonConstThis
->cachedBoundaries
);
85 bi
= std::move(nonConstThis
->cachedBI
);
88 bi
.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status
));
90 if (boundaries
.isNull()) {
91 boundaries
.adoptInstead(new UVector32(status
));
94 if (bi
.isNull() || boundaries
.isNull() || U_FAILURE(status
)) {
98 boundaries
->removeAllElements();
99 UnicodeString sText
= replaceableAsString(text
);
101 bi
->preceding(offsets
.start
);
103 // To make things much easier, we will stack the boundaries, and then insert at the end.
104 // generally, we won't need too many, since we will be filtered.
107 for(boundary
= bi
->next(); boundary
!= UBRK_DONE
&& boundary
< offsets
.limit
; boundary
= bi
->next()) {
108 if (boundary
== 0) continue;
109 // HACK: Check to see that preceeding item was a letter
111 UChar32 cp
= sText
.char32At(boundary
-1);
112 int type
= u_charType(cp
);
113 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
114 if ((U_MASK(type
) & (U_GC_L_MASK
| U_GC_M_MASK
)) == 0) continue;
116 cp
= sText
.char32At(boundary
);
117 type
= u_charType(cp
);
118 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
119 if ((U_MASK(type
) & (U_GC_L_MASK
| U_GC_M_MASK
)) == 0) continue;
121 boundaries
->addElement(boundary
, status
);
122 // printf("Boundary at %d\n", boundary);
126 int lastBoundary
= 0;
128 if (boundaries
->size() != 0) { // if we found something, adjust
129 delta
= boundaries
->size() * fInsertion
.length();
130 lastBoundary
= boundaries
->lastElementi();
132 // we do this from the end backwards, so that we don't have to keep updating.
134 while (boundaries
->size() > 0) {
135 boundary
= boundaries
->popi();
136 text
.handleReplaceBetween(boundary
, boundary
, fInsertion
);
140 // Now fix up the return values
141 offsets
.contextLimit
+= delta
;
142 offsets
.limit
+= delta
;
143 offsets
.start
= isIncremental
? lastBoundary
+ delta
: offsets
.limit
;
145 // Return break iterator & boundaries vector to the cache.
148 BreakTransliterator
*nonConstThis
= const_cast<BreakTransliterator
*>(this);
149 if (nonConstThis
->cachedBI
.isNull()) {
150 nonConstThis
->cachedBI
= std::move(bi
);
152 if (nonConstThis
->cachedBoundaries
.isNull()) {
153 nonConstThis
->cachedBoundaries
= std::move(boundaries
);
157 // TODO: do something with U_FAILURE(status);
158 // (need to look at transliterators overall, not just here.)
164 const UnicodeString
&BreakTransliterator::getInsertion() const {
171 void BreakTransliterator::setInsertion(const UnicodeString
&insertion
) {
172 this->fInsertion
= insertion
;
176 // replaceableAsString Hack to let break iterators work
177 // on the replaceable text from transliterators.
178 // In practice, the only real Replaceable type that we
179 // will be seeing is UnicodeString, so this function
180 // will normally be efficient.
182 UnicodeString
BreakTransliterator::replaceableAsString(Replaceable
&r
) {
184 UnicodeString
*rs
= dynamic_cast<UnicodeString
*>(&r
);
188 r
.extractBetween(0, r
.length(), s
);
195 #endif /* #if !UCONFIG_NO_TRANSLITERATION */