]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/brktrans.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (C) 2008-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 05/11/2008 Andy Heninger Port from Java
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
17 #include "unicode/brkiter.h"
18 #include "unicode/localpointer.h"
19 #include "unicode/uchar.h"
20 #include "unicode/unifilt.h"
21 #include "unicode/uniset.h"
33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator
)
35 static const UChar SPACE
= 32; // ' '
39 * Constructs a transliterator with the default delimiters '{' and
42 BreakTransliterator::BreakTransliterator(UnicodeFilter
* adoptedFilter
) :
43 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter
),
44 cachedBI(NULL
), cachedBoundaries(NULL
), fInsertion(SPACE
) {
51 BreakTransliterator::~BreakTransliterator() {
57 BreakTransliterator::BreakTransliterator(const BreakTransliterator
& o
) :
58 Transliterator(o
), cachedBI(NULL
), cachedBoundaries(NULL
), fInsertion(o
.fInsertion
) {
65 Transliterator
* BreakTransliterator::clone(void) const {
66 return new BreakTransliterator(*this);
70 * Implements {@link Transliterator#handleTransliterate}.
72 void BreakTransliterator::handleTransliterate(Replaceable
& text
, UTransPosition
& offsets
,
73 UBool isIncremental
) const {
75 UErrorCode status
= U_ZERO_ERROR
;
76 LocalPointer
<BreakIterator
> bi
;
77 LocalPointer
<UVector32
> boundaries
;
81 BreakTransliterator
*nonConstThis
= const_cast<BreakTransliterator
*>(this);
82 boundaries
.moveFrom(nonConstThis
->cachedBoundaries
);
83 bi
.moveFrom(nonConstThis
->cachedBI
);
86 bi
.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status
));
88 if (boundaries
.isNull()) {
89 boundaries
.adoptInstead(new UVector32(status
));
92 if (bi
.isNull() || boundaries
.isNull() || U_FAILURE(status
)) {
96 boundaries
->removeAllElements();
97 UnicodeString sText
= replaceableAsString(text
);
99 bi
->preceding(offsets
.start
);
101 // To make things much easier, we will stack the boundaries, and then insert at the end.
102 // generally, we won't need too many, since we will be filtered.
105 for(boundary
= bi
->next(); boundary
!= UBRK_DONE
&& boundary
< offsets
.limit
; boundary
= bi
->next()) {
106 if (boundary
== 0) continue;
107 // HACK: Check to see that preceeding item was a letter
109 UChar32 cp
= sText
.char32At(boundary
-1);
110 int type
= u_charType(cp
);
111 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
112 if ((U_MASK(type
) & (U_GC_L_MASK
| U_GC_M_MASK
)) == 0) continue;
114 cp
= sText
.char32At(boundary
);
115 type
= u_charType(cp
);
116 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
117 if ((U_MASK(type
) & (U_GC_L_MASK
| U_GC_M_MASK
)) == 0) continue;
119 boundaries
->addElement(boundary
, status
);
120 // printf("Boundary at %d\n", boundary);
124 int lastBoundary
= 0;
126 if (boundaries
->size() != 0) { // if we found something, adjust
127 delta
= boundaries
->size() * fInsertion
.length();
128 lastBoundary
= boundaries
->lastElementi();
130 // we do this from the end backwards, so that we don't have to keep updating.
132 while (boundaries
->size() > 0) {
133 boundary
= boundaries
->popi();
134 text
.handleReplaceBetween(boundary
, boundary
, fInsertion
);
138 // Now fix up the return values
139 offsets
.contextLimit
+= delta
;
140 offsets
.limit
+= delta
;
141 offsets
.start
= isIncremental
? lastBoundary
+ delta
: offsets
.limit
;
143 // Return break iterator & boundaries vector to the cache.
146 BreakTransliterator
*nonConstThis
= const_cast<BreakTransliterator
*>(this);
147 if (nonConstThis
->cachedBI
.isNull()) {
148 nonConstThis
->cachedBI
.moveFrom(bi
);
150 if (nonConstThis
->cachedBoundaries
.isNull()) {
151 nonConstThis
->cachedBoundaries
.moveFrom(boundaries
);
155 // TODO: do something with U_FAILURE(status);
156 // (need to look at transliterators overall, not just here.)
162 const UnicodeString
&BreakTransliterator::getInsertion() const {
169 void BreakTransliterator::setInsertion(const UnicodeString
&insertion
) {
170 this->fInsertion
= insertion
;
174 // replaceableAsString Hack to let break iterators work
175 // on the replaceable text from transliterators.
176 // In practice, the only real Replaceable type that we
177 // will be seeing is UnicodeString, so this function
178 // will normally be efficient.
180 UnicodeString
BreakTransliterator::replaceableAsString(Replaceable
&r
) {
182 UnicodeString
*rs
= dynamic_cast<UnicodeString
*>(&r
);
186 r
.extractBetween(0, r
.length(), s
);
193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */