]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/brktrans.cpp
2 **********************************************************************
3 * Copyright (C) 2008-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 05/11/2008 Andy Heninger Port from Java
8 **********************************************************************
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
15 #include "unicode/brkiter.h"
16 #include "unicode/localpointer.h"
17 #include "unicode/uchar.h"
18 #include "unicode/unifilt.h"
19 #include "unicode/uniset.h"
31 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator
)
33 static const UChar SPACE
= 32; // ' '
37 * Constructs a transliterator with the default delimiters '{' and
40 BreakTransliterator::BreakTransliterator(UnicodeFilter
* adoptedFilter
) :
41 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter
),
42 cachedBI(NULL
), cachedBoundaries(NULL
), fInsertion(SPACE
) {
49 BreakTransliterator::~BreakTransliterator() {
55 BreakTransliterator::BreakTransliterator(const BreakTransliterator
& o
) :
56 Transliterator(o
), cachedBI(NULL
), cachedBoundaries(NULL
), fInsertion(o
.fInsertion
) {
63 Transliterator
* BreakTransliterator::clone(void) const {
64 return new BreakTransliterator(*this);
68 * Implements {@link Transliterator#handleTransliterate}.
70 void BreakTransliterator::handleTransliterate(Replaceable
& text
, UTransPosition
& offsets
,
71 UBool isIncremental
) const {
73 UErrorCode status
= U_ZERO_ERROR
;
74 LocalPointer
<BreakIterator
> bi
;
75 LocalPointer
<UVector32
> boundaries
;
79 BreakTransliterator
*nonConstThis
= const_cast<BreakTransliterator
*>(this);
80 boundaries
.moveFrom(nonConstThis
->cachedBoundaries
);
81 bi
.moveFrom(nonConstThis
->cachedBI
);
84 bi
.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status
));
86 if (boundaries
.isNull()) {
87 boundaries
.adoptInstead(new UVector32(status
));
90 if (bi
.isNull() || boundaries
.isNull() || U_FAILURE(status
)) {
94 boundaries
->removeAllElements();
95 UnicodeString sText
= replaceableAsString(text
);
97 bi
->preceding(offsets
.start
);
99 // To make things much easier, we will stack the boundaries, and then insert at the end.
100 // generally, we won't need too many, since we will be filtered.
103 for(boundary
= bi
->next(); boundary
!= UBRK_DONE
&& boundary
< offsets
.limit
; boundary
= bi
->next()) {
104 if (boundary
== 0) continue;
105 // HACK: Check to see that preceeding item was a letter
107 UChar32 cp
= sText
.char32At(boundary
-1);
108 int type
= u_charType(cp
);
109 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
110 if ((U_MASK(type
) & (U_GC_L_MASK
| U_GC_M_MASK
)) == 0) continue;
112 cp
= sText
.char32At(boundary
);
113 type
= u_charType(cp
);
114 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
115 if ((U_MASK(type
) & (U_GC_L_MASK
| U_GC_M_MASK
)) == 0) continue;
117 boundaries
->addElement(boundary
, status
);
118 // printf("Boundary at %d\n", boundary);
122 int lastBoundary
= 0;
124 if (boundaries
->size() != 0) { // if we found something, adjust
125 delta
= boundaries
->size() * fInsertion
.length();
126 lastBoundary
= boundaries
->lastElementi();
128 // we do this from the end backwards, so that we don't have to keep updating.
130 while (boundaries
->size() > 0) {
131 boundary
= boundaries
->popi();
132 text
.handleReplaceBetween(boundary
, boundary
, fInsertion
);
136 // Now fix up the return values
137 offsets
.contextLimit
+= delta
;
138 offsets
.limit
+= delta
;
139 offsets
.start
= isIncremental
? lastBoundary
+ delta
: offsets
.limit
;
141 // Return break iterator & boundaries vector to the cache.
144 BreakTransliterator
*nonConstThis
= const_cast<BreakTransliterator
*>(this);
145 if (nonConstThis
->cachedBI
.isNull()) {
146 nonConstThis
->cachedBI
.moveFrom(bi
);
148 if (nonConstThis
->cachedBoundaries
.isNull()) {
149 nonConstThis
->cachedBoundaries
.moveFrom(boundaries
);
153 // TODO: do something with U_FAILURE(status);
154 // (need to look at transliterators overall, not just here.)
160 const UnicodeString
&BreakTransliterator::getInsertion() const {
167 void BreakTransliterator::setInsertion(const UnicodeString
&insertion
) {
168 this->fInsertion
= insertion
;
172 // replaceableAsString Hack to let break iterators work
173 // on the replaceable text from transliterators.
174 // In practice, the only real Replaceable type that we
175 // will be seeing is UnicodeString, so this function
176 // will normally be efficient.
178 UnicodeString
BreakTransliterator::replaceableAsString(Replaceable
&r
) {
180 UnicodeString
*rs
= dynamic_cast<UnicodeString
*>(&r
);
184 r
.extractBetween(0, r
.length(), s
);
191 #endif /* #if !UCONFIG_NO_TRANSLITERATION */