]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/brktrans.cpp
ICU-400.40.tar.gz
[apple/icu.git] / icuSources / i18n / brktrans.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2008, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 05/11/2008 Andy Heninger Port from Java
8 **********************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
14
15 #include "unicode/unifilt.h"
16 #include "unicode/uchar.h"
17 #include "unicode/uniset.h"
18 #include "unicode/brkiter.h"
19 #include "brktrans.h"
20 #include "unicode/uchar.h"
21 #include "cmemory.h"
22 #include "uprops.h"
23 #include "uinvchar.h"
24 #include "util.h"
25 #include "uvectr32.h"
26
27 U_NAMESPACE_BEGIN
28
29 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
30
31 static const UChar SPACE = 32; // ' '
32
33
34 /**
35 * Constructs a transliterator with the default delimiters '{' and
36 * '}'.
37 */
38 BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
39 Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
40 fInsertion(SPACE) {
41 bi = NULL;
42 UErrorCode status = U_ZERO_ERROR;
43 boundaries = new UVector32(status);
44 }
45
46
47 /**
48 * Destructor.
49 */
50 BreakTransliterator::~BreakTransliterator() {
51 delete bi;
52 bi = NULL;
53 delete boundaries;
54 boundaries = NULL;
55 }
56
57 /**
58 * Copy constructor.
59 */
60 BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
61 Transliterator(o) {
62 bi = NULL;
63 if (o.bi != NULL) {
64 bi = o.bi->clone();
65 }
66 fInsertion = o.fInsertion;
67 UErrorCode status = U_ZERO_ERROR;
68 boundaries = new UVector32(status);
69 }
70
71
72 /**
73 * Transliterator API.
74 */
75 Transliterator* BreakTransliterator::clone(void) const {
76 return new BreakTransliterator(*this);
77 }
78
79 /**
80 * Implements {@link Transliterator#handleTransliterate}.
81 */
82 void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
83 UBool isIncremental ) const {
84
85 UErrorCode status = U_ZERO_ERROR;
86 boundaries->removeAllElements();
87 BreakTransliterator *nonConstThis = (BreakTransliterator *)this;
88 nonConstThis->getBreakIterator(); // Lazy-create it if necessary
89 UnicodeString sText = replaceableAsString(text);
90 bi->setText(sText);
91 bi->preceding(offsets.start);
92
93 // To make things much easier, we will stack the boundaries, and then insert at the end.
94 // generally, we won't need too many, since we will be filtered.
95
96 int32_t boundary;
97 for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
98 if (boundary == 0) continue;
99 // HACK: Check to see that preceeding item was a letter
100
101 UChar32 cp = sText.char32At(boundary-1);
102 int type = u_charType(cp);
103 //System.out.println(Integer.toString(cp,16) + " (before): " + type);
104 if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
105
106 cp = sText.char32At(boundary);
107 type = u_charType(cp);
108 //System.out.println(Integer.toString(cp,16) + " (after): " + type);
109 if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
110
111 boundaries->addElement(boundary, status);
112 //System.out.println(boundary);
113 }
114
115 int delta = 0;
116 int lastBoundary = 0;
117
118 if (boundaries->size() != 0) { // if we found something, adjust
119 delta = boundaries->size() * fInsertion.length();
120 lastBoundary = boundaries->lastElementi();
121
122 // we do this from the end backwards, so that we don't have to keep updating.
123
124 while (boundaries->size() > 0) {
125 boundary = boundaries->popi();
126 text.handleReplaceBetween(boundary, boundary, fInsertion);
127 }
128 }
129
130 // Now fix up the return values
131 offsets.contextLimit += delta;
132 offsets.limit += delta;
133 offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
134
135 // TODO: do something with U_FAILURE(status);
136 // (need to look at transliterators overall, not just here.)
137 }
138
139 //
140 // getInsertion()
141 //
142 const UnicodeString &BreakTransliterator::getInsertion() const {
143 return fInsertion;
144 }
145
146 //
147 // setInsertion()
148 //
149 void BreakTransliterator::setInsertsion(const UnicodeString &insertion) {
150 this->fInsertion = insertion;
151 }
152
153 //
154 // getBreakIterator Lazily create the break iterator if it does
155 // not already exist. Copied from Java, probably
156 // better to just create it in the constructor.
157 //
158 BreakIterator *BreakTransliterator::getBreakIterator() {
159 UErrorCode status = U_ZERO_ERROR;
160 if (bi == NULL) {
161 // Note: Thai breaking behavior is universal, it is not
162 // tied to the Thai locale.
163 bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
164 }
165 return bi;
166 }
167
168 //
169 // replaceableAsString Hack to let break iterators work
170 // on the replaceable text from transliterators.
171 // In practice, the only real Replaceable type that we
172 // will be seeing is UnicodeString, so this function
173 // will normally be efficient.
174 //
175 UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
176 if (r.getDynamicClassID() == UnicodeString::getStaticClassID()) {
177 return (UnicodeString &) r;
178 }
179 UnicodeString s;
180 r.extractBetween(0, r.length(), s);
181 return s;
182 }
183
184 U_NAMESPACE_END
185
186 #endif /* #if !UCONFIG_NO_TRANSLITERATION */