2 *******************************************************************************
3 * Copyright (C) 2003-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 08/22/2003 ram Creation.
13 *******************************************************************************
16 // Safer use of UnicodeString.
17 #ifndef UNISTR_FROM_CHAR_EXPLICIT
18 # define UNISTR_FROM_CHAR_EXPLICIT explicit
21 // Less important, but still a good idea.
22 #ifndef UNISTR_FROM_STRING_EXPLICIT
23 # define UNISTR_FROM_STRING_EXPLICIT explicit
26 #include "unicode/regex.h"
27 #include "unicode/unistr.h"
28 #include "unicode/parseerr.h"
35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
37 #define MAX_SPLIT_STRINGS 20
39 const char *patternStrings
[UPC_LIMIT
]={
45 removeText(UChar
*source
, int32_t srcLen
,
46 UnicodeString patString
,uint32_t options
,
47 UnicodeString replaceText
, UErrorCode
*status
){
49 if(status
== NULL
|| U_FAILURE(*status
)){
53 UnicodeString
src(source
, srcLen
);
55 RegexMatcher
myMatcher(patString
, src
, options
, *status
);
56 if(U_FAILURE(*status
)){
62 dest
= myMatcher
.replaceAll(replaceText
,*status
);
65 return dest
.extract(source
, srcLen
, *status
);
69 trim(UChar
*src
, int32_t srcLen
, UErrorCode
*status
){
70 srcLen
= removeText(src
, srcLen
, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status
); // remove leading new lines
71 srcLen
= removeText(src
, srcLen
, UnicodeString("^\\s+"), 0, UnicodeString(), status
); // remove leading spaces
72 srcLen
= removeText(src
, srcLen
, UnicodeString("\\s+$"), 0, UnicodeString(), status
); // remvoe trailing spcaes
77 removeCmtText(UChar
* source
, int32_t srcLen
, UErrorCode
* status
){
78 srcLen
= trim(source
, srcLen
, status
);
79 UnicodeString
patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
80 srcLen
= removeText(source
, srcLen
, patString
, UREGEX_MULTILINE
, UnicodeString(), status
);
81 return removeText(source
, srcLen
, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status
);// remove new lines;
85 getText(const UChar
* source
, int32_t srcLen
,
86 UChar
** dest
, int32_t destCapacity
,
87 UnicodeString patternString
,
90 if(status
== NULL
|| U_FAILURE(*status
)){
94 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
95 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), 0, *status
);
96 UnicodeString
src (source
,srcLen
);
98 if (U_FAILURE(*status
)) {
101 pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
103 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
104 if (U_FAILURE(*status
)) {
107 for(int32_t i
=0; i
<MAX_SPLIT_STRINGS
; i
++){
108 matcher
.reset(stringArray
[i
]);
109 if(matcher
.lookingAt(*status
)){
110 UnicodeString out
= matcher
.group(1, *status
);
112 return out
.extract(*dest
, destCapacity
,*status
);
119 #define AT_SIGN 0x0040
122 getDescription( const UChar
* source
, int32_t srcLen
,
123 UChar
** dest
, int32_t destCapacity
,
125 if(status
== NULL
|| U_FAILURE(*status
)){
129 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
130 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE
, *status
);
131 UnicodeString
src(source
, srcLen
);
133 if (U_FAILURE(*status
)) {
136 pattern
->split(src
, stringArray
,MAX_SPLIT_STRINGS
, *status
);
138 if(stringArray
[0].indexOf((UChar
)AT_SIGN
)==-1){
139 int32_t destLen
= stringArray
[0].extract(*dest
, destCapacity
, *status
);
140 return trim(*dest
, destLen
, status
);
146 getCount(const UChar
* source
, int32_t srcLen
,
147 UParseCommentsOption option
, UErrorCode
*status
){
149 if(status
== NULL
|| U_FAILURE(*status
)){
153 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
154 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE
, *status
);
155 UnicodeString
src (source
, srcLen
);
158 if (U_FAILURE(*status
)) {
161 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
163 UnicodeString
patternString(patternStrings
[option
]);
164 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
165 if (U_FAILURE(*status
)) {
169 for(int32_t i
=0; i
<retLen
; i
++){
170 matcher
.reset(stringArray
[i
]);
171 if(matcher
.lookingAt(*status
)){
175 if(option
== UPC_TRANSLATE
&& count
> 1){
176 fprintf(stderr
, "Multiple @translate tags cannot be supported.\n");
177 exit(U_UNSUPPORTED_ERROR
);
183 getAt(const UChar
* source
, int32_t srcLen
,
184 UChar
** dest
, int32_t destCapacity
,
186 UParseCommentsOption option
,
189 if(status
== NULL
|| U_FAILURE(*status
)){
193 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
194 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE
, *status
);
195 UnicodeString
src (source
, srcLen
);
198 if (U_FAILURE(*status
)) {
201 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
203 UnicodeString
patternString(patternStrings
[option
]);
204 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
205 if (U_FAILURE(*status
)) {
209 for(int32_t i
=0; i
<retLen
; i
++){
210 matcher
.reset(stringArray
[i
]);
211 if(matcher
.lookingAt(*status
)){
213 UnicodeString out
= matcher
.group(1, *status
);
214 return out
.extract(*dest
, destCapacity
,*status
);
225 getTranslate( const UChar
* source
, int32_t srcLen
,
226 UChar
** dest
, int32_t destCapacity
,
228 UnicodeString
notePatternString("^translate\\s*?(.*)");
230 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
231 return trim(*dest
, destLen
, status
);
235 getNote(const UChar
* source
, int32_t srcLen
,
236 UChar
** dest
, int32_t destCapacity
,
239 UnicodeString
notePatternString("^note\\s*?(.*)");
240 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
241 return trim(*dest
, destLen
, status
);
245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */