2 *******************************************************************************
3 * Copyright (C) 2003-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
9 * Modification History:
11 * Date Name Description
12 * 08/22/2003 ram Creation.
13 *******************************************************************************
16 #include "unicode/regex.h"
17 #include "unicode/unistr.h"
18 #include "unicode/parseerr.h"
25 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
27 #define MAX_SPLIT_STRINGS 20
29 const char *patternStrings
[UPC_LIMIT
]={
35 removeText(UChar
*source
, int32_t srcLen
,
36 UnicodeString patString
,uint32_t options
,
37 UnicodeString replaceText
, UErrorCode
*status
){
39 if(status
== NULL
|| U_FAILURE(*status
)){
43 UnicodeString
src(source
, srcLen
);
45 RegexMatcher
myMatcher(patString
, src
, options
, *status
);
46 if(U_FAILURE(*status
)){
52 dest
= myMatcher
.replaceAll(replaceText
,*status
);
55 return dest
.extract(source
, srcLen
, *status
);
59 trim(UChar
*src
, int32_t srcLen
, UErrorCode
*status
){
60 srcLen
= removeText(src
, srcLen
, "^[ \\r\\n]+ ", 0, "", status
); // remove leading new lines
61 srcLen
= removeText(src
, srcLen
, "^\\s+", 0, "", status
); // remove leading spaces
62 srcLen
= removeText(src
, srcLen
, "\\s+$", 0, "", status
); // remvoe trailing spcaes
67 removeCmtText(UChar
* source
, int32_t srcLen
, UErrorCode
* status
){
68 srcLen
= trim(source
, srcLen
, status
);
69 UnicodeString patString
= "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
70 srcLen
= removeText(source
, srcLen
, patString
, UREGEX_MULTILINE
, "", status
);
71 return removeText(source
, srcLen
, "[ \\r\\n]+", 0, " ", status
);// remove new lines;
75 getText(const UChar
* source
, int32_t srcLen
,
76 UChar
** dest
, int32_t destCapacity
,
77 UnicodeString patternString
,
80 if(status
== NULL
|| U_FAILURE(*status
)){
84 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
85 RegexPattern
*pattern
= RegexPattern::compile("@", 0, *status
);
86 UnicodeString
src (source
,srcLen
);
88 if (U_FAILURE(*status
)) {
91 pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
93 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
94 if (U_FAILURE(*status
)) {
97 for(int32_t i
=0; i
<MAX_SPLIT_STRINGS
; i
++){
98 matcher
.reset(stringArray
[i
]);
99 if(matcher
.lookingAt(*status
)){
100 UnicodeString out
= matcher
.group(1, *status
);
102 return out
.extract(*dest
, destCapacity
,*status
);
109 #define AT_SIGN 0x0040
112 getDescription( const UChar
* source
, int32_t srcLen
,
113 UChar
** dest
, int32_t destCapacity
,
115 if(status
== NULL
|| U_FAILURE(*status
)){
119 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
120 RegexPattern
*pattern
= RegexPattern::compile("@", UREGEX_MULTILINE
, *status
);
121 UnicodeString
src(source
, srcLen
);
123 if (U_FAILURE(*status
)) {
126 pattern
->split(src
, stringArray
,MAX_SPLIT_STRINGS
, *status
);
128 if(stringArray
[0].indexOf((UChar
)AT_SIGN
)==-1){
129 int32_t destLen
= stringArray
[0].extract(*dest
, destCapacity
, *status
);
130 return trim(*dest
, destLen
, status
);
136 getCount(const UChar
* source
, int32_t srcLen
,
137 UParseCommentsOption option
, UErrorCode
*status
){
139 if(status
== NULL
|| U_FAILURE(*status
)){
143 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
144 RegexPattern
*pattern
= RegexPattern::compile("@", UREGEX_MULTILINE
, *status
);
145 UnicodeString
src (source
, srcLen
);
148 if (U_FAILURE(*status
)) {
151 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
153 RegexMatcher
matcher(patternStrings
[option
], UREGEX_DOTALL
, *status
);
154 if (U_FAILURE(*status
)) {
158 for(int32_t i
=0; i
<retLen
; i
++){
159 matcher
.reset(stringArray
[i
]);
160 if(matcher
.lookingAt(*status
)){
164 if(option
== UPC_TRANSLATE
&& count
> 1){
165 fprintf(stderr
, "Multiple @translate tags cannot be supported.\n");
166 exit(U_UNSUPPORTED_ERROR
);
172 getAt(const UChar
* source
, int32_t srcLen
,
173 UChar
** dest
, int32_t destCapacity
,
175 UParseCommentsOption option
,
178 if(status
== NULL
|| U_FAILURE(*status
)){
182 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
183 RegexPattern
*pattern
= RegexPattern::compile("@", UREGEX_MULTILINE
, *status
);
184 UnicodeString
src (source
, srcLen
);
187 if (U_FAILURE(*status
)) {
190 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
192 RegexMatcher
matcher(patternStrings
[option
], UREGEX_DOTALL
, *status
);
193 if (U_FAILURE(*status
)) {
197 for(int32_t i
=0; i
<retLen
; i
++){
198 matcher
.reset(stringArray
[i
]);
199 if(matcher
.lookingAt(*status
)){
201 UnicodeString out
= matcher
.group(1, *status
);
202 return out
.extract(*dest
, destCapacity
,*status
);
213 getTranslate( const UChar
* source
, int32_t srcLen
,
214 UChar
** dest
, int32_t destCapacity
,
216 UnicodeString notePatternString
= "^translate\\s*?(.*)";
218 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
219 return trim(*dest
, destLen
, status
);
223 getNote(const UChar
* source
, int32_t srcLen
,
224 UChar
** dest
, int32_t destCapacity
,
227 UnicodeString notePatternString
= "^note\\s*?(.*)";
228 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
229 return trim(*dest
, destLen
, status
);
233 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */