2 *******************************************************************************
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 08/22/2003 ram Creation.
15 *******************************************************************************
17 #include "unicode/regex.h"
18 #include "unicode/unistr.h"
19 #include "unicode/parseerr.h"
24 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
26 #define MAX_SPLIT_STRINGS 20
28 const char *patternStrings
[UPC_LIMIT
]={
29 "^translate\\s*?(.*)",
34 removeText(UChar
*source
, int32_t srcLen
,
35 UnicodeString patString
,uint32_t options
,
36 UnicodeString replaceText
, UErrorCode
*status
){
38 if(status
== NULL
|| U_FAILURE(*status
)){
42 UnicodeString
src(source
, srcLen
);
44 RegexMatcher
myMatcher(patString
, src
, options
, *status
);
45 if(U_FAILURE(*status
)){
51 dest
= myMatcher
.replaceAll(replaceText
,*status
);
54 return dest
.extract(source
, srcLen
, *status
);
58 trim(UChar
*src
, int32_t srcLen
, UErrorCode
*status
){
59 srcLen
= removeText(src
, srcLen
, "^[ \\r\\n]+ ", 0, "", status
); // remove leading new lines
60 srcLen
= removeText(src
, srcLen
, "^\\s+", 0, "", status
); // remove leading spaces
61 srcLen
= removeText(src
, srcLen
, "\\s+$", 0, "", status
); // remvoe trailing spcaes
66 removeCmtText(UChar
* source
, int32_t srcLen
, UErrorCode
* status
){
67 srcLen
= trim(source
, srcLen
, status
);
68 UnicodeString patString
= "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
69 srcLen
= removeText(source
, srcLen
, patString
, UREGEX_MULTILINE
, "", status
);
70 return removeText(source
, srcLen
, "[ \\r\\n]+", 0, " ", status
);// remove new lines;
74 getText(const UChar
* source
, int32_t srcLen
,
75 UChar
** dest
, int32_t destCapacity
,
76 UnicodeString patternString
,
79 if(status
== NULL
|| U_FAILURE(*status
)){
83 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
84 RegexPattern
*pattern
= RegexPattern::compile("@", 0, *status
);
85 UnicodeString
src (source
,srcLen
);
87 if (U_FAILURE(*status
)) {
90 pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
92 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
93 if (U_FAILURE(*status
)) {
96 for(int32_t i
=0; i
<MAX_SPLIT_STRINGS
; i
++){
97 matcher
.reset(stringArray
[i
]);
98 if(matcher
.lookingAt(*status
)){
99 UnicodeString out
= matcher
.group(1, *status
);
101 return out
.extract(*dest
, destCapacity
,*status
);
108 #define AT_SIGN 0x0040
111 getDescription( const UChar
* source
, int32_t srcLen
,
112 UChar
** dest
, int32_t destCapacity
,
114 if(status
== NULL
|| U_FAILURE(*status
)){
118 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
119 RegexPattern
*pattern
= RegexPattern::compile("@", UREGEX_MULTILINE
, *status
);
120 UnicodeString
src(source
, srcLen
);
122 if (U_FAILURE(*status
)) {
125 pattern
->split(src
, stringArray
,MAX_SPLIT_STRINGS
, *status
);
127 if(stringArray
[0].indexOf((UChar
)AT_SIGN
)==-1){
128 int32_t destLen
= stringArray
[0].extract(*dest
, destCapacity
, *status
);
129 return trim(*dest
, destLen
, status
);
135 getCount(const UChar
* source
, int32_t srcLen
,
136 UParseCommentsOption option
, UErrorCode
*status
){
138 if(status
== NULL
|| U_FAILURE(*status
)){
142 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
143 RegexPattern
*pattern
= RegexPattern::compile("@", UREGEX_MULTILINE
, *status
);
144 UnicodeString
src (source
, srcLen
);
147 if (U_FAILURE(*status
)) {
150 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
152 RegexMatcher
matcher(patternStrings
[option
], UREGEX_DOTALL
, *status
);
153 if (U_FAILURE(*status
)) {
157 for(int32_t i
=0; i
<retLen
; i
++){
158 matcher
.reset(stringArray
[i
]);
159 if(matcher
.lookingAt(*status
)){
163 if(option
== UPC_TRANSLATE
&& count
> 1){
164 fprintf(stderr
, "Multiple @translate tags cannot be supported.\n");
165 exit(U_UNSUPPORTED_ERROR
);
171 getAt(const UChar
* source
, int32_t srcLen
,
172 UChar
** dest
, int32_t destCapacity
,
174 UParseCommentsOption option
,
177 if(status
== NULL
|| U_FAILURE(*status
)){
181 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
182 RegexPattern
*pattern
= RegexPattern::compile("@", UREGEX_MULTILINE
, *status
);
183 UnicodeString
src (source
, srcLen
);
186 if (U_FAILURE(*status
)) {
189 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
191 RegexMatcher
matcher(patternStrings
[option
], UREGEX_DOTALL
, *status
);
192 if (U_FAILURE(*status
)) {
196 for(int32_t i
=0; i
<retLen
; i
++){
197 matcher
.reset(stringArray
[i
]);
198 if(matcher
.lookingAt(*status
)){
200 UnicodeString out
= matcher
.group(1, *status
);
201 return out
.extract(*dest
, destCapacity
,*status
);
212 getTranslate( const UChar
* source
, int32_t srcLen
,
213 UChar
** dest
, int32_t destCapacity
,
215 UnicodeString notePatternString
= "^translate\\s*?(.*)";
217 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
218 return trim(*dest
, destLen
, status
);
222 getNote(const UChar
* source
, int32_t srcLen
,
223 UChar
** dest
, int32_t destCapacity
,
226 UnicodeString notePatternString
= "^note\\s*?(.*)";
227 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
228 return trim(*dest
, destLen
, status
);
232 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */