1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2003-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 08/22/2003 ram Creation.
15 *******************************************************************************
18 // Safer use of UnicodeString.
19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
20 # define UNISTR_FROM_CHAR_EXPLICIT explicit
23 // Less important, but still a good idea.
24 #ifndef UNISTR_FROM_STRING_EXPLICIT
25 # define UNISTR_FROM_STRING_EXPLICIT explicit
28 #include "unicode/regex.h"
29 #include "unicode/unistr.h"
30 #include "unicode/parseerr.h"
37 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
39 #define MAX_SPLIT_STRINGS 20
41 const char *patternStrings
[UPC_LIMIT
]={
47 removeText(UChar
*source
, int32_t srcLen
,
48 UnicodeString patString
,uint32_t options
,
49 UnicodeString replaceText
, UErrorCode
*status
){
51 if(status
== NULL
|| U_FAILURE(*status
)){
55 UnicodeString
src(source
, srcLen
);
57 RegexMatcher
myMatcher(patString
, src
, options
, *status
);
58 if(U_FAILURE(*status
)){
64 dest
= myMatcher
.replaceAll(replaceText
,*status
);
67 return dest
.extract(source
, srcLen
, *status
);
71 trim(UChar
*src
, int32_t srcLen
, UErrorCode
*status
){
72 srcLen
= removeText(src
, srcLen
, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status
); // remove leading new lines
73 srcLen
= removeText(src
, srcLen
, UnicodeString("^\\s+"), 0, UnicodeString(), status
); // remove leading spaces
74 srcLen
= removeText(src
, srcLen
, UnicodeString("\\s+$"), 0, UnicodeString(), status
); // remvoe trailing spcaes
79 removeCmtText(UChar
* source
, int32_t srcLen
, UErrorCode
* status
){
80 srcLen
= trim(source
, srcLen
, status
);
81 UnicodeString
patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
82 srcLen
= removeText(source
, srcLen
, patString
, UREGEX_MULTILINE
, UnicodeString(), status
);
83 return removeText(source
, srcLen
, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status
);// remove new lines;
87 getText(const UChar
* source
, int32_t srcLen
,
88 UChar
** dest
, int32_t destCapacity
,
89 UnicodeString patternString
,
92 if(status
== NULL
|| U_FAILURE(*status
)){
96 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
97 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), 0, *status
);
98 UnicodeString
src (source
,srcLen
);
100 if (U_FAILURE(*status
)) {
103 pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
105 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
106 if (U_FAILURE(*status
)) {
109 for(int32_t i
=0; i
<MAX_SPLIT_STRINGS
; i
++){
110 matcher
.reset(stringArray
[i
]);
111 if(matcher
.lookingAt(*status
)){
112 UnicodeString out
= matcher
.group(1, *status
);
114 return out
.extract(*dest
, destCapacity
,*status
);
121 #define AT_SIGN 0x0040
124 getDescription( const UChar
* source
, int32_t srcLen
,
125 UChar
** dest
, int32_t destCapacity
,
127 if(status
== NULL
|| U_FAILURE(*status
)){
131 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
132 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE
, *status
);
133 UnicodeString
src(source
, srcLen
);
135 if (U_FAILURE(*status
)) {
138 pattern
->split(src
, stringArray
,MAX_SPLIT_STRINGS
, *status
);
140 if(stringArray
[0].indexOf((UChar
)AT_SIGN
)==-1){
141 int32_t destLen
= stringArray
[0].extract(*dest
, destCapacity
, *status
);
142 return trim(*dest
, destLen
, status
);
148 getCount(const UChar
* source
, int32_t srcLen
,
149 UParseCommentsOption option
, UErrorCode
*status
){
151 if(status
== NULL
|| U_FAILURE(*status
)){
155 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
156 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE
, *status
);
157 UnicodeString
src (source
, srcLen
);
160 if (U_FAILURE(*status
)) {
163 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
165 UnicodeString
patternString(patternStrings
[option
]);
166 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
167 if (U_FAILURE(*status
)) {
171 for(int32_t i
=0; i
<retLen
; i
++){
172 matcher
.reset(stringArray
[i
]);
173 if(matcher
.lookingAt(*status
)){
177 if(option
== UPC_TRANSLATE
&& count
> 1){
178 fprintf(stderr
, "Multiple @translate tags cannot be supported.\n");
179 exit(U_UNSUPPORTED_ERROR
);
185 getAt(const UChar
* source
, int32_t srcLen
,
186 UChar
** dest
, int32_t destCapacity
,
188 UParseCommentsOption option
,
191 if(status
== NULL
|| U_FAILURE(*status
)){
195 UnicodeString stringArray
[MAX_SPLIT_STRINGS
];
196 RegexPattern
*pattern
= RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE
, *status
);
197 UnicodeString
src (source
, srcLen
);
200 if (U_FAILURE(*status
)) {
203 int32_t retLen
= pattern
->split(src
, stringArray
, MAX_SPLIT_STRINGS
, *status
);
205 UnicodeString
patternString(patternStrings
[option
]);
206 RegexMatcher
matcher(patternString
, UREGEX_DOTALL
, *status
);
207 if (U_FAILURE(*status
)) {
211 for(int32_t i
=0; i
<retLen
; i
++){
212 matcher
.reset(stringArray
[i
]);
213 if(matcher
.lookingAt(*status
)){
215 UnicodeString out
= matcher
.group(1, *status
);
216 return out
.extract(*dest
, destCapacity
,*status
);
227 getTranslate( const UChar
* source
, int32_t srcLen
,
228 UChar
** dest
, int32_t destCapacity
,
230 UnicodeString
notePatternString("^translate\\s*?(.*)");
232 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
233 return trim(*dest
, destLen
, status
);
237 getNote(const UChar
* source
, int32_t srcLen
,
238 UChar
** dest
, int32_t destCapacity
,
241 UnicodeString
notePatternString("^note\\s*?(.*)");
242 int32_t destLen
= getText(source
, srcLen
, dest
, destCapacity
, notePatternString
, status
);
243 return trim(*dest
, destLen
, status
);
247 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */