]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2003-2014, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * | |
7 | * File prscmnts.cpp | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 08/22/2003 ram Creation. | |
13 | ******************************************************************************* | |
14 | */ | |
15 | ||
16 | // Safer use of UnicodeString. | |
17 | #ifndef UNISTR_FROM_CHAR_EXPLICIT | |
18 | # define UNISTR_FROM_CHAR_EXPLICIT explicit | |
19 | #endif | |
20 | ||
21 | // Less important, but still a good idea. | |
22 | #ifndef UNISTR_FROM_STRING_EXPLICIT | |
23 | # define UNISTR_FROM_STRING_EXPLICIT explicit | |
24 | #endif | |
25 | ||
26 | #include "unicode/regex.h" | |
27 | #include "unicode/unistr.h" | |
28 | #include "unicode/parseerr.h" | |
29 | #include "prscmnts.h" | |
30 | #include <stdio.h> | |
31 | #include <stdlib.h> | |
32 | ||
33 | U_NAMESPACE_USE | |
34 | ||
35 | #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ | |
36 | ||
37 | #define MAX_SPLIT_STRINGS 20 | |
38 | ||
39 | const char *patternStrings[UPC_LIMIT]={ | |
40 | "^translate\\s*(.*)", | |
41 | "^note\\s*(.*)" | |
42 | }; | |
43 | ||
44 | U_CFUNC int32_t | |
45 | removeText(UChar *source, int32_t srcLen, | |
46 | UnicodeString patString,uint32_t options, | |
47 | UnicodeString replaceText, UErrorCode *status){ | |
48 | ||
49 | if(status == NULL || U_FAILURE(*status)){ | |
50 | return 0; | |
51 | } | |
52 | ||
53 | UnicodeString src(source, srcLen); | |
54 | ||
55 | RegexMatcher myMatcher(patString, src, options, *status); | |
56 | if(U_FAILURE(*status)){ | |
57 | return 0; | |
58 | } | |
59 | UnicodeString dest; | |
60 | ||
61 | ||
62 | dest = myMatcher.replaceAll(replaceText,*status); | |
63 | ||
64 | ||
65 | return dest.extract(source, srcLen, *status); | |
66 | ||
67 | } | |
68 | U_CFUNC int32_t | |
69 | trim(UChar *src, int32_t srcLen, UErrorCode *status){ | |
70 | srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines | |
71 | srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces | |
72 | srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes | |
73 | return srcLen; | |
74 | } | |
75 | ||
76 | U_CFUNC int32_t | |
77 | removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | |
78 | srcLen = trim(source, srcLen, status); | |
79 | UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line | |
80 | srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); | |
81 | return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; | |
82 | } | |
83 | ||
84 | U_CFUNC int32_t | |
85 | getText(const UChar* source, int32_t srcLen, | |
86 | UChar** dest, int32_t destCapacity, | |
87 | UnicodeString patternString, | |
88 | UErrorCode* status){ | |
89 | ||
90 | if(status == NULL || U_FAILURE(*status)){ | |
91 | return 0; | |
92 | } | |
93 | ||
94 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
95 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); | |
96 | UnicodeString src (source,srcLen); | |
97 | ||
98 | if (U_FAILURE(*status)) { | |
99 | return 0; | |
100 | } | |
101 | pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
102 | ||
103 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
104 | if (U_FAILURE(*status)) { | |
105 | return 0; | |
106 | } | |
107 | for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ | |
108 | matcher.reset(stringArray[i]); | |
109 | if(matcher.lookingAt(*status)){ | |
110 | UnicodeString out = matcher.group(1, *status); | |
111 | ||
112 | return out.extract(*dest, destCapacity,*status); | |
113 | } | |
114 | } | |
115 | return 0; | |
116 | } | |
117 | ||
118 | ||
119 | #define AT_SIGN 0x0040 | |
120 | ||
121 | U_CFUNC int32_t | |
122 | getDescription( const UChar* source, int32_t srcLen, | |
123 | UChar** dest, int32_t destCapacity, | |
124 | UErrorCode* status){ | |
125 | if(status == NULL || U_FAILURE(*status)){ | |
126 | return 0; | |
127 | } | |
128 | ||
129 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
130 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | |
131 | UnicodeString src(source, srcLen); | |
132 | ||
133 | if (U_FAILURE(*status)) { | |
134 | return 0; | |
135 | } | |
136 | pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | |
137 | ||
138 | if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | |
139 | int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); | |
140 | return trim(*dest, destLen, status); | |
141 | } | |
142 | return 0; | |
143 | } | |
144 | ||
145 | U_CFUNC int32_t | |
146 | getCount(const UChar* source, int32_t srcLen, | |
147 | UParseCommentsOption option, UErrorCode *status){ | |
148 | ||
149 | if(status == NULL || U_FAILURE(*status)){ | |
150 | return 0; | |
151 | } | |
152 | ||
153 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
154 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | |
155 | UnicodeString src (source, srcLen); | |
156 | ||
157 | ||
158 | if (U_FAILURE(*status)) { | |
159 | return 0; | |
160 | } | |
161 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
162 | ||
163 | UnicodeString patternString(patternStrings[option]); | |
164 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
165 | if (U_FAILURE(*status)) { | |
166 | return 0; | |
167 | } | |
168 | int32_t count = 0; | |
169 | for(int32_t i=0; i<retLen; i++){ | |
170 | matcher.reset(stringArray[i]); | |
171 | if(matcher.lookingAt(*status)){ | |
172 | count++; | |
173 | } | |
174 | } | |
175 | if(option == UPC_TRANSLATE && count > 1){ | |
176 | fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | |
177 | exit(U_UNSUPPORTED_ERROR); | |
178 | } | |
179 | return count; | |
180 | } | |
181 | ||
182 | U_CFUNC int32_t | |
183 | getAt(const UChar* source, int32_t srcLen, | |
184 | UChar** dest, int32_t destCapacity, | |
185 | int32_t index, | |
186 | UParseCommentsOption option, | |
187 | UErrorCode* status){ | |
188 | ||
189 | if(status == NULL || U_FAILURE(*status)){ | |
190 | return 0; | |
191 | } | |
192 | ||
193 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
194 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | |
195 | UnicodeString src (source, srcLen); | |
196 | ||
197 | ||
198 | if (U_FAILURE(*status)) { | |
199 | return 0; | |
200 | } | |
201 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
202 | ||
203 | UnicodeString patternString(patternStrings[option]); | |
204 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
205 | if (U_FAILURE(*status)) { | |
206 | return 0; | |
207 | } | |
208 | int32_t count = 0; | |
209 | for(int32_t i=0; i<retLen; i++){ | |
210 | matcher.reset(stringArray[i]); | |
211 | if(matcher.lookingAt(*status)){ | |
212 | if(count == index){ | |
213 | UnicodeString out = matcher.group(1, *status); | |
214 | return out.extract(*dest, destCapacity,*status); | |
215 | } | |
216 | count++; | |
217 | ||
218 | } | |
219 | } | |
220 | return 0; | |
221 | ||
222 | } | |
223 | ||
224 | U_CFUNC int32_t | |
225 | getTranslate( const UChar* source, int32_t srcLen, | |
226 | UChar** dest, int32_t destCapacity, | |
227 | UErrorCode* status){ | |
228 | UnicodeString notePatternString("^translate\\s*?(.*)"); | |
229 | ||
230 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
231 | return trim(*dest, destLen, status); | |
232 | } | |
233 | ||
234 | U_CFUNC int32_t | |
235 | getNote(const UChar* source, int32_t srcLen, | |
236 | UChar** dest, int32_t destCapacity, | |
237 | UErrorCode* status){ | |
238 | ||
239 | UnicodeString notePatternString("^note\\s*?(.*)"); | |
240 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
241 | return trim(*dest, destLen, status); | |
242 | ||
243 | } | |
244 | ||
245 | #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
246 |