]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * Copyright (C) 2003-2014, International Business Machines | |
6 | * Corporation and others. All Rights Reserved. | |
7 | ******************************************************************************* | |
8 | * | |
9 | * File prscmnts.cpp | |
10 | * | |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 08/22/2003 ram Creation. | |
15 | ******************************************************************************* | |
16 | */ | |
17 | ||
18 | // Safer use of UnicodeString. | |
19 | #ifndef UNISTR_FROM_CHAR_EXPLICIT | |
20 | # define UNISTR_FROM_CHAR_EXPLICIT explicit | |
21 | #endif | |
22 | ||
23 | // Less important, but still a good idea. | |
24 | #ifndef UNISTR_FROM_STRING_EXPLICIT | |
25 | # define UNISTR_FROM_STRING_EXPLICIT explicit | |
26 | #endif | |
27 | ||
28 | #include "unicode/regex.h" | |
29 | #include "unicode/unistr.h" | |
30 | #include "unicode/parseerr.h" | |
31 | #include "prscmnts.h" | |
32 | #include <stdio.h> | |
33 | #include <stdlib.h> | |
34 | ||
35 | U_NAMESPACE_USE | |
36 | ||
37 | #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ | |
38 | ||
39 | #define MAX_SPLIT_STRINGS 20 | |
40 | ||
41 | const char *patternStrings[UPC_LIMIT]={ | |
42 | "^translate\\s*(.*)", | |
43 | "^note\\s*(.*)" | |
44 | }; | |
45 | ||
46 | U_CFUNC int32_t | |
47 | removeText(UChar *source, int32_t srcLen, | |
48 | UnicodeString patString,uint32_t options, | |
49 | UnicodeString replaceText, UErrorCode *status){ | |
50 | ||
51 | if(status == NULL || U_FAILURE(*status)){ | |
52 | return 0; | |
53 | } | |
54 | ||
55 | UnicodeString src(source, srcLen); | |
56 | ||
57 | RegexMatcher myMatcher(patString, src, options, *status); | |
58 | if(U_FAILURE(*status)){ | |
59 | return 0; | |
60 | } | |
61 | UnicodeString dest; | |
62 | ||
63 | ||
64 | dest = myMatcher.replaceAll(replaceText,*status); | |
65 | ||
66 | ||
67 | return dest.extract(source, srcLen, *status); | |
68 | ||
69 | } | |
70 | U_CFUNC int32_t | |
71 | trim(UChar *src, int32_t srcLen, UErrorCode *status){ | |
72 | srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines | |
73 | srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces | |
74 | srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes | |
75 | return srcLen; | |
76 | } | |
77 | ||
78 | U_CFUNC int32_t | |
79 | removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | |
80 | srcLen = trim(source, srcLen, status); | |
81 | UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line | |
82 | srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status); | |
83 | return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines; | |
84 | } | |
85 | ||
86 | U_CFUNC int32_t | |
87 | getText(const UChar* source, int32_t srcLen, | |
88 | UChar** dest, int32_t destCapacity, | |
89 | UnicodeString patternString, | |
90 | UErrorCode* status){ | |
91 | ||
92 | if(status == NULL || U_FAILURE(*status)){ | |
93 | return 0; | |
94 | } | |
95 | ||
96 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
97 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status); | |
98 | UnicodeString src (source,srcLen); | |
99 | ||
100 | if (U_FAILURE(*status)) { | |
101 | return 0; | |
102 | } | |
103 | pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
104 | ||
105 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
106 | if (U_FAILURE(*status)) { | |
107 | return 0; | |
108 | } | |
109 | for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ | |
110 | matcher.reset(stringArray[i]); | |
111 | if(matcher.lookingAt(*status)){ | |
112 | UnicodeString out = matcher.group(1, *status); | |
113 | ||
114 | return out.extract(*dest, destCapacity,*status); | |
115 | } | |
116 | } | |
117 | return 0; | |
118 | } | |
119 | ||
120 | ||
121 | #define AT_SIGN 0x0040 | |
122 | ||
123 | U_CFUNC int32_t | |
124 | getDescription( const UChar* source, int32_t srcLen, | |
125 | UChar** dest, int32_t destCapacity, | |
126 | UErrorCode* status){ | |
127 | if(status == NULL || U_FAILURE(*status)){ | |
128 | return 0; | |
129 | } | |
130 | ||
131 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
132 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | |
133 | UnicodeString src(source, srcLen); | |
134 | ||
135 | if (U_FAILURE(*status)) { | |
136 | return 0; | |
137 | } | |
138 | pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | |
139 | ||
140 | if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | |
141 | int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); | |
142 | return trim(*dest, destLen, status); | |
143 | } | |
144 | return 0; | |
145 | } | |
146 | ||
147 | U_CFUNC int32_t | |
148 | getCount(const UChar* source, int32_t srcLen, | |
149 | UParseCommentsOption option, UErrorCode *status){ | |
150 | ||
151 | if(status == NULL || U_FAILURE(*status)){ | |
152 | return 0; | |
153 | } | |
154 | ||
155 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
156 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | |
157 | UnicodeString src (source, srcLen); | |
158 | ||
159 | ||
160 | if (U_FAILURE(*status)) { | |
161 | return 0; | |
162 | } | |
163 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
164 | ||
165 | UnicodeString patternString(patternStrings[option]); | |
166 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
167 | if (U_FAILURE(*status)) { | |
168 | return 0; | |
169 | } | |
170 | int32_t count = 0; | |
171 | for(int32_t i=0; i<retLen; i++){ | |
172 | matcher.reset(stringArray[i]); | |
173 | if(matcher.lookingAt(*status)){ | |
174 | count++; | |
175 | } | |
176 | } | |
177 | if(option == UPC_TRANSLATE && count > 1){ | |
178 | fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | |
179 | exit(U_UNSUPPORTED_ERROR); | |
180 | } | |
181 | return count; | |
182 | } | |
183 | ||
184 | U_CFUNC int32_t | |
185 | getAt(const UChar* source, int32_t srcLen, | |
186 | UChar** dest, int32_t destCapacity, | |
187 | int32_t index, | |
188 | UParseCommentsOption option, | |
189 | UErrorCode* status){ | |
190 | ||
191 | if(status == NULL || U_FAILURE(*status)){ | |
192 | return 0; | |
193 | } | |
194 | ||
195 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
196 | RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status); | |
197 | UnicodeString src (source, srcLen); | |
198 | ||
199 | ||
200 | if (U_FAILURE(*status)) { | |
201 | return 0; | |
202 | } | |
203 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
204 | ||
205 | UnicodeString patternString(patternStrings[option]); | |
206 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
207 | if (U_FAILURE(*status)) { | |
208 | return 0; | |
209 | } | |
210 | int32_t count = 0; | |
211 | for(int32_t i=0; i<retLen; i++){ | |
212 | matcher.reset(stringArray[i]); | |
213 | if(matcher.lookingAt(*status)){ | |
214 | if(count == index){ | |
215 | UnicodeString out = matcher.group(1, *status); | |
216 | return out.extract(*dest, destCapacity,*status); | |
217 | } | |
218 | count++; | |
219 | ||
220 | } | |
221 | } | |
222 | return 0; | |
223 | ||
224 | } | |
225 | ||
226 | U_CFUNC int32_t | |
227 | getTranslate( const UChar* source, int32_t srcLen, | |
228 | UChar** dest, int32_t destCapacity, | |
229 | UErrorCode* status){ | |
230 | UnicodeString notePatternString("^translate\\s*?(.*)"); | |
231 | ||
232 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
233 | return trim(*dest, destLen, status); | |
234 | } | |
235 | ||
236 | U_CFUNC int32_t | |
237 | getNote(const UChar* source, int32_t srcLen, | |
238 | UChar** dest, int32_t destCapacity, | |
239 | UErrorCode* status){ | |
240 | ||
241 | UnicodeString notePatternString("^note\\s*?(.*)"); | |
242 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
243 | return trim(*dest, destLen, status); | |
244 | ||
245 | } | |
246 | ||
247 | #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
248 |