]>
Commit | Line | Data |
---|---|---|
374ca955 | 1 | /* |
46f4442e A |
2 | ******************************************************************************* |
3 | * Copyright (C) 2003-2007, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * | |
7 | * File prscmnts.cpp | |
8 | * | |
9 | * Modification History: | |
10 | * | |
11 | * Date Name Description | |
12 | * 08/22/2003 ram Creation. | |
13 | ******************************************************************************* | |
14 | */ | |
15 | ||
374ca955 A |
16 | #include "unicode/regex.h" |
17 | #include "unicode/unistr.h" | |
18 | #include "unicode/parseerr.h" | |
19 | #include "prscmnts.h" | |
20 | #include <stdio.h> | |
21 | #include <stdlib.h> | |
22 | ||
46f4442e A |
23 | U_NAMESPACE_USE |
24 | ||
374ca955 A |
25 | #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ |
26 | ||
27 | #define MAX_SPLIT_STRINGS 20 | |
28 | ||
29 | const char *patternStrings[UPC_LIMIT]={ | |
46f4442e A |
30 | "^translate\\s*(.*)", |
31 | "^note\\s*(.*)" | |
374ca955 A |
32 | }; |
33 | ||
34 | U_CFUNC int32_t | |
35 | removeText(UChar *source, int32_t srcLen, | |
36 | UnicodeString patString,uint32_t options, | |
37 | UnicodeString replaceText, UErrorCode *status){ | |
38 | ||
39 | if(status == NULL || U_FAILURE(*status)){ | |
40 | return 0; | |
41 | } | |
42 | ||
43 | UnicodeString src(source, srcLen); | |
44 | ||
45 | RegexMatcher myMatcher(patString, src, options, *status); | |
46 | if(U_FAILURE(*status)){ | |
47 | return 0; | |
48 | } | |
49 | UnicodeString dest; | |
50 | ||
51 | ||
52 | dest = myMatcher.replaceAll(replaceText,*status); | |
53 | ||
54 | ||
55 | return dest.extract(source, srcLen, *status); | |
56 | ||
57 | } | |
58 | U_CFUNC int32_t | |
59 | trim(UChar *src, int32_t srcLen, UErrorCode *status){ | |
60 | srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines | |
61 | srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces | |
62 | srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes | |
63 | return srcLen; | |
64 | } | |
65 | ||
66 | U_CFUNC int32_t | |
67 | removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | |
68 | srcLen = trim(source, srcLen, status); | |
69 | UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line | |
70 | srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); | |
71 | return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; | |
72 | } | |
73 | ||
74 | U_CFUNC int32_t | |
75 | getText(const UChar* source, int32_t srcLen, | |
76 | UChar** dest, int32_t destCapacity, | |
77 | UnicodeString patternString, | |
78 | UErrorCode* status){ | |
79 | ||
80 | if(status == NULL || U_FAILURE(*status)){ | |
81 | return 0; | |
82 | } | |
83 | ||
84 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
85 | RegexPattern *pattern = RegexPattern::compile("@", 0, *status); | |
86 | UnicodeString src (source,srcLen); | |
87 | ||
88 | if (U_FAILURE(*status)) { | |
89 | return 0; | |
90 | } | |
91 | pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
92 | ||
93 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
94 | if (U_FAILURE(*status)) { | |
95 | return 0; | |
96 | } | |
97 | for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ | |
98 | matcher.reset(stringArray[i]); | |
99 | if(matcher.lookingAt(*status)){ | |
100 | UnicodeString out = matcher.group(1, *status); | |
101 | ||
102 | return out.extract(*dest, destCapacity,*status); | |
103 | } | |
104 | } | |
105 | return 0; | |
106 | } | |
107 | ||
108 | ||
109 | #define AT_SIGN 0x0040 | |
110 | ||
111 | U_CFUNC int32_t | |
112 | getDescription( const UChar* source, int32_t srcLen, | |
113 | UChar** dest, int32_t destCapacity, | |
114 | UErrorCode* status){ | |
115 | if(status == NULL || U_FAILURE(*status)){ | |
116 | return 0; | |
117 | } | |
118 | ||
119 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
120 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); | |
121 | UnicodeString src(source, srcLen); | |
122 | ||
123 | if (U_FAILURE(*status)) { | |
124 | return 0; | |
125 | } | |
126 | pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | |
127 | ||
128 | if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | |
129 | int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); | |
130 | return trim(*dest, destLen, status); | |
131 | } | |
132 | return 0; | |
133 | } | |
134 | ||
135 | U_CFUNC int32_t | |
136 | getCount(const UChar* source, int32_t srcLen, | |
137 | UParseCommentsOption option, UErrorCode *status){ | |
138 | ||
139 | if(status == NULL || U_FAILURE(*status)){ | |
140 | return 0; | |
141 | } | |
142 | ||
143 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
144 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); | |
145 | UnicodeString src (source, srcLen); | |
146 | ||
147 | ||
148 | if (U_FAILURE(*status)) { | |
149 | return 0; | |
150 | } | |
151 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
152 | ||
153 | RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | |
154 | if (U_FAILURE(*status)) { | |
155 | return 0; | |
156 | } | |
157 | int32_t count = 0; | |
158 | for(int32_t i=0; i<retLen; i++){ | |
159 | matcher.reset(stringArray[i]); | |
160 | if(matcher.lookingAt(*status)){ | |
161 | count++; | |
162 | } | |
163 | } | |
164 | if(option == UPC_TRANSLATE && count > 1){ | |
165 | fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | |
166 | exit(U_UNSUPPORTED_ERROR); | |
167 | } | |
168 | return count; | |
169 | } | |
170 | ||
171 | U_CFUNC int32_t | |
172 | getAt(const UChar* source, int32_t srcLen, | |
173 | UChar** dest, int32_t destCapacity, | |
174 | int32_t index, | |
175 | UParseCommentsOption option, | |
176 | UErrorCode* status){ | |
177 | ||
178 | if(status == NULL || U_FAILURE(*status)){ | |
179 | return 0; | |
180 | } | |
181 | ||
182 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
183 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); | |
184 | UnicodeString src (source, srcLen); | |
185 | ||
186 | ||
187 | if (U_FAILURE(*status)) { | |
188 | return 0; | |
189 | } | |
190 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
191 | ||
192 | RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | |
193 | if (U_FAILURE(*status)) { | |
194 | return 0; | |
195 | } | |
196 | int32_t count = 0; | |
197 | for(int32_t i=0; i<retLen; i++){ | |
198 | matcher.reset(stringArray[i]); | |
199 | if(matcher.lookingAt(*status)){ | |
200 | if(count == index){ | |
201 | UnicodeString out = matcher.group(1, *status); | |
202 | return out.extract(*dest, destCapacity,*status); | |
203 | } | |
204 | count++; | |
205 | ||
206 | } | |
207 | } | |
208 | return 0; | |
209 | ||
210 | } | |
211 | ||
212 | U_CFUNC int32_t | |
213 | getTranslate( const UChar* source, int32_t srcLen, | |
214 | UChar** dest, int32_t destCapacity, | |
215 | UErrorCode* status){ | |
216 | UnicodeString notePatternString = "^translate\\s*?(.*)"; | |
217 | ||
218 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
219 | return trim(*dest, destLen, status); | |
220 | } | |
221 | ||
222 | U_CFUNC int32_t | |
223 | getNote(const UChar* source, int32_t srcLen, | |
224 | UChar** dest, int32_t destCapacity, | |
225 | UErrorCode* status){ | |
226 | ||
227 | UnicodeString notePatternString = "^note\\s*?(.*)"; | |
228 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
229 | return trim(*dest, destLen, status); | |
230 | ||
231 | } | |
232 | ||
233 | #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
234 |