]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2003, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * | |
9 | * File prscmnts.cpp | |
10 | * | |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 08/22/2003 ram Creation. | |
15 | ******************************************************************************* | |
16 | */ | |
17 | #include "unicode/regex.h" | |
18 | #include "unicode/unistr.h" | |
19 | #include "unicode/parseerr.h" | |
20 | #include "prscmnts.h" | |
21 | #include <stdio.h> | |
22 | #include <stdlib.h> | |
23 | ||
24 | #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */ | |
25 | ||
26 | #define MAX_SPLIT_STRINGS 20 | |
27 | ||
28 | const char *patternStrings[UPC_LIMIT]={ | |
29 | "^translate\\s*?(.*)", | |
30 | "^note\\s*?(.*)" | |
31 | }; | |
32 | ||
33 | U_CFUNC int32_t | |
34 | removeText(UChar *source, int32_t srcLen, | |
35 | UnicodeString patString,uint32_t options, | |
36 | UnicodeString replaceText, UErrorCode *status){ | |
37 | ||
38 | if(status == NULL || U_FAILURE(*status)){ | |
39 | return 0; | |
40 | } | |
41 | ||
42 | UnicodeString src(source, srcLen); | |
43 | ||
44 | RegexMatcher myMatcher(patString, src, options, *status); | |
45 | if(U_FAILURE(*status)){ | |
46 | return 0; | |
47 | } | |
48 | UnicodeString dest; | |
49 | ||
50 | ||
51 | dest = myMatcher.replaceAll(replaceText,*status); | |
52 | ||
53 | ||
54 | return dest.extract(source, srcLen, *status); | |
55 | ||
56 | } | |
57 | U_CFUNC int32_t | |
58 | trim(UChar *src, int32_t srcLen, UErrorCode *status){ | |
59 | srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines | |
60 | srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces | |
61 | srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes | |
62 | return srcLen; | |
63 | } | |
64 | ||
65 | U_CFUNC int32_t | |
66 | removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){ | |
67 | srcLen = trim(source, srcLen, status); | |
68 | UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line | |
69 | srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status); | |
70 | return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines; | |
71 | } | |
72 | ||
73 | U_CFUNC int32_t | |
74 | getText(const UChar* source, int32_t srcLen, | |
75 | UChar** dest, int32_t destCapacity, | |
76 | UnicodeString patternString, | |
77 | UErrorCode* status){ | |
78 | ||
79 | if(status == NULL || U_FAILURE(*status)){ | |
80 | return 0; | |
81 | } | |
82 | ||
83 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
84 | RegexPattern *pattern = RegexPattern::compile("@", 0, *status); | |
85 | UnicodeString src (source,srcLen); | |
86 | ||
87 | if (U_FAILURE(*status)) { | |
88 | return 0; | |
89 | } | |
90 | pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
91 | ||
92 | RegexMatcher matcher(patternString, UREGEX_DOTALL, *status); | |
93 | if (U_FAILURE(*status)) { | |
94 | return 0; | |
95 | } | |
96 | for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){ | |
97 | matcher.reset(stringArray[i]); | |
98 | if(matcher.lookingAt(*status)){ | |
99 | UnicodeString out = matcher.group(1, *status); | |
100 | ||
101 | return out.extract(*dest, destCapacity,*status); | |
102 | } | |
103 | } | |
104 | return 0; | |
105 | } | |
106 | ||
107 | ||
108 | #define AT_SIGN 0x0040 | |
109 | ||
110 | U_CFUNC int32_t | |
111 | getDescription( const UChar* source, int32_t srcLen, | |
112 | UChar** dest, int32_t destCapacity, | |
113 | UErrorCode* status){ | |
114 | if(status == NULL || U_FAILURE(*status)){ | |
115 | return 0; | |
116 | } | |
117 | ||
118 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
119 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); | |
120 | UnicodeString src(source, srcLen); | |
121 | ||
122 | if (U_FAILURE(*status)) { | |
123 | return 0; | |
124 | } | |
125 | pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status); | |
126 | ||
127 | if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){ | |
128 | int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status); | |
129 | return trim(*dest, destLen, status); | |
130 | } | |
131 | return 0; | |
132 | } | |
133 | ||
134 | U_CFUNC int32_t | |
135 | getCount(const UChar* source, int32_t srcLen, | |
136 | UParseCommentsOption option, UErrorCode *status){ | |
137 | ||
138 | if(status == NULL || U_FAILURE(*status)){ | |
139 | return 0; | |
140 | } | |
141 | ||
142 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
143 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); | |
144 | UnicodeString src (source, srcLen); | |
145 | ||
146 | ||
147 | if (U_FAILURE(*status)) { | |
148 | return 0; | |
149 | } | |
150 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
151 | ||
152 | RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | |
153 | if (U_FAILURE(*status)) { | |
154 | return 0; | |
155 | } | |
156 | int32_t count = 0; | |
157 | for(int32_t i=0; i<retLen; i++){ | |
158 | matcher.reset(stringArray[i]); | |
159 | if(matcher.lookingAt(*status)){ | |
160 | count++; | |
161 | } | |
162 | } | |
163 | if(option == UPC_TRANSLATE && count > 1){ | |
164 | fprintf(stderr, "Multiple @translate tags cannot be supported.\n"); | |
165 | exit(U_UNSUPPORTED_ERROR); | |
166 | } | |
167 | return count; | |
168 | } | |
169 | ||
170 | U_CFUNC int32_t | |
171 | getAt(const UChar* source, int32_t srcLen, | |
172 | UChar** dest, int32_t destCapacity, | |
173 | int32_t index, | |
174 | UParseCommentsOption option, | |
175 | UErrorCode* status){ | |
176 | ||
177 | if(status == NULL || U_FAILURE(*status)){ | |
178 | return 0; | |
179 | } | |
180 | ||
181 | UnicodeString stringArray[MAX_SPLIT_STRINGS]; | |
182 | RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status); | |
183 | UnicodeString src (source, srcLen); | |
184 | ||
185 | ||
186 | if (U_FAILURE(*status)) { | |
187 | return 0; | |
188 | } | |
189 | int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status); | |
190 | ||
191 | RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status); | |
192 | if (U_FAILURE(*status)) { | |
193 | return 0; | |
194 | } | |
195 | int32_t count = 0; | |
196 | for(int32_t i=0; i<retLen; i++){ | |
197 | matcher.reset(stringArray[i]); | |
198 | if(matcher.lookingAt(*status)){ | |
199 | if(count == index){ | |
200 | UnicodeString out = matcher.group(1, *status); | |
201 | return out.extract(*dest, destCapacity,*status); | |
202 | } | |
203 | count++; | |
204 | ||
205 | } | |
206 | } | |
207 | return 0; | |
208 | ||
209 | } | |
210 | ||
211 | U_CFUNC int32_t | |
212 | getTranslate( const UChar* source, int32_t srcLen, | |
213 | UChar** dest, int32_t destCapacity, | |
214 | UErrorCode* status){ | |
215 | UnicodeString notePatternString = "^translate\\s*?(.*)"; | |
216 | ||
217 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
218 | return trim(*dest, destLen, status); | |
219 | } | |
220 | ||
221 | U_CFUNC int32_t | |
222 | getNote(const UChar* source, int32_t srcLen, | |
223 | UChar** dest, int32_t destCapacity, | |
224 | UErrorCode* status){ | |
225 | ||
226 | UnicodeString notePatternString = "^note\\s*?(.*)"; | |
227 | int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status); | |
228 | return trim(*dest, destLen, status); | |
229 | ||
230 | } | |
231 | ||
232 | #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */ | |
233 |