]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/prscmnts.cpp
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / prscmnts.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File prscmnts.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 08/22/2003 ram Creation.
15 *******************************************************************************
16 */
17 #include "unicode/regex.h"
18 #include "unicode/unistr.h"
19 #include "unicode/parseerr.h"
20 #include "prscmnts.h"
21 #include <stdio.h>
22 #include <stdlib.h>
23
24 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
25
26 #define MAX_SPLIT_STRINGS 20
27
28 const char *patternStrings[UPC_LIMIT]={
29 "^translate\\s*?(.*)",
30 "^note\\s*?(.*)"
31 };
32
33 U_CFUNC int32_t
34 removeText(UChar *source, int32_t srcLen,
35 UnicodeString patString,uint32_t options,
36 UnicodeString replaceText, UErrorCode *status){
37
38 if(status == NULL || U_FAILURE(*status)){
39 return 0;
40 }
41
42 UnicodeString src(source, srcLen);
43
44 RegexMatcher myMatcher(patString, src, options, *status);
45 if(U_FAILURE(*status)){
46 return 0;
47 }
48 UnicodeString dest;
49
50
51 dest = myMatcher.replaceAll(replaceText,*status);
52
53
54 return dest.extract(source, srcLen, *status);
55
56 }
57 U_CFUNC int32_t
58 trim(UChar *src, int32_t srcLen, UErrorCode *status){
59 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
60 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
61 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
62 return srcLen;
63 }
64
65 U_CFUNC int32_t
66 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
67 srcLen = trim(source, srcLen, status);
68 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
69 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
70 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
71 }
72
73 U_CFUNC int32_t
74 getText(const UChar* source, int32_t srcLen,
75 UChar** dest, int32_t destCapacity,
76 UnicodeString patternString,
77 UErrorCode* status){
78
79 if(status == NULL || U_FAILURE(*status)){
80 return 0;
81 }
82
83 UnicodeString stringArray[MAX_SPLIT_STRINGS];
84 RegexPattern *pattern = RegexPattern::compile("@", 0, *status);
85 UnicodeString src (source,srcLen);
86
87 if (U_FAILURE(*status)) {
88 return 0;
89 }
90 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
91
92 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
93 if (U_FAILURE(*status)) {
94 return 0;
95 }
96 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
97 matcher.reset(stringArray[i]);
98 if(matcher.lookingAt(*status)){
99 UnicodeString out = matcher.group(1, *status);
100
101 return out.extract(*dest, destCapacity,*status);
102 }
103 }
104 return 0;
105 }
106
107
108 #define AT_SIGN 0x0040
109
110 U_CFUNC int32_t
111 getDescription( const UChar* source, int32_t srcLen,
112 UChar** dest, int32_t destCapacity,
113 UErrorCode* status){
114 if(status == NULL || U_FAILURE(*status)){
115 return 0;
116 }
117
118 UnicodeString stringArray[MAX_SPLIT_STRINGS];
119 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
120 UnicodeString src(source, srcLen);
121
122 if (U_FAILURE(*status)) {
123 return 0;
124 }
125 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
126
127 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
128 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
129 return trim(*dest, destLen, status);
130 }
131 return 0;
132 }
133
134 U_CFUNC int32_t
135 getCount(const UChar* source, int32_t srcLen,
136 UParseCommentsOption option, UErrorCode *status){
137
138 if(status == NULL || U_FAILURE(*status)){
139 return 0;
140 }
141
142 UnicodeString stringArray[MAX_SPLIT_STRINGS];
143 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
144 UnicodeString src (source, srcLen);
145
146
147 if (U_FAILURE(*status)) {
148 return 0;
149 }
150 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
151
152 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
153 if (U_FAILURE(*status)) {
154 return 0;
155 }
156 int32_t count = 0;
157 for(int32_t i=0; i<retLen; i++){
158 matcher.reset(stringArray[i]);
159 if(matcher.lookingAt(*status)){
160 count++;
161 }
162 }
163 if(option == UPC_TRANSLATE && count > 1){
164 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
165 exit(U_UNSUPPORTED_ERROR);
166 }
167 return count;
168 }
169
170 U_CFUNC int32_t
171 getAt(const UChar* source, int32_t srcLen,
172 UChar** dest, int32_t destCapacity,
173 int32_t index,
174 UParseCommentsOption option,
175 UErrorCode* status){
176
177 if(status == NULL || U_FAILURE(*status)){
178 return 0;
179 }
180
181 UnicodeString stringArray[MAX_SPLIT_STRINGS];
182 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
183 UnicodeString src (source, srcLen);
184
185
186 if (U_FAILURE(*status)) {
187 return 0;
188 }
189 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
190
191 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
192 if (U_FAILURE(*status)) {
193 return 0;
194 }
195 int32_t count = 0;
196 for(int32_t i=0; i<retLen; i++){
197 matcher.reset(stringArray[i]);
198 if(matcher.lookingAt(*status)){
199 if(count == index){
200 UnicodeString out = matcher.group(1, *status);
201 return out.extract(*dest, destCapacity,*status);
202 }
203 count++;
204
205 }
206 }
207 return 0;
208
209 }
210
211 U_CFUNC int32_t
212 getTranslate( const UChar* source, int32_t srcLen,
213 UChar** dest, int32_t destCapacity,
214 UErrorCode* status){
215 UnicodeString notePatternString = "^translate\\s*?(.*)";
216
217 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
218 return trim(*dest, destLen, status);
219 }
220
221 U_CFUNC int32_t
222 getNote(const UChar* source, int32_t srcLen,
223 UChar** dest, int32_t destCapacity,
224 UErrorCode* status){
225
226 UnicodeString notePatternString = "^note\\s*?(.*)";
227 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
228 return trim(*dest, destLen, status);
229
230 }
231
232 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
233