]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/prscmnts.cpp
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / tools / genrb / prscmnts.cpp
CommitLineData
374ca955 1/*
46f4442e
A
2 *******************************************************************************
3 * Copyright (C) 2003-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File prscmnts.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 08/22/2003 ram Creation.
13 *******************************************************************************
14 */
15
374ca955
A
16#include "unicode/regex.h"
17#include "unicode/unistr.h"
18#include "unicode/parseerr.h"
19#include "prscmnts.h"
20#include <stdio.h>
21#include <stdlib.h>
22
46f4442e
A
23U_NAMESPACE_USE
24
374ca955
A
25#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
26
27#define MAX_SPLIT_STRINGS 20
28
29const char *patternStrings[UPC_LIMIT]={
46f4442e
A
30 "^translate\\s*(.*)",
31 "^note\\s*(.*)"
374ca955
A
32};
33
34U_CFUNC int32_t
35removeText(UChar *source, int32_t srcLen,
36 UnicodeString patString,uint32_t options,
37 UnicodeString replaceText, UErrorCode *status){
38
39 if(status == NULL || U_FAILURE(*status)){
40 return 0;
41 }
42
43 UnicodeString src(source, srcLen);
44
45 RegexMatcher myMatcher(patString, src, options, *status);
46 if(U_FAILURE(*status)){
47 return 0;
48 }
49 UnicodeString dest;
50
51
52 dest = myMatcher.replaceAll(replaceText,*status);
53
54
55 return dest.extract(source, srcLen, *status);
56
57}
58U_CFUNC int32_t
59trim(UChar *src, int32_t srcLen, UErrorCode *status){
60 srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
61 srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
62 srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
63 return srcLen;
64}
65
66U_CFUNC int32_t
67removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
68 srcLen = trim(source, srcLen, status);
69 UnicodeString patString = "^\\s*?\\*\\s*?"; // remove pattern like " * " at the begining of the line
70 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
71 return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
72}
73
74U_CFUNC int32_t
75getText(const UChar* source, int32_t srcLen,
76 UChar** dest, int32_t destCapacity,
77 UnicodeString patternString,
78 UErrorCode* status){
79
80 if(status == NULL || U_FAILURE(*status)){
81 return 0;
82 }
83
84 UnicodeString stringArray[MAX_SPLIT_STRINGS];
85 RegexPattern *pattern = RegexPattern::compile("@", 0, *status);
86 UnicodeString src (source,srcLen);
87
88 if (U_FAILURE(*status)) {
89 return 0;
90 }
91 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
92
93 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
94 if (U_FAILURE(*status)) {
95 return 0;
96 }
97 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
98 matcher.reset(stringArray[i]);
99 if(matcher.lookingAt(*status)){
100 UnicodeString out = matcher.group(1, *status);
101
102 return out.extract(*dest, destCapacity,*status);
103 }
104 }
105 return 0;
106}
107
108
109#define AT_SIGN 0x0040
110
111U_CFUNC int32_t
112getDescription( const UChar* source, int32_t srcLen,
113 UChar** dest, int32_t destCapacity,
114 UErrorCode* status){
115 if(status == NULL || U_FAILURE(*status)){
116 return 0;
117 }
118
119 UnicodeString stringArray[MAX_SPLIT_STRINGS];
120 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
121 UnicodeString src(source, srcLen);
122
123 if (U_FAILURE(*status)) {
124 return 0;
125 }
126 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
127
128 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
129 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
130 return trim(*dest, destLen, status);
131 }
132 return 0;
133}
134
135U_CFUNC int32_t
136getCount(const UChar* source, int32_t srcLen,
137 UParseCommentsOption option, UErrorCode *status){
138
139 if(status == NULL || U_FAILURE(*status)){
140 return 0;
141 }
142
143 UnicodeString stringArray[MAX_SPLIT_STRINGS];
144 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
145 UnicodeString src (source, srcLen);
146
147
148 if (U_FAILURE(*status)) {
149 return 0;
150 }
151 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
152
153 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
154 if (U_FAILURE(*status)) {
155 return 0;
156 }
157 int32_t count = 0;
158 for(int32_t i=0; i<retLen; i++){
159 matcher.reset(stringArray[i]);
160 if(matcher.lookingAt(*status)){
161 count++;
162 }
163 }
164 if(option == UPC_TRANSLATE && count > 1){
165 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
166 exit(U_UNSUPPORTED_ERROR);
167 }
168 return count;
169}
170
171U_CFUNC int32_t
172getAt(const UChar* source, int32_t srcLen,
173 UChar** dest, int32_t destCapacity,
174 int32_t index,
175 UParseCommentsOption option,
176 UErrorCode* status){
177
178 if(status == NULL || U_FAILURE(*status)){
179 return 0;
180 }
181
182 UnicodeString stringArray[MAX_SPLIT_STRINGS];
183 RegexPattern *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
184 UnicodeString src (source, srcLen);
185
186
187 if (U_FAILURE(*status)) {
188 return 0;
189 }
190 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
191
192 RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
193 if (U_FAILURE(*status)) {
194 return 0;
195 }
196 int32_t count = 0;
197 for(int32_t i=0; i<retLen; i++){
198 matcher.reset(stringArray[i]);
199 if(matcher.lookingAt(*status)){
200 if(count == index){
201 UnicodeString out = matcher.group(1, *status);
202 return out.extract(*dest, destCapacity,*status);
203 }
204 count++;
205
206 }
207 }
208 return 0;
209
210}
211
212U_CFUNC int32_t
213getTranslate( const UChar* source, int32_t srcLen,
214 UChar** dest, int32_t destCapacity,
215 UErrorCode* status){
216 UnicodeString notePatternString = "^translate\\s*?(.*)";
217
218 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
219 return trim(*dest, destLen, status);
220}
221
222U_CFUNC int32_t
223getNote(const UChar* source, int32_t srcLen,
224 UChar** dest, int32_t destCapacity,
225 UErrorCode* status){
226
227 UnicodeString notePatternString = "^note\\s*?(.*)";
228 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
229 return trim(*dest, destLen, status);
230
231}
232
233#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
234