]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/prscmnts.cpp
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / prscmnts.cpp
CommitLineData
374ca955 1/*
46f4442e 2 *******************************************************************************
57a6839d 3 * Copyright (C) 2003-2014, International Business Machines
46f4442e
A
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File prscmnts.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 08/22/2003 ram Creation.
13 *******************************************************************************
14 */
15
57a6839d
A
16// Safer use of UnicodeString.
17#ifndef UNISTR_FROM_CHAR_EXPLICIT
18# define UNISTR_FROM_CHAR_EXPLICIT explicit
19#endif
20
21// Less important, but still a good idea.
22#ifndef UNISTR_FROM_STRING_EXPLICIT
23# define UNISTR_FROM_STRING_EXPLICIT explicit
24#endif
25
374ca955
A
26#include "unicode/regex.h"
27#include "unicode/unistr.h"
28#include "unicode/parseerr.h"
29#include "prscmnts.h"
30#include <stdio.h>
31#include <stdlib.h>
32
46f4442e
A
33U_NAMESPACE_USE
34
374ca955
A
35#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
36
37#define MAX_SPLIT_STRINGS 20
38
39const char *patternStrings[UPC_LIMIT]={
46f4442e
A
40 "^translate\\s*(.*)",
41 "^note\\s*(.*)"
374ca955
A
42};
43
44U_CFUNC int32_t
45removeText(UChar *source, int32_t srcLen,
46 UnicodeString patString,uint32_t options,
47 UnicodeString replaceText, UErrorCode *status){
48
49 if(status == NULL || U_FAILURE(*status)){
50 return 0;
51 }
52
53 UnicodeString src(source, srcLen);
54
55 RegexMatcher myMatcher(patString, src, options, *status);
56 if(U_FAILURE(*status)){
57 return 0;
58 }
59 UnicodeString dest;
60
61
62 dest = myMatcher.replaceAll(replaceText,*status);
63
64
65 return dest.extract(source, srcLen, *status);
66
67}
68U_CFUNC int32_t
69trim(UChar *src, int32_t srcLen, UErrorCode *status){
57a6839d
A
70 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
71 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
72 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
374ca955
A
73 return srcLen;
74}
75
76U_CFUNC int32_t
77removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
78 srcLen = trim(source, srcLen, status);
57a6839d
A
79 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
80 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
81 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
374ca955
A
82}
83
84U_CFUNC int32_t
85getText(const UChar* source, int32_t srcLen,
86 UChar** dest, int32_t destCapacity,
87 UnicodeString patternString,
88 UErrorCode* status){
89
90 if(status == NULL || U_FAILURE(*status)){
91 return 0;
92 }
93
94 UnicodeString stringArray[MAX_SPLIT_STRINGS];
57a6839d 95 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
374ca955
A
96 UnicodeString src (source,srcLen);
97
98 if (U_FAILURE(*status)) {
99 return 0;
100 }
101 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
102
103 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
104 if (U_FAILURE(*status)) {
105 return 0;
106 }
107 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
108 matcher.reset(stringArray[i]);
109 if(matcher.lookingAt(*status)){
110 UnicodeString out = matcher.group(1, *status);
111
112 return out.extract(*dest, destCapacity,*status);
113 }
114 }
115 return 0;
116}
117
118
119#define AT_SIGN 0x0040
120
121U_CFUNC int32_t
122getDescription( const UChar* source, int32_t srcLen,
123 UChar** dest, int32_t destCapacity,
124 UErrorCode* status){
125 if(status == NULL || U_FAILURE(*status)){
126 return 0;
127 }
128
129 UnicodeString stringArray[MAX_SPLIT_STRINGS];
57a6839d 130 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
374ca955
A
131 UnicodeString src(source, srcLen);
132
133 if (U_FAILURE(*status)) {
134 return 0;
135 }
136 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
137
138 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
139 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
140 return trim(*dest, destLen, status);
141 }
142 return 0;
143}
144
145U_CFUNC int32_t
146getCount(const UChar* source, int32_t srcLen,
147 UParseCommentsOption option, UErrorCode *status){
148
149 if(status == NULL || U_FAILURE(*status)){
150 return 0;
151 }
152
153 UnicodeString stringArray[MAX_SPLIT_STRINGS];
57a6839d 154 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
374ca955
A
155 UnicodeString src (source, srcLen);
156
157
158 if (U_FAILURE(*status)) {
159 return 0;
160 }
161 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
162
57a6839d
A
163 UnicodeString patternString(patternStrings[option]);
164 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
374ca955
A
165 if (U_FAILURE(*status)) {
166 return 0;
167 }
168 int32_t count = 0;
169 for(int32_t i=0; i<retLen; i++){
170 matcher.reset(stringArray[i]);
171 if(matcher.lookingAt(*status)){
172 count++;
173 }
174 }
175 if(option == UPC_TRANSLATE && count > 1){
176 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
177 exit(U_UNSUPPORTED_ERROR);
178 }
179 return count;
180}
181
182U_CFUNC int32_t
183getAt(const UChar* source, int32_t srcLen,
184 UChar** dest, int32_t destCapacity,
185 int32_t index,
186 UParseCommentsOption option,
187 UErrorCode* status){
188
189 if(status == NULL || U_FAILURE(*status)){
190 return 0;
191 }
192
193 UnicodeString stringArray[MAX_SPLIT_STRINGS];
57a6839d 194 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
374ca955
A
195 UnicodeString src (source, srcLen);
196
197
198 if (U_FAILURE(*status)) {
199 return 0;
200 }
201 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
202
57a6839d
A
203 UnicodeString patternString(patternStrings[option]);
204 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
374ca955
A
205 if (U_FAILURE(*status)) {
206 return 0;
207 }
208 int32_t count = 0;
209 for(int32_t i=0; i<retLen; i++){
210 matcher.reset(stringArray[i]);
211 if(matcher.lookingAt(*status)){
212 if(count == index){
213 UnicodeString out = matcher.group(1, *status);
214 return out.extract(*dest, destCapacity,*status);
215 }
216 count++;
217
218 }
219 }
220 return 0;
221
222}
223
224U_CFUNC int32_t
225getTranslate( const UChar* source, int32_t srcLen,
226 UChar** dest, int32_t destCapacity,
227 UErrorCode* status){
57a6839d 228 UnicodeString notePatternString("^translate\\s*?(.*)");
374ca955
A
229
230 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
231 return trim(*dest, destLen, status);
232}
233
234U_CFUNC int32_t
235getNote(const UChar* source, int32_t srcLen,
236 UChar** dest, int32_t destCapacity,
237 UErrorCode* status){
238
57a6839d 239 UnicodeString notePatternString("^note\\s*?(.*)");
374ca955
A
240 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
241 return trim(*dest, destLen, status);
242
243}
244
245#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
246