]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/tools/genrb/prscmnts.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / tools / genrb / prscmnts.cpp
... / ...
CommitLineData
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 *******************************************************************************
5 * Copyright (C) 2003-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File prscmnts.cpp
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 08/22/2003 ram Creation.
15 *******************************************************************************
16 */
17
18// Safer use of UnicodeString.
19#ifndef UNISTR_FROM_CHAR_EXPLICIT
20# define UNISTR_FROM_CHAR_EXPLICIT explicit
21#endif
22
23// Less important, but still a good idea.
24#ifndef UNISTR_FROM_STRING_EXPLICIT
25# define UNISTR_FROM_STRING_EXPLICIT explicit
26#endif
27
28#include "unicode/regex.h"
29#include "unicode/unistr.h"
30#include "unicode/parseerr.h"
31#include "prscmnts.h"
32#include <stdio.h>
33#include <stdlib.h>
34
35U_NAMESPACE_USE
36
37#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
38
39#define MAX_SPLIT_STRINGS 20
40
41const char *patternStrings[UPC_LIMIT]={
42 "^translate\\s*(.*)",
43 "^note\\s*(.*)"
44};
45
46U_CFUNC int32_t
47removeText(UChar *source, int32_t srcLen,
48 UnicodeString patString,uint32_t options,
49 UnicodeString replaceText, UErrorCode *status){
50
51 if(status == NULL || U_FAILURE(*status)){
52 return 0;
53 }
54
55 UnicodeString src(source, srcLen);
56
57 RegexMatcher myMatcher(patString, src, options, *status);
58 if(U_FAILURE(*status)){
59 return 0;
60 }
61 UnicodeString dest;
62
63
64 dest = myMatcher.replaceAll(replaceText,*status);
65
66
67 return dest.extract(source, srcLen, *status);
68
69}
70U_CFUNC int32_t
71trim(UChar *src, int32_t srcLen, UErrorCode *status){
72 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
73 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
74 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
75 return srcLen;
76}
77
78U_CFUNC int32_t
79removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
80 srcLen = trim(source, srcLen, status);
81 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
82 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
83 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
84}
85
86U_CFUNC int32_t
87getText(const UChar* source, int32_t srcLen,
88 UChar** dest, int32_t destCapacity,
89 UnicodeString patternString,
90 UErrorCode* status){
91
92 if(status == NULL || U_FAILURE(*status)){
93 return 0;
94 }
95
96 UnicodeString stringArray[MAX_SPLIT_STRINGS];
97 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
98 UnicodeString src (source,srcLen);
99
100 if (U_FAILURE(*status)) {
101 return 0;
102 }
103 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
104
105 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
106 if (U_FAILURE(*status)) {
107 return 0;
108 }
109 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
110 matcher.reset(stringArray[i]);
111 if(matcher.lookingAt(*status)){
112 UnicodeString out = matcher.group(1, *status);
113
114 return out.extract(*dest, destCapacity,*status);
115 }
116 }
117 return 0;
118}
119
120
121#define AT_SIGN 0x0040
122
123U_CFUNC int32_t
124getDescription( const UChar* source, int32_t srcLen,
125 UChar** dest, int32_t destCapacity,
126 UErrorCode* status){
127 if(status == NULL || U_FAILURE(*status)){
128 return 0;
129 }
130
131 UnicodeString stringArray[MAX_SPLIT_STRINGS];
132 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
133 UnicodeString src(source, srcLen);
134
135 if (U_FAILURE(*status)) {
136 return 0;
137 }
138 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
139
140 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
141 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
142 return trim(*dest, destLen, status);
143 }
144 return 0;
145}
146
147U_CFUNC int32_t
148getCount(const UChar* source, int32_t srcLen,
149 UParseCommentsOption option, UErrorCode *status){
150
151 if(status == NULL || U_FAILURE(*status)){
152 return 0;
153 }
154
155 UnicodeString stringArray[MAX_SPLIT_STRINGS];
156 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
157 UnicodeString src (source, srcLen);
158
159
160 if (U_FAILURE(*status)) {
161 return 0;
162 }
163 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
164
165 UnicodeString patternString(patternStrings[option]);
166 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
167 if (U_FAILURE(*status)) {
168 return 0;
169 }
170 int32_t count = 0;
171 for(int32_t i=0; i<retLen; i++){
172 matcher.reset(stringArray[i]);
173 if(matcher.lookingAt(*status)){
174 count++;
175 }
176 }
177 if(option == UPC_TRANSLATE && count > 1){
178 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
179 exit(U_UNSUPPORTED_ERROR);
180 }
181 return count;
182}
183
184U_CFUNC int32_t
185getAt(const UChar* source, int32_t srcLen,
186 UChar** dest, int32_t destCapacity,
187 int32_t index,
188 UParseCommentsOption option,
189 UErrorCode* status){
190
191 if(status == NULL || U_FAILURE(*status)){
192 return 0;
193 }
194
195 UnicodeString stringArray[MAX_SPLIT_STRINGS];
196 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
197 UnicodeString src (source, srcLen);
198
199
200 if (U_FAILURE(*status)) {
201 return 0;
202 }
203 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
204
205 UnicodeString patternString(patternStrings[option]);
206 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
207 if (U_FAILURE(*status)) {
208 return 0;
209 }
210 int32_t count = 0;
211 for(int32_t i=0; i<retLen; i++){
212 matcher.reset(stringArray[i]);
213 if(matcher.lookingAt(*status)){
214 if(count == index){
215 UnicodeString out = matcher.group(1, *status);
216 return out.extract(*dest, destCapacity,*status);
217 }
218 count++;
219
220 }
221 }
222 return 0;
223
224}
225
226U_CFUNC int32_t
227getTranslate( const UChar* source, int32_t srcLen,
228 UChar** dest, int32_t destCapacity,
229 UErrorCode* status){
230 UnicodeString notePatternString("^translate\\s*?(.*)");
231
232 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
233 return trim(*dest, destLen, status);
234}
235
236U_CFUNC int32_t
237getNote(const UChar* source, int32_t srcLen,
238 UChar** dest, int32_t destCapacity,
239 UErrorCode* status){
240
241 UnicodeString notePatternString("^note\\s*?(.*)");
242 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
243 return trim(*dest, destLen, status);
244
245}
246
247#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
248