icuSources/tools/genrb/prscmnts.cpp

   1 /*
   2  *******************************************************************************
   3  *   Copyright (C) 2003-2014, International Business Machines
   4  *   Corporation and others.  All Rights Reserved.
   5  *******************************************************************************
   6  *
   7  * File prscmnts.cpp
   8  *
   9  * Modification History:
  10  *
  11  *   Date          Name        Description
  12  *   08/22/2003    ram         Creation.
  13  *******************************************************************************
  14  */
  15
  16 // Safer use of UnicodeString.
  17 #ifndef UNISTR_FROM_CHAR_EXPLICIT
  18 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
  19 #endif
  20
  21 // Less important, but still a good idea.
  22 #ifndef UNISTR_FROM_STRING_EXPLICIT
  23 #   define UNISTR_FROM_STRING_EXPLICIT explicit
  24 #endif
  25
  26 #include "unicode/regex.h"
  27 #include "unicode/unistr.h"
  28 #include "unicode/parseerr.h"
  29 #include "prscmnts.h"
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32
  33 U_NAMESPACE_USE
  34
  35 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
  36
  37 #define MAX_SPLIT_STRINGS 20
  38
  39 const char *patternStrings[UPC_LIMIT]={
  40     "^translate\\s*(.*)",
  41     "^note\\s*(.*)"
  42 };
  43
  44 U_CFUNC int32_t
  45 removeText(UChar *source, int32_t srcLen,
  46            UnicodeString patString,uint32_t options,
  47            UnicodeString replaceText, UErrorCode *status){
  48
  49     if(status == NULL || U_FAILURE(*status)){
  50         return 0;
  51     }
  52
  53     UnicodeString src(source, srcLen);
  54
  55     RegexMatcher    myMatcher(patString, src, options, *status);
  56     if(U_FAILURE(*status)){
  57         return 0;
  58     }
  59     UnicodeString dest;
  60
  61
  62     dest = myMatcher.replaceAll(replaceText,*status);
  63
  64
  65     return dest.extract(source, srcLen, *status);
  66
  67 }
  68 U_CFUNC int32_t
  69 trim(UChar *src, int32_t srcLen, UErrorCode *status){
  70      srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
  71      srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
  72      srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
  73      return srcLen;
  74 }
  75
  76 U_CFUNC int32_t
  77 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
  78     srcLen = trim(source, srcLen, status);
  79     UnicodeString patString("^\\s*?\\*\\s*?");  // remove pattern like " * " at the begining of the line
  80     srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
  81     return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
  82 }
  83
  84 U_CFUNC int32_t
  85 getText(const UChar* source, int32_t srcLen,
  86         UChar** dest, int32_t destCapacity,
  87         UnicodeString patternString,
  88         UErrorCode* status){
  89
  90     if(status == NULL || U_FAILURE(*status)){
  91         return 0;
  92     }
  93
  94     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
  95     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
  96     UnicodeString src (source,srcLen);
  97
  98     if (U_FAILURE(*status)) {
  99         return 0;
 100     }
 101     pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
 102
 103     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
 104     if (U_FAILURE(*status)) {
 105         return 0;
 106     }
 107     for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
 108         matcher.reset(stringArray[i]);
 109         if(matcher.lookingAt(*status)){
 110             UnicodeString out = matcher.group(1, *status);
 111
 112             return out.extract(*dest, destCapacity,*status);
 113         }
 114     }
 115     return 0;
 116 }
 117
 118
 119 #define AT_SIGN  0x0040
 120
 121 U_CFUNC int32_t
 122 getDescription( const UChar* source, int32_t srcLen,
 123                 UChar** dest, int32_t destCapacity,
 124                 UErrorCode* status){
 125     if(status == NULL || U_FAILURE(*status)){
 126         return 0;
 127     }
 128
 129     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
 130     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
 131     UnicodeString src(source, srcLen);
 132
 133     if (U_FAILURE(*status)) {
 134         return 0;
 135     }
 136     pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
 137
 138     if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
 139         int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
 140         return trim(*dest, destLen, status);
 141     }
 142     return 0;
 143 }
 144
 145 U_CFUNC int32_t
 146 getCount(const UChar* source, int32_t srcLen,
 147          UParseCommentsOption option, UErrorCode *status){
 148
 149     if(status == NULL || U_FAILURE(*status)){
 150         return 0;
 151     }
 152
 153     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
 154     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
 155     UnicodeString src (source, srcLen);
 156
 157
 158     if (U_FAILURE(*status)) {
 159         return 0;
 160     }
 161     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
 162
 163     UnicodeString patternString(patternStrings[option]);
 164     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
 165     if (U_FAILURE(*status)) {
 166         return 0;
 167     }
 168     int32_t count = 0;
 169     for(int32_t i=0; i<retLen; i++){
 170         matcher.reset(stringArray[i]);
 171         if(matcher.lookingAt(*status)){
 172             count++;
 173         }
 174     }
 175     if(option == UPC_TRANSLATE && count > 1){
 176         fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
 177         exit(U_UNSUPPORTED_ERROR);
 178     }
 179     return count;
 180 }
 181
 182 U_CFUNC int32_t
 183 getAt(const UChar* source, int32_t srcLen,
 184         UChar** dest, int32_t destCapacity,
 185         int32_t index,
 186         UParseCommentsOption option,
 187         UErrorCode* status){
 188
 189     if(status == NULL || U_FAILURE(*status)){
 190         return 0;
 191     }
 192
 193     UnicodeString     stringArray[MAX_SPLIT_STRINGS];
 194     RegexPattern      *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
 195     UnicodeString src (source, srcLen);
 196
 197
 198     if (U_FAILURE(*status)) {
 199         return 0;
 200     }
 201     int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
 202
 203     UnicodeString patternString(patternStrings[option]);
 204     RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
 205     if (U_FAILURE(*status)) {
 206         return 0;
 207     }
 208     int32_t count = 0;
 209     for(int32_t i=0; i<retLen; i++){
 210         matcher.reset(stringArray[i]);
 211         if(matcher.lookingAt(*status)){
 212             if(count == index){
 213                 UnicodeString out = matcher.group(1, *status);
 214                 return out.extract(*dest, destCapacity,*status);
 215             }
 216             count++;
 217
 218         }
 219     }
 220     return 0;
 221
 222 }
 223
 224 U_CFUNC int32_t
 225 getTranslate( const UChar* source, int32_t srcLen,
 226               UChar** dest, int32_t destCapacity,
 227               UErrorCode* status){
 228     UnicodeString     notePatternString("^translate\\s*?(.*)");
 229
 230     int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
 231     return trim(*dest, destLen, status);
 232 }
 233
 234 U_CFUNC int32_t
 235 getNote(const UChar* source, int32_t srcLen,
 236         UChar** dest, int32_t destCapacity,
 237         UErrorCode* status){
 238
 239     UnicodeString     notePatternString("^note\\s*?(.*)");
 240     int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
 241     return trim(*dest, destLen, status);
 242
 243 }
 244
 245 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
 246