ICU-6.2.4.tar.gz

[apple/icu.git] / icuSources / tools / genrb / prscmnts.cpp
diff --git a/icuSources/tools/genrb/prscmnts.cpp b/icuSources/tools/genrb/prscmnts.cpp

new file mode 100644 (file)

index 0000000..75a8b0e
--- /dev/null
+++ b/icuSources/tools/genrb/prscmnts.cpp
@@ -0,0 +1,233 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*
+* File prscmnts.cpp
+*
+* Modification History:
+*
+*   Date          Name        Description
+*   08/22/2003    ram         Creation.
+*******************************************************************************
+*/
+#include "unicode/regex.h"
+#include "unicode/unistr.h"
+#include "unicode/parseerr.h"
+#include "prscmnts.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
+
+#define MAX_SPLIT_STRINGS 20
+
+const char *patternStrings[UPC_LIMIT]={
+    "^translate\\s*?(.*)",
+    "^note\\s*?(.*)"
+};
+
+U_CFUNC int32_t 
+removeText(UChar *source, int32_t srcLen, 
+           UnicodeString patString,uint32_t options,  
+           UnicodeString replaceText, UErrorCode *status){
+
+    if(status == NULL || U_FAILURE(*status)){
+        return 0;
+    }
+
+    UnicodeString src(source, srcLen);
+
+    RegexMatcher    myMatcher(patString, src, options, *status);
+    if(U_FAILURE(*status)){
+        return 0;
+    }
+    UnicodeString dest;
+
+
+    dest = myMatcher.replaceAll(replaceText,*status);
+    
+    
+    return dest.extract(source, srcLen, *status);
+
+}
+U_CFUNC int32_t
+trim(UChar *src, int32_t srcLen, UErrorCode *status){
+     srcLen = removeText(src, srcLen, "^[ \\r\\n]+ ", 0, "", status); // remove leading new lines
+     srcLen = removeText(src, srcLen, "^\\s+", 0, "", status); // remove leading spaces
+     srcLen = removeText(src, srcLen, "\\s+$", 0, "", status); // remvoe trailing spcaes
+     return srcLen;
+}
+
+U_CFUNC int32_t 
+removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
+    srcLen = trim(source, srcLen, status);
+    UnicodeString     patString = "^\\s*?\\*\\s*?";     // remove pattern like " * " at the begining of the line
+    srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, "", status);
+    return removeText(source, srcLen, "[ \\r\\n]+", 0, " ", status);// remove new lines;
+}
+
+U_CFUNC int32_t 
+getText(const UChar* source, int32_t srcLen,
+        UChar** dest, int32_t destCapacity,
+        UnicodeString patternString, 
+        UErrorCode* status){
+    
+    if(status == NULL || U_FAILURE(*status)){
+        return 0;
+    }
+
+    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
+    RegexPattern      *pattern = RegexPattern::compile("@", 0, *status);
+    UnicodeString src (source,srcLen);
+    
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
+    
+    RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
+        matcher.reset(stringArray[i]);
+        if(matcher.lookingAt(*status)){
+            UnicodeString out = matcher.group(1, *status);
+
+            return out.extract(*dest, destCapacity,*status);
+        }
+    }
+    return 0;
+}
+
+
+#define AT_SIGN  0x0040
+
+U_CFUNC int32_t
+getDescription( const UChar* source, int32_t srcLen,
+                UChar** dest, int32_t destCapacity,
+                UErrorCode* status){
+    if(status == NULL || U_FAILURE(*status)){
+        return 0;
+    }
+
+    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
+    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
+    UnicodeString src(source, srcLen);
+    
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
+
+    if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
+        int32_t destLen =  stringArray[0].extract(*dest, destCapacity, *status);
+        return trim(*dest, destLen, status);
+    }
+    return 0;
+}
+
+U_CFUNC int32_t
+getCount(const UChar* source, int32_t srcLen, 
+         UParseCommentsOption option, UErrorCode *status){
+    
+    if(status == NULL || U_FAILURE(*status)){
+        return 0;
+    }
+
+    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
+    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
+    UnicodeString src (source, srcLen);
+
+
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
+    
+    RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    } 
+    int32_t count = 0;
+    for(int32_t i=0; i<retLen; i++){
+        matcher.reset(stringArray[i]);
+        if(matcher.lookingAt(*status)){
+            count++;
+        }
+    }
+    if(option == UPC_TRANSLATE && count > 1){
+        fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
+        exit(U_UNSUPPORTED_ERROR);
+    }
+    return count;
+}
+
+U_CFUNC int32_t 
+getAt(const UChar* source, int32_t srcLen,
+        UChar** dest, int32_t destCapacity,
+        int32_t index,
+        UParseCommentsOption option,
+        UErrorCode* status){
+
+    if(status == NULL || U_FAILURE(*status)){
+        return 0;
+    }
+
+    UnicodeString     stringArray[MAX_SPLIT_STRINGS];
+    RegexPattern      *pattern = RegexPattern::compile("@", UREGEX_MULTILINE, *status);
+    UnicodeString src (source, srcLen);
+
+
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+    int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
+    
+    RegexMatcher matcher(patternStrings[option], UREGEX_DOTALL, *status);
+    if (U_FAILURE(*status)) {
+        return 0;
+    } 
+    int32_t count = 0;
+    for(int32_t i=0; i<retLen; i++){
+        matcher.reset(stringArray[i]);
+        if(matcher.lookingAt(*status)){
+            if(count == index){
+                UnicodeString out = matcher.group(1, *status);
+                return out.extract(*dest, destCapacity,*status);
+            }
+            count++;
+            
+        }
+    }
+    return 0;
+
+}
+
+U_CFUNC int32_t
+getTranslate( const UChar* source, int32_t srcLen,
+              UChar** dest, int32_t destCapacity,
+              UErrorCode* status){
+    UnicodeString     notePatternString = "^translate\\s*?(.*)"; 
+    
+    int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
+    return trim(*dest, destLen, status);
+}
+
+U_CFUNC int32_t 
+getNote(const UChar* source, int32_t srcLen,
+        UChar** dest, int32_t destCapacity,
+        UErrorCode* status){
+
+    UnicodeString     notePatternString = "^note\\s*?(.*)"; 
+    int32_t destLen =  getText(source, srcLen, dest, destCapacity, notePatternString, status);
+    return trim(*dest, destLen, status);
+
+}
+
+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
+