ICU-8.11.1.tar.gz

[apple/icu.git] / icuSources / tools / toolutil / uparse.c
diff --git a/icuSources/tools/toolutil/uparse.c b/icuSources/tools/toolutil/uparse.c

index 9f31e74aa22cdb0e2eec09a96ed1e4dbd890ba2e..f9f84721c277c6536c97838214d9ed18b358ce16 100644 (file)
--- a/icuSources/tools/toolutil/uparse.c
+++ b/icuSources/tools/toolutil/uparse.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2000-2003, International Business Machines
+*   Copyright (C) 2000-2006, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -36,6 +36,29 @@ u_skipWhitespace(const char *s) {
      return s;
  }
  
+/*
+ * If the string starts with # @missing: then return the pointer to the
+ * following non-whitespace character.
+ * Otherwise return the original pointer.
+ * Unicode 5.0 adds such lines in some data files to document
+ * default property values.
+ * Poor man's regex for variable amounts of white space.
+ */
+static const char *
+getMissingLimit(const char *s) {
+    const char *s0=s;
+    if(
+        *(s=u_skipWhitespace(s))=='#' &&
+        *(s=u_skipWhitespace(s+1))=='@' &&
+        0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) &&
+        *(s=u_skipWhitespace(s+7))==':'
+    ) {
+        return u_skipWhitespace(s+1);
+    } else {
+        return s0;
+    }
+}
+
  U_CAPI void U_EXPORT2
  u_parseDelimitedFile(const char *filename, char delimiter,
                       char *fields[][2], int32_t fieldCount,
@@ -74,16 +97,28 @@ u_parseDelimitedFile(const char *filename, char delimiter,
              line[--length]=0;
          }
  
+        /*
+         * detect a line with # @missing:
+         * start parsing after that, or else from the beginning of the line
+         * set the default warning for @missing lines
+         */
+        start=(char *)getMissingLimit(line);
+        if(start==line) {
+            *pErrorCode=U_ZERO_ERROR;
+        } else {
+            *pErrorCode=U_USING_DEFAULT_WARNING;
+        }
+
          /* skip this line if it is empty or a comment */
-        if(line[0]==0 || line[0]=='#') {
+        if(*start==0 || *start=='#') {
              continue;
          }
  
          /* remove in-line comments */
-        limit=uprv_strchr(line, '#');
+        limit=uprv_strchr(start, '#');
          if(limit!=NULL) {
              /* get white space before the pound sign */
-            while(limit>line && (*(limit-1)==' ' || *(limit-1)=='\t')) {
+            while(limit>start && (*(limit-1)==' ' || *(limit-1)=='\t')) {
                  --limit;
              }
  
@@ -92,12 +127,11 @@ u_parseDelimitedFile(const char *filename, char delimiter,
          }
  
          /* skip lines with only whitespace */
-        if(u_skipWhitespace(line)[0]==0) {
+        if(u_skipWhitespace(start)[0]==0) {
              continue;
          }
  
          /* for each field, call the corresponding field function */
-        start=line;
          for(i=0; i<fieldCount; ++i) {
              /* set the limit pointer of this field */
              limit=start;
@@ -156,6 +190,7 @@ u_parseCodePoints(const char *s,
      }
      if(s==NULL || destCapacity<0 || (destCapacity>0 && dest==NULL)) {
          *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
      }
  
      count=0;
@@ -261,6 +296,7 @@ u_parseCodePointRange(const char *s,
      }
      if(s==NULL || pStart==NULL || pEnd==NULL) {
          *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
      }
  
      s=u_skipWhitespace(s);
@@ -313,79 +349,13 @@ u_parseCodePointRange(const char *s,
      }
  }
  
-
-U_CAPI const UChar * U_EXPORT2
-u_strSkipWhiteSpace(const UChar *s, int32_t length) {
-    int32_t i = 0, toReturn = 0;
-    UChar32 c = 0;
-    if(s == NULL) {
-        return NULL;
-    }
-    if(length == 0) {
-        return s;
-    }
-    if(length > 0) {
-        for(;;) {
-            if(i >= length) {
-                break;
-            }
-            toReturn = i;
-            U16_NEXT(s, i, length, c);
-            if(!(c == 0x20 || u_isUWhiteSpace(c))) {
-                break;
-            }
-        }
-    } else {
-        for(;;) {
-            toReturn = i;
-            U16_NEXT(s, i, length, c);
-            if(!(c == 0x20 || u_isUWhiteSpace(c)) || c == 0) {
-                break;
-            }
-        }
-    }
-    return s+toReturn;
-}
-
-
-U_CAPI const UChar * U_EXPORT2
-u_strTrailingWhiteSpaceStart(const UChar *s, int32_t length) {
-    int32_t i = 0, toReturn = 0;
-    UChar32 c = 0;
-    
-    if(s == NULL) {
-        return NULL;
-    }
-    if(length == 0) {
-        return s;
-    }
-    
-    if(length < 0) {
-        length = u_strlen(s);
-    }
-    
-    i = length;
-    for(;;) {
-        toReturn = i;
-        if(i <= 0) {
-            break;
-        }
-        U16_PREV(s, 0, i, c);
-        if(!(c == 0x20 || u_isUWhiteSpace(c))) {
-            break;
-        }
-    }
-    
-    return s+toReturn;
-}
-
  U_CAPI int32_t U_EXPORT2
  u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) {
      const char *read = source;
      int32_t i = 0;
      unsigned int value = 0;
      if(sLen == -1) {
-        sLen = strlen(source);
+        sLen = (int32_t)strlen(source);
      }
      
      while(read < source+sLen) {