ICU-8.11.4.tar.gz

author Apple <opensource@apple.com>

Mon, 23 Feb 2009 03:45:05 +0000 (03:45 +0000)

committer Apple <opensource@apple.com>

Mon, 23 Feb 2009 03:45:05 +0000 (03:45 +0000)
author Apple <opensource@apple.com>
Mon, 23 Feb 2009 03:45:05 +0000 (03:45 +0000)
committer Apple <opensource@apple.com>
Mon, 23 Feb 2009 03:45:05 +0000 (03:45 +0000)
diff --git a/icuSources/common/ucnv2022.c b/icuSources/common/ucnv2022.c

index cd83c69c88648255561d5ca5d6dcc2aff5cb7aa2..67c8da9f588b5a4830c5706cab50ffaf7c1d8010 100644 (file)
--- a/icuSources/common/ucnv2022.c
+++ b/icuSources/common/ucnv2022.c
@@ -735,6 +735,7 @@ changeState_2022(UConverter* _this,
      UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo);
      uint32_t key = myData2022->key;
      int32_t offset = 0;
+    int8_t initialToULength = _this->toULength;
      char c;
  
      value = VALID_NON_TERMINAL_2022;
@@ -787,7 +788,6 @@ DONE:
          return;
      } else if (value == INVALID_2022 ) {
          *err = U_ILLEGAL_ESCAPE_SEQUENCE;
-        return;
      } else /* value == VALID_TERMINAL_2022 */ {
          switch(var){
  #ifdef U_ENABLE_GENERIC_ISO_2022
@@ -918,6 +918,35 @@ DONE:
      }
      if(U_SUCCESS(*err)) {
          _this->toULength = 0;
+    } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) {
+        if(_this->toULength>1) {
+            /*
+             * Ticket 5691: consistent illegal sequences:
+             * - We include at least the first byte (ESC) in the illegal sequence.
+             * - If any of the non-initial bytes could be the start of a character,
+             *   we stop the illegal sequence before the first one of those.
+             *   In escape sequences, all following bytes are "printable", that is,
+             *   unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS),
+             *   they are valid single/lead bytes.
+             *   For simplicity, we always only report the initial ESC byte as the
+             *   illegal sequence and back out all other bytes we looked at.
+             */
+            /* Back out some bytes. */
+            int8_t backOutDistance=_this->toULength-1;
+            int8_t bytesFromThisBuffer=_this->toULength-initialToULength;
+            if(backOutDistance<=bytesFromThisBuffer) {
+                /* same as initialToULength<=1 */
+                *source-=backOutDistance;
+            } else {
+                /* Back out bytes from the previous buffer: Need to replay them. */
+                _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
+                /* same as -(initialToULength-1) */
+                /* preToULength is negative! */
+                uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength);
+                *source-=bytesFromThisBuffer;
+            }
+            _this->toULength=1;
+        }
      }
  }
  
@@ -1200,7 +1229,7 @@ toUnicodeCallback(UConverter *cnv,
      }
      else{
          cnv->toUBytes[0] =(char) sourceChar;
-        cnv->toULength = 2;
+        cnv->toULength = 1;
      }
  
      if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){
@@ -1689,6 +1718,7 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
          mySourceChar = args->converter->toUBytes[0];
          args->converter->toULength = 0;
          cs = (StateEnum)pToU2022State->cs[pToU2022State->g];
+        targetUniChar = missingCharMarker;
          goto getTrailByte;
      }
  
@@ -1820,12 +1850,40 @@ escape:
                  default:
                      /* G0 DBCS */
                      if(mySource < mySourceLimit) {
-                        char trailByte;
+                        int leadIsOk, trailIsOk;
+                        uint8_t trailByte;
  getTrailByte:
+                        trailByte = (uint8_t)*mySource;
+                        /* old
                          tempBuf[0] = (char) (mySourceChar);
                          tempBuf[1] = trailByte = *mySource++;
                          mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
                          targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+                        */
+                        /*
+                         * Ticket 5691: consistent illegal sequences:
+                         * - We include at least the first byte in the illegal sequence.
+                         * - If any of the non-initial bytes could be the start of a character,
+                         *Ê Êwe stop the illegal sequence before the first one of those.
+                         *
+                         * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+                         * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+                         * Otherwise we convert or report the pair of bytes.
+                         */
+                        leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+                        trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
+                        if (leadIsOk && trailIsOk) {
+                            ++mySource;
+                            tempBuf[0] = (char) (mySourceChar);
+                            tempBuf[1] = trailByte;
+                            mySourceChar = (mySourceChar << 8) | trailByte;
+                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE);
+                        } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+                            /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                            ++mySource;
+                            /* add another bit so that the code below writes 2 bytes in case of error */
+                            mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
+                        }
                      } else {
                          args->converter->toUBytes[0] = (uint8_t)mySourceChar;
                          args->converter->toULength = 1;
@@ -1966,7 +2024,12 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args
              MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,&length,MBCS_OUTPUT_2);
              /* only DBCS or SBCS characters are expected*/
              /* DB characters with high bit set to 1 are expected */
-            if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){
+            if( length > 2 || length==0 ||
+                (length == 1 && targetByteUnit > 0x7f) ||
+                (length == 2 &&
+                    ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) ||
+                    (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1)))
+            ) {
                  targetByteUnit=missingCharMarker;
              }
              if (targetByteUnit != missingCharMarker){
@@ -2294,17 +2357,42 @@ escape:
              myData->isEmptySegment = FALSE;    /* Any invalid char errors will be detected separately, so just reset this */
              if(myData->toU2022State.g == 1) {
                  if(mySource < mySourceLimit) {
-                    char trailByte;
+                    int leadIsOk, trailIsOk;
+                    uint8_t trailByte;
  getTrailByte:
+                    /* old
                      trailByte = *mySource++;
                      tempBuf[0] = (char)(mySourceChar + 0x80);
                      tempBuf[1] = (char)(trailByte + 0x80);
                      mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
                      if((mySourceChar & 0x8080) == 0) {
                          targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
-                    } else {
-                        /* illegal bytes > 0x7f */
-                        targetUniChar = missingCharMarker;
+                    */
+                    targetUniChar = missingCharMarker;
+                    trailByte = (uint8_t)*mySource;
+                    /*
+                     * Ticket 5691: consistent illegal sequences:
+                     * - We include at least the first byte in the illegal sequence.
+                     * - If any of the non-initial bytes could be the start of a character,
+                     *   we stop the illegal sequence before the first one of those.
+                     *
+                     * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+                     * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+                     * Otherwise we convert or report the pair of bytes.
+                     */
+                    leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+                    trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
+                    if (leadIsOk && trailIsOk) {
+                        ++mySource;
+                        tempBuf[0] = (char)(mySourceChar + 0x80);
+                        tempBuf[1] = (char)(trailByte + 0x80);
+                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback);
+                        mySourceChar = (mySourceChar << 8) | trailByte;
+                    } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                        ++mySource;
+                        /* add another bit so that the code below writes 2 bytes in case of error */
+                        mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
                      }
                  } else {
                      args->converter->toUBytes[0] = (uint8_t)mySourceChar;
@@ -2312,8 +2400,10 @@ getTrailByte:
                      break;
                  }
              }
-            else{
+            else if(mySourceChar <= 0x7f) {
                  targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback);
+            } else {
+                targetUniChar = 0xffff;
              }
              if(targetUniChar < 0xfffe){
                  if(args->offsets) {
@@ -2778,6 +2868,7 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
          /* continue with a partial double-byte character */
          mySourceChar = args->converter->toUBytes[0];
          args->converter->toULength = 0;
+        targetUniChar = missingCharMarker;
          goto getTrailByte;
      }
  
@@ -2855,8 +2946,10 @@ escape:
                          UConverterSharedData *cnv;
                          StateEnum tempState;
                          int32_t tempBufLen;
-                        char trailByte;
+                        int leadIsOk, trailIsOk;
+                        uint8_t trailByte;
  getTrailByte:
+                        /* old
                          trailByte = *mySource++;
                          tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
                          if(tempState > CNS_11643_0) {
@@ -2871,13 +2964,48 @@ getTrailByte:
                              tempBuf[0] = (char) (mySourceChar);
                              tempBuf[1] = trailByte;
                              tempBufLen = 2;
+                        */
+                        trailByte = (uint8_t)*mySource;
+                        /*
+                         * Ticket 5691: consistent illegal sequences:
+                         * - We include at least the first byte in the illegal sequence.
+                         * - If any of the non-initial bytes could be the start of a character,
+                         *   we stop the illegal sequence before the first one of those.
+                         *
+                         * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is
+                         * an ESC/SO/SI, we report only the first byte as the illegal sequence.
+                         * Otherwise we convert or report the pair of bytes.
+                         */
+                        leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+                        trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21);
+                        if (leadIsOk && trailIsOk) {
+                            ++mySource;
+                            tempState = (StateEnum)pToU2022State->cs[pToU2022State->g];
+                            if(tempState >= CNS_11643_0) {
+                                cnv = myData->myConverterArray[CNS_11643];
+                                tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0));
+                                tempBuf[1] = (char) (mySourceChar);
+                                tempBuf[2] = (char) trailByte;
+                                tempBufLen = 3;
+
+                            }else{
+                                cnv = myData->myConverterArray[tempState];
+                                tempBuf[0] = (char) (mySourceChar);
+                                tempBuf[1] = (char) trailByte;
+                                tempBufLen = 2;
+                            }
+                            targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
+                            mySourceChar = (mySourceChar << 8) | trailByte;
+                        } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) {
+                            /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                            ++mySource;
+                            /* add another bit so that the code below writes 2 bytes in case of error */
+                            mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte;
                          }
-                        mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte);
                          if(pToU2022State->g>=2) {
                              /* return from a single-shift state to the previous one */
                              pToU2022State->g=pToU2022State->prevG;
                          }
-                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE);
                      } else {
                          args->converter->toUBytes[0] = (uint8_t)mySourceChar;
                          args->converter->toULength = 1;
diff --git a/icuSources/common/ucnvhz.c b/icuSources/common/ucnvhz.c

index c3f63fca1081a4f64d632f400b5d81cc9b016819..4bab29b13db8ec89795afe89e07eef4dafb004de 100644 (file)
--- a/icuSources/common/ucnvhz.c
+++ b/icuSources/common/ucnvhz.c
@@ -143,7 +143,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
      UChar *myTarget = args->target;
      const char *mySourceLimit = args->sourceLimit;
      UChar32 targetUniChar = 0x0000;
-    UChar mySourceChar = 0x0000;
+    int32_t mySourceChar = 0x0000;
      UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo);
      tempBuf[0]=0; 
      tempBuf[1]=0;
@@ -157,105 +157,136 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
          if(myTarget < args->targetLimit){
              
              mySourceChar= (unsigned char) *mySource++;
-
-            switch(mySourceChar){
-                case 0x0A:
-                    if(args->converter->mode ==UCNV_TILDE){
-                        args->converter->mode=0;
-                        
-                    }
-                    *(myTarget++)=(UChar)mySourceChar;
-                    myData->isEmptySegment = FALSE;
-                    continue;
              
-                case UCNV_TILDE:
-                    if(args->converter->mode ==UCNV_TILDE){
-                        *(myTarget++)=(UChar)mySourceChar;
-                        args->converter->mode=0;
-                        myData->isEmptySegment = FALSE;
-                        continue;
-                        
-                    }
-                    else if(args->converter->toUnicodeStatus !=0){
-                        args->converter->mode=0;
-                        break;
-                    }
-                    else{
+            if(args->converter->mode == UCNV_TILDE) {
+                /* second byte after ~ */
+                args->converter->mode=0;
+                switch(mySourceChar) {
+                               case 0x0A:
+                                       /* no output for ~\n (line-continuation marker) */
+                                       continue;
+                               case UCNV_TILDE:
+                                   if(args->offsets) {
+                                           args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2);
+                                   }
+                                       *(myTarget++)=(UChar)mySourceChar;
+                                       myData->isEmptySegment = FALSE;
+                                       continue;
+                               case UCNV_OPEN_BRACE:
+                               case UCNV_CLOSE_BRACE:
+                                        myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE);
+                                        if (myData->isEmptySegment) {
+                                               myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */
+                                               *err = U_PARSE_ERROR;   /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
+                                               args->converter->toUBytes[0] = UCNV_TILDE;
+                                               args->converter->toUBytes[1] = mySourceChar;
+                                               args->converter->toULength = 2;
+                                               args->target = myTarget;
+                                               args->source = mySource;
+                                               return;
+                                        }
+                                        myData->isEmptySegment = TRUE;
+                                       continue;
+                               default:
+                                       /* if the first byte is equal to TILDE and the trail byte
+                                        * is not a valid byte then it is an error condition
+                                        */
+                                       /* old
+                                       myData->isEmptySegment = FALSE;
+                                       mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
+                                       goto SAVE_STATE;
+                                       */
+                                       /*
+                                        * Ticket 5691: consistent illegal sequences:
+                                        * - We include at least the first byte in the illegal sequence.
+                                        * - If any of the non-initial bytes could be the start of a character,
+                                        *   we stop the illegal sequence before the first one of those.
+                                        */
+                                       myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
+                                       *err = U_ILLEGAL_ESCAPE_SEQUENCE;
+                                       args->converter->toUBytes[0] = UCNV_TILDE;
+                                       if( myData->isStateDBCS ?
+                                                       (0x21 <= mySourceChar && mySourceChar <= 0x7e) :
+                                                       mySourceChar <= 0x7f
+                                       ) {
+                                               /* The current byte could be the start of a character: Back it out. */
+                                               args->converter->toULength = 1;
+                                               --mySource;
+                                       } else {
+                                               /* Include the current byte in the illegal sequence. */
+                                               args->converter->toUBytes[1] = mySourceChar;
+                                               args->converter->toULength = 2;
+                                       }
+                                       args->target = myTarget;
+                                       args->source = mySource;
+                                       return;
+                }
+            } else if(myData->isStateDBCS) {
+                if(args->converter->toUnicodeStatus == 0x00){
+                    /* lead byte */
+                    if(mySourceChar == UCNV_TILDE) {
                          args->converter->mode = UCNV_TILDE;
-                        continue;
-                    }
-                
-                
-                case UCNV_OPEN_BRACE:
-                    if(args->converter->mode == UCNV_TILDE){
-                        args->converter->mode=0;
-                        myData->isStateDBCS = TRUE;
-                        myData->isEmptySegment = TRUE;
-                        continue;
-                    }
-                    else{
-                        break;
-                    }
-               
-                
-                case UCNV_CLOSE_BRACE:
-                    if(args->converter->mode == UCNV_TILDE){
-                        args->converter->mode=0;
-                         myData->isStateDBCS = FALSE;
-                         if (myData->isEmptySegment) {
-                            myData->isEmptySegment = FALSE;    /* we are handling it, reset to avoid future spurious errors */
-                            *err = U_PARSE_ERROR;      /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */
-                            args->converter->toUBytes[0] = UCNV_TILDE;
-                            args->converter->toUBytes[1] = mySourceChar;
-                            args->converter->toULength = 2;
-                               goto EXIT;
-                         }
-                         myData->isEmptySegment = TRUE;
-                        continue;
-                    }
-                    else{
-                        break;
-                    }
-                
-                default:
-                     /* if the first byte is equal to TILDE and the trail byte
-                     * is not a valid byte then it is an error condition
-                     */
-                    if(args->converter->mode == UCNV_TILDE){
-                        args->converter->mode=0;
-                        mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
-                        myData->isEmptySegment = FALSE;        /* different error here, reset this to avoid spurious future error */
-                        goto SAVE_STATE;
+                    } else {
+                        /* add another bit to distinguish a 0 byte from not having seen a lead byte */
+                        args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100);
+                        myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */
                      }
-                    
-                    break;
-
-            }
-             
-            myData->isEmptySegment = FALSE;    /* the segment has something, either valid or will produce a different error, so reset this */
-            if(myData->isStateDBCS){
-                if(args->converter->toUnicodeStatus == 0x00){
-                    args->converter->toUnicodeStatus = (UChar) mySourceChar;
                      continue;
                  }
                  else{
+                    /* trail byte */
+                    /* old
                      tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ;
                      tempBuf[1] = (char) (mySourceChar+0x80);
                      mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80));
                      args->converter->toUnicodeStatus =0x00;
                      targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
                          tempBuf, 2, args->converter->useFallback);
+                    */
+                    int leadIsOk, trailIsOk;
+                    uint32_t leadByte = args->converter->toUnicodeStatus & 0xff;
+                    targetUniChar = 0xffff;
+                    /*
+                     * Ticket 5691: consistent illegal sequences:
+                     * - We include at least the first byte in the illegal sequence.
+                     * - If any of the non-initial bytes could be the start of a character,
+                     *   we stop the illegal sequence before the first one of those.
+                     *
+                     * In HZ DBCS, if the second byte is in the 21..7e range,
+                     * we report only the first byte as the illegal sequence.
+                     * Otherwise we convert or report the pair of bytes.
+                     */
+                    leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21);
+                    trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21);
+                    if (leadIsOk && trailIsOk) {
+                        tempBuf[0] = (char) (leadByte+0x80) ;
+                        tempBuf[1] = (char) (mySourceChar+0x80);
+                        targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
+                            tempBuf, 2, args->converter->useFallback);
+                        mySourceChar= (leadByte << 8) | mySourceChar;
+                    } else if (trailIsOk) {
+                        /* report a single illegal byte and continue with the following DBCS starter byte */
+                        --mySource;
+                        mySourceChar = (int32_t)leadByte;
+                    } else {
+                        /* report a pair of illegal bytes if the second byte is not a DBCS starter */
+                        /* add another bit so that the code below writes 2 bytes in case of error */
+                        mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar;
+                    }
+                    args->converter->toUnicodeStatus =0x00;
                  }
              }
              else{
-                if(args->converter->fromUnicodeStatus == 0x00){
-                    targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData,
-                        mySource - 1, 1, args->converter->useFallback);
-                }
-                else{
-                    goto SAVE_STATE;
+                if(mySourceChar == UCNV_TILDE) {
+                    args->converter->mode = UCNV_TILDE;
+                    continue;
+                } else if(mySourceChar <= 0x7f) {
+                    targetUniChar = (UChar)mySourceChar;  /* ASCII */
+                    myData->isEmptySegment = FALSE; /* the segment has something valid */
+                } else {
+                    targetUniChar = 0xffff;
+                    myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */
                  }
-
              }
              if(targetUniChar < 0xfffe){
                  if(args->offsets) {
@@ -264,27 +295,18 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args,
  
                  *(myTarget++)=(UChar)targetUniChar;
              }
-            else if(targetUniChar>=0xfffe){
-SAVE_STATE:
+            else /* targetUniChar>=0xfffe */ {
                  if(targetUniChar == 0xfffe){
                      *err = U_INVALID_CHAR_FOUND;
                  }
                  else{
                      *err = U_ILLEGAL_CHAR_FOUND;
                  }
-                if(myData->isStateDBCS){
-                    /* this should never occur since isStateDBCS is set to true 
-                     * only after tempBuf[0] and tempBuf[1]
-                     * are set to the input ..  just to please BEAM 
-                     */
-                    if(tempBuf[0]==0 || tempBuf[1]==0){
-                        *err = U_INTERNAL_PROGRAM_ERROR;
-                    }else{
-                        args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80);
-                        args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80);
+                if(mySourceChar > 0xff){
+                    args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8);
+                    args->converter->toUBytes[1] = (uint8_t)mySourceChar;
                          args->converter->toULength=2;
                      }
-                }
                  else{
                      args->converter->toUBytes[0] = (uint8_t)mySourceChar;
                      args->converter->toULength=1;
@@ -297,7 +319,7 @@ SAVE_STATE:
              break;
          }
      }
-EXIT:
+
      args->target = myTarget;
      args->source = mySource;
  }
diff --git a/icuSources/common/ucnvmbcs.c b/icuSources/common/ucnvmbcs.c

index 84bae8a9f2596e6dc841ea64b7d92e3a383b6b17..4be8e2dd982ef08d63b148df5965a2661a367c92 100644 (file)
--- a/icuSources/common/ucnvmbcs.c
+++ b/icuSources/common/ucnvmbcs.c
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2000-2006, International Business Machines
+*   Copyright (C) 2000-2006,2008, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -1611,6 +1611,65 @@ unrolled:
      pArgs->offsets=offsets;
  }
  
+static UBool
+hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) {
+    const int32_t *row=stateTable[state];
+    int32_t b, entry;
+    /* First test for final entries in this state for some commonly valid byte values. */
+    entry=row[0xa1];
+    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
+        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
+    ) {
+        return TRUE;
+    }
+    entry=row[0x41];
+    if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
+        MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
+    ) {
+        return TRUE;
+    }
+    /* Then test for final entries in this state. */
+    for(b=0; b<=0xff; ++b) {
+        entry=row[b];
+        if( !MBCS_ENTRY_IS_TRANSITION(entry) &&
+            MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL
+        ) {
+            return TRUE;
+        }
+    }
+    /* Then recurse for transition entries. */
+    for(b=0; b<=0xff; ++b) {
+        entry=row[b];
+        if( MBCS_ENTRY_IS_TRANSITION(entry) &&
+            hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry))
+        ) {
+            return TRUE;
+        }
+    }
+    return FALSE;
+}
+
+/*
+ * Is byte b a single/lead byte in this state?
+ * Recurse for transition states, because here we don't want to say that
+ * b is a lead byte if all byte sequences that start with b are illegal.
+ */
+static UBool
+isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) {
+    const int32_t *row=stateTable[state];
+    int32_t entry=row[b];
+    if(MBCS_ENTRY_IS_TRANSITION(entry)) {   /* lead byte */
+        return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry));
+    } else {
+        uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry));
+        if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) {
+            return FALSE;   /* SI/SO are illegal for DBCS-only conversion */
+        } else {
+            return action!=MBCS_STATE_ILLEGAL;
+        }
+    }
+}
+
  U_CFUNC void
  ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                            UErrorCode *pErrorCode) {
@@ -1966,6 +2025,34 @@ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
              sourceIndex=nextSourceIndex;
          } else if(U_FAILURE(*pErrorCode)) {
              /* callback(illegal) */
+            if(byteIndex>1) {
+                /*
+                 * Ticket 5691: consistent illegal sequences:
+                 * - We include at least the first byte in the illegal sequence.
+                 * - If any of the non-initial bytes could be the start of a character,
+                 *   we stop the illegal sequence before the first one of those.
+                 */
+                UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
+                int8_t i;
+                for(i=1;
+                    i<byteIndex && !isSingleOrLead(stateTable, state, isDBCSOnly, bytes[i]);
+                    ++i) {}
+                if(i<byteIndex) {
+                    /* Back out some bytes. */
+                    int8_t backOutDistance=byteIndex-i;
+                    int32_t bytesFromThisBuffer=(int32_t)(source-(const uint8_t *)pArgs->source);
+                    byteIndex=i;  /* length of reported illegal byte sequence */
+                    if(backOutDistance<=bytesFromThisBuffer) {
+                        source-=backOutDistance;
+                    } else {
+                        /* Back out bytes from the previous buffer: Need to replay them. */
+                        cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance);
+                        /* preToULength is negative! */
+                        uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength);
+                        source=(const uint8_t *)pArgs->source;
+                    }
+                }
+            }
              break;
          } else /* unassigned sequences indicated with byteIndex>0 */ {
              /* try an extension mapping */
@@ -1976,7 +2063,7 @@ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
                                &offsets, sourceIndex,
                                pArgs->flush,
                                pErrorCode);
-            sourceIndex=nextSourceIndex+(int32_t)(source-(const uint8_t *)pArgs->source);
+            sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source);
  
              if(U_FAILURE(*pErrorCode)) {
                  /* not mappable or buffer overflow */
@@ -2267,15 +2354,37 @@ ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs,
  
      if(c<0) {
          if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSource<source) {
-            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
-        }
-        if(U_FAILURE(*pErrorCode)) {
              /* incomplete character byte sequence */
              uint8_t *bytes=cnv->toUBytes;
              cnv->toULength=(int8_t)(source-lastSource);
              do {
                  *bytes++=*lastSource++;
              } while(lastSource<source);
+            *pErrorCode=U_TRUNCATED_CHAR_FOUND;
+        } else if(U_FAILURE(*pErrorCode)) {
+            /* callback(illegal) */
+            /*
+             * Ticket 5691: consistent illegal sequences:
+             * - We include at least the first byte in the illegal sequence.
+             * - If any of the non-initial bytes could be the start of a character,
+             *   we stop the illegal sequence before the first one of those.
+             */
+            UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0);
+            uint8_t *bytes=cnv->toUBytes;
+            *bytes++=*lastSource++;     /* first byte */
+            if(lastSource==source) {
+                cnv->toULength=1;
+            } else /* lastSource<source: multi-byte character */ {
+                int8_t i;
+                for(i=1;
+                    lastSource<source && !isSingleOrLead(stateTable, state, isDBCSOnly, *lastSource);
+                    ++i
+                ) {
+                    *bytes++=*lastSource++;
+                }
+                cnv->toULength=i;
+                source=lastSource;
+            }
          } else {
              /* no output because of empty input or only state changes */
              *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
diff --git a/icuSources/test/cintltst/nccbtst.c b/icuSources/test/cintltst/nccbtst.c

index d82d37b9e6dc4b65800b24f0f4bee96c2e0a5e6f..d426b989008b01cc523b397f5139e2e26590d471 100644 (file)
--- a/icuSources/test/cintltst/nccbtst.c
+++ b/icuSources/test/cintltst/nccbtst.c
@@ -1,6 +1,6 @@
  /********************************************************************
   * COPYRIGHT: 
- * Copyright (c) 1997-2006, International Business Machines Corporation and
+ * Copyright (c) 1997-2006,2008, International Business Machines Corporation and
   * others. All Rights Reserved.
   ********************************************************************/
  /*
@@ -2497,13 +2497,13 @@ static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
  
  
      static const uint8_t text943[] = {
-        0x82, 0xa9, 0x82, 0x20, /*0xc8,*/  0x61, 0x8a, 0xbf, 0x8e, 0x9a };
-    static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22,  0x5b57};
-    static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22,  0x5b57};
+        0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
+    static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
+    static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
      static const UChar toUnicode943stop[]= { 0x304b};
  
-    static const int32_t  fromIBM943Offssub[]  = {0, 2, 4, 5, 7};
-    static const int32_t  fromIBM943Offsskip[] = { 0, 4, 5, 7};
+    static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
+    static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
      static const int32_t  fromIBM943Offsstop[] = { 0};
  
      gInBufferSize = inputsize;
@@ -2537,9 +2537,9 @@ static void TestSingleByte(int32_t inputsize, int32_t outputsize)
  {
      static const uint8_t sampleText[] = {
          0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
-        0xff, /*0x82, 0xa9,*/ 0x32, 0x33};
-    static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063,  0xfffd,/*0x304b,*/ 0x0032, 0x0033};
-    static const int32_t  fromIBM943Offssub[]  = {0, 2, 3, 4, 5, 7, 8};
+        0xff, 0x32, 0x33};
+    static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
+    static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
      /*checking illegal value for ibm-943 with substitute*/ 
      gInBufferSize = inputsize;
      gOutBufferSize = outputsize;
diff --git a/icuSources/test/cintltst/nucnvtst.c b/icuSources/test/cintltst/nucnvtst.c

index 6791a1ebbec03761cc4bb2bbefdc0ebbc4c2dd22..cab2975d271379cde569a88ec49918d8d1fb72f2 100644 (file)
--- a/icuSources/test/cintltst/nucnvtst.c
+++ b/icuSources/test/cintltst/nucnvtst.c
@@ -2605,7 +2605,7 @@ TestMBCS() {
      TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
      /*Test for the condition where there is an invalid character*/
      {
-        static const uint8_t source2[]={0xa1, 0x01};
+        static const uint8_t source2[]={0xa1, 0x80};
          TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
      }
      /*Test for the condition where we have a truncated char*/
@@ -3898,11 +3898,11 @@ static void
  TestISO_2022_KR() {
      /* test input */
      static const uint16_t in[]={
-                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D
-                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04
+                    0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
+                   ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
                     ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
                     ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
-                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2
+                   ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
                     ,0x53E3,0x53E4,0x000A,0x000D};
      const UChar* uSource;
      const UChar* uSourceLimit;
diff --git a/icuSources/test/testdata/conversion.txt b/icuSources/test/testdata/conversion.txt

index a8cd8a90ac3f2379232718a2ded2f28aba178df9..45ae1e693b42fc6c18f1635e10d3f3a39928cea6 100644 (file)
--- a/icuSources/test/testdata/conversion.txt
+++ b/icuSources/test/testdata/conversion.txt
@@ -1,6 +1,6 @@
  //*******************************************************************************
  //
-//   Copyright (C) 2003-2006, International Business Machines
+//   Copyright (C) 2003-2006,2008 International Business Machines
  //   Corporation and others.  All Rights Reserved.
  //
  //   file name:  conversion.txt
@@ -48,6 +48,155 @@ conversion:table(nofallback) {
      toUnicode {
        Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" }
        Cases {
+        // Test ticket 5691: consistent illegal sequences
+        // The following test cases are for illegal character byte sequences.
+        //
+        // Unfortunately, we cannot use the Shift-JIS examples from the ticket
+        // comments because our Shift-JIS table is Windows-compatible and
+        // therefore has no illegal single bytes. Same for GBK.
+        // Instead, we use the stricter GB 18030 also for 2-byte examples.
+        // The byte sequences are generally slightly different from the ticket
+        // comment, simply using assigned characters rather than just
+        // theoretically valid sequences.
+        {
+          "gb18030",
+          :bin{ 618140813c81ff7a },
+          "a\u4e02\\x81<\\x81\\xFFz",
+          :intvector{ 0,1,3,3,3,3,4,5,5,5,5,5,5,5,5,7 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "EUC-JP",
+          :bin{ 618fb0a98fb03c8f3cb0a97a },
+          "a\u4e28\\x8F\\xB0<\\x8F<\u9022z",
+          :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "gb18030",
+          :bin{ 618130fc318130fc8181303c3e813cfc817a },
+          "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z",
+          :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "UTF-8",
+          :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a },
+          "a\U00040042\\xF1\\x80\\x81<\\xF1\\x80\\x81\\xFF\\xF1\\x80\\xFF<\\xF1\\xFF<>z",
+          :intvector{ 0,1,1,5,5,5,5,5,5,5,5,5,5,5,5,8,9,9,9,9,9,9,9,9,9,9,9,9,12,12,12,12,13,13,13,13,13,13,13,13,15,15,15,15,16,17,17,17,17,18,18,18,18,19,20,21 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ISO-2022-JP",
+          :bin{ 1b24424141af4142affe41431b2842 },
+          "\u758f\\xAF\u758e\\xAF\\xFE\u790e",
+          :intvector{ 3,5,5,5,5,6,8,8,8,8,8,8,8,8,10 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ibm-25546",
+          :bin{ 411b242943420e4141af4142affe41430f5a },
+          "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
+          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ISO-2022-KR",
+          :bin{ 411b242943420e4141af4142affe41430f5a },
+          "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
+          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ISO-2022-CN",
+          :bin{ 411b242941420e4141af4142affe41430f5a },
+          "AB\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
+          :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "HZ",
+          :bin{ 417e7b4141af4142affe41437e7d5a },
+          "A\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
+          :intvector{ 0,3,5,5,5,5,6,8,8,8,8,8,8,8,8,10,14 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        // Test ticket 5691: consistent illegal sequences
+        // The following test cases are for illegal escape/designator/shift sequences.
+        //
+        // ISO-2022-JP and -CN with illegal escape sequences.
+        {
+          "ISO-2022-JP",
+          :bin{ 611b24201b244241411b283f1b28427a },
+          "a\\x1B$ \u758f\\x1B\u2538z",
+          :intvector{ 0,1,1,1,1,2,3,7,9,9,9,9,10,15 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ISO-2022-CN",
+          :bin{ 611b2429201b2429410e41410f7a },
+          "a\\x1B$) \u4eaez",
+          :intvector{ 0,1,1,1,1,2,3,4,10,13 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        // Test ticket 5691: ISO-2022-JP-2 with illegal single-shift SS2 and SS3 sequences.
+        // The first ESC N comes before its designator sequence, the last sequence is ESC+space.
+        {
+          "ISO-2022-JP-2",
+          :bin{ 4e1b4e4e1b2e414e1b4e4e4e1b204e },
+          "N\\x1BNNN\xceN\\x1B N",
+          :intvector{ 0,1,1,1,1,2,3,7,10,11,12,12,12,12,13,14 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ISO-2022-CN-EXT",
+          :bin{ 4e1b4e4e1b242a484e1b4e4e4e4e1b204e },
+          "N\\x1BNNN\u8f0eN\\x1B N",
+          :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        {
+          "ISO-2022-CN-EXT",
+          :bin{ 4f1b4f4f1b242b494f1b4f4f4f4f1b204f },
+          "O\\x1BOOO\u492bO\\x1B O",
+          :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        // Test ticket 5691: HZ with illegal tilde sequences.
+        {
+          "HZ",
+          :bin{ 417e20427e21437e80447e7b41417e207e41427e7f41437e7d5a },
+          "A\\x7E B\\x7E!C\\x7E\\x80D\u4eae\\x7E\\x20\\x7E\u8c05\\x7E\\x7F\u64a9Z",
+          :intvector{ 0,1,1,1,1,2,3,4,4,4,4,5,6,7,7,7,7,7,7,7,7,9,                          // SBCS
+                      12,14,14,14,14,14,14,14,14,16,16,16,16,17,19,19,19,19,19,19,19,19,21, // DBCS
+                      25 },                                                                 // SBCS
+          :int{1}, :int{0}, "", "&C", :bin{""}
+        }
+        // Test ticket 5691: Example from Peter Edberg.
+        {
+          "ISO-2022-JP",
+          :bin{ 1b244230212f7e742630801b284a621b2458631b2842648061 },
+          "\u4e9c\ufffd\u7199\ufffdb\ufffd$Xcd\ufffda",
+          :intvector{ 3,5,7,9,14,15,16,17,18,22,23,24 },
+          :int{1}, :int{0}, "", "?", :bin{""}
+        }
+        // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
+        {
+          "HZ",
+          :bin{ 7e7b21212120217e217f772100007e217e7e7d207e7e807e0a2b },
+          "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd\u3013 ~\ufffd+",
+          :intvector{ 2,4,6,8,10,12,14,15,19,20,22,25 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
+        // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
+        // using the Shift-JIS table for JIS X 0208 (ticket #5797)
+        {
+          "ISO-2022-JP",
+          :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
+          "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
+          :intvector{ 3,4,5,9,11,12,14,16,17,19,21,23,25,27 },
+          :int{1}, :int{1}, "", "?", :bin{""}
+        }
          // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets()
          {
            "ISO-8859-3",
@@ -324,7 +473,7 @@ conversion:table(nofallback) {
          {
            "ISO-2022-CN-EXT",
            :bin{ 411b4e2121 }, "\x41", :intvector{ 0 },
-          :int{1}, :int{1}, "illesc", ".", :bin{ 1b4e }
+          :int{1}, :int{1}, "illesc", ".", :bin{ 1b }
          }
          // G3 designator: recognized, but not supported for -CN (only for -CN-EXT)
          {
diff --git a/icuSources/tools/tzcode/icuzdump.vcproj b/icuSources/tools/tzcode/icuzdump.vcproj

index 87242b5f0bace105fe7a4dcdb7de090c408f050d..72122468097a2d64ec9a9d7684916552100ce7a1 100644 (file)
--- a/icuSources/tools/tzcode/icuzdump.vcproj
+++ b/icuSources/tools/tzcode/icuzdump.vcproj
@@ -1,11 +1,12 @@
  <?xml version="1.0" encoding="Windows-1252"?>
  <VisualStudioProject
         ProjectType="Visual C++"
-       Version="8.00"
+       Version="9.00"
         Name="icuzdump"
         ProjectGUID="{655F4481-B461-4DF0-AF10-0D01114A26C1}"
         RootNamespace="icuzdump"
         Keyword="Win32Proj"
+       TargetFrameworkVersion="131072"
         >
         <Platforms>
                 <Platform
@@ -77,6 +78,8 @@
                                 GenerateDebugInformation="true"
                                 ProgramDatabaseFile=".\x86\Debug/icuzdump.pdb"
                                 SubSystem="1"
+                               RandomizedBaseAddress="1"
+                               DataExecutionPrevention="0"
                                 TargetMachine="0"
                         />
                         <Tool
@@ -98,9 +101,6 @@
                         <Tool
                                 Name="VCAppVerifierTool"
                         />
-                       <Tool
-                               Name="VCWebDeploymentTool"
-                       />
                         <Tool
                                 Name="VCPostBuildEventTool"
                         />
@@ -153,6 +153,8 @@
                                 SubSystem="1"
                                 OptimizeReferences="2"
                                 EnableCOMDATFolding="2"
+                               RandomizedBaseAddress="1"
+                               DataExecutionPrevention="0"
                                 TargetMachine="1"
                         />
                         <Tool
@@ -173,9 +175,6 @@
                         <Tool
                                 Name="VCAppVerifierTool"
                         />
-                       <Tool
-                               Name="VCWebDeploymentTool"
-                       />
                         <Tool
                                 Name="VCPostBuildEventTool"
                         />
diff --git a/icuSources/tools/tzcode/localtime.c b/icuSources/tools/tzcode/localtime.c

index 1fdfbdbb312d83c5765886a1de97f00e9bd9ce6e..d186080bf351e235358278d48e8da452bfdf9049 100644 (file)
--- a/icuSources/tools/tzcode/localtime.c
+++ b/icuSources/tools/tzcode/localtime.c
@@ -5,7 +5,7 @@
  
  #ifndef lint
  #ifndef NOID
-static char    elsieid[] = "@(#)localtime.c    8.5";
+static char    elsieid[] = "@(#)localtime.c    8.9";
  #endif /* !defined NOID */
  #endif /* !defined lint */
  
@@ -136,51 +136,52 @@ struct rule {
  ** Prototypes for static functions.
  */
  
-static long            detzcode P((const char * codep));
-static time_t          detzcode64 P((const char * codep));
-static int             differ_by_repeat P((time_t t1, time_t t0));
-static const char *    getzname P((const char * strp));
-static const char *    getqzname P((const char * strp, const int delim));
-static const char *    getnum P((const char * strp, int * nump, int min,
-                               int max));
-static const char *    getsecs P((const char * strp, long * secsp));
-static const char *    getoffset P((const char * strp, long * offsetp));
-static const char *    getrule P((const char * strp, struct rule * rulep));
-static void            gmtload P((struct state * sp));
-static struct tm *     gmtsub P((const time_t * timep, long offset,
-                               struct tm * tmp));
-static struct tm *     localsub P((const time_t * timep, long offset,
-                               struct tm * tmp));
-static int             increment_overflow P((int * number, int delta));
-static int             leaps_thru_end_of P((int y));
-static int             long_increment_overflow P((long * number, int delta));
-static int             long_normalize_overflow P((long * tensptr,
-                               int * unitsptr, int base));
-static int             normalize_overflow P((int * tensptr, int * unitsptr,
-                               int base));
-static void            settzname P((void));
-static time_t          time1 P((struct tm * tmp,
-                               struct tm * (*funcp) P((const time_t *,
-                               long, struct tm *)),
-                               long offset));
-static time_t          time2 P((struct tm *tmp,
-                               struct tm * (*funcp) P((const time_t *,
-                               long, struct tm*)),
-                               long offset, int * okayp));
-static time_t          time2sub P((struct tm *tmp,
-                               struct tm * (*funcp) P((const time_t *,
-                               long, struct tm*)),
-                               long offset, int * okayp, int do_norm_secs));
-static struct tm *     timesub P((const time_t * timep, long offset,
-                               const struct state * sp, struct tm * tmp));
-static int             tmcomp P((const struct tm * atmp,
-                               const struct tm * btmp));
-static time_t          transtime P((time_t janfirst, int year,
-                               const struct rule * rulep, long offset));
-static int             tzload P((const char * name, struct state * sp,
-                               int doextend));
-static int             tzparse P((const char * name, struct state * sp,
-                               int lastditch));
+static long            detzcode(const char * codep);
+static time_t          detzcode64(const char * codep);
+static int             differ_by_repeat(time_t t1, time_t t0);
+static const char *    getzname(const char * strp);
+static const char *    getqzname(const char * strp, const int delim);
+static const char *    getnum(const char * strp, int * nump, int min,
+                               int max);
+static const char *    getsecs(const char * strp, long * secsp);
+static const char *    getoffset(const char * strp, long * offsetp);
+static const char *    getrule(const char * strp, struct rule * rulep);
+static void            gmtload(struct state * sp);
+static struct tm *     gmtsub(const time_t * timep, long offset,
+                               struct tm * tmp);
+static struct tm *     localsub(const time_t * timep, long offset,
+                               struct tm * tmp);
+static int             increment_overflow(int * number, int delta);
+static int             leaps_thru_end_of(int y);
+static int             long_increment_overflow(long * number, int delta);
+static int             long_normalize_overflow(long * tensptr,
+                               int * unitsptr, int base);
+static int             normalize_overflow(int * tensptr, int * unitsptr,
+                               int base);
+static void            settzname(void);
+static time_t          time1(struct tm * tmp,
+                               struct tm * (*funcp)(const time_t *,
+                               long, struct tm *),
+                               long offset);
+static time_t          time2(struct tm *tmp,
+                               struct tm * (*funcp)(const time_t *,
+                               long, struct tm*),
+                               long offset, int * okayp);
+static time_t          time2sub(struct tm *tmp,
+                               struct tm * (*funcp)(const time_t *,
+                               long, struct tm*),
+                               long offset, int * okayp, int do_norm_secs);
+static struct tm *     timesub(const time_t * timep, long offset,
+                               const struct state * sp, struct tm * tmp);
+static int             tmcomp(const struct tm * atmp,
+                               const struct tm * btmp);
+static time_t          transtime(time_t janfirst, int year,
+                               const struct rule * rulep, long offset);
+static int             typesequiv(const struct state * sp, int a, int b);
+static int             tzload(const char * name, struct state * sp,
+                               int doextend);
+static int             tzparse(const char * name, struct state * sp,
+                               int lastditch);
  
  #ifdef ALL_STATE
  static struct state *  lclptr;
@@ -253,7 +254,7 @@ const char * const  codep;
  }
  
  static void
-settzname P((void))
+settzname(void)
  {
         register struct state * const   sp = lclptr;
         register int                    i;
@@ -554,17 +555,51 @@ register const int                doextend;
                                         sp->ttis[sp->typecnt++] = ts.ttis[1];
                         }
         }
-       i = 2 * YEARSPERREPEAT;
-       sp->goback = sp->goahead = sp->timecnt > i;
-       sp->goback = sp->goback && sp->types[i] == sp->types[0] &&
-               differ_by_repeat(sp->ats[i], sp->ats[0]);
-       sp->goahead = sp->goahead &&
-               sp->types[sp->timecnt - 1] == sp->types[sp->timecnt - 1 - i] &&
-               differ_by_repeat(sp->ats[sp->timecnt - 1],
-                        sp->ats[sp->timecnt - 1 - i]);
+       sp->goback = sp->goahead = FALSE;
+       if (sp->timecnt > 1) {
+               for (i = 1; i < sp->timecnt; ++i)
+                       if (typesequiv(sp, sp->types[i], sp->types[0]) &&
+                               differ_by_repeat(sp->ats[i], sp->ats[0])) {
+                                       sp->goback = TRUE;
+                                       break;
+                               }
+               for (i = sp->timecnt - 2; i >= 0; --i)
+                       if (typesequiv(sp, sp->types[sp->timecnt - 1],
+                               sp->types[i]) &&
+                               differ_by_repeat(sp->ats[sp->timecnt - 1],
+                               sp->ats[i])) {
+                                       sp->goahead = TRUE;
+                                       break;
+               }
+       }
         return 0;
  }
  
+static int
+typesequiv(sp, a, b)
+const struct state * const     sp;
+const int                      a;
+const int                      b;
+{
+       register int    result;
+
+       if (sp == NULL ||
+               a < 0 || a >= sp->typecnt ||
+               b < 0 || b >= sp->typecnt)
+                       result = FALSE;
+       else {
+               register const struct ttinfo *  ap = &sp->ttis[a];
+               register const struct ttinfo *  bp = &sp->ttis[b];
+               result = ap->tt_gmtoff == bp->tt_gmtoff &&
+                       ap->tt_isdst == bp->tt_isdst &&
+                       ap->tt_ttisstd == bp->tt_ttisstd &&
+                       ap->tt_ttisgmt == bp->tt_ttisgmt &&
+                       strcmp(&sp->chars[ap->tt_abbrind],
+                       &sp->chars[bp->tt_abbrind]) == 0;
+       }
+       return result;
+}
+
  static const int       mon_lengths[2][MONSPERYEAR] = {
         { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 },
         { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }
@@ -1120,7 +1155,7 @@ struct state * const      sp;
  static
  #endif /* !defined STD_INSPIRED */
  void
-tzsetwall P((void))
+tzsetwall(void)
  {
         if (lcl_is_set < 0)
                 return;
@@ -1141,7 +1176,7 @@ tzsetwall P((void))
  }
  
  void
-tzset P((void))
+tzset(void)
  {
         register const char *   name;
  
@@ -1639,7 +1674,7 @@ register const struct tm * const btmp;
  static time_t
  time2sub(tmp, funcp, offset, okayp, do_norm_secs)
  struct tm * const      tmp;
-struct tm * (* const   funcp) P((const time_t*, long, struct tm*));
+struct tm * (* const   funcp)(const time_t*, long, struct tm*);
  const long             offset;
  int * const            okayp;
  const int              do_norm_secs;
@@ -1781,12 +1816,8 @@ const int                do_norm_secs;
                 ** It's okay to guess wrong since the guess
                 ** gets checked.
                 */
-               /*
-               ** The (void *) casts are the benefit of SunOS 3.3 on Sun 2's.
-               */
                 sp = (const struct state *)
-                       (((void *) funcp == (void *) localsub) ?
-                       lclptr : gmtptr);
+                       ((funcp == localsub) ? lclptr : gmtptr);
  #ifdef ALL_STATE
                 if (sp == NULL)
                         return WRONG;
@@ -1827,7 +1858,7 @@ label:
  static time_t
  time2(tmp, funcp, offset, okayp)
  struct tm * const      tmp;
-struct tm * (* const   funcp) P((const time_t*, long, struct tm*));
+struct tm * (* const   funcp)(const time_t*, long, struct tm*);
  const long             offset;
  int * const            okayp;
  {
@@ -1845,7 +1876,7 @@ int * const               okayp;
  static time_t
  time1(tmp, funcp, offset)
  struct tm * const      tmp;
-struct tm * (* const   funcp) P((const time_t *, long, struct tm *));
+struct tm * (* const   funcp)(const time_t *, long, struct tm *);
  const long             offset;
  {
         register time_t                 t;
@@ -1880,11 +1911,7 @@ const long               offset;
         ** We try to divine the type they started from and adjust to the
         ** type they need.
         */
-       /*
-       ** The (void *) casts are the benefit of SunOS 3.3 on Sun 2's.
-       */
-       sp = (const struct state *) (((void *) funcp == (void *) localsub) ?
-               lclptr : gmtptr);
+       sp = (const struct state *) ((funcp == localsub) ?  lclptr : gmtptr);
  #ifdef ALL_STATE
         if (sp == NULL)
                 return WRONG;
diff --git a/icuSources/tools/tzcode/private.h b/icuSources/tools/tzcode/private.h

index 2837b70c101e59df916d3bb49b44a21aa0d797f5..008d468ac7876d553156cbd03d9481811a648021 100644 (file)
--- a/icuSources/tools/tzcode/private.h
+++ b/icuSources/tools/tzcode/private.h
@@ -21,7 +21,7 @@
  
  #ifndef lint
  #ifndef NOID
-static char    privatehid[] = "@(#)private.h   8.2";
+static char    privatehid[] = "@(#)private.h   8.6";
  #endif /* !defined NOID */
  #endif /* !defined lint */
  
@@ -48,10 +48,6 @@ static char  privatehid[] = "@(#)private.h   8.2";
  #define HAVE_SETTIMEOFDAY      3
  #endif /* !defined HAVE_SETTIMEOFDAY */
  
-#ifndef HAVE_STRERROR
-#define HAVE_STRERROR          1
-#endif /* !defined HAVE_STRERROR */
-
  #ifndef HAVE_SYMLINK
  #define HAVE_SYMLINK           1
  #endif /* !defined HAVE_SYMLINK */
@@ -109,17 +105,15 @@ static char       privatehid[] = "@(#)private.h   8.2";
  #endif /* !defined WEXITSTATUS */
  
  #if HAVE_UNISTD_H
-#include "unistd.h"    /* for F_OK and R_OK */
+#include "unistd.h"    /* for F_OK, R_OK, and other POSIX goodness */
  #endif /* HAVE_UNISTD_H */
  
-#if !HAVE_UNISTD_H
  #ifndef F_OK
  #define F_OK   0
  #endif /* !defined F_OK */
  #ifndef R_OK
  #define R_OK   4
  #endif /* !defined R_OK */
-#endif /* !HAVE_UNISTD_H */
  
  /* Unlike <ctype.h>'s isdigit, this also works if c < 0 | c > UCHAR_MAX. */
  #define is_digit(c) ((unsigned)(c) - '0' <= 9)
@@ -164,70 +158,6 @@ typedef long               int_fast64_t;
  ** Workarounds for compilers/systems.
  */
  
-/*
-** If your compiler lacks prototypes, "#define P(x) ()".
-*/
-
-#ifndef P
-#define P(x)   x
-#endif /* !defined P */
-
-/*
-** SunOS 4.1.1 headers lack EXIT_SUCCESS.
-*/
-
-#ifndef EXIT_SUCCESS
-#define EXIT_SUCCESS   0
-#endif /* !defined EXIT_SUCCESS */
-
-/*
-** SunOS 4.1.1 headers lack EXIT_FAILURE.
-*/
-
-#ifndef EXIT_FAILURE
-#define EXIT_FAILURE   1
-#endif /* !defined EXIT_FAILURE */
-
-/*
-** SunOS 4.1.1 headers lack FILENAME_MAX.
-*/
-
-#ifndef FILENAME_MAX
-
-#ifndef MAXPATHLEN
-#ifdef unix
-#include "sys/param.h"
-#endif /* defined unix */
-#endif /* !defined MAXPATHLEN */
-
-#ifdef MAXPATHLEN
-#define FILENAME_MAX   MAXPATHLEN
-#endif /* defined MAXPATHLEN */
-#ifndef MAXPATHLEN
-#define FILENAME_MAX   1024            /* Pure guesswork */
-#endif /* !defined MAXPATHLEN */
-
-#endif /* !defined FILENAME_MAX */
-
-/*
-** SunOS 4.1.1 libraries lack remove.
-*/
-
-#ifndef remove
-extern int     unlink P((const char * filename));
-#define remove unlink
-#endif /* !defined remove */
-
-/*
-** Some ancient errno.h implementations don't declare errno.
-** But some newer errno.h implementations define it as a macro.
-** Fix the former without affecting the latter.
-*/
-
-#ifndef errno
-extern int errno;
-#endif /* !defined errno */
-
  /*
  ** Some time.h implementations don't declare asctime_r.
  ** Others might define it as a macro.
@@ -235,21 +165,21 @@ extern int errno;
  */
  
  #ifndef asctime_r
-extern char *  asctime_r();
+extern char *  asctime_r(struct tm const *, char *);
  #endif
  
  /*
  ** Private function declarations.
  */
  
-char *         icalloc P((int nelem, int elsize));
-char *         icatalloc P((char * old, const char * new));
-char *         icpyalloc P((const char * string));
-char *         imalloc P((int n));
-void *         irealloc P((void * pointer, int size));
-void           icfree P((char * pointer));
-void           ifree P((char * pointer));
-const char *   scheck P((const char * string, const char * format));
+char *         icalloc(int nelem, int elsize);
+char *         icatalloc(char * old, const char * new);
+char *         icpyalloc(const char * string);
+char *         imalloc(int n);
+void *         irealloc(void * pointer, int size);
+void           icfree(char * pointer);
+void           ifree(char * pointer);
+const char *   scheck(const char * string, const char * format);
  
  /*
  ** Finally, some convenience items.
@@ -337,8 +267,8 @@ const char *        scheck P((const char * string, const char * format));
  #if HAVE_INCOMPATIBLE_CTIME_R
  #undef asctime_r
  #undef ctime_r
-char *asctime_r P((struct tm const *, char *));
-char *ctime_r P((time_t const *, char *));
+char *asctime_r(struct tm const *, char *);
+char *ctime_r(time_t const *, char *);
  #endif /* HAVE_INCOMPATIBLE_CTIME_R */
  
  #ifndef YEARSPERREPEAT
@@ -356,7 +286,7 @@ char *ctime_r P((time_t const *, char *));
  #ifndef SECSPERREPEAT
  #define SECSPERREPEAT          ((int_fast64_t) YEARSPERREPEAT * (int_fast64_t) AVGSECSPERYEAR)
  #endif /* !defined SECSPERREPEAT */
- 
+
  #ifndef SECSPERREPEAT_BITS
  #define SECSPERREPEAT_BITS     34      /* ceil(log2(SECSPERREPEAT)) */
  #endif /* !defined SECSPERREPEAT_BITS */
diff --git a/icuSources/tools/tzcode/zdump.c b/icuSources/tools/tzcode/zdump.c

index c7199acb8725cc2a08d33c11acae1f2826186f85..b27480dd8894568f6c5a951edd245cab2d75641d 100644 (file)
--- a/icuSources/tools/tzcode/zdump.c
+++ b/icuSources/tools/tzcode/zdump.c
@@ -1,4 +1,4 @@
-static char    elsieid[] = "@(#)zdump.c        8.3";
+static char    elsieid[] = "@(#)zdump.c        8.8";
  
  /*
  ** This code has been made independent of the rest of the time
@@ -104,6 +104,9 @@ static char elsieid[] = "@(#)zdump.c        8.3";
  #define SECSPERNYEAR   (SECSPERDAY * DAYSPERNYEAR)
  #define SECSPERLYEAR   (SECSPERNYEAR + SECSPERDAY)
  
+#ifndef HAVE_GETTEXT
+#define HAVE_GETTEXT 0
+#endif
  #if HAVE_GETTEXT
  #include "locale.h"    /* for setlocale */
  #include "libintl.h"
@@ -145,13 +148,9 @@ static char        elsieid[] = "@(#)zdump.c        8.3";
  #define TZ_DOMAIN "tz"
  #endif /* !defined TZ_DOMAIN */
  
-#ifndef P
-#define P(x)   x
-#endif /* !defined P */
-
  extern char ** environ;
-extern int     getopt P((int argc, char * const argv[],
-                       const char * options));
+extern int     getopt(int argc, char * const argv[],
+                       const char * options);
  extern char *  optarg;
  extern int     optind;
  extern char *  tzname[2];
@@ -162,26 +161,26 @@ static size_t     longest;
  static char *  progname;
  static int     warned;
  
-static char *  abbr P((struct tm * tmp));
-static void    abbrok P((const char * abbrp, const char * zone));
-static long    delta P((struct tm * newp, struct tm * oldp));
-static void    dumptime P((const struct tm * tmp));
-static time_t  hunt P((char * name, time_t lot, time_t hit));
-static void    setabsolutes P((void));
-static void    show P((char * zone, time_t t, int v));
-static const char *    tformat P((void));
-static time_t  yeartot P((long y));
+static char *  abbr(struct tm * tmp);
+static void    abbrok(const char * abbrp, const char * zone);
+static long    delta(struct tm * newp, struct tm * oldp);
+static void    dumptime(const struct tm * tmp);
+static time_t  hunt(char * name, time_t lot, time_t    hit);
+static void    setabsolutes(void);
+static void    show(char * zone, time_t t, int v);
+static const char *    tformat(void);
+static time_t  yeartot(long y);
  #ifdef ICU
  typedef struct listentry {
         char *          name;
         struct listentry *      next;
  } listentry;
  
-static time_t  huntICU P((char * name, time_t lot, time_t      hit, FILE *fp));
-static void    dumptimeICU P((FILE * fp, time_t t));
-static void    showICU P((FILE * fp, char * zone, time_t t1, time_t t2));
-static int     getall P((struct listentry ** namelist));
-static void getzones P((char * basedir, char * subdir, struct listentry ** last, int * count));
+static time_t  huntICU(char * name, time_t lot, time_t hit, FILE *fp);
+static void    dumptimeICU(FILE * fp, time_t t);
+static void    showICU(FILE * fp, char * zone, time_t t1, time_t t2);
+static int     getall(struct listentry ** namelist);
+static void getzones(char * basedir, char * subdir, struct listentry ** last, int * count);
  #endif
  
  #ifndef TYPECHECK
@@ -259,6 +258,17 @@ const char * const zone;
         warned = TRUE;
  }
  
+static void
+usage(const char *progname, FILE *stream, int status)
+{
+       (void) fprintf(stream,
+_("%s: usage is %s [ --version ] [ --help ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n\
+\n\
+Report bugs to tz@elsie.nci.nih.gov.\n"),
+                      progname, progname);
+       exit(status);
+}
+
  int
  main(argc, argv)
  int    argc;
@@ -303,6 +313,8 @@ char *      argv[];
                 if (strcmp(argv[i], "--version") == 0) {
                         (void) printf("%s\n", elsieid);
                         exit(EXIT_SUCCESS);
+               } else if (strcmp(argv[i], "--help") == 0) {
+                       usage(progname, stdout, EXIT_SUCCESS);
                 }
         vflag = 0;
         cutarg = NULL;
@@ -359,10 +371,7 @@ char *     argv[];
                 else    cutarg = optarg;
         if ((c != EOF && c != -1) ||
                 (optind == argc - 1 && strcmp(argv[optind], "=") == 0)) {
-                       (void) fprintf(stderr,
-_("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"),
-                               progname, progname);
-                       exit(EXIT_FAILURE);
+                       usage(progname, stderr, EXIT_FAILURE);
         }
  #endif
         if (vflag) {
@@ -498,13 +507,9 @@ _("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"),
                         (void) strncpy(buf, abbr(&tm), (sizeof buf) - 1);
                 }
                 for ( ; ; ) {
-                       if (t >= cuthitime)
+                       if (t >= cuthitime || t >= cuthitime - SECSPERHOUR * 12)
                                 break;
                         newt = t + SECSPERHOUR * 12;
-                       if (newt >= cuthitime)
-                               break;
-                       if (newt <= t)
-                               break;
                         newtmp = localtime(&newt);
                         if (newtmp != NULL)
                                 newtm = *newtmp;
@@ -588,7 +593,7 @@ _("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"),
  }
  
  static void
-setabsolutes()
+setabsolutes(void)
  {
         if (0.5 == (time_t) 0.5) {
                 /*
@@ -617,7 +622,7 @@ _("%s: use of -v on system with floating time_t other than float or double\n"),
                         t = t1;
                         t1 = 2 * t1 + 1;
                 }
-                 
+
                 absolute_max_time = t;
                 t = -t;
                 absolute_min_time = t - 1;
@@ -786,7 +791,7 @@ struct tm * tmp;
  */
  
  static const char *
-tformat()
+tformat(void)
  {
         if (0.5 == (time_t) 0.5) {      /* floating */
                 if (sizeof (time_t) > sizeof (double))
diff --git a/icuSources/tools/tzcode/zic.c b/icuSources/tools/tzcode/zic.c

index a753515559cae533998bbb264bebf44a2fee0c2d..fcc3c82367119ac0c7360d7d8b07fa1638ff6fc2 100644 (file)
--- a/icuSources/tools/tzcode/zic.c
+++ b/icuSources/tools/tzcode/zic.c
@@ -3,7 +3,7 @@
  ** 2006-07-17 by Arthur David Olson.
  */
  
-static char    elsieid[] = "@(#)zic.c  8.7";
+static char    elsieid[] = "@(#)zic.c  8.18";
  
  #include "private.h"
  #include "locale.h"
@@ -113,76 +113,80 @@ struct zone {
         zic_t           z_untiltime;
  };
  
-extern int     getopt P((int argc, char * const argv[],
-                       const char * options));
-extern int     link P((const char * fromname, const char * toname));
+extern int     getopt(int argc, char * const argv[],
+                       const char * options);
+extern int     link(const char * fromname, const char * toname);
  extern char *  optarg;
  extern int     optind;
  
-static void    addtt P((zic_t starttime, int type));
+static void    addtt(zic_t starttime, int type);
  #ifdef ICU
-static int     addtype P((long gmtoff, long rawoff, long dstoff,
+static int     addtype(long gmtoff, long rawoff, long dstoff,
                                 const char * abbr, int isdst,
-                               int ttisstd, int ttisgmt));
+                               int ttisstd, int ttisgmt);
  #else
-static int     addtype P((long gmtoff, const char * abbr, int isdst,
-                               int ttisstd, int ttisgmt));
+static int     addtype(long gmtoff, const char * abbr, int isdst,
+                               int ttisstd, int ttisgmt);
  #endif
-static void    leapadd P((zic_t t, int positive, int rolling, int count));
-static void    adjleap P((void));
-static void    associate P((void));
-static int     ciequal P((const char * ap, const char * bp));
-static void    convert P((long val, char * buf));
-static void    convert64 P((zic_t val, char * buf));
-static void    dolink P((const char * fromfile, const char * tofile));
-static void    doabbr P((char * abbr, const char * format,
-                       const char * letters, int isdst, int doquotes));
-static void    eat P((const char * name, int num));
-static void    eats P((const char * name, int num,
-                       const char * rname, int rnum));
-static long    eitol P((int i));
-static void    error P((const char * message));
-static char ** getfields P((char * buf));
-static long    gethms P((const char * string, const char * errstrng,
-                       int signable));
-static void    infile P((const char * filename));
-static void    inleap P((char ** fields, int nfields));
-static void    inlink P((char ** fields, int nfields));
-static void    inrule P((char ** fields, int nfields));
-static int     inzcont P((char ** fields, int nfields));
-static int     inzone P((char ** fields, int nfields));
-static int     inzsub P((char ** fields, int nfields, int iscont));
-static int     is32 P((zic_t x));
-static int     itsabbr P((const char * abbr, const char * word));
-static int     itsdir P((const char * name));
-static int     lowerit P((int c));
-static char *  memcheck P((char * tocheck));
-static int     mkdirs P((char * filename));
-static void    newabbr P((const char * abbr));
-static long    oadd P((long t1, long t2));
-static void    outzone P((const struct zone * zp, int ntzones));
-static void    puttzcode P((long code, FILE * fp));
-static void    puttzcode64 P((zic_t code, FILE * fp));
-static int     rcomp P((const void * leftp, const void * rightp));
-static zic_t   rpytime P((const struct rule * rp, int wantedy));
-static void    rulesub P((struct rule * rp,
+static void    leapadd(zic_t t, int positive, int rolling, int count);
+static void    adjleap(void);
+static void    associate(void);
+static int     ciequal(const char * ap, const char * bp);
+static void    convert(long val, char * buf);
+static void    convert64(zic_t val, char * buf);
+static void    dolink(const char * fromfield, const char * tofield);
+static void    doabbr(char * abbr, const char * format,
+                       const char * letters, int isdst, int doquotes);
+static void    eat(const char * name, int num);
+static void    eats(const char * name, int num,
+                       const char * rname, int rnum);
+static long    eitol(int i);
+static void    error(const char * message);
+static char ** getfields(char * buf);
+static long    gethms(const char * string, const char * errstrng,
+                       int signable);
+static void    infile(const char * filename);
+static void    inleap(char ** fields, int nfields);
+static void    inlink(char ** fields, int nfields);
+static void    inrule(char ** fields, int nfields);
+static int     inzcont(char ** fields, int nfields);
+static int     inzone(char ** fields, int nfields);
+static int     inzsub(char ** fields, int nfields, int iscont);
+static int     is32(zic_t x);
+static int     itsabbr(const char * abbr, const char * word);
+static int     itsdir(const char * name);
+static int     lowerit(int c);
+static char *  memcheck(char * tocheck);
+static int     mkdirs(char * filename);
+static void    newabbr(const char * abbr);
+static long    oadd(long t1, long t2);
+static void    outzone(const struct zone * zp, int ntzones);
+static void    puttzcode(long code, FILE * fp);
+static void    puttzcode64(zic_t code, FILE * fp);
+static int     rcomp(const void * leftp, const void * rightp);
+static zic_t   rpytime(const struct rule * rp, int wantedy);
+static void    rulesub(struct rule * rp,
                         const char * loyearp, const char * hiyearp,
                         const char * typep, const char * monthp,
-                       const char * dayp, const char * timep));
-static int     stringoffset P((char * result, long offset));
-static int     stringrule P((char * result, const struct rule * rp,
-                       long dstoff, long gmtoff));
-static void    stringzone P((char * result,
-                       const struct zone * zp, int ntzones));
-static void    setboundaries P((void));
-static zic_t   tadd P((zic_t t1, long t2));
-static void    usage P((void));
-static void    writezone P((const char * name, const char * string));
-static int     yearistype P((int year, const char * type));
-
-#if !HAVE_STRERROR
-static char *  strerror P((int));
-#endif /* !HAVE_STRERROR */
+                       const char * dayp, const char * timep);
+static int     stringoffset(char * result, long offset);
+static int     stringrule(char * result, const struct rule * rp,
+                       long dstoff, long gmtoff);
+static void    stringzone(char * result,
+                       const struct zone * zp, int ntzones);
+static void    setboundaries(void);
+static zic_t   tadd(zic_t t1, long t2);
+static void    usage(FILE *stream, int status);
+static void    writezone(const char * name, const char * string);
+static int     yearistype(int year, const char * type);
+#ifdef ICU
+static void    emit_icu_zone(FILE* f, const char* zoneName, int zoneOffset,
+                                       const struct rule* rule,
+                                       int ruleIndex, int startYear);
+static void    emit_icu_link(FILE* f, const char* from, const char* to);
+static void    emit_icu_rule(FILE* f, const struct rule* r, int ruleIndex);
+static int     add_icu_final_rules(const struct rule* r1, const struct rule* r2);
+#endif
  
  static int             charcnt;
  static int             errors;
@@ -308,19 +312,16 @@ struct lookup {
  };
  
  #ifdef ICU
-
  /* Indices into rules[] for final rules.  They will occur in pairs,
   * with finalRules[i] occurring before finalRules[i+1] in the year.
   * Each zone need only store a start year, a standard offset, and an
   * index into finalRules[].  FinalRules[] are aliases into rules[]. */
-
-static const struct rule **   finalRules;
-static int                    finalRulesCount;
-
+static const struct rule **    finalRules;
+static int                                     finalRulesCount;
  #endif
  
-static struct lookup const *   byword P((const char * string,
-                                       const struct lookup * lp));
+static struct lookup const *   byword(const char * string,
+                                       const struct lookup * lp);
  
  static struct lookup const     line_codes[] = {
         { "Rule",       LC_RULE },
@@ -442,19 +443,6 @@ char * const       ptr;
  ** Error handling.
  */
  
-#if !HAVE_STRERROR
-static char *
-strerror(errnum)
-int    errnum;
-{
-       extern char *   sys_errlist[];
-       extern int      sys_nerr;
-
-       return (errnum > 0 && errnum <= sys_nerr) ?
-               sys_errlist[errnum] : _("Unknown system error");
-}
-#endif /* !HAVE_STRERROR */
-
  static void
  eats(name, num, rname, rnum)
  const char * const     name;
@@ -508,69 +496,87 @@ const char * const        string;
  }
  
  static void
-usage P((void))
+usage(FILE *stream, int status)
  {
-       (void) fprintf(stderr, _("%s: usage is %s \
-[ --version ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n\
-\t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n"),
-               progname, progname);
-       exit(EXIT_FAILURE);
+       (void) fprintf(stream, _("%s: usage is %s \
+[ --version ] [ --help ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n\
+\t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n\
+\n\
+Report bugs to tz@elsie.nci.nih.gov.\n"),
+                      progname, progname);
+       exit(status);
  }
  
  #ifdef ICU
-
  /* File into which we will write supplemental ICU data. */
-static FILE *                 icuFile;
-
-void emit_icu_zone(FILE* f, const char* zoneName, int zoneOffset,
-                   const struct rule* rule,
-                   int ruleIndex, int startYear) {
-    /* machine-readable section */
-    fprintf(f, "zone %s %d %d %s", zoneName, zoneOffset, startYear, rule->r_name);
-
-    /* human-readable section */
-    fprintf(f, " # zone %s, offset %d, year >= %d, rule %s (%d)\n",
-            zoneName, zoneOffset, startYear,
-            rule->r_name, ruleIndex);
+static FILE *  icuFile;
+
+static void
+emit_icu_zone(FILE* f, const char* zoneName, int zoneOffset,
+                                       const struct rule* rule,
+                                       int ruleIndex, int startYear) {
+       /* machine-readable section */
+       fprintf(f, "zone %s %d %d %s", zoneName, zoneOffset, startYear, rule->r_name);
+
+       /* human-readable section */
+       fprintf(f, " # zone %s, offset %d, year >= %d, rule %s (%d)\n",
+                       zoneName, zoneOffset, startYear,
+                       rule->r_name, ruleIndex);
  }
  
-void emit_icu_link(FILE* f, const char* from, const char* to) {
-    /* machine-readable section */
-    fprintf(f, "link %s %s\n", from, to);
+static void
+emit_icu_link(FILE* f, const char* from, const char* to) {
+       /* machine-readable section */
+       fprintf(f, "link %s %s\n", from, to);
  }
  
  static const char* DYCODE[] = {"DOM", "DOWGEQ", "DOWLEQ"};
  
-void emit_icu_rule(FILE* f, const struct rule* r, int ruleIndex) {
-    if (r->r_yrtype != NULL) {
-        warning("year types not supported by ICU");
-        fprintf(stderr, "rule %s, file %s, line %d\n",
-                r->r_name, r->r_filename, r->r_linenum);
+static void
+emit_icu_rule(FILE* f, const struct rule* r, int ruleIndex) {
+       if (r->r_yrtype != NULL) {
+               warning("year types not supported by ICU");
+               fprintf(stderr, "rule %s, file %s, line %d\n",
+                               r->r_name, r->r_filename, r->r_linenum);
      }
  
-    /* machine-readable section */
-    fprintf(f, "rule %s %s %d %d %d %d %d %d %d",
-            r->r_name, DYCODE[r->r_dycode],
-            r->r_month, r->r_dayofmonth,
-            (r->r_dycode == DC_DOM ? -1 : r->r_wday),
-            r->r_tod, r->r_todisstd, r->r_todisgmt, r->r_stdoff
-            );
-
-    /* human-readable section */
-    fprintf(f, " # %d: %s, file %s, line %d",
-            ruleIndex, r->r_name, r->r_filename, r->r_linenum);
-    fprintf(f, ", mode %s", DYCODE[r->r_dycode]);
-    fprintf(f, ", %s, dom %d", mon_names[r->r_month].l_word, r->r_dayofmonth);
-    if (r->r_dycode != DC_DOM) {
-        fprintf(f, ", %s", wday_names[r->r_wday].l_word);
-    }
-    fprintf(f, ", time %d", r->r_tod);
-    fprintf(f, ", isstd %d", r->r_todisstd);
-    fprintf(f, ", isgmt %d", r->r_todisgmt);
-    fprintf(f, ", offset %ld", r->r_stdoff);
-    fprintf(f, "\n");
+       /* machine-readable section */
+       fprintf(f, "rule %s %s %d %d %d %ld %d %d %ld",
+                       r->r_name, DYCODE[r->r_dycode],
+                       r->r_month, r->r_dayofmonth,
+                       (r->r_dycode == DC_DOM ? -1 : r->r_wday),
+                       r->r_tod, r->r_todisstd, r->r_todisgmt, r->r_stdoff
+                       );
+
+       /* human-readable section */
+       fprintf(f, " # %d: %s, file %s, line %d",
+                       ruleIndex, r->r_name, r->r_filename, r->r_linenum);
+       fprintf(f, ", mode %s", DYCODE[r->r_dycode]);
+       fprintf(f, ", %s, dom %d", mon_names[r->r_month].l_word, r->r_dayofmonth);
+       if (r->r_dycode != DC_DOM) {
+               fprintf(f, ", %s", wday_names[r->r_wday].l_word);
+       }
+       fprintf(f, ", time %ld", r->r_tod);
+       fprintf(f, ", isstd %d", r->r_todisstd);
+       fprintf(f, ", isgmt %d", r->r_todisgmt);
+       fprintf(f, ", offset %ld", r->r_stdoff);
+       fprintf(f, "\n");
  }
  
+static int
+add_icu_final_rules(const struct rule* r1, const struct rule* r2) {
+       int i;
+
+       for (i=0; i<finalRulesCount; ++i) { /* i+=2 should work too */
+               if (r1==finalRules[i]) return i; /* [sic] pointer comparison */
+       }
+
+       finalRules = (const struct rule**) (void*) erealloc((char *) finalRules,
+                               (finalRulesCount + 2) * sizeof(*finalRules));
+       finalRules[finalRulesCount++] = r1;
+       finalRules[finalRulesCount++] = r2;
+       return finalRulesCount - 2;
+}
  #endif
  
  static const char *    psxrules;
@@ -608,11 +614,13 @@ char *    argv[];
                 if (strcmp(argv[i], "--version") == 0) {
                         (void) printf("%s\n", elsieid);
                         exit(EXIT_SUCCESS);
+               } else if (strcmp(argv[i], "--help") == 0) {
+                       usage(stdout, EXIT_SUCCESS);
                 }
         while ((c = getopt(argc, argv, "d:l:p:L:vsy:")) != EOF && c != -1)
                 switch (c) {
                         default:
-                               usage();
+                               usage(stderr, EXIT_FAILURE);
                         case 'd':
                                 if (directory == NULL)
                                         directory = optarg;
@@ -671,7 +679,7 @@ _("%s: More than one -L option specified\n"),
                                 break;
                 }
         if (optind == argc - 1 && strcmp(argv[optind], "=") == 0)
-               usage();        /* usage message by request */
+               usage(stderr, EXIT_FAILURE);    /* usage message by request */
         if (directory == NULL)
                 directory = TZDIR;
         if (yitcommand == NULL)
@@ -686,11 +694,11 @@ _("%s: More than one -L option specified\n"),
  
  #ifdef ICU
         if ((icuFile = fopen(ICU_ZONE_FILE, "w")) == NULL) {
-            const char *e = strerror(errno);
-            (void) fprintf(stderr, _("%s: Can't open %s: %s\n"),
-                           progname, ICU_ZONE_FILE, e);
-            (void) exit(EXIT_FAILURE);
-        }
+               const char *e = strerror(errno);
+               (void) fprintf(stderr, _("%s: Can't open %s: %s\n"),
+                                               progname, ICU_ZONE_FILE, e);
+               (void) exit(EXIT_FAILURE);
+       }
  #endif
         for (i = optind; i < argc; ++i)
                 infile(argv[i]);
@@ -712,7 +720,7 @@ _("%s: More than one -L option specified\n"),
                 eat(links[i].l_filename, links[i].l_linenum);
                 dolink(links[i].l_from, links[i].l_to);
  #ifdef ICU
-                emit_icu_link(icuFile, links[i].l_from, links[i].l_to);
+               emit_icu_link(icuFile, links[i].l_from, links[i].l_to);
  #endif
                 if (noise)
                         for (j = 0; j < nlinks; ++j)
@@ -729,34 +737,34 @@ _("%s: More than one -L option specified\n"),
                 dolink(psxrules, TZDEFRULES);
         }
  #ifdef ICU
-        for (i=0; i<finalRulesCount; ++i) {
-            emit_icu_rule(icuFile, finalRules[i], i);
-        }
+       for (i=0; i<finalRulesCount; ++i) {
+               emit_icu_rule(icuFile, finalRules[i], i);
+       }
  #endif /*ICU*/
         return (errors == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
  }
  
  static void
-dolink(fromfile, tofile)
-const char * const     fromfile;
-const char * const     tofile;
+dolink(fromfield, tofield)
+const char * const     fromfield;
+const char * const     tofield;
  {
         register char * fromname;
         register char * toname;
  
-       if (fromfile[0] == '/')
-               fromname = ecpyalloc(fromfile);
+       if (fromfield[0] == '/')
+               fromname = ecpyalloc(fromfield);
         else {
                 fromname = ecpyalloc(directory);
                 fromname = ecatalloc(fromname, "/");
-               fromname = ecatalloc(fromname, fromfile);
+               fromname = ecatalloc(fromname, fromfield);
         }
-       if (tofile[0] == '/')
-               toname = ecpyalloc(tofile);
+       if (tofield[0] == '/')
+               toname = ecpyalloc(tofield);
         else {
                 toname = ecpyalloc(directory);
                 toname = ecatalloc(toname, "/");
-               toname = ecatalloc(toname, tofile);
+               toname = ecatalloc(toname, tofield);
         }
         /*
         ** We get to be careful here since
@@ -775,7 +783,7 @@ const char * const  tofile;
                 if (result != 0 &&
                         access(fromname, F_OK) == 0 &&
                         !itsdir(fromname)) {
-                               const char *s = tofile;
+                               const char *s = tofield;
                                 register char * symlinkcontents = NULL;
  
                                 while ((s = strchr(s+1, '/')) != NULL)
@@ -784,7 +792,7 @@ const char * const  tofile;
                                                 "../");
                                 symlinkcontents =
                                         ecatalloc(symlinkcontents,
-                                       fromfile);
+                                       fromname);
                                 result = symlink(symlinkcontents,
                                         toname);
                                 if (result == 0)
@@ -810,7 +818,7 @@ warning(_("hard link failed, symbolic link used"));
  #define TIME_T_BITS_IN_FILE    64
  
  static void
-setboundaries P((void))
+setboundaries(void)
  {
         register int    i;
  
@@ -852,7 +860,7 @@ const void *        cp2;
  }
  
  static void
-associate P((void))
+associate(void)
  {
         register struct zone *  zp;
         register struct rule *  rp;
@@ -1037,7 +1045,8 @@ const char *              string;
  const char * const     errstring;
  const int              signable;
  {
-       int     hh, mm, ss, sign;
+       long    hh;
+       int     mm, ss, sign;
  
         if (string == NULL || *string == '\0')
                 return 0;
@@ -1047,27 +1056,32 @@ const int               signable;
                 sign = -1;
                 ++string;
         } else  sign = 1;
-       if (sscanf(string, scheck(string, "%d"), &hh) == 1)
+       if (sscanf(string, scheck(string, "%ld"), &hh) == 1)
                 mm = ss = 0;
-       else if (sscanf(string, scheck(string, "%d:%d"), &hh, &mm) == 2)
+       else if (sscanf(string, scheck(string, "%ld:%d"), &hh, &mm) == 2)
                 ss = 0;
-       else if (sscanf(string, scheck(string, "%d:%d:%d"),
+       else if (sscanf(string, scheck(string, "%ld:%d:%d"),
                 &hh, &mm, &ss) != 3) {
                         error(errstring);
                         return 0;
         }
-       if ((hh < 0 || hh >= HOURSPERDAY ||
+       if (hh < 0 ||
                 mm < 0 || mm >= MINSPERHOUR ||
-               ss < 0 || ss > SECSPERMIN) &&
-               !(hh == HOURSPERDAY && mm == 0 && ss == 0)) {
+               ss < 0 || ss > SECSPERMIN) {
                         error(errstring);
                         return 0;
         }
-       if (noise && hh == HOURSPERDAY)
+       if (LONG_MAX / SECSPERHOUR < hh) {
+               error(_("time overflow"));
+               return 0;
+       }
+       if (noise && hh == HOURSPERDAY && mm == 0 && ss == 0)
                 warning(_("24:00 not handled by pre-1998 versions of zic"));
-       return eitol(sign) *
-               (eitol(hh * MINSPERHOUR + mm) *
-               eitol(SECSPERMIN) + eitol(ss));
+       if (noise && (hh > HOURSPERDAY ||
+               (hh == HOURSPERDAY && (mm != 0 || ss != 0))))
+warning(_("values over 24 hours not handled by pre-2007 versions of zic"));
+       return oadd(eitol(sign) * hh * eitol(SECSPERHOUR),
+                   eitol(sign) * (eitol(mm) * eitol(SECSPERMIN) + eitol(ss)));
  }
  
  static void
@@ -1767,8 +1781,8 @@ const char * const        string;
                                 (size_t) sizeof tzh.field, (size_t) 1, fp)
                 tzh = tzh0;
  #ifdef ICU
-                * (ICUZoneinfoVersion*) &tzh.tzh_reserved = TZ_ICU_VERSION;
-                (void) strncpy(tzh.tzh_magic, TZ_ICU_MAGIC, sizeof tzh.tzh_magic);
+               * (ICUZoneinfoVersion*) &tzh.tzh_reserved = TZ_ICU_VERSION;
+               (void) strncpy(tzh.tzh_magic, TZ_ICU_MAGIC, sizeof tzh.tzh_magic);
  #else
                 (void) strncpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic);
  #endif
@@ -1805,10 +1819,10 @@ const char * const      string;
                 for (i = 0; i < typecnt; ++i)
                         if (writetype[i]) {
  #ifdef ICU
-                                puttzcode((long) rawoffs[i], fp);
-                                puttzcode((long) dstoffs[i], fp);
+                               puttzcode((long) rawoffs[i], fp);
+                               puttzcode((long) dstoffs[i], fp);
  #else
-                                puttzcode((long) gmtoffs[i], fp);
+                               puttzcode(gmtoffs[i], fp);
  #endif
                                 (void) putc(isdsts[i], fp);
                                 (void) putc((unsigned char) indmap[abbrinds[i]], fp);
@@ -2078,24 +2092,6 @@ const int                        zonecount;
         }
  }
  
-#ifdef ICU
-
-int add_icu_final_rules(const struct rule* r1, const struct rule* r2) {
-    int i;
-
-    for (i=0; i<finalRulesCount; ++i) { /* i+=2 should work too */
-        if (r1==finalRules[i]) return i; /* [sic] pointer comparison */
-    }
-
-    finalRules = (const struct rule**) (void*) erealloc((char *) finalRules,
-                 (finalRulesCount + 2) * sizeof(*finalRules));
-    finalRules[finalRulesCount++] = r1;
-    finalRules[finalRulesCount++] = r2;
-    return finalRulesCount - 2;
-}
-
-#endif /*ICU*/
-
  static void
  outzone(zpfirst, zonecount)
  const struct zone * const      zpfirst;
@@ -2119,9 +2115,9 @@ const int                 zonecount;
         register int                    max_abbr_len;
         register int                    max_envvar_len;
  #ifdef ICU
-        int                  finalRuleYear, finalRuleIndex;
-        const struct rule*   finalRule1;
-        const struct rule*   finalRule2;
+       int                                             finalRuleYear, finalRuleIndex;
+       const struct rule*              finalRule1;
+       const struct rule*              finalRule2;
  #endif
  
         max_abbr_len = 2 + max_format_len + max_abbrvar_len;
@@ -2150,7 +2146,8 @@ const int                 zonecount;
         }
         for (i = 0; i < zonecount; ++i) {
                 zp = &zpfirst[i];
-               updateminmax(zp->z_untilrule.r_loyear);
+               if (i < zonecount - 1)
+                       updateminmax(zp->z_untilrule.r_loyear);
                 for (j = 0; j < zp->z_nrules; ++j) {
                         rp = &zp->z_rules[j];
                         if (rp->r_lowasnum)
@@ -2168,7 +2165,7 @@ const int                 zonecount;
  
  wp = ecpyalloc(_("no POSIX environment variable for zone"));
                 wp = ecatalloc(wp, " ");
-               wp = ecatalloc(wp, zpfirst->z_name); 
+               wp = ecatalloc(wp, zpfirst->z_name);
                 warning(wp);
                 ifree(wp);
         }
@@ -2181,8 +2178,11 @@ wp = ecpyalloc(_("no POSIX environment variable for zone"));
                 else    max_year = INT_MAX;
         }
         /*
-       ** For the benefit of older systems, generate data through 2037.
+       ** For the benefit of older systems,
+       ** generate data from 1900 through 2037.
         */
+       if (min_year > 1900)
+               min_year = 1900;
         if (max_year < 2037)
                 max_year = 2037;
         for (i = 0; i < zonecount; ++i) {
@@ -2200,45 +2200,53 @@ wp = ecpyalloc(_("no POSIX environment variable for zone"));
                 *startbuf = '\0';
                 startoff = zp->z_gmtoff;
  #ifdef ICU
-                finalRuleYear = finalRuleIndex = -1;
-                finalRule1 = finalRule2 = NULL;
-                if (i == (zonecount - 1)) { /* !useuntil */
-                    /* Look for exactly 2 rules that end at 'max' and
-                     * note them. Determine max(r_loyear) for the 2 of
-                     * them. */
-                    for (j=0; j<zp->z_nrules; ++j) {
-                        rp = &zp->z_rules[j];
-                        if (rp->r_hiyear == INT_MAX) {
-                            if (finalRule1 == NULL) {
-                                finalRule1 = rp;
-                                finalRuleYear = rp->r_loyear;
-                            } else if (finalRule2 == NULL) {
-                                finalRule2 = rp;
-                                if (rp->r_loyear > finalRuleYear) {
-                                    finalRuleYear = rp->r_loyear;
-                                }
-                            } else {
-                                error("more than two max rules found (ICU)");
-                                exit(EXIT_FAILURE);
-                            }
-                        }
-                    }
-                    if (finalRule1 != NULL && finalRule2 == NULL) {
-                        error("only one max rule found (ICU)");
-                        exit(EXIT_FAILURE);
-                    }
-                    if (finalRule1 != NULL) {
-                        /* Swap if necessary so finalRule1 occurs before
-                         * finalRule2 */
-                        if (finalRule1->r_month > finalRule2->r_month) {
-                            const struct rule* t = finalRule1;
-                            finalRule1 = finalRule2;
-                            finalRule2 = t;
-                        }
-                        /* Add final rule to our list */
-                        finalRuleIndex = add_icu_final_rules(finalRule1, finalRule2);
-                    }
-                }
+               finalRuleYear = finalRuleIndex = -1;
+               finalRule1 = finalRule2 = NULL;
+               if (i == (zonecount - 1)) { /* !useuntil */
+                       /* Look for exactly 2 rules that end at 'max' and
+                        * note them. Determine max(r_loyear) for the 2 of
+                        * them. */
+                       for (j=0; j<zp->z_nrules; ++j) {
+                               rp = &zp->z_rules[j];
+                               if (rp->r_hiyear == INT_MAX) {
+                                       if (finalRule1 == NULL) {
+                                               finalRule1 = rp;
+                                               finalRuleYear = rp->r_loyear;
+                               } else if (finalRule2 == NULL) {
+                                               finalRule2 = rp;
+                                               if (rp->r_loyear > finalRuleYear) {
+                                                       finalRuleYear = rp->r_loyear;
+                                               }
+                                       } else {
+                                               error("more than two max rules found (ICU)");
+                                               exit(EXIT_FAILURE);
+                                       }
+                               }
+                       }
+                       if (finalRule1 != NULL && finalRule2 == NULL) {
+                               error("only one max rule found (ICU)");
+                               exit(EXIT_FAILURE);
+                       }
+                       if (finalRule1 != NULL) {
+                               if (finalRule1->r_stdoff == finalRule2->r_stdoff) {
+                                       /* America/Resolute in 2009a uses a pair of rules
+                                        * which does not change the offset.  ICU ignores
+                                        * such rules without actual time transitions. */
+                                       finalRuleYear = finalRuleIndex = -1;
+                                       finalRule1 = finalRule2 = NULL; 
+                               } else {
+                                       /* Swap if necessary so finalRule1 occurs before
+                                        * finalRule2 */
+                                       if (finalRule1->r_month > finalRule2->r_month) {
+                                               const struct rule* t = finalRule1;
+                                               finalRule1 = finalRule2;
+                                               finalRule2 = t;
+                                       }
+                                       /* Add final rule to our list */
+                                       finalRuleIndex = add_icu_final_rules(finalRule1, finalRule2);
+                               }
+                       }
+               }
  #endif
  
                 if (zp->z_nrules == 0) {
@@ -2247,7 +2255,7 @@ wp = ecpyalloc(_("no POSIX environment variable for zone"));
                                 (char *) NULL, stdoff != 0, FALSE);
                         type = addtype(oadd(zp->z_gmtoff, stdoff),
  #ifdef ICU
-                                zp->z_gmtoff, stdoff,
+                               zp->z_gmtoff, stdoff,
  #endif
                                 startbuf, stdoff != 0, startttisstd,
                                 startttisgmt);
@@ -2348,40 +2356,40 @@ wp = ecpyalloc(_("no POSIX environment variable for zone"));
                                         }
                                 }
  #ifdef ICU
-                                if (year >= finalRuleYear && rp == finalRule1) {
-                                    /* We want to shift final year 1 year after
-                                     * the actual final rule takes effect (year + 1),
-                                     * because the previous type is valid until the first
-                                     * transition defined by the final rule.  Otherwise
-                                     * we may see unexpected offset shift at the
-                                     * begining of the year when the final rule takes
-                                     * effect. */
-
-                                    /* ICU currently can support signed int32 transition
-                                     * times.  Thus, the transitions in year 2038 may be
-                                     * truncated.  At this moment (tzdata2008g), only
-                                     * Rule Brazil is impacted by this limitation, because
-                                     * the final set of rules are starting in 2038.  Although
-                                     * this code put the first couple of transitions populated
-                                     * by the final rules, they will be dropped off when
-                                     * collecting transition times.  So, we need to keep
-                                     * the start year of the final rule in 2038, not 2039.
-                                     * Fortunately, the Brazil rules in 2038 and beyond use
-                                     * the same base offset/dst saving amount.  Thus, even
-                                     * we skip the first couple of transitions, the final
-                                     * rule set for 2038 works properly.  So for now,
-                                     * we do not increment the final rule start year only when
-                                     * it falls into year 2038. We need to revisit this code
-                                     * in future to fix the root cause of this problem (ICU
-                                     * resource type limitation - signed int32).
-                                     * Oct 7, 2008 - Yoshito */
-                                    int finalStartYear = (year == 2038) ? year : year + 1;
-                                    emit_icu_zone(icuFile,
-                                                  zpfirst->z_name, zp->z_gmtoff,
-                                                  rp, finalRuleIndex, finalStartYear);
-                                    /* only emit this for the first year */
-                                    finalRule1 = NULL;
-                                }
+                               if (year >= finalRuleYear && rp == finalRule1) {
+                                       /* We want to shift final year 1 year after
+                                        * the actual final rule takes effect (year + 1),
+                                        * because the previous type is valid until the first
+                                        * transition defined by the final rule.  Otherwise
+                                        * we may see unexpected offset shift at the
+                                        * begining of the year when the final rule takes
+                                        * effect. */
+
+                                       /* ICU currently can support signed int32 transition
+                                        * times.  Thus, the transitions in year 2038 may be
+                                        * truncated.  At this moment (tzdata2008g), only
+                                        * Rule Brazil is impacted by this limitation, because
+                                        * the final set of rules are starting in 2038.  Although
+                                        * this code put the first couple of transitions populated
+                                        * by the final rules, they will be dropped off when
+                                        * collecting transition times.  So, we need to keep
+                                        * the start year of the final rule in 2038, not 2039.
+                                        * Fortunately, the Brazil rules in 2038 and beyond use
+                                        * the same base offset/dst saving amount.  Thus, even
+                                        * we skip the first couple of transitions, the final
+                                        * rule set for 2038 works properly.  So for now,
+                                        * we do not increment the final rule start year only when
+                                        * it falls into year 2038. We need to revisit this code
+                                        * in future to fix the root cause of this problem (ICU
+                                        * resource type limitation - signed int32).
+                                        * Oct 7, 2008 - Yoshito */
+                                       int finalStartYear = (year == 2038) ? year : year + 1;
+                                       emit_icu_zone(icuFile,
+                                                       zpfirst->z_name, zp->z_gmtoff,
+                                                       rp, finalRuleIndex, finalStartYear);
+                                       /* only emit this for the first year */
+                                       finalRule1 = NULL;
+                               }
  #endif
                                 eats(zp->z_filename, zp->z_linenum,
                                         rp->r_filename, rp->r_linenum);
@@ -2390,7 +2398,7 @@ wp = ecpyalloc(_("no POSIX environment variable for zone"));
                                 offset = oadd(zp->z_gmtoff, rp->r_stdoff);
  #ifdef ICU
                                 type = addtype(offset, zp->z_gmtoff, rp->r_stdoff,
-                                        ab, rp->r_stdoff != 0,
+                                       ab, rp->r_stdoff != 0,
                                         rp->r_todisstd, rp->r_todisgmt);
  #else
                                 type = addtype(offset, ab, rp->r_stdoff != 0,
@@ -2411,8 +2419,8 @@ error(_("can't determine time zone abbreviation to use just after until time"));
                         else    addtt(starttime,
  #ifdef ICU
                                         addtype(startoff,
-                                                zp->z_gmtoff, startoff - zp->z_gmtoff,
-                                                startbuf,
+                                               zp->z_gmtoff, startoff - zp->z_gmtoff,
+                                               startbuf,
                                                 startoff != zp->z_gmtoff,
                                                 startttisstd,
                                                 startttisgmt));
@@ -2508,10 +2516,10 @@ const int               ttisgmt;
                 error(_("internal error - addtype called with bad isdst/dstoff"));
                 (void) exit(EXIT_FAILURE);
         }
-        if (gmtoff != (rawoff + dstoff)) {
+       if (gmtoff != (rawoff + dstoff)) {
                 error(_("internal error - addtype called with bad gmt/raw/dstoff"));
                 (void) exit(EXIT_FAILURE);
-        }
+       }
  #endif
         /*
         ** See if there's already an entry for this zone type.
@@ -2520,7 +2528,7 @@ const int         ttisgmt;
         for (i = 0; i < typecnt; ++i) {
                 if (gmtoff == gmtoffs[i] && isdst == isdsts[i] &&
  #ifdef ICU
-                        rawoff == rawoffs[i] && dstoff == dstoffs[i] &&
+                       rawoff == rawoffs[i] && dstoff == dstoffs[i] &&
  #endif
                         strcmp(abbr, &chars[abbrinds[i]]) == 0 &&
                         ttisstd == ttisstds[i] &&
@@ -2535,10 +2543,14 @@ const int               ttisgmt;
                 error(_("too many local time types"));
                 exit(EXIT_FAILURE);
         }
+       if (! (-1L - 2147483647L <= gmtoff && gmtoff <= 2147483647L)) {
+               error(_("UTC offset out of range"));
+               exit(EXIT_FAILURE);
+       }
         gmtoffs[i] = gmtoff;
  #ifdef ICU
-        rawoffs[i] = rawoff;
-        dstoffs[i] = dstoff;
+       rawoffs[i] = rawoff;
+       dstoffs[i] = dstoff;
  #endif
         isdsts[i] = isdst;
         ttisstds[i] = ttisstd;
@@ -2589,7 +2601,7 @@ int               count;
  }
  
  static void
-adjleap P((void))
+adjleap(void)
  {
         register int    i;
         register long   last = 0;
@@ -2719,9 +2731,12 @@ register char *  cp;
                         else while ((*dp = *cp++) != '"')
                                 if (*dp != '\0')
                                         ++dp;
-                               else    error(_(
+                               else {
+                                       error(_(
                                                 "Odd number of quotation marks"
                                                 ));
+                                       exit(1);
+                               }
                 } while (*cp != '\0' && *cp != '#' &&
                         (!isascii(*cp) || !isspace((unsigned char) *cp)));
                 if (isascii(*cp) && isspace((unsigned char) *cp))
@@ -2909,7 +2924,7 @@ wp = _("time zone abbreviation differs from POSIX standard");
  
  static int
  mkdirs(argname)
-char * const   argname;
+char *         argname;
  {
         register char * name;
         register char * cp;
author	Apple <opensource@apple.com>
	Mon, 23 Feb 2009 03:45:05 +0000 (03:45 +0000)
committer	Apple <opensource@apple.com>
	Mon, 23 Feb 2009 03:45:05 +0000 (03:45 +0000)
icuSources/common/ucnv2022.c		patch \| blob \| blame \| history
icuSources/common/ucnvhz.c		patch \| blob \| blame \| history
icuSources/common/ucnvmbcs.c		patch \| blob \| blame \| history
icuSources/test/cintltst/nccbtst.c		patch \| blob \| blame \| history
icuSources/test/cintltst/nucnvtst.c		patch \| blob \| blame \| history
icuSources/test/testdata/conversion.txt		patch \| blob \| blame \| history
icuSources/tools/tzcode/icuzdump.vcproj		patch \| blob \| blame \| history
icuSources/tools/tzcode/localtime.c		patch \| blob \| blame \| history
icuSources/tools/tzcode/private.h		patch \| blob \| blame \| history
icuSources/tools/tzcode/zdump.c		patch \| blob \| blame \| history
icuSources/tools/tzcode/zic.c		patch \| blob \| blame \| history