From: Apple Date: Mon, 23 Feb 2009 03:45:05 +0000 (+0000) Subject: ICU-8.11.4.tar.gz X-Git-Tag: mac-os-x-1057^0 X-Git-Url: https://git.saurik.com/apple/icu.git/commitdiff_plain/fd0068a84e9996f225edba706498f6ed413d0673 ICU-8.11.4.tar.gz --- diff --git a/icuSources/common/ucnv2022.c b/icuSources/common/ucnv2022.c index cd83c69c..67c8da9f 100644 --- a/icuSources/common/ucnv2022.c +++ b/icuSources/common/ucnv2022.c @@ -735,6 +735,7 @@ changeState_2022(UConverter* _this, UConverterDataISO2022* myData2022 = ((UConverterDataISO2022*)_this->extraInfo); uint32_t key = myData2022->key; int32_t offset = 0; + int8_t initialToULength = _this->toULength; char c; value = VALID_NON_TERMINAL_2022; @@ -787,7 +788,6 @@ DONE: return; } else if (value == INVALID_2022 ) { *err = U_ILLEGAL_ESCAPE_SEQUENCE; - return; } else /* value == VALID_TERMINAL_2022 */ { switch(var){ #ifdef U_ENABLE_GENERIC_ISO_2022 @@ -918,6 +918,35 @@ DONE: } if(U_SUCCESS(*err)) { _this->toULength = 0; + } else if(*err==U_ILLEGAL_ESCAPE_SEQUENCE) { + if(_this->toULength>1) { + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte (ESC) in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * In escape sequences, all following bytes are "printable", that is, + * unless they are completely illegal (>7f in SBCS, outside 21..7e in DBCS), + * they are valid single/lead bytes. + * For simplicity, we always only report the initial ESC byte as the + * illegal sequence and back out all other bytes we looked at. + */ + /* Back out some bytes. */ + int8_t backOutDistance=_this->toULength-1; + int8_t bytesFromThisBuffer=_this->toULength-initialToULength; + if(backOutDistance<=bytesFromThisBuffer) { + /* same as initialToULength<=1 */ + *source-=backOutDistance; + } else { + /* Back out bytes from the previous buffer: Need to replay them. */ + _this->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); + /* same as -(initialToULength-1) */ + /* preToULength is negative! */ + uprv_memcpy(_this->preToU, _this->toUBytes+1, -_this->preToULength); + *source-=bytesFromThisBuffer; + } + _this->toULength=1; + } } } @@ -1200,7 +1229,7 @@ toUnicodeCallback(UConverter *cnv, } else{ cnv->toUBytes[0] =(char) sourceChar; - cnv->toULength = 2; + cnv->toULength = 1; } if(targetUniChar == (missingCharMarker-1/*0xfffe*/)){ @@ -1689,6 +1718,7 @@ UConverter_toUnicode_ISO_2022_JP_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, mySourceChar = args->converter->toUBytes[0]; args->converter->toULength = 0; cs = (StateEnum)pToU2022State->cs[pToU2022State->g]; + targetUniChar = missingCharMarker; goto getTrailByte; } @@ -1820,12 +1850,40 @@ escape: default: /* G0 DBCS */ if(mySource < mySourceLimit) { - char trailByte; + int leadIsOk, trailIsOk; + uint8_t trailByte; getTrailByte: + trailByte = (uint8_t)*mySource; + /* old tempBuf[0] = (char) (mySourceChar); tempBuf[1] = trailByte = *mySource++; mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); + */ + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + *Ê Êwe stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + ++mySource; + tempBuf[0] = (char) (mySourceChar); + tempBuf[1] = trailByte; + mySourceChar = (mySourceChar << 8) | trailByte; + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->myConverterArray[cs], tempBuf, 2, FALSE); + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + ++mySource; + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; + } } else { args->converter->toUBytes[0] = (uint8_t)mySourceChar; args->converter->toULength = 1; @@ -1966,7 +2024,12 @@ UConverter_fromUnicode_ISO_2022_KR_OFFSETS_LOGIC(UConverterFromUnicodeArgs* args MBCS_FROM_UCHAR32_ISO2022(sharedData,sourceChar,&targetByteUnit,useFallback,&length,MBCS_OUTPUT_2); /* only DBCS or SBCS characters are expected*/ /* DB characters with high bit set to 1 are expected */ - if(length > 2 || length==0 ||(((targetByteUnit & 0x8080) != 0x8080)&& length==2)){ + if( length > 2 || length==0 || + (length == 1 && targetByteUnit > 0x7f) || + (length == 2 && + ((uint16_t)(targetByteUnit - 0xa1a1) > (0xfefe - 0xa1a1) || + (uint8_t)(targetByteUnit - 0xa1) > (0xfe - 0xa1))) + ) { targetByteUnit=missingCharMarker; } if (targetByteUnit != missingCharMarker){ @@ -2294,17 +2357,42 @@ escape: myData->isEmptySegment = FALSE; /* Any invalid char errors will be detected separately, so just reset this */ if(myData->toU2022State.g == 1) { if(mySource < mySourceLimit) { - char trailByte; + int leadIsOk, trailIsOk; + uint8_t trailByte; getTrailByte: + /* old trailByte = *mySource++; tempBuf[0] = (char)(mySourceChar + 0x80); tempBuf[1] = (char)(trailByte + 0x80); mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); if((mySourceChar & 0x8080) == 0) { targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); - } else { - /* illegal bytes > 0x7f */ - targetUniChar = missingCharMarker; + */ + targetUniChar = missingCharMarker; + trailByte = (uint8_t)*mySource; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + ++mySource; + tempBuf[0] = (char)(mySourceChar + 0x80); + tempBuf[1] = (char)(trailByte + 0x80); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, tempBuf, 2, useFallback); + mySourceChar = (mySourceChar << 8) | trailByte; + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + ++mySource; + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; } } else { args->converter->toUBytes[0] = (uint8_t)mySourceChar; @@ -2312,8 +2400,10 @@ getTrailByte: break; } } - else{ + else if(mySourceChar <= 0x7f) { targetUniChar = ucnv_MBCSSimpleGetNextUChar(sharedData, mySource - 1, 1, useFallback); + } else { + targetUniChar = 0xffff; } if(targetUniChar < 0xfffe){ if(args->offsets) { @@ -2778,6 +2868,7 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, /* continue with a partial double-byte character */ mySourceChar = args->converter->toUBytes[0]; args->converter->toULength = 0; + targetUniChar = missingCharMarker; goto getTrailByte; } @@ -2855,8 +2946,10 @@ escape: UConverterSharedData *cnv; StateEnum tempState; int32_t tempBufLen; - char trailByte; + int leadIsOk, trailIsOk; + uint8_t trailByte; getTrailByte: + /* old trailByte = *mySource++; tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; if(tempState > CNS_11643_0) { @@ -2871,13 +2964,48 @@ getTrailByte: tempBuf[0] = (char) (mySourceChar); tempBuf[1] = trailByte; tempBufLen = 2; + */ + trailByte = (uint8_t)*mySource; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In ISO-2022 DBCS, if the second byte is in the 21..7e range or is + * an ESC/SO/SI, we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + trailIsOk = (uint8_t)(trailByte - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + ++mySource; + tempState = (StateEnum)pToU2022State->cs[pToU2022State->g]; + if(tempState >= CNS_11643_0) { + cnv = myData->myConverterArray[CNS_11643]; + tempBuf[0] = (char) (0x80+(tempState-CNS_11643_0)); + tempBuf[1] = (char) (mySourceChar); + tempBuf[2] = (char) trailByte; + tempBufLen = 3; + + }else{ + cnv = myData->myConverterArray[tempState]; + tempBuf[0] = (char) (mySourceChar); + tempBuf[1] = (char) trailByte; + tempBufLen = 2; + } + targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); + mySourceChar = (mySourceChar << 8) | trailByte; + } else if (!(trailIsOk || IS_2022_CONTROL(trailByte))) { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + ++mySource; + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar = 0x10000 | (mySourceChar << 8) | trailByte; } - mySourceChar = (mySourceChar << 8) | (uint8_t)(trailByte); if(pToU2022State->g>=2) { /* return from a single-shift state to the previous one */ pToU2022State->g=pToU2022State->prevG; } - targetUniChar = ucnv_MBCSSimpleGetNextUChar(cnv, tempBuf, tempBufLen, FALSE); } else { args->converter->toUBytes[0] = (uint8_t)mySourceChar; args->converter->toULength = 1; diff --git a/icuSources/common/ucnvhz.c b/icuSources/common/ucnvhz.c index c3f63fca..4bab29b1 100644 --- a/icuSources/common/ucnvhz.c +++ b/icuSources/common/ucnvhz.c @@ -143,7 +143,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UChar *myTarget = args->target; const char *mySourceLimit = args->sourceLimit; UChar32 targetUniChar = 0x0000; - UChar mySourceChar = 0x0000; + int32_t mySourceChar = 0x0000; UConverterDataHZ* myData=(UConverterDataHZ*)(args->converter->extraInfo); tempBuf[0]=0; tempBuf[1]=0; @@ -157,105 +157,136 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, if(myTarget < args->targetLimit){ mySourceChar= (unsigned char) *mySource++; - - switch(mySourceChar){ - case 0x0A: - if(args->converter->mode ==UCNV_TILDE){ - args->converter->mode=0; - - } - *(myTarget++)=(UChar)mySourceChar; - myData->isEmptySegment = FALSE; - continue; - case UCNV_TILDE: - if(args->converter->mode ==UCNV_TILDE){ - *(myTarget++)=(UChar)mySourceChar; - args->converter->mode=0; - myData->isEmptySegment = FALSE; - continue; - - } - else if(args->converter->toUnicodeStatus !=0){ - args->converter->mode=0; - break; - } - else{ + if(args->converter->mode == UCNV_TILDE) { + /* second byte after ~ */ + args->converter->mode=0; + switch(mySourceChar) { + case 0x0A: + /* no output for ~\n (line-continuation marker) */ + continue; + case UCNV_TILDE: + if(args->offsets) { + args->offsets[myTarget - args->target]=(int32_t)(mySource - args->source - 2); + } + *(myTarget++)=(UChar)mySourceChar; + myData->isEmptySegment = FALSE; + continue; + case UCNV_OPEN_BRACE: + case UCNV_CLOSE_BRACE: + myData->isStateDBCS = (mySourceChar == UCNV_OPEN_BRACE); + if (myData->isEmptySegment) { + myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ + *err = U_PARSE_ERROR; /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */ + args->converter->toUBytes[0] = UCNV_TILDE; + args->converter->toUBytes[1] = mySourceChar; + args->converter->toULength = 2; + args->target = myTarget; + args->source = mySource; + return; + } + myData->isEmptySegment = TRUE; + continue; + default: + /* if the first byte is equal to TILDE and the trail byte + * is not a valid byte then it is an error condition + */ + /* old + myData->isEmptySegment = FALSE; + mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); + goto SAVE_STATE; + */ + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ + *err = U_ILLEGAL_ESCAPE_SEQUENCE; + args->converter->toUBytes[0] = UCNV_TILDE; + if( myData->isStateDBCS ? + (0x21 <= mySourceChar && mySourceChar <= 0x7e) : + mySourceChar <= 0x7f + ) { + /* The current byte could be the start of a character: Back it out. */ + args->converter->toULength = 1; + --mySource; + } else { + /* Include the current byte in the illegal sequence. */ + args->converter->toUBytes[1] = mySourceChar; + args->converter->toULength = 2; + } + args->target = myTarget; + args->source = mySource; + return; + } + } else if(myData->isStateDBCS) { + if(args->converter->toUnicodeStatus == 0x00){ + /* lead byte */ + if(mySourceChar == UCNV_TILDE) { args->converter->mode = UCNV_TILDE; - continue; - } - - - case UCNV_OPEN_BRACE: - if(args->converter->mode == UCNV_TILDE){ - args->converter->mode=0; - myData->isStateDBCS = TRUE; - myData->isEmptySegment = TRUE; - continue; - } - else{ - break; - } - - - case UCNV_CLOSE_BRACE: - if(args->converter->mode == UCNV_TILDE){ - args->converter->mode=0; - myData->isStateDBCS = FALSE; - if (myData->isEmptySegment) { - myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ - *err = U_PARSE_ERROR; /* temporary err to flag empty segment, will be reset to U_ILLEGAL_ESCAPE_SEQUENCE in _toUnicodeWithCallback */ - args->converter->toUBytes[0] = UCNV_TILDE; - args->converter->toUBytes[1] = mySourceChar; - args->converter->toULength = 2; - goto EXIT; - } - myData->isEmptySegment = TRUE; - continue; - } - else{ - break; - } - - default: - /* if the first byte is equal to TILDE and the trail byte - * is not a valid byte then it is an error condition - */ - if(args->converter->mode == UCNV_TILDE){ - args->converter->mode=0; - mySourceChar= (UChar)(((UCNV_TILDE+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); - myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ - goto SAVE_STATE; + } else { + /* add another bit to distinguish a 0 byte from not having seen a lead byte */ + args->converter->toUnicodeStatus = (uint32_t) (mySourceChar | 0x100); + myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ } - - break; - - } - - myData->isEmptySegment = FALSE; /* the segment has something, either valid or will produce a different error, so reset this */ - if(myData->isStateDBCS){ - if(args->converter->toUnicodeStatus == 0x00){ - args->converter->toUnicodeStatus = (UChar) mySourceChar; continue; } else{ + /* trail byte */ + /* old tempBuf[0] = (char) (args->converter->toUnicodeStatus+0x80) ; tempBuf[1] = (char) (mySourceChar+0x80); mySourceChar= (UChar)(((args->converter->toUnicodeStatus+0x80) << 8) | ((mySourceChar & 0x00ff)+0x80)); args->converter->toUnicodeStatus =0x00; targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, tempBuf, 2, args->converter->useFallback); + */ + int leadIsOk, trailIsOk; + uint32_t leadByte = args->converter->toUnicodeStatus & 0xff; + targetUniChar = 0xffff; + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + * + * In HZ DBCS, if the second byte is in the 21..7e range, + * we report only the first byte as the illegal sequence. + * Otherwise we convert or report the pair of bytes. + */ + leadIsOk = (uint8_t)(leadByte - 0x21) <= (0x7d - 0x21); + trailIsOk = (uint8_t)(mySourceChar - 0x21) <= (0x7e - 0x21); + if (leadIsOk && trailIsOk) { + tempBuf[0] = (char) (leadByte+0x80) ; + tempBuf[1] = (char) (mySourceChar+0x80); + targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, + tempBuf, 2, args->converter->useFallback); + mySourceChar= (leadByte << 8) | mySourceChar; + } else if (trailIsOk) { + /* report a single illegal byte and continue with the following DBCS starter byte */ + --mySource; + mySourceChar = (int32_t)leadByte; + } else { + /* report a pair of illegal bytes if the second byte is not a DBCS starter */ + /* add another bit so that the code below writes 2 bytes in case of error */ + mySourceChar= 0x10000 | (leadByte << 8) | mySourceChar; + } + args->converter->toUnicodeStatus =0x00; } } else{ - if(args->converter->fromUnicodeStatus == 0x00){ - targetUniChar = ucnv_MBCSSimpleGetNextUChar(myData->gbConverter->sharedData, - mySource - 1, 1, args->converter->useFallback); - } - else{ - goto SAVE_STATE; + if(mySourceChar == UCNV_TILDE) { + args->converter->mode = UCNV_TILDE; + continue; + } else if(mySourceChar <= 0x7f) { + targetUniChar = (UChar)mySourceChar; /* ASCII */ + myData->isEmptySegment = FALSE; /* the segment has something valid */ + } else { + targetUniChar = 0xffff; + myData->isEmptySegment = FALSE; /* different error here, reset this to avoid spurious future error */ } - } if(targetUniChar < 0xfffe){ if(args->offsets) { @@ -264,27 +295,18 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, *(myTarget++)=(UChar)targetUniChar; } - else if(targetUniChar>=0xfffe){ -SAVE_STATE: + else /* targetUniChar>=0xfffe */ { if(targetUniChar == 0xfffe){ *err = U_INVALID_CHAR_FOUND; } else{ *err = U_ILLEGAL_CHAR_FOUND; } - if(myData->isStateDBCS){ - /* this should never occur since isStateDBCS is set to true - * only after tempBuf[0] and tempBuf[1] - * are set to the input .. just to please BEAM - */ - if(tempBuf[0]==0 || tempBuf[1]==0){ - *err = U_INTERNAL_PROGRAM_ERROR; - }else{ - args->converter->toUBytes[0] = (uint8_t)(tempBuf[0]-0x80); - args->converter->toUBytes[1] = (uint8_t)(tempBuf[1]-0x80); + if(mySourceChar > 0xff){ + args->converter->toUBytes[0] = (uint8_t)(mySourceChar >> 8); + args->converter->toUBytes[1] = (uint8_t)mySourceChar; args->converter->toULength=2; } - } else{ args->converter->toUBytes[0] = (uint8_t)mySourceChar; args->converter->toULength=1; @@ -297,7 +319,7 @@ SAVE_STATE: break; } } -EXIT: + args->target = myTarget; args->source = mySource; } diff --git a/icuSources/common/ucnvmbcs.c b/icuSources/common/ucnvmbcs.c index 84bae8a9..4be8e2dd 100644 --- a/icuSources/common/ucnvmbcs.c +++ b/icuSources/common/ucnvmbcs.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 2000-2006, International Business Machines +* Copyright (C) 2000-2006,2008, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -1611,6 +1611,65 @@ unrolled: pArgs->offsets=offsets; } +static UBool +hasValidTrailBytes(const int32_t (*stateTable)[256], uint8_t state) { + const int32_t *row=stateTable[state]; + int32_t b, entry; + /* First test for final entries in this state for some commonly valid byte values. */ + entry=row[0xa1]; + if( !MBCS_ENTRY_IS_TRANSITION(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL + ) { + return TRUE; + } + entry=row[0x41]; + if( !MBCS_ENTRY_IS_TRANSITION(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL + ) { + return TRUE; + } + /* Then test for final entries in this state. */ + for(b=0; b<=0xff; ++b) { + entry=row[b]; + if( !MBCS_ENTRY_IS_TRANSITION(entry) && + MBCS_ENTRY_FINAL_ACTION(entry)!=MBCS_STATE_ILLEGAL + ) { + return TRUE; + } + } + /* Then recurse for transition entries. */ + for(b=0; b<=0xff; ++b) { + entry=row[b]; + if( MBCS_ENTRY_IS_TRANSITION(entry) && + hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)) + ) { + return TRUE; + } + } + return FALSE; +} + +/* + * Is byte b a single/lead byte in this state? + * Recurse for transition states, because here we don't want to say that + * b is a lead byte if all byte sequences that start with b are illegal. + */ +static UBool +isSingleOrLead(const int32_t (*stateTable)[256], uint8_t state, UBool isDBCSOnly, uint8_t b) { + const int32_t *row=stateTable[state]; + int32_t entry=row[b]; + if(MBCS_ENTRY_IS_TRANSITION(entry)) { /* lead byte */ + return hasValidTrailBytes(stateTable, (uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry)); + } else { + uint8_t action=(uint8_t)(MBCS_ENTRY_FINAL_ACTION(entry)); + if(action==MBCS_STATE_CHANGE_ONLY && isDBCSOnly) { + return FALSE; /* SI/SO are illegal for DBCS-only conversion */ + } else { + return action!=MBCS_STATE_ILLEGAL; + } + } +} + U_CFUNC void ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, UErrorCode *pErrorCode) { @@ -1966,6 +2025,34 @@ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, sourceIndex=nextSourceIndex; } else if(U_FAILURE(*pErrorCode)) { /* callback(illegal) */ + if(byteIndex>1) { + /* + * Ticket 5691: consistent illegal sequences: + * - We include at least the first byte in the illegal sequence. + * - If any of the non-initial bytes could be the start of a character, + * we stop the illegal sequence before the first one of those. + */ + UBool isDBCSOnly=(UBool)(cnv->sharedData->mbcs.dbcsOnlyState!=0); + int8_t i; + for(i=1; + isource); + byteIndex=i; /* length of reported illegal byte sequence */ + if(backOutDistance<=bytesFromThisBuffer) { + source-=backOutDistance; + } else { + /* Back out bytes from the previous buffer: Need to replay them. */ + cnv->preToULength=(int8_t)(bytesFromThisBuffer-backOutDistance); + /* preToULength is negative! */ + uprv_memcpy(cnv->preToU, bytes+i, -cnv->preToULength); + source=(const uint8_t *)pArgs->source; + } + } + } break; } else /* unassigned sequences indicated with byteIndex>0 */ { /* try an extension mapping */ @@ -1976,7 +2063,7 @@ ucnv_MBCSToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, &offsets, sourceIndex, pArgs->flush, pErrorCode); - sourceIndex=nextSourceIndex+(int32_t)(source-(const uint8_t *)pArgs->source); + sourceIndex=nextSourceIndex+=(int32_t)(source-(const uint8_t *)pArgs->source); if(U_FAILURE(*pErrorCode)) { /* not mappable or buffer overflow */ @@ -2267,15 +2354,37 @@ ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, if(c<0) { if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSourcetoUBytes; cnv->toULength=(int8_t)(source-lastSource); do { *bytes++=*lastSource++; } while(lastSourcesharedData->mbcs.dbcsOnlyState!=0); + uint8_t *bytes=cnv->toUBytes; + *bytes++=*lastSource++; /* first byte */ + if(lastSource==source) { + cnv->toULength=1; + } else /* lastSourcetoULength=i; + source=lastSource; + } } else { /* no output because of empty input or only state changes */ *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; diff --git a/icuSources/test/cintltst/nccbtst.c b/icuSources/test/cintltst/nccbtst.c index d82d37b9..d426b989 100644 --- a/icuSources/test/cintltst/nccbtst.c +++ b/icuSources/test/cintltst/nccbtst.c @@ -1,6 +1,6 @@ /******************************************************************** * COPYRIGHT: - * Copyright (c) 1997-2006, International Business Machines Corporation and + * Copyright (c) 1997-2006,2008, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /* @@ -2497,13 +2497,13 @@ static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize) static const uint8_t text943[] = { - 0x82, 0xa9, 0x82, 0x20, /*0xc8,*/ 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; - static const UChar toUnicode943sub[] = { 0x304b, 0xfffd, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; - static const UChar toUnicode943skip[]= { 0x304b, /*0xff88,*/ 0x0061, 0x6f22, 0x5b57}; + 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a }; + static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 }; + static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 }; static const UChar toUnicode943stop[]= { 0x304b}; - static const int32_t fromIBM943Offssub[] = {0, 2, 4, 5, 7}; - static const int32_t fromIBM943Offsskip[] = { 0, 4, 5, 7}; + static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 }; + static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 }; static const int32_t fromIBM943Offsstop[] = { 0}; gInBufferSize = inputsize; @@ -2537,9 +2537,9 @@ static void TestSingleByte(int32_t inputsize, int32_t outputsize) { static const uint8_t sampleText[] = { 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82, - 0xff, /*0x82, 0xa9,*/ 0x32, 0x33}; - static const UChar toUnicode943sub[] = {0x304b, 0x0061, 0x0062, 0x0063, 0xfffd,/*0x304b,*/ 0x0032, 0x0033}; - static const int32_t fromIBM943Offssub[] = {0, 2, 3, 4, 5, 7, 8}; + 0xff, 0x32, 0x33}; + static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 }; + static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 }; /*checking illegal value for ibm-943 with substitute*/ gInBufferSize = inputsize; gOutBufferSize = outputsize; diff --git a/icuSources/test/cintltst/nucnvtst.c b/icuSources/test/cintltst/nucnvtst.c index 6791a1eb..cab2975d 100644 --- a/icuSources/test/cintltst/nucnvtst.c +++ b/icuSources/test/cintltst/nucnvtst.c @@ -2605,7 +2605,7 @@ TestMBCS() { TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source"); /*Test for the condition where there is an invalid character*/ { - static const uint8_t source2[]={0xa1, 0x01}; + static const uint8_t source2[]={0xa1, 0x80}; TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character"); } /*Test for the condition where we have a truncated char*/ @@ -3898,11 +3898,11 @@ static void TestISO_2022_KR() { /* test input */ static const uint16_t in[]={ - 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F66,0x9F67,0x9F6A,0x000A,0x000D - ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC02,0xAC04 + 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D + ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB - ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53DF,0x53E1,0x53E2 + ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2 ,0x53E3,0x53E4,0x000A,0x000D}; const UChar* uSource; const UChar* uSourceLimit; diff --git a/icuSources/test/testdata/conversion.txt b/icuSources/test/testdata/conversion.txt index a8cd8a90..45ae1e69 100644 --- a/icuSources/test/testdata/conversion.txt +++ b/icuSources/test/testdata/conversion.txt @@ -1,6 +1,6 @@ //******************************************************************************* // -// Copyright (C) 2003-2006, International Business Machines +// Copyright (C) 2003-2006,2008 International Business Machines // Corporation and others. All Rights Reserved. // // file name: conversion.txt @@ -48,6 +48,155 @@ conversion:table(nofallback) { toUnicode { Headers { "charset", "bytes", "unicode", "offsets", "flush", "fallbacks", "errorCode", "callback", "invalidChars" } Cases { + // Test ticket 5691: consistent illegal sequences + // The following test cases are for illegal character byte sequences. + // + // Unfortunately, we cannot use the Shift-JIS examples from the ticket + // comments because our Shift-JIS table is Windows-compatible and + // therefore has no illegal single bytes. Same for GBK. + // Instead, we use the stricter GB 18030 also for 2-byte examples. + // The byte sequences are generally slightly different from the ticket + // comment, simply using assigned characters rather than just + // theoretically valid sequences. + { + "gb18030", + :bin{ 618140813c81ff7a }, + "a\u4e02\\x81<\\x81\\xFFz", + :intvector{ 0,1,3,3,3,3,4,5,5,5,5,5,5,5,5,7 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "EUC-JP", + :bin{ 618fb0a98fb03c8f3cb0a97a }, + "a\u4e28\\x8F\\xB0<\\x8F<\u9022z", + :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "gb18030", + :bin{ 618130fc318130fc8181303c3e813cfc817a }, + "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z", + :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "UTF-8", + :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a }, + "a\U00040042\\xF1\\x80\\x81<\\xF1\\x80\\x81\\xFF\\xF1\\x80\\xFF<\\xF1\\xFF<>z", + :intvector{ 0,1,1,5,5,5,5,5,5,5,5,5,5,5,5,8,9,9,9,9,9,9,9,9,9,9,9,9,12,12,12,12,13,13,13,13,13,13,13,13,15,15,15,15,16,17,17,17,17,18,18,18,18,19,20,21 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ISO-2022-JP", + :bin{ 1b24424141af4142affe41431b2842 }, + "\u758f\\xAF\u758e\\xAF\\xFE\u790e", + :intvector{ 3,5,5,5,5,6,8,8,8,8,8,8,8,8,10 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ibm-25546", + :bin{ 411b242943420e4141af4142affe41430f5a }, + "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ", + :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ISO-2022-KR", + :bin{ 411b242943420e4141af4142affe41430f5a }, + "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ", + :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ISO-2022-CN", + :bin{ 411b242941420e4141af4142affe41430f5a }, + "AB\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z", + :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "HZ", + :bin{ 417e7b4141af4142affe41437e7d5a }, + "A\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z", + :intvector{ 0,3,5,5,5,5,6,8,8,8,8,8,8,8,8,10,14 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + // Test ticket 5691: consistent illegal sequences + // The following test cases are for illegal escape/designator/shift sequences. + // + // ISO-2022-JP and -CN with illegal escape sequences. + { + "ISO-2022-JP", + :bin{ 611b24201b244241411b283f1b28427a }, + "a\\x1B$ \u758f\\x1B\u2538z", + :intvector{ 0,1,1,1,1,2,3,7,9,9,9,9,10,15 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ISO-2022-CN", + :bin{ 611b2429201b2429410e41410f7a }, + "a\\x1B$) \u4eaez", + :intvector{ 0,1,1,1,1,2,3,4,10,13 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + // Test ticket 5691: ISO-2022-JP-2 with illegal single-shift SS2 and SS3 sequences. + // The first ESC N comes before its designator sequence, the last sequence is ESC+space. + { + "ISO-2022-JP-2", + :bin{ 4e1b4e4e1b2e414e1b4e4e4e1b204e }, + "N\\x1BNNN\xceN\\x1B N", + :intvector{ 0,1,1,1,1,2,3,7,10,11,12,12,12,12,13,14 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ISO-2022-CN-EXT", + :bin{ 4e1b4e4e1b242a484e1b4e4e4e4e1b204e }, + "N\\x1BNNN\u8f0eN\\x1B N", + :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + { + "ISO-2022-CN-EXT", + :bin{ 4f1b4f4f1b242b494f1b4f4f4f4f1b204f }, + "O\\x1BOOO\u492bO\\x1B O", + :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 }, + :int{1}, :int{0}, "", "&C", :bin{""} + } + // Test ticket 5691: HZ with illegal tilde sequences. + { + "HZ", + :bin{ 417e20427e21437e80447e7b41417e207e41427e7f41437e7d5a }, + "A\\x7E B\\x7E!C\\x7E\\x80D\u4eae\\x7E\\x20\\x7E\u8c05\\x7E\\x7F\u64a9Z", + :intvector{ 0,1,1,1,1,2,3,4,4,4,4,5,6,7,7,7,7,7,7,7,7,9, // SBCS + 12,14,14,14,14,14,14,14,14,16,16,16,16,17,19,19,19,19,19,19,19,19,21, // DBCS + 25 }, // SBCS + :int{1}, :int{0}, "", "&C", :bin{""} + } + // Test ticket 5691: Example from Peter Edberg. + { + "ISO-2022-JP", + :bin{ 1b244230212f7e742630801b284a621b2458631b2842648061 }, + "\u4e9c\ufffd\u7199\ufffdb\ufffd$Xcd\ufffda", + :intvector{ 3,5,7,9,14,15,16,17,18,22,23,24 }, + :int{1}, :int{0}, "", "?", :bin{""} + } + // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e + { + "HZ", + :bin{ 7e7b21212120217e217f772100007e217e7e7d207e7e807e0a2b }, + "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd\u3013 ~\ufffd+", + :intvector{ 2,4,6,8,10,12,14,15,19,20,22,25 }, + :int{1}, :int{1}, "", "?", :bin{""} + } + // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and + // using the Shift-JIS table for JIS X 0208 (ticket #5797) + { + "ISO-2022-JP", + :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 }, + "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e", + :intvector{ 3,4,5,9,11,12,14,16,17,19,21,23,25,27 }, + :int{1}, :int{1}, "", "?", :bin{""} + } // improve coverage of unrolled loops in ucnvmbcs.c/ucnv_MBCSSingleToBMPWithOffsets() { "ISO-8859-3", @@ -324,7 +473,7 @@ conversion:table(nofallback) { { "ISO-2022-CN-EXT", :bin{ 411b4e2121 }, "\x41", :intvector{ 0 }, - :int{1}, :int{1}, "illesc", ".", :bin{ 1b4e } + :int{1}, :int{1}, "illesc", ".", :bin{ 1b } } // G3 designator: recognized, but not supported for -CN (only for -CN-EXT) { diff --git a/icuSources/tools/tzcode/icuzdump.vcproj b/icuSources/tools/tzcode/icuzdump.vcproj index 87242b5f..72122468 100644 --- a/icuSources/tools/tzcode/icuzdump.vcproj +++ b/icuSources/tools/tzcode/icuzdump.vcproj @@ -1,11 +1,12 @@ - @@ -153,6 +153,8 @@ SubSystem="1" OptimizeReferences="2" EnableCOMDATFolding="2" + RandomizedBaseAddress="1" + DataExecutionPrevention="0" TargetMachine="1" /> - diff --git a/icuSources/tools/tzcode/localtime.c b/icuSources/tools/tzcode/localtime.c index 1fdfbdbb..d186080b 100644 --- a/icuSources/tools/tzcode/localtime.c +++ b/icuSources/tools/tzcode/localtime.c @@ -5,7 +5,7 @@ #ifndef lint #ifndef NOID -static char elsieid[] = "@(#)localtime.c 8.5"; +static char elsieid[] = "@(#)localtime.c 8.9"; #endif /* !defined NOID */ #endif /* !defined lint */ @@ -136,51 +136,52 @@ struct rule { ** Prototypes for static functions. */ -static long detzcode P((const char * codep)); -static time_t detzcode64 P((const char * codep)); -static int differ_by_repeat P((time_t t1, time_t t0)); -static const char * getzname P((const char * strp)); -static const char * getqzname P((const char * strp, const int delim)); -static const char * getnum P((const char * strp, int * nump, int min, - int max)); -static const char * getsecs P((const char * strp, long * secsp)); -static const char * getoffset P((const char * strp, long * offsetp)); -static const char * getrule P((const char * strp, struct rule * rulep)); -static void gmtload P((struct state * sp)); -static struct tm * gmtsub P((const time_t * timep, long offset, - struct tm * tmp)); -static struct tm * localsub P((const time_t * timep, long offset, - struct tm * tmp)); -static int increment_overflow P((int * number, int delta)); -static int leaps_thru_end_of P((int y)); -static int long_increment_overflow P((long * number, int delta)); -static int long_normalize_overflow P((long * tensptr, - int * unitsptr, int base)); -static int normalize_overflow P((int * tensptr, int * unitsptr, - int base)); -static void settzname P((void)); -static time_t time1 P((struct tm * tmp, - struct tm * (*funcp) P((const time_t *, - long, struct tm *)), - long offset)); -static time_t time2 P((struct tm *tmp, - struct tm * (*funcp) P((const time_t *, - long, struct tm*)), - long offset, int * okayp)); -static time_t time2sub P((struct tm *tmp, - struct tm * (*funcp) P((const time_t *, - long, struct tm*)), - long offset, int * okayp, int do_norm_secs)); -static struct tm * timesub P((const time_t * timep, long offset, - const struct state * sp, struct tm * tmp)); -static int tmcomp P((const struct tm * atmp, - const struct tm * btmp)); -static time_t transtime P((time_t janfirst, int year, - const struct rule * rulep, long offset)); -static int tzload P((const char * name, struct state * sp, - int doextend)); -static int tzparse P((const char * name, struct state * sp, - int lastditch)); +static long detzcode(const char * codep); +static time_t detzcode64(const char * codep); +static int differ_by_repeat(time_t t1, time_t t0); +static const char * getzname(const char * strp); +static const char * getqzname(const char * strp, const int delim); +static const char * getnum(const char * strp, int * nump, int min, + int max); +static const char * getsecs(const char * strp, long * secsp); +static const char * getoffset(const char * strp, long * offsetp); +static const char * getrule(const char * strp, struct rule * rulep); +static void gmtload(struct state * sp); +static struct tm * gmtsub(const time_t * timep, long offset, + struct tm * tmp); +static struct tm * localsub(const time_t * timep, long offset, + struct tm * tmp); +static int increment_overflow(int * number, int delta); +static int leaps_thru_end_of(int y); +static int long_increment_overflow(long * number, int delta); +static int long_normalize_overflow(long * tensptr, + int * unitsptr, int base); +static int normalize_overflow(int * tensptr, int * unitsptr, + int base); +static void settzname(void); +static time_t time1(struct tm * tmp, + struct tm * (*funcp)(const time_t *, + long, struct tm *), + long offset); +static time_t time2(struct tm *tmp, + struct tm * (*funcp)(const time_t *, + long, struct tm*), + long offset, int * okayp); +static time_t time2sub(struct tm *tmp, + struct tm * (*funcp)(const time_t *, + long, struct tm*), + long offset, int * okayp, int do_norm_secs); +static struct tm * timesub(const time_t * timep, long offset, + const struct state * sp, struct tm * tmp); +static int tmcomp(const struct tm * atmp, + const struct tm * btmp); +static time_t transtime(time_t janfirst, int year, + const struct rule * rulep, long offset); +static int typesequiv(const struct state * sp, int a, int b); +static int tzload(const char * name, struct state * sp, + int doextend); +static int tzparse(const char * name, struct state * sp, + int lastditch); #ifdef ALL_STATE static struct state * lclptr; @@ -253,7 +254,7 @@ const char * const codep; } static void -settzname P((void)) +settzname(void) { register struct state * const sp = lclptr; register int i; @@ -554,17 +555,51 @@ register const int doextend; sp->ttis[sp->typecnt++] = ts.ttis[1]; } } - i = 2 * YEARSPERREPEAT; - sp->goback = sp->goahead = sp->timecnt > i; - sp->goback = sp->goback && sp->types[i] == sp->types[0] && - differ_by_repeat(sp->ats[i], sp->ats[0]); - sp->goahead = sp->goahead && - sp->types[sp->timecnt - 1] == sp->types[sp->timecnt - 1 - i] && - differ_by_repeat(sp->ats[sp->timecnt - 1], - sp->ats[sp->timecnt - 1 - i]); + sp->goback = sp->goahead = FALSE; + if (sp->timecnt > 1) { + for (i = 1; i < sp->timecnt; ++i) + if (typesequiv(sp, sp->types[i], sp->types[0]) && + differ_by_repeat(sp->ats[i], sp->ats[0])) { + sp->goback = TRUE; + break; + } + for (i = sp->timecnt - 2; i >= 0; --i) + if (typesequiv(sp, sp->types[sp->timecnt - 1], + sp->types[i]) && + differ_by_repeat(sp->ats[sp->timecnt - 1], + sp->ats[i])) { + sp->goahead = TRUE; + break; + } + } return 0; } +static int +typesequiv(sp, a, b) +const struct state * const sp; +const int a; +const int b; +{ + register int result; + + if (sp == NULL || + a < 0 || a >= sp->typecnt || + b < 0 || b >= sp->typecnt) + result = FALSE; + else { + register const struct ttinfo * ap = &sp->ttis[a]; + register const struct ttinfo * bp = &sp->ttis[b]; + result = ap->tt_gmtoff == bp->tt_gmtoff && + ap->tt_isdst == bp->tt_isdst && + ap->tt_ttisstd == bp->tt_ttisstd && + ap->tt_ttisgmt == bp->tt_ttisgmt && + strcmp(&sp->chars[ap->tt_abbrind], + &sp->chars[bp->tt_abbrind]) == 0; + } + return result; +} + static const int mon_lengths[2][MONSPERYEAR] = { { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, { 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 } @@ -1120,7 +1155,7 @@ struct state * const sp; static #endif /* !defined STD_INSPIRED */ void -tzsetwall P((void)) +tzsetwall(void) { if (lcl_is_set < 0) return; @@ -1141,7 +1176,7 @@ tzsetwall P((void)) } void -tzset P((void)) +tzset(void) { register const char * name; @@ -1639,7 +1674,7 @@ register const struct tm * const btmp; static time_t time2sub(tmp, funcp, offset, okayp, do_norm_secs) struct tm * const tmp; -struct tm * (* const funcp) P((const time_t*, long, struct tm*)); +struct tm * (* const funcp)(const time_t*, long, struct tm*); const long offset; int * const okayp; const int do_norm_secs; @@ -1781,12 +1816,8 @@ const int do_norm_secs; ** It's okay to guess wrong since the guess ** gets checked. */ - /* - ** The (void *) casts are the benefit of SunOS 3.3 on Sun 2's. - */ sp = (const struct state *) - (((void *) funcp == (void *) localsub) ? - lclptr : gmtptr); + ((funcp == localsub) ? lclptr : gmtptr); #ifdef ALL_STATE if (sp == NULL) return WRONG; @@ -1827,7 +1858,7 @@ label: static time_t time2(tmp, funcp, offset, okayp) struct tm * const tmp; -struct tm * (* const funcp) P((const time_t*, long, struct tm*)); +struct tm * (* const funcp)(const time_t*, long, struct tm*); const long offset; int * const okayp; { @@ -1845,7 +1876,7 @@ int * const okayp; static time_t time1(tmp, funcp, offset) struct tm * const tmp; -struct tm * (* const funcp) P((const time_t *, long, struct tm *)); +struct tm * (* const funcp)(const time_t *, long, struct tm *); const long offset; { register time_t t; @@ -1880,11 +1911,7 @@ const long offset; ** We try to divine the type they started from and adjust to the ** type they need. */ - /* - ** The (void *) casts are the benefit of SunOS 3.3 on Sun 2's. - */ - sp = (const struct state *) (((void *) funcp == (void *) localsub) ? - lclptr : gmtptr); + sp = (const struct state *) ((funcp == localsub) ? lclptr : gmtptr); #ifdef ALL_STATE if (sp == NULL) return WRONG; diff --git a/icuSources/tools/tzcode/private.h b/icuSources/tools/tzcode/private.h index 2837b70c..008d468a 100644 --- a/icuSources/tools/tzcode/private.h +++ b/icuSources/tools/tzcode/private.h @@ -21,7 +21,7 @@ #ifndef lint #ifndef NOID -static char privatehid[] = "@(#)private.h 8.2"; +static char privatehid[] = "@(#)private.h 8.6"; #endif /* !defined NOID */ #endif /* !defined lint */ @@ -48,10 +48,6 @@ static char privatehid[] = "@(#)private.h 8.2"; #define HAVE_SETTIMEOFDAY 3 #endif /* !defined HAVE_SETTIMEOFDAY */ -#ifndef HAVE_STRERROR -#define HAVE_STRERROR 1 -#endif /* !defined HAVE_STRERROR */ - #ifndef HAVE_SYMLINK #define HAVE_SYMLINK 1 #endif /* !defined HAVE_SYMLINK */ @@ -109,17 +105,15 @@ static char privatehid[] = "@(#)private.h 8.2"; #endif /* !defined WEXITSTATUS */ #if HAVE_UNISTD_H -#include "unistd.h" /* for F_OK and R_OK */ +#include "unistd.h" /* for F_OK, R_OK, and other POSIX goodness */ #endif /* HAVE_UNISTD_H */ -#if !HAVE_UNISTD_H #ifndef F_OK #define F_OK 0 #endif /* !defined F_OK */ #ifndef R_OK #define R_OK 4 #endif /* !defined R_OK */ -#endif /* !HAVE_UNISTD_H */ /* Unlike 's isdigit, this also works if c < 0 | c > UCHAR_MAX. */ #define is_digit(c) ((unsigned)(c) - '0' <= 9) @@ -164,70 +158,6 @@ typedef long int_fast64_t; ** Workarounds for compilers/systems. */ -/* -** If your compiler lacks prototypes, "#define P(x) ()". -*/ - -#ifndef P -#define P(x) x -#endif /* !defined P */ - -/* -** SunOS 4.1.1 headers lack EXIT_SUCCESS. -*/ - -#ifndef EXIT_SUCCESS -#define EXIT_SUCCESS 0 -#endif /* !defined EXIT_SUCCESS */ - -/* -** SunOS 4.1.1 headers lack EXIT_FAILURE. -*/ - -#ifndef EXIT_FAILURE -#define EXIT_FAILURE 1 -#endif /* !defined EXIT_FAILURE */ - -/* -** SunOS 4.1.1 headers lack FILENAME_MAX. -*/ - -#ifndef FILENAME_MAX - -#ifndef MAXPATHLEN -#ifdef unix -#include "sys/param.h" -#endif /* defined unix */ -#endif /* !defined MAXPATHLEN */ - -#ifdef MAXPATHLEN -#define FILENAME_MAX MAXPATHLEN -#endif /* defined MAXPATHLEN */ -#ifndef MAXPATHLEN -#define FILENAME_MAX 1024 /* Pure guesswork */ -#endif /* !defined MAXPATHLEN */ - -#endif /* !defined FILENAME_MAX */ - -/* -** SunOS 4.1.1 libraries lack remove. -*/ - -#ifndef remove -extern int unlink P((const char * filename)); -#define remove unlink -#endif /* !defined remove */ - -/* -** Some ancient errno.h implementations don't declare errno. -** But some newer errno.h implementations define it as a macro. -** Fix the former without affecting the latter. -*/ - -#ifndef errno -extern int errno; -#endif /* !defined errno */ - /* ** Some time.h implementations don't declare asctime_r. ** Others might define it as a macro. @@ -235,21 +165,21 @@ extern int errno; */ #ifndef asctime_r -extern char * asctime_r(); +extern char * asctime_r(struct tm const *, char *); #endif /* ** Private function declarations. */ -char * icalloc P((int nelem, int elsize)); -char * icatalloc P((char * old, const char * new)); -char * icpyalloc P((const char * string)); -char * imalloc P((int n)); -void * irealloc P((void * pointer, int size)); -void icfree P((char * pointer)); -void ifree P((char * pointer)); -const char * scheck P((const char * string, const char * format)); +char * icalloc(int nelem, int elsize); +char * icatalloc(char * old, const char * new); +char * icpyalloc(const char * string); +char * imalloc(int n); +void * irealloc(void * pointer, int size); +void icfree(char * pointer); +void ifree(char * pointer); +const char * scheck(const char * string, const char * format); /* ** Finally, some convenience items. @@ -337,8 +267,8 @@ const char * scheck P((const char * string, const char * format)); #if HAVE_INCOMPATIBLE_CTIME_R #undef asctime_r #undef ctime_r -char *asctime_r P((struct tm const *, char *)); -char *ctime_r P((time_t const *, char *)); +char *asctime_r(struct tm const *, char *); +char *ctime_r(time_t const *, char *); #endif /* HAVE_INCOMPATIBLE_CTIME_R */ #ifndef YEARSPERREPEAT @@ -356,7 +286,7 @@ char *ctime_r P((time_t const *, char *)); #ifndef SECSPERREPEAT #define SECSPERREPEAT ((int_fast64_t) YEARSPERREPEAT * (int_fast64_t) AVGSECSPERYEAR) #endif /* !defined SECSPERREPEAT */ - + #ifndef SECSPERREPEAT_BITS #define SECSPERREPEAT_BITS 34 /* ceil(log2(SECSPERREPEAT)) */ #endif /* !defined SECSPERREPEAT_BITS */ diff --git a/icuSources/tools/tzcode/zdump.c b/icuSources/tools/tzcode/zdump.c index c7199acb..b27480dd 100644 --- a/icuSources/tools/tzcode/zdump.c +++ b/icuSources/tools/tzcode/zdump.c @@ -1,4 +1,4 @@ -static char elsieid[] = "@(#)zdump.c 8.3"; +static char elsieid[] = "@(#)zdump.c 8.8"; /* ** This code has been made independent of the rest of the time @@ -104,6 +104,9 @@ static char elsieid[] = "@(#)zdump.c 8.3"; #define SECSPERNYEAR (SECSPERDAY * DAYSPERNYEAR) #define SECSPERLYEAR (SECSPERNYEAR + SECSPERDAY) +#ifndef HAVE_GETTEXT +#define HAVE_GETTEXT 0 +#endif #if HAVE_GETTEXT #include "locale.h" /* for setlocale */ #include "libintl.h" @@ -145,13 +148,9 @@ static char elsieid[] = "@(#)zdump.c 8.3"; #define TZ_DOMAIN "tz" #endif /* !defined TZ_DOMAIN */ -#ifndef P -#define P(x) x -#endif /* !defined P */ - extern char ** environ; -extern int getopt P((int argc, char * const argv[], - const char * options)); +extern int getopt(int argc, char * const argv[], + const char * options); extern char * optarg; extern int optind; extern char * tzname[2]; @@ -162,26 +161,26 @@ static size_t longest; static char * progname; static int warned; -static char * abbr P((struct tm * tmp)); -static void abbrok P((const char * abbrp, const char * zone)); -static long delta P((struct tm * newp, struct tm * oldp)); -static void dumptime P((const struct tm * tmp)); -static time_t hunt P((char * name, time_t lot, time_t hit)); -static void setabsolutes P((void)); -static void show P((char * zone, time_t t, int v)); -static const char * tformat P((void)); -static time_t yeartot P((long y)); +static char * abbr(struct tm * tmp); +static void abbrok(const char * abbrp, const char * zone); +static long delta(struct tm * newp, struct tm * oldp); +static void dumptime(const struct tm * tmp); +static time_t hunt(char * name, time_t lot, time_t hit); +static void setabsolutes(void); +static void show(char * zone, time_t t, int v); +static const char * tformat(void); +static time_t yeartot(long y); #ifdef ICU typedef struct listentry { char * name; struct listentry * next; } listentry; -static time_t huntICU P((char * name, time_t lot, time_t hit, FILE *fp)); -static void dumptimeICU P((FILE * fp, time_t t)); -static void showICU P((FILE * fp, char * zone, time_t t1, time_t t2)); -static int getall P((struct listentry ** namelist)); -static void getzones P((char * basedir, char * subdir, struct listentry ** last, int * count)); +static time_t huntICU(char * name, time_t lot, time_t hit, FILE *fp); +static void dumptimeICU(FILE * fp, time_t t); +static void showICU(FILE * fp, char * zone, time_t t1, time_t t2); +static int getall(struct listentry ** namelist); +static void getzones(char * basedir, char * subdir, struct listentry ** last, int * count); #endif #ifndef TYPECHECK @@ -259,6 +258,17 @@ const char * const zone; warned = TRUE; } +static void +usage(const char *progname, FILE *stream, int status) +{ + (void) fprintf(stream, +_("%s: usage is %s [ --version ] [ --help ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n\ +\n\ +Report bugs to tz@elsie.nci.nih.gov.\n"), + progname, progname); + exit(status); +} + int main(argc, argv) int argc; @@ -303,6 +313,8 @@ char * argv[]; if (strcmp(argv[i], "--version") == 0) { (void) printf("%s\n", elsieid); exit(EXIT_SUCCESS); + } else if (strcmp(argv[i], "--help") == 0) { + usage(progname, stdout, EXIT_SUCCESS); } vflag = 0; cutarg = NULL; @@ -359,10 +371,7 @@ char * argv[]; else cutarg = optarg; if ((c != EOF && c != -1) || (optind == argc - 1 && strcmp(argv[optind], "=") == 0)) { - (void) fprintf(stderr, -_("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"), - progname, progname); - exit(EXIT_FAILURE); + usage(progname, stderr, EXIT_FAILURE); } #endif if (vflag) { @@ -498,13 +507,9 @@ _("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"), (void) strncpy(buf, abbr(&tm), (sizeof buf) - 1); } for ( ; ; ) { - if (t >= cuthitime) + if (t >= cuthitime || t >= cuthitime - SECSPERHOUR * 12) break; newt = t + SECSPERHOUR * 12; - if (newt >= cuthitime) - break; - if (newt <= t) - break; newtmp = localtime(&newt); if (newtmp != NULL) newtm = *newtmp; @@ -588,7 +593,7 @@ _("%s: usage is %s [ --version ] [ -v ] [ -c [loyear,]hiyear ] zonename ...\n"), } static void -setabsolutes() +setabsolutes(void) { if (0.5 == (time_t) 0.5) { /* @@ -617,7 +622,7 @@ _("%s: use of -v on system with floating time_t other than float or double\n"), t = t1; t1 = 2 * t1 + 1; } - + absolute_max_time = t; t = -t; absolute_min_time = t - 1; @@ -786,7 +791,7 @@ struct tm * tmp; */ static const char * -tformat() +tformat(void) { if (0.5 == (time_t) 0.5) { /* floating */ if (sizeof (time_t) > sizeof (double)) diff --git a/icuSources/tools/tzcode/zic.c b/icuSources/tools/tzcode/zic.c index a7535155..fcc3c823 100644 --- a/icuSources/tools/tzcode/zic.c +++ b/icuSources/tools/tzcode/zic.c @@ -3,7 +3,7 @@ ** 2006-07-17 by Arthur David Olson. */ -static char elsieid[] = "@(#)zic.c 8.7"; +static char elsieid[] = "@(#)zic.c 8.18"; #include "private.h" #include "locale.h" @@ -113,76 +113,80 @@ struct zone { zic_t z_untiltime; }; -extern int getopt P((int argc, char * const argv[], - const char * options)); -extern int link P((const char * fromname, const char * toname)); +extern int getopt(int argc, char * const argv[], + const char * options); +extern int link(const char * fromname, const char * toname); extern char * optarg; extern int optind; -static void addtt P((zic_t starttime, int type)); +static void addtt(zic_t starttime, int type); #ifdef ICU -static int addtype P((long gmtoff, long rawoff, long dstoff, +static int addtype(long gmtoff, long rawoff, long dstoff, const char * abbr, int isdst, - int ttisstd, int ttisgmt)); + int ttisstd, int ttisgmt); #else -static int addtype P((long gmtoff, const char * abbr, int isdst, - int ttisstd, int ttisgmt)); +static int addtype(long gmtoff, const char * abbr, int isdst, + int ttisstd, int ttisgmt); #endif -static void leapadd P((zic_t t, int positive, int rolling, int count)); -static void adjleap P((void)); -static void associate P((void)); -static int ciequal P((const char * ap, const char * bp)); -static void convert P((long val, char * buf)); -static void convert64 P((zic_t val, char * buf)); -static void dolink P((const char * fromfile, const char * tofile)); -static void doabbr P((char * abbr, const char * format, - const char * letters, int isdst, int doquotes)); -static void eat P((const char * name, int num)); -static void eats P((const char * name, int num, - const char * rname, int rnum)); -static long eitol P((int i)); -static void error P((const char * message)); -static char ** getfields P((char * buf)); -static long gethms P((const char * string, const char * errstrng, - int signable)); -static void infile P((const char * filename)); -static void inleap P((char ** fields, int nfields)); -static void inlink P((char ** fields, int nfields)); -static void inrule P((char ** fields, int nfields)); -static int inzcont P((char ** fields, int nfields)); -static int inzone P((char ** fields, int nfields)); -static int inzsub P((char ** fields, int nfields, int iscont)); -static int is32 P((zic_t x)); -static int itsabbr P((const char * abbr, const char * word)); -static int itsdir P((const char * name)); -static int lowerit P((int c)); -static char * memcheck P((char * tocheck)); -static int mkdirs P((char * filename)); -static void newabbr P((const char * abbr)); -static long oadd P((long t1, long t2)); -static void outzone P((const struct zone * zp, int ntzones)); -static void puttzcode P((long code, FILE * fp)); -static void puttzcode64 P((zic_t code, FILE * fp)); -static int rcomp P((const void * leftp, const void * rightp)); -static zic_t rpytime P((const struct rule * rp, int wantedy)); -static void rulesub P((struct rule * rp, +static void leapadd(zic_t t, int positive, int rolling, int count); +static void adjleap(void); +static void associate(void); +static int ciequal(const char * ap, const char * bp); +static void convert(long val, char * buf); +static void convert64(zic_t val, char * buf); +static void dolink(const char * fromfield, const char * tofield); +static void doabbr(char * abbr, const char * format, + const char * letters, int isdst, int doquotes); +static void eat(const char * name, int num); +static void eats(const char * name, int num, + const char * rname, int rnum); +static long eitol(int i); +static void error(const char * message); +static char ** getfields(char * buf); +static long gethms(const char * string, const char * errstrng, + int signable); +static void infile(const char * filename); +static void inleap(char ** fields, int nfields); +static void inlink(char ** fields, int nfields); +static void inrule(char ** fields, int nfields); +static int inzcont(char ** fields, int nfields); +static int inzone(char ** fields, int nfields); +static int inzsub(char ** fields, int nfields, int iscont); +static int is32(zic_t x); +static int itsabbr(const char * abbr, const char * word); +static int itsdir(const char * name); +static int lowerit(int c); +static char * memcheck(char * tocheck); +static int mkdirs(char * filename); +static void newabbr(const char * abbr); +static long oadd(long t1, long t2); +static void outzone(const struct zone * zp, int ntzones); +static void puttzcode(long code, FILE * fp); +static void puttzcode64(zic_t code, FILE * fp); +static int rcomp(const void * leftp, const void * rightp); +static zic_t rpytime(const struct rule * rp, int wantedy); +static void rulesub(struct rule * rp, const char * loyearp, const char * hiyearp, const char * typep, const char * monthp, - const char * dayp, const char * timep)); -static int stringoffset P((char * result, long offset)); -static int stringrule P((char * result, const struct rule * rp, - long dstoff, long gmtoff)); -static void stringzone P((char * result, - const struct zone * zp, int ntzones)); -static void setboundaries P((void)); -static zic_t tadd P((zic_t t1, long t2)); -static void usage P((void)); -static void writezone P((const char * name, const char * string)); -static int yearistype P((int year, const char * type)); - -#if !HAVE_STRERROR -static char * strerror P((int)); -#endif /* !HAVE_STRERROR */ + const char * dayp, const char * timep); +static int stringoffset(char * result, long offset); +static int stringrule(char * result, const struct rule * rp, + long dstoff, long gmtoff); +static void stringzone(char * result, + const struct zone * zp, int ntzones); +static void setboundaries(void); +static zic_t tadd(zic_t t1, long t2); +static void usage(FILE *stream, int status); +static void writezone(const char * name, const char * string); +static int yearistype(int year, const char * type); +#ifdef ICU +static void emit_icu_zone(FILE* f, const char* zoneName, int zoneOffset, + const struct rule* rule, + int ruleIndex, int startYear); +static void emit_icu_link(FILE* f, const char* from, const char* to); +static void emit_icu_rule(FILE* f, const struct rule* r, int ruleIndex); +static int add_icu_final_rules(const struct rule* r1, const struct rule* r2); +#endif static int charcnt; static int errors; @@ -308,19 +312,16 @@ struct lookup { }; #ifdef ICU - /* Indices into rules[] for final rules. They will occur in pairs, * with finalRules[i] occurring before finalRules[i+1] in the year. * Each zone need only store a start year, a standard offset, and an * index into finalRules[]. FinalRules[] are aliases into rules[]. */ - -static const struct rule ** finalRules; -static int finalRulesCount; - +static const struct rule ** finalRules; +static int finalRulesCount; #endif -static struct lookup const * byword P((const char * string, - const struct lookup * lp)); +static struct lookup const * byword(const char * string, + const struct lookup * lp); static struct lookup const line_codes[] = { { "Rule", LC_RULE }, @@ -442,19 +443,6 @@ char * const ptr; ** Error handling. */ -#if !HAVE_STRERROR -static char * -strerror(errnum) -int errnum; -{ - extern char * sys_errlist[]; - extern int sys_nerr; - - return (errnum > 0 && errnum <= sys_nerr) ? - sys_errlist[errnum] : _("Unknown system error"); -} -#endif /* !HAVE_STRERROR */ - static void eats(name, num, rname, rnum) const char * const name; @@ -508,69 +496,87 @@ const char * const string; } static void -usage P((void)) +usage(FILE *stream, int status) { - (void) fprintf(stderr, _("%s: usage is %s \ -[ --version ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n\ -\t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n"), - progname, progname); - exit(EXIT_FAILURE); + (void) fprintf(stream, _("%s: usage is %s \ +[ --version ] [ --help ] [ -v ] [ -l localtime ] [ -p posixrules ] \\\n\ +\t[ -d directory ] [ -L leapseconds ] [ -y yearistype ] [ filename ... ]\n\ +\n\ +Report bugs to tz@elsie.nci.nih.gov.\n"), + progname, progname); + exit(status); } #ifdef ICU - /* File into which we will write supplemental ICU data. */ -static FILE * icuFile; - -void emit_icu_zone(FILE* f, const char* zoneName, int zoneOffset, - const struct rule* rule, - int ruleIndex, int startYear) { - /* machine-readable section */ - fprintf(f, "zone %s %d %d %s", zoneName, zoneOffset, startYear, rule->r_name); - - /* human-readable section */ - fprintf(f, " # zone %s, offset %d, year >= %d, rule %s (%d)\n", - zoneName, zoneOffset, startYear, - rule->r_name, ruleIndex); +static FILE * icuFile; + +static void +emit_icu_zone(FILE* f, const char* zoneName, int zoneOffset, + const struct rule* rule, + int ruleIndex, int startYear) { + /* machine-readable section */ + fprintf(f, "zone %s %d %d %s", zoneName, zoneOffset, startYear, rule->r_name); + + /* human-readable section */ + fprintf(f, " # zone %s, offset %d, year >= %d, rule %s (%d)\n", + zoneName, zoneOffset, startYear, + rule->r_name, ruleIndex); } -void emit_icu_link(FILE* f, const char* from, const char* to) { - /* machine-readable section */ - fprintf(f, "link %s %s\n", from, to); +static void +emit_icu_link(FILE* f, const char* from, const char* to) { + /* machine-readable section */ + fprintf(f, "link %s %s\n", from, to); } static const char* DYCODE[] = {"DOM", "DOWGEQ", "DOWLEQ"}; -void emit_icu_rule(FILE* f, const struct rule* r, int ruleIndex) { - if (r->r_yrtype != NULL) { - warning("year types not supported by ICU"); - fprintf(stderr, "rule %s, file %s, line %d\n", - r->r_name, r->r_filename, r->r_linenum); +static void +emit_icu_rule(FILE* f, const struct rule* r, int ruleIndex) { + if (r->r_yrtype != NULL) { + warning("year types not supported by ICU"); + fprintf(stderr, "rule %s, file %s, line %d\n", + r->r_name, r->r_filename, r->r_linenum); } - /* machine-readable section */ - fprintf(f, "rule %s %s %d %d %d %d %d %d %d", - r->r_name, DYCODE[r->r_dycode], - r->r_month, r->r_dayofmonth, - (r->r_dycode == DC_DOM ? -1 : r->r_wday), - r->r_tod, r->r_todisstd, r->r_todisgmt, r->r_stdoff - ); - - /* human-readable section */ - fprintf(f, " # %d: %s, file %s, line %d", - ruleIndex, r->r_name, r->r_filename, r->r_linenum); - fprintf(f, ", mode %s", DYCODE[r->r_dycode]); - fprintf(f, ", %s, dom %d", mon_names[r->r_month].l_word, r->r_dayofmonth); - if (r->r_dycode != DC_DOM) { - fprintf(f, ", %s", wday_names[r->r_wday].l_word); - } - fprintf(f, ", time %d", r->r_tod); - fprintf(f, ", isstd %d", r->r_todisstd); - fprintf(f, ", isgmt %d", r->r_todisgmt); - fprintf(f, ", offset %ld", r->r_stdoff); - fprintf(f, "\n"); + /* machine-readable section */ + fprintf(f, "rule %s %s %d %d %d %ld %d %d %ld", + r->r_name, DYCODE[r->r_dycode], + r->r_month, r->r_dayofmonth, + (r->r_dycode == DC_DOM ? -1 : r->r_wday), + r->r_tod, r->r_todisstd, r->r_todisgmt, r->r_stdoff + ); + + /* human-readable section */ + fprintf(f, " # %d: %s, file %s, line %d", + ruleIndex, r->r_name, r->r_filename, r->r_linenum); + fprintf(f, ", mode %s", DYCODE[r->r_dycode]); + fprintf(f, ", %s, dom %d", mon_names[r->r_month].l_word, r->r_dayofmonth); + if (r->r_dycode != DC_DOM) { + fprintf(f, ", %s", wday_names[r->r_wday].l_word); + } + fprintf(f, ", time %ld", r->r_tod); + fprintf(f, ", isstd %d", r->r_todisstd); + fprintf(f, ", isgmt %d", r->r_todisgmt); + fprintf(f, ", offset %ld", r->r_stdoff); + fprintf(f, "\n"); } +static int +add_icu_final_rules(const struct rule* r1, const struct rule* r2) { + int i; + + for (i=0; i= HOURSPERDAY || + if (hh < 0 || mm < 0 || mm >= MINSPERHOUR || - ss < 0 || ss > SECSPERMIN) && - !(hh == HOURSPERDAY && mm == 0 && ss == 0)) { + ss < 0 || ss > SECSPERMIN) { error(errstring); return 0; } - if (noise && hh == HOURSPERDAY) + if (LONG_MAX / SECSPERHOUR < hh) { + error(_("time overflow")); + return 0; + } + if (noise && hh == HOURSPERDAY && mm == 0 && ss == 0) warning(_("24:00 not handled by pre-1998 versions of zic")); - return eitol(sign) * - (eitol(hh * MINSPERHOUR + mm) * - eitol(SECSPERMIN) + eitol(ss)); + if (noise && (hh > HOURSPERDAY || + (hh == HOURSPERDAY && (mm != 0 || ss != 0)))) +warning(_("values over 24 hours not handled by pre-2007 versions of zic")); + return oadd(eitol(sign) * hh * eitol(SECSPERHOUR), + eitol(sign) * (eitol(mm) * eitol(SECSPERMIN) + eitol(ss))); } static void @@ -1767,8 +1781,8 @@ const char * const string; (size_t) sizeof tzh.field, (size_t) 1, fp) tzh = tzh0; #ifdef ICU - * (ICUZoneinfoVersion*) &tzh.tzh_reserved = TZ_ICU_VERSION; - (void) strncpy(tzh.tzh_magic, TZ_ICU_MAGIC, sizeof tzh.tzh_magic); + * (ICUZoneinfoVersion*) &tzh.tzh_reserved = TZ_ICU_VERSION; + (void) strncpy(tzh.tzh_magic, TZ_ICU_MAGIC, sizeof tzh.tzh_magic); #else (void) strncpy(tzh.tzh_magic, TZ_MAGIC, sizeof tzh.tzh_magic); #endif @@ -1805,10 +1819,10 @@ const char * const string; for (i = 0; i < typecnt; ++i) if (writetype[i]) { #ifdef ICU - puttzcode((long) rawoffs[i], fp); - puttzcode((long) dstoffs[i], fp); + puttzcode((long) rawoffs[i], fp); + puttzcode((long) dstoffs[i], fp); #else - puttzcode((long) gmtoffs[i], fp); + puttzcode(gmtoffs[i], fp); #endif (void) putc(isdsts[i], fp); (void) putc((unsigned char) indmap[abbrinds[i]], fp); @@ -2078,24 +2092,6 @@ const int zonecount; } } -#ifdef ICU - -int add_icu_final_rules(const struct rule* r1, const struct rule* r2) { - int i; - - for (i=0; iz_untilrule.r_loyear); + if (i < zonecount - 1) + updateminmax(zp->z_untilrule.r_loyear); for (j = 0; j < zp->z_nrules; ++j) { rp = &zp->z_rules[j]; if (rp->r_lowasnum) @@ -2168,7 +2165,7 @@ const int zonecount; wp = ecpyalloc(_("no POSIX environment variable for zone")); wp = ecatalloc(wp, " "); - wp = ecatalloc(wp, zpfirst->z_name); + wp = ecatalloc(wp, zpfirst->z_name); warning(wp); ifree(wp); } @@ -2181,8 +2178,11 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); else max_year = INT_MAX; } /* - ** For the benefit of older systems, generate data through 2037. + ** For the benefit of older systems, + ** generate data from 1900 through 2037. */ + if (min_year > 1900) + min_year = 1900; if (max_year < 2037) max_year = 2037; for (i = 0; i < zonecount; ++i) { @@ -2200,45 +2200,53 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); *startbuf = '\0'; startoff = zp->z_gmtoff; #ifdef ICU - finalRuleYear = finalRuleIndex = -1; - finalRule1 = finalRule2 = NULL; - if (i == (zonecount - 1)) { /* !useuntil */ - /* Look for exactly 2 rules that end at 'max' and - * note them. Determine max(r_loyear) for the 2 of - * them. */ - for (j=0; jz_nrules; ++j) { - rp = &zp->z_rules[j]; - if (rp->r_hiyear == INT_MAX) { - if (finalRule1 == NULL) { - finalRule1 = rp; - finalRuleYear = rp->r_loyear; - } else if (finalRule2 == NULL) { - finalRule2 = rp; - if (rp->r_loyear > finalRuleYear) { - finalRuleYear = rp->r_loyear; - } - } else { - error("more than two max rules found (ICU)"); - exit(EXIT_FAILURE); - } - } - } - if (finalRule1 != NULL && finalRule2 == NULL) { - error("only one max rule found (ICU)"); - exit(EXIT_FAILURE); - } - if (finalRule1 != NULL) { - /* Swap if necessary so finalRule1 occurs before - * finalRule2 */ - if (finalRule1->r_month > finalRule2->r_month) { - const struct rule* t = finalRule1; - finalRule1 = finalRule2; - finalRule2 = t; - } - /* Add final rule to our list */ - finalRuleIndex = add_icu_final_rules(finalRule1, finalRule2); - } - } + finalRuleYear = finalRuleIndex = -1; + finalRule1 = finalRule2 = NULL; + if (i == (zonecount - 1)) { /* !useuntil */ + /* Look for exactly 2 rules that end at 'max' and + * note them. Determine max(r_loyear) for the 2 of + * them. */ + for (j=0; jz_nrules; ++j) { + rp = &zp->z_rules[j]; + if (rp->r_hiyear == INT_MAX) { + if (finalRule1 == NULL) { + finalRule1 = rp; + finalRuleYear = rp->r_loyear; + } else if (finalRule2 == NULL) { + finalRule2 = rp; + if (rp->r_loyear > finalRuleYear) { + finalRuleYear = rp->r_loyear; + } + } else { + error("more than two max rules found (ICU)"); + exit(EXIT_FAILURE); + } + } + } + if (finalRule1 != NULL && finalRule2 == NULL) { + error("only one max rule found (ICU)"); + exit(EXIT_FAILURE); + } + if (finalRule1 != NULL) { + if (finalRule1->r_stdoff == finalRule2->r_stdoff) { + /* America/Resolute in 2009a uses a pair of rules + * which does not change the offset. ICU ignores + * such rules without actual time transitions. */ + finalRuleYear = finalRuleIndex = -1; + finalRule1 = finalRule2 = NULL; + } else { + /* Swap if necessary so finalRule1 occurs before + * finalRule2 */ + if (finalRule1->r_month > finalRule2->r_month) { + const struct rule* t = finalRule1; + finalRule1 = finalRule2; + finalRule2 = t; + } + /* Add final rule to our list */ + finalRuleIndex = add_icu_final_rules(finalRule1, finalRule2); + } + } + } #endif if (zp->z_nrules == 0) { @@ -2247,7 +2255,7 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); (char *) NULL, stdoff != 0, FALSE); type = addtype(oadd(zp->z_gmtoff, stdoff), #ifdef ICU - zp->z_gmtoff, stdoff, + zp->z_gmtoff, stdoff, #endif startbuf, stdoff != 0, startttisstd, startttisgmt); @@ -2348,40 +2356,40 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); } } #ifdef ICU - if (year >= finalRuleYear && rp == finalRule1) { - /* We want to shift final year 1 year after - * the actual final rule takes effect (year + 1), - * because the previous type is valid until the first - * transition defined by the final rule. Otherwise - * we may see unexpected offset shift at the - * begining of the year when the final rule takes - * effect. */ - - /* ICU currently can support signed int32 transition - * times. Thus, the transitions in year 2038 may be - * truncated. At this moment (tzdata2008g), only - * Rule Brazil is impacted by this limitation, because - * the final set of rules are starting in 2038. Although - * this code put the first couple of transitions populated - * by the final rules, they will be dropped off when - * collecting transition times. So, we need to keep - * the start year of the final rule in 2038, not 2039. - * Fortunately, the Brazil rules in 2038 and beyond use - * the same base offset/dst saving amount. Thus, even - * we skip the first couple of transitions, the final - * rule set for 2038 works properly. So for now, - * we do not increment the final rule start year only when - * it falls into year 2038. We need to revisit this code - * in future to fix the root cause of this problem (ICU - * resource type limitation - signed int32). - * Oct 7, 2008 - Yoshito */ - int finalStartYear = (year == 2038) ? year : year + 1; - emit_icu_zone(icuFile, - zpfirst->z_name, zp->z_gmtoff, - rp, finalRuleIndex, finalStartYear); - /* only emit this for the first year */ - finalRule1 = NULL; - } + if (year >= finalRuleYear && rp == finalRule1) { + /* We want to shift final year 1 year after + * the actual final rule takes effect (year + 1), + * because the previous type is valid until the first + * transition defined by the final rule. Otherwise + * we may see unexpected offset shift at the + * begining of the year when the final rule takes + * effect. */ + + /* ICU currently can support signed int32 transition + * times. Thus, the transitions in year 2038 may be + * truncated. At this moment (tzdata2008g), only + * Rule Brazil is impacted by this limitation, because + * the final set of rules are starting in 2038. Although + * this code put the first couple of transitions populated + * by the final rules, they will be dropped off when + * collecting transition times. So, we need to keep + * the start year of the final rule in 2038, not 2039. + * Fortunately, the Brazil rules in 2038 and beyond use + * the same base offset/dst saving amount. Thus, even + * we skip the first couple of transitions, the final + * rule set for 2038 works properly. So for now, + * we do not increment the final rule start year only when + * it falls into year 2038. We need to revisit this code + * in future to fix the root cause of this problem (ICU + * resource type limitation - signed int32). + * Oct 7, 2008 - Yoshito */ + int finalStartYear = (year == 2038) ? year : year + 1; + emit_icu_zone(icuFile, + zpfirst->z_name, zp->z_gmtoff, + rp, finalRuleIndex, finalStartYear); + /* only emit this for the first year */ + finalRule1 = NULL; + } #endif eats(zp->z_filename, zp->z_linenum, rp->r_filename, rp->r_linenum); @@ -2390,7 +2398,7 @@ wp = ecpyalloc(_("no POSIX environment variable for zone")); offset = oadd(zp->z_gmtoff, rp->r_stdoff); #ifdef ICU type = addtype(offset, zp->z_gmtoff, rp->r_stdoff, - ab, rp->r_stdoff != 0, + ab, rp->r_stdoff != 0, rp->r_todisstd, rp->r_todisgmt); #else type = addtype(offset, ab, rp->r_stdoff != 0, @@ -2411,8 +2419,8 @@ error(_("can't determine time zone abbreviation to use just after until time")); else addtt(starttime, #ifdef ICU addtype(startoff, - zp->z_gmtoff, startoff - zp->z_gmtoff, - startbuf, + zp->z_gmtoff, startoff - zp->z_gmtoff, + startbuf, startoff != zp->z_gmtoff, startttisstd, startttisgmt)); @@ -2508,10 +2516,10 @@ const int ttisgmt; error(_("internal error - addtype called with bad isdst/dstoff")); (void) exit(EXIT_FAILURE); } - if (gmtoff != (rawoff + dstoff)) { + if (gmtoff != (rawoff + dstoff)) { error(_("internal error - addtype called with bad gmt/raw/dstoff")); (void) exit(EXIT_FAILURE); - } + } #endif /* ** See if there's already an entry for this zone type. @@ -2520,7 +2528,7 @@ const int ttisgmt; for (i = 0; i < typecnt; ++i) { if (gmtoff == gmtoffs[i] && isdst == isdsts[i] && #ifdef ICU - rawoff == rawoffs[i] && dstoff == dstoffs[i] && + rawoff == rawoffs[i] && dstoff == dstoffs[i] && #endif strcmp(abbr, &chars[abbrinds[i]]) == 0 && ttisstd == ttisstds[i] && @@ -2535,10 +2543,14 @@ const int ttisgmt; error(_("too many local time types")); exit(EXIT_FAILURE); } + if (! (-1L - 2147483647L <= gmtoff && gmtoff <= 2147483647L)) { + error(_("UTC offset out of range")); + exit(EXIT_FAILURE); + } gmtoffs[i] = gmtoff; #ifdef ICU - rawoffs[i] = rawoff; - dstoffs[i] = dstoff; + rawoffs[i] = rawoff; + dstoffs[i] = dstoff; #endif isdsts[i] = isdst; ttisstds[i] = ttisstd; @@ -2589,7 +2601,7 @@ int count; } static void -adjleap P((void)) +adjleap(void) { register int i; register long last = 0; @@ -2719,9 +2731,12 @@ register char * cp; else while ((*dp = *cp++) != '"') if (*dp != '\0') ++dp; - else error(_( + else { + error(_( "Odd number of quotation marks" )); + exit(1); + } } while (*cp != '\0' && *cp != '#' && (!isascii(*cp) || !isspace((unsigned char) *cp))); if (isascii(*cp) && isspace((unsigned char) *cp)) @@ -2909,7 +2924,7 @@ wp = _("time zone abbreviation differs from POSIX standard"); static int mkdirs(argname) -char * const argname; +char * argname; { register char * name; register char * cp;