bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23         File:           UnicodeWrappers.c
  24
  25         Contains:       Wrapper routines for Unicode conversion and comparison.
  26
  27 */
  28 #include <sys/param.h>
  29 #include <sys/utfconv.h>
  30
  31 #include "../../hfs_macos_defs.h"
  32 #include "UCStringCompareData.h"
  33
  34 #include "../headers/FileMgrInternal.h"
  35 #include "../headers/HFSUnicodeWrappers.h"
  36
  37 enum {
  38         kMinFileExtensionChars = 1,     /* does not include dot */
  39         kMaxFileExtensionChars = 5      /* does not include dot */
  40 };
  41
  42
  43 #define EXTENSIONCHAR(c)        (((c) >= 0x61 && (c) <= 0x7A) || \
  44                                  ((c) >= 0x41 && (c) <= 0x5A) || \
  45                                  ((c) >= 0x30 && (c) <= 0x39))
  46
  47
  48 #define IsHexDigit(c)           (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
  49                                  ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
  50
  51
  52 static void     GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
  53
  54 static void     GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
  55
  56 static UInt32   HexStringToInteger( UInt32 length, const UInt8 *hexStr );
  57
  58
  59
  60 /*
  61  * Convert file ID into a hexidecimal string with no leading zeros
  62  */
  63 static void
  64 GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
  65 {
  66         SInt32  i, b;
  67         UInt8   *translate = (UInt8 *) "0123456789ABCDEF";
  68         UInt8   c;
  69
  70         fileIDStr[0] = '#';
  71
  72         for ( i = 0, b = 28; b >= 0; b -= 4 ) {
  73                 c = *(translate + ((fileID >> b) & 0x0000000F));
  74
  75                 /* if its not a leading zero add it to our string */
  76                 if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
  77                         fileIDStr[++i] = c;
  78         }
  79
  80         fileIDStr[++i] = '\0';
  81 }
  82
  83
  84 /*
  85  * Get filename extension (if any) as a C string
  86  */
  87 static void
  88 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
  89 {
  90         UInt32  i;
  91         UniChar c;
  92         UInt16  extChars;       /* number of extension chars (excluding dot) */
  93         UInt16  maxExtChars;
  94         Boolean foundExtension;
  95
  96         extStr[0] = '\0';       /* assume there's no extension */
  97
  98         if ( length < 3 )
  99                 return;         /* "x.y" is smallest possible extension */
 100
 101         if ( length < (kMaxFileExtensionChars + 2) )
 102                 maxExtChars = length - 2;       /* save room for prefix + dot */
 103         else
 104                 maxExtChars = kMaxFileExtensionChars;
 105
 106         i = length;
 107         extChars = 0;
 108         foundExtension = false;
 109
 110         while ( extChars <= maxExtChars ) {
 111                 c = unicodeStr[--i];
 112
 113                 /* look for leading dot */
 114                 if ( c == (UniChar) '.' ) {
 115                         if ( extChars > 0 )     /* cannot end with a dot */
 116                                 foundExtension = true;
 117                         break;
 118                 }
 119
 120                 if ( EXTENSIONCHAR(c) )
 121                         ++extChars;
 122                 else
 123                         break;
 124         }
 125
 126         /* if we found one then copy it */
 127         if ( foundExtension ) {
 128                 UInt8 *extStrPtr = extStr;
 129                 const UniChar *unicodeStrPtr = &unicodeStr[i];
 130
 131                 for ( i = 0; i <= extChars; ++i )
 132                         *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
 133                 extStr[extChars + 1] = '\0';    /* terminate extension + dot */
 134         }
 135 }
 136
 137
 138
 139 /*
 140  * Count filename extension characters (if any)
 141  */
 142 static UInt32
 143 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
 144 {
 145         UInt32  i;
 146         UniChar c;
 147         UInt32  extChars;       /* number of extension chars (excluding dot) */
 148         UInt16  maxExtChars;
 149         Boolean foundExtension;
 150
 151         if ( length < 3 )
 152                 return 0;       /* "x.y" is smallest possible extension */
 153
 154         if ( length < (kMaxFileExtensionChars + 2) )
 155                 maxExtChars = length - 2;       /* save room for prefix + dot */
 156         else
 157                 maxExtChars = kMaxFileExtensionChars;
 158
 159         extChars = 0;           /* assume there's no extension */
 160         i = length - 1;         /* index to last ascii character */
 161         foundExtension = false;
 162
 163         while ( extChars <= maxExtChars ) {
 164                 c = filename[i--];
 165
 166                 /* look for leading dot */
 167                 if ( c == (UInt8) '.' ) {
 168                         if ( extChars > 0 )     /* cannot end with a dot */
 169                                 return (extChars);
 170
 171                         break;
 172                 }
 173
 174                 if ( EXTENSIONCHAR(c) )
 175                         ++extChars;
 176                 else
 177                         break;
 178         }
 179
 180         return 0;
 181 }
 182
 183
 184 /*
 185  * extract the file id from a mangled name
 186  */
 187 HFSCatalogNodeID
 188 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
 189 {
 190         short   extChars;
 191         short   i;
 192         UInt8   c;
 193
 194         *prefixLength = 0;
 195
 196         if ( filename == NULL )
 197                 return 0;
 198
 199         if ( length < 28 )
 200                 return 0;       /* too small to have been mangled */
 201
 202         /* big enough for a file ID (#10) and an extension (.x) ? */
 203         if ( length > 5 )
 204                 extChars = CountFilenameExtensionChars(filename, length);
 205         else
 206                 extChars = 0;
 207
 208         /* skip over dot plus extension characters */
 209         if ( extChars > 0 )
 210                 length -= (extChars + 1);
 211
 212         /* scan for file id digits */
 213         for ( i = length - 1; i >= 0; --i) {
 214                 c = filename[i];
 215
 216                 /* look for file ID marker */
 217                 if ( c == '#' ) {
 218                         if ( (length - i) < 3 )
 219                                 break;  /* too small to be a file ID */
 220
 221                         *prefixLength = i;
 222                         return HexStringToInteger(length - i - 1, &filename[i+1]);
 223                 }
 224
 225                 if ( !IsHexDigit(c) )
 226                         break;  /* file ID string must have hex digits */
 227         }
 228
 229         return 0;
 230 }
 231
 232
 233
 234 static UInt32
 235 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
 236 {
 237         UInt32          value;
 238         short           i;
 239         UInt8           c;
 240         const UInt8     *p;
 241
 242         value = 0;
 243         p = hexStr;
 244
 245         for ( i = 0; i < length; ++i ) {
 246                 c = *p++;
 247
 248                 if (c >= '0' && c <= '9') {
 249                         value = value << 4;
 250                         value += (UInt32) c - (UInt32) '0';
 251                 } else if (c >= 'A' && c <= 'F') {
 252                         value = value << 4;
 253                         value += 10 + ((unsigned int) c - (unsigned int) 'A');
 254                 } else {
 255                         return 0;       /* bad character */
 256                 }
 257         }
 258
 259         return value;
 260 }
 261
 262
 263 /*
 264  * Routine:     FastRelString
 265  *
 266  * Output:      returns -1 if str1 < str2
 267  *              returns  1 if str1 > str2
 268  *              return   0 if equal
 269  *
 270  */
 271 SInt32  FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
 272 {
 273         UInt16*                 compareTable;
 274         SInt32                  bestGuess;
 275         UInt8                   length, length2;
 276         UInt8                   delta;
 277
 278         delta = 0;
 279         length = *(str1++);
 280         length2 = *(str2++);
 281
 282         if (length == length2)
 283                 bestGuess = 0;
 284         else if (length < length2)
 285         {
 286                 bestGuess = -1;
 287                 delta = length2 - length;
 288         }
 289         else
 290         {
 291                 bestGuess = 1;
 292                 length = length2;
 293         }
 294
 295         compareTable = (UInt16*) gCompareTable;
 296
 297         while (length--)
 298         {
 299                 UInt8   aChar, bChar;
 300
 301                 aChar = *(str1++);
 302                 bChar = *(str2++);
 303
 304                 if (aChar != bChar)             //      If they don't match exacly, do case conversion
 305                 {
 306                         UInt16  aSortWord, bSortWord;
 307
 308                         aSortWord = compareTable[aChar];
 309                         bSortWord = compareTable[bChar];
 310
 311                         if (aSortWord > bSortWord)
 312                                 return 1;
 313
 314                         if (aSortWord < bSortWord)
 315                                 return -1;
 316                 }
 317
 318                 //      If characters match exactly, then go on to next character immediately without
 319                 //      doing any extra work.
 320         }
 321
 322         //      if you got to here, then return bestGuess
 323         return bestGuess;
 324 }
 325
 326
 327
 328 //
 329 //      FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
 330 //
 331 //          IF                          RESULT
 332 //      --------------------------
 333 //      str1 < str2             =>      -1
 334 //      str1 = str2             =>       0
 335 //      str1 > str2             =>      +1
 336 //
 337 //      The lower case table starts with 256 entries (one for each of the upper bytes
 338 //      of the original Unicode char).  If that entry is zero, then all characters with
 339 //      that upper byte are already case folded.  If the entry is non-zero, then it is
 340 //      the _index_ (not byte offset) of the start of the sub-table for the characters
 341 //      with that upper byte.  All ignorable characters are folded to the value zero.
 342 //
 343 //      In pseudocode:
 344 //
 345 //              Let c = source Unicode character
 346 //              Let table[] = lower case table
 347 //
 348 //              lower = table[highbyte(c)]
 349 //              if (lower == 0)
 350 //                      lower = c
 351 //              else
 352 //                      lower = table[lower+lowbyte(c)]
 353 //
 354 //              if (lower == 0)
 355 //                      ignore this character
 356 //
 357 //      To handle ignorable characters, we now need a loop to find the next valid character.
 358 //      Also, we can't pre-compute the number of characters to compare; the string length might
 359 //      be larger than the number of non-ignorable characters.  Further, we must be able to handle
 360 //      ignorable characters at any point in the string, including as the first or last characters.
 361 //      We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
 362 //      Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
 363 //      the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
 364 //      an invalid Unicode character).
 365 //
 366 //      Pseudocode:
 367 //
 368 //              while (1) {
 369 //                      c1 = GetNextValidChar(str1)                     //      returns zero if at end of string
 370 //                      c2 = GetNextValidChar(str2)
 371 //
 372 //                      if (c1 != c2) break                                     //      found a difference
 373 //
 374 //                      if (c1 == 0)                                            //      reached end of string on both strings at once?
 375 //                              return 0;                                               //      yes, so strings are equal
 376 //              }
 377 //
 378 //              // When we get here, c1 != c2.  So, we just need to determine which one is less.
 379 //              if (c1 < c2)
 380 //                      return -1;
 381 //              else
 382 //                      return 1;
 383 //
 384
 385 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
 386                                                         register ConstUniCharArrayPtr str2, register ItemCount length2)
 387 {
 388         register UInt16         c1,c2;
 389         register UInt16         temp;
 390         register UInt16*        lowerCaseTable;
 391
 392         lowerCaseTable = (UInt16*) gLowerCaseTable;
 393
 394         while (1) {
 395                 /* Set default values for c1, c2 in case there are no more valid chars */
 396                 c1 = 0;
 397                 c2 = 0;
 398
 399                 /* Find next non-ignorable char from str1, or zero if no more */
 400                 while (length1 && c1 == 0) {
 401                         c1 = *(str1++);
 402                         --length1;
 403                         /* check for basic latin first */
 404                         if (c1 < 0x0100) {
 405                                 c1 = gLatinCaseFold[c1];
 406                                 break;
 407                         }
 408                         /* case fold if neccessary */
 409                         if ((temp = lowerCaseTable[c1>>8]) != 0)
 410                                 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
 411                 }
 412
 413
 414                 /* Find next non-ignorable char from str2, or zero if no more */
 415                 while (length2 && c2 == 0) {
 416                         c2 = *(str2++);
 417                         --length2;
 418                         /* check for basic latin first */
 419                         if (c2 < 0x0100) {
 420                                 c2 = gLatinCaseFold[c2];
 421                                 break;
 422                         }
 423                         /* case fold if neccessary */
 424                         if ((temp = lowerCaseTable[c2>>8]) != 0)
 425                                 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
 426                 }
 427
 428                 if (c1 != c2)           //      found a difference, so stop looping
 429                         break;
 430
 431                 if (c1 == 0)            //      did we reach the end of both strings at the same time?
 432                         return 0;               //      yes, so strings are equal
 433         }
 434
 435         if (c1 < c2)
 436                 return -1;
 437         else
 438                 return 1;
 439 }
 440
 441
 442 OSErr
 443 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
 444                                          ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
 445 {
 446         ByteCount subMaxLen;
 447         size_t utf8len;
 448         char fileIDStr[15];
 449         char extStr[15];
 450
 451         GetFileIDString(cnid, fileIDStr);
 452         GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
 453
 454         /* remove extension chars from source */
 455         srcLen -= strlen(extStr) * sizeof(UniChar);
 456         subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
 457
 458         (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
 459
 460         strcat(dstStr, fileIDStr);
 461         strcat(dstStr, extStr);
 462         *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
 463
 464         return noErr;
 465 }
 466