bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23         File:           UnicodeWrappers.c
  24
  25         Contains:       Wrapper routines for Unicode conversion and comparison.
  26
  27 */
  28 #include <sys/param.h>
  29 #include <sys/utfconv.h>
  30
  31 #include "../../hfs_macos_defs.h"
  32 #include "UCStringCompareData.h"
  33
  34 #include "../headers/FileMgrInternal.h"
  35 #include "../headers/HFSUnicodeWrappers.h"
  36
  37 enum {
  38         kMinFileExtensionChars = 1,     /* does not include dot */
  39         kMaxFileExtensionChars = 5      /* does not include dot */
  40 };
  41
  42
  43 #define EXTENSIONCHAR(c)        (((c) >= 0x61 && (c) <= 0x7A) || \
  44                                  ((c) >= 0x41 && (c) <= 0x5A) || \
  45                                  ((c) >= 0x30 && (c) <= 0x39))
  46
  47
  48 #define IsHexDigit(c)           (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
  49                                  ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
  50
  51
  52 static void     GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
  53
  54 static void     GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
  55
  56 static UInt32   HexStringToInteger( UInt32 length, const UInt8 *hexStr );
  57
  58
  59
  60 /*
  61  * Convert file ID into a hexidecimal string with no leading zeros
  62  */
  63 static void
  64 GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
  65 {
  66         SInt32  i, b;
  67         UInt8   *translate = (UInt8 *) "0123456789ABCDEF";
  68         UInt8   c;
  69
  70         fileIDStr[0] = '#';
  71
  72         for ( i = 0, b = 28; b >= 0; b -= 4 ) {
  73                 c = *(translate + ((fileID >> b) & 0x0000000F));
  74
  75                 /* if its not a leading zero add it to our string */
  76                 if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
  77                         fileIDStr[++i] = c;
  78         }
  79
  80         fileIDStr[++i] = '\0';
  81 }
  82
  83
  84 /*
  85  * Get filename extension (if any) as a C string
  86  */
  87 static void
  88 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
  89 {
  90         UInt32  i;
  91         UniChar c;
  92         UInt16  extChars;       /* number of extension chars (excluding dot) */
  93         UInt16  maxExtChars;
  94         Boolean foundExtension;
  95
  96         extStr[0] = '\0';       /* assume there's no extension */
  97
  98         if ( length < 3 )
  99                 return;         /* "x.y" is smallest possible extension */
 100
 101         if ( length < (kMaxFileExtensionChars + 2) )
 102                 maxExtChars = length - 2;       /* save room for prefix + dot */
 103         else
 104                 maxExtChars = kMaxFileExtensionChars;
 105
 106         i = length;
 107         extChars = 0;
 108         foundExtension = false;
 109
 110         while ( extChars <= maxExtChars ) {
 111                 c = unicodeStr[--i];
 112
 113                 /* look for leading dot */
 114                 if ( c == (UniChar) '.' ) {
 115                         if ( extChars > 0 )     /* cannot end with a dot */
 116                                 foundExtension = true;
 117                         break;
 118                 }
 119
 120                 if ( EXTENSIONCHAR(c) )
 121                         ++extChars;
 122                 else
 123                         break;
 124         }
 125
 126         /* if we found one then copy it */
 127         if ( foundExtension ) {
 128                 UInt8 *extStrPtr = extStr;
 129                 const UniChar *unicodeStrPtr = &unicodeStr[i];
 130
 131                 for ( i = 0; i <= extChars; ++i )
 132                         *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
 133                 extStr[extChars + 1] = '\0';    /* terminate extension + dot */
 134         }
 135 }
 136
 137
 138
 139 /*
 140  * Count filename extension characters (if any)
 141  */
 142 static UInt32
 143 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
 144 {
 145         UInt32  i;
 146         UniChar c;
 147         UInt32  extChars;       /* number of extension chars (excluding dot) */
 148         UInt16  maxExtChars;
 149         Boolean foundExtension;
 150
 151         if (length == kUndefinedStrLen)
 152                 length = strlen(filename);
 153
 154         if ( length < 3 )
 155                 return 0;       /* "x.y" is smallest possible extension */
 156
 157         if ( length < (kMaxFileExtensionChars + 2) )
 158                 maxExtChars = length - 2;       /* save room for prefix + dot */
 159         else
 160                 maxExtChars = kMaxFileExtensionChars;
 161
 162         extChars = 0;           /* assume there's no extension */
 163         i = length - 1;         /* index to last ascii character */
 164         foundExtension = false;
 165
 166         while ( extChars <= maxExtChars ) {
 167                 c = filename[i--];
 168
 169                 /* look for leading dot */
 170                 if ( c == (UInt8) '.' ) {
 171                         if ( extChars > 0 )     /* cannot end with a dot */
 172                                 return (extChars);
 173
 174                         break;
 175                 }
 176
 177                 if ( EXTENSIONCHAR(c) )
 178                         ++extChars;
 179                 else
 180                         break;
 181         }
 182
 183         return 0;
 184 }
 185
 186
 187 /*
 188  * extract the file id from a mangled name
 189  */
 190 HFSCatalogNodeID
 191 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
 192 {
 193         short   extChars;
 194         short   i;
 195         UInt8   c;
 196
 197         *prefixLength = 0;
 198
 199         if ( filename == NULL )
 200                 return 0;
 201
 202         if (length == kUndefinedStrLen)
 203                 length = strlen(filename);
 204
 205         if ( length < 28 )
 206                 return 0;       /* too small to have been mangled */
 207
 208         /* big enough for a file ID (#10) and an extension (.x) ? */
 209         if ( length > 5 )
 210                 extChars = CountFilenameExtensionChars(filename, length);
 211         else
 212                 extChars = 0;
 213
 214         /* skip over dot plus extension characters */
 215         if ( extChars > 0 )
 216                 length -= (extChars + 1);
 217
 218         /* scan for file id digits */
 219         for ( i = length - 1; i >= 0; --i) {
 220                 c = filename[i];
 221
 222                 /* look for file ID marker */
 223                 if ( c == '#' ) {
 224                         if ( (length - i) < 3 )
 225                                 break;  /* too small to be a file ID */
 226
 227                         *prefixLength = i;
 228                         return HexStringToInteger(length - i - 1, &filename[i+1]);
 229                 }
 230
 231                 if ( !IsHexDigit(c) )
 232                         break;  /* file ID string must have hex digits */
 233         }
 234
 235         return 0;
 236 }
 237
 238
 239
 240 static UInt32
 241 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
 242 {
 243         UInt32          value;
 244         short           i;
 245         UInt8           c;
 246         const UInt8     *p;
 247
 248         value = 0;
 249         p = hexStr;
 250
 251         for ( i = 0; i < length; ++i ) {
 252                 c = *p++;
 253
 254                 if (c >= '0' && c <= '9') {
 255                         value = value << 4;
 256                         value += (UInt32) c - (UInt32) '0';
 257                 } else if (c >= 'A' && c <= 'F') {
 258                         value = value << 4;
 259                         value += 10 + ((unsigned int) c - (unsigned int) 'A');
 260                 } else {
 261                         return 0;       /* bad character */
 262                 }
 263         }
 264
 265         return value;
 266 }
 267
 268
 269 /*
 270  * Routine:     FastRelString
 271  *
 272  * Output:      returns -1 if str1 < str2
 273  *              returns  1 if str1 > str2
 274  *              return   0 if equal
 275  *
 276  */
 277 extern unsigned short gCompareTable[];
 278
 279 SInt32  FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
 280 {
 281         UInt16*                 compareTable;
 282         SInt32                  bestGuess;
 283         UInt8                   length, length2;
 284         UInt8                   delta;
 285
 286         delta = 0;
 287         length = *(str1++);
 288         length2 = *(str2++);
 289
 290         if (length == length2)
 291                 bestGuess = 0;
 292         else if (length < length2)
 293         {
 294                 bestGuess = -1;
 295                 delta = length2 - length;
 296         }
 297         else
 298         {
 299                 bestGuess = 1;
 300                 length = length2;
 301         }
 302
 303         compareTable = (UInt16*) gCompareTable;
 304
 305         while (length--)
 306         {
 307                 UInt8   aChar, bChar;
 308
 309                 aChar = *(str1++);
 310                 bChar = *(str2++);
 311
 312                 if (aChar != bChar)             //      If they don't match exacly, do case conversion
 313                 {
 314                         UInt16  aSortWord, bSortWord;
 315
 316                         aSortWord = compareTable[aChar];
 317                         bSortWord = compareTable[bChar];
 318
 319                         if (aSortWord > bSortWord)
 320                                 return 1;
 321
 322                         if (aSortWord < bSortWord)
 323                                 return -1;
 324                 }
 325
 326                 //      If characters match exactly, then go on to next character immediately without
 327                 //      doing any extra work.
 328         }
 329
 330         //      if you got to here, then return bestGuess
 331         return bestGuess;
 332 }
 333
 334
 335
 336 //
 337 //      FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
 338 //
 339 //          IF                          RESULT
 340 //      --------------------------
 341 //      str1 < str2             =>      -1
 342 //      str1 = str2             =>       0
 343 //      str1 > str2             =>      +1
 344 //
 345 //      The lower case table starts with 256 entries (one for each of the upper bytes
 346 //      of the original Unicode char).  If that entry is zero, then all characters with
 347 //      that upper byte are already case folded.  If the entry is non-zero, then it is
 348 //      the _index_ (not byte offset) of the start of the sub-table for the characters
 349 //      with that upper byte.  All ignorable characters are folded to the value zero.
 350 //
 351 //      In pseudocode:
 352 //
 353 //              Let c = source Unicode character
 354 //              Let table[] = lower case table
 355 //
 356 //              lower = table[highbyte(c)]
 357 //              if (lower == 0)
 358 //                      lower = c
 359 //              else
 360 //                      lower = table[lower+lowbyte(c)]
 361 //
 362 //              if (lower == 0)
 363 //                      ignore this character
 364 //
 365 //      To handle ignorable characters, we now need a loop to find the next valid character.
 366 //      Also, we can't pre-compute the number of characters to compare; the string length might
 367 //      be larger than the number of non-ignorable characters.  Further, we must be able to handle
 368 //      ignorable characters at any point in the string, including as the first or last characters.
 369 //      We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
 370 //      Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
 371 //      the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
 372 //      an invalid Unicode character).
 373 //
 374 //      Pseudocode:
 375 //
 376 //              while (1) {
 377 //                      c1 = GetNextValidChar(str1)                     //      returns zero if at end of string
 378 //                      c2 = GetNextValidChar(str2)
 379 //
 380 //                      if (c1 != c2) break                                     //      found a difference
 381 //
 382 //                      if (c1 == 0)                                            //      reached end of string on both strings at once?
 383 //                              return 0;                                               //      yes, so strings are equal
 384 //              }
 385 //
 386 //              // When we get here, c1 != c2.  So, we just need to determine which one is less.
 387 //              if (c1 < c2)
 388 //                      return -1;
 389 //              else
 390 //                      return 1;
 391 //
 392
 393 extern UInt16 gLowerCaseTable[];
 394 extern UInt16 gLatinCaseFold[];
 395
 396 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
 397                                                         register ConstUniCharArrayPtr str2, register ItemCount length2)
 398 {
 399         register UInt16         c1,c2;
 400         register UInt16         temp;
 401         register UInt16*        lowerCaseTable;
 402
 403         lowerCaseTable = (UInt16*) gLowerCaseTable;
 404
 405         while (1) {
 406                 /* Set default values for c1, c2 in case there are no more valid chars */
 407                 c1 = 0;
 408                 c2 = 0;
 409
 410                 /* Find next non-ignorable char from str1, or zero if no more */
 411                 while (length1 && c1 == 0) {
 412                         c1 = *(str1++);
 413                         --length1;
 414                         /* check for basic latin first */
 415                         if (c1 < 0x0100) {
 416                                 c1 = gLatinCaseFold[c1];
 417                                 break;
 418                         }
 419                         /* case fold if neccessary */
 420                         if ((temp = lowerCaseTable[c1>>8]) != 0)
 421                                 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
 422                 }
 423
 424
 425                 /* Find next non-ignorable char from str2, or zero if no more */
 426                 while (length2 && c2 == 0) {
 427                         c2 = *(str2++);
 428                         --length2;
 429                         /* check for basic latin first */
 430                         if (c2 < 0x0100) {
 431                                 c2 = gLatinCaseFold[c2];
 432                                 break;
 433                         }
 434                         /* case fold if neccessary */
 435                         if ((temp = lowerCaseTable[c2>>8]) != 0)
 436                                 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
 437                 }
 438
 439                 if (c1 != c2)           //      found a difference, so stop looping
 440                         break;
 441
 442                 if (c1 == 0)            //      did we reach the end of both strings at the same time?
 443                         return 0;               //      yes, so strings are equal
 444         }
 445
 446         if (c1 < c2)
 447                 return -1;
 448         else
 449                 return 1;
 450 }
 451
 452
 453 OSErr
 454 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
 455                                          ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
 456 {
 457         ByteCount subMaxLen;
 458         size_t utf8len;
 459         char fileIDStr[15];
 460         char extStr[15];
 461
 462         GetFileIDString(cnid, fileIDStr);
 463         GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
 464
 465         /* remove extension chars from source */
 466         srcLen -= strlen(extStr) * sizeof(UniChar);
 467         subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
 468
 469         (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
 470
 471         strcat(dstStr, fileIDStr);
 472         strcat(dstStr, extStr);
 473         *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
 474
 475         return noErr;
 476 }
 477