bsd/hfs/hfscommon/Unicode/UnicodeWrappers.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26         File:           UnicodeWrappers.c
  27
  28         Contains:       Wrapper routines for Unicode conversion and comparison.
  29
  30 */
  31 #include <sys/param.h>
  32 #include <sys/utfconv.h>
  33
  34 #include "../../hfs_macos_defs.h"
  35 #include "UCStringCompareData.h"
  36
  37 #include "../headers/FileMgrInternal.h"
  38 #include "../headers/HFSUnicodeWrappers.h"
  39
  40 enum {
  41         kMinFileExtensionChars = 1,     /* does not include dot */
  42         kMaxFileExtensionChars = 5      /* does not include dot */
  43 };
  44
  45
  46 #define EXTENSIONCHAR(c)        (((c) >= 0x61 && (c) <= 0x7A) || \
  47                                  ((c) >= 0x41 && (c) <= 0x5A) || \
  48                                  ((c) >= 0x30 && (c) <= 0x39))
  49
  50
  51 #define IsHexDigit(c)           (((c) >= (UInt8) '0' && (c) <= (UInt8) '9') || \
  52                                  ((c) >= (UInt8) 'A' && (c) <= (UInt8) 'F'))
  53
  54
  55 static void     GetFilenameExtension( ItemCount length, ConstUniCharArrayPtr unicodeStr, char* extStr );
  56
  57 static void     GetFileIDString( HFSCatalogNodeID fileID, char* fileIDStr );
  58
  59 static UInt32   HexStringToInteger( UInt32 length, const UInt8 *hexStr );
  60
  61
  62
  63 /*
  64  * Convert file ID into a hexidecimal string with no leading zeros
  65  */
  66 static void
  67 GetFileIDString( HFSCatalogNodeID fileID, char * fileIDStr )
  68 {
  69         SInt32  i, b;
  70         UInt8   *translate = (UInt8 *) "0123456789ABCDEF";
  71         UInt8   c;
  72
  73         fileIDStr[0] = '#';
  74
  75         for ( i = 0, b = 28; b >= 0; b -= 4 ) {
  76                 c = *(translate + ((fileID >> b) & 0x0000000F));
  77
  78                 /* if its not a leading zero add it to our string */
  79                 if ( (c != (UInt8) '0') || (i > 1) || (b == 0) )
  80                         fileIDStr[++i] = c;
  81         }
  82
  83         fileIDStr[++i] = '\0';
  84 }
  85
  86
  87 /*
  88  * Get filename extension (if any) as a C string
  89  */
  90 static void
  91 GetFilenameExtension(ItemCount length, ConstUniCharArrayPtr unicodeStr, char * extStr)
  92 {
  93         UInt32  i;
  94         UniChar c;
  95         UInt16  extChars;       /* number of extension chars (excluding dot) */
  96         UInt16  maxExtChars;
  97         Boolean foundExtension;
  98
  99         extStr[0] = '\0';       /* assume there's no extension */
 100
 101         if ( length < 3 )
 102                 return;         /* "x.y" is smallest possible extension */
 103
 104         if ( length < (kMaxFileExtensionChars + 2) )
 105                 maxExtChars = length - 2;       /* save room for prefix + dot */
 106         else
 107                 maxExtChars = kMaxFileExtensionChars;
 108
 109         i = length;
 110         extChars = 0;
 111         foundExtension = false;
 112
 113         while ( extChars <= maxExtChars ) {
 114                 c = unicodeStr[--i];
 115
 116                 /* look for leading dot */
 117                 if ( c == (UniChar) '.' ) {
 118                         if ( extChars > 0 )     /* cannot end with a dot */
 119                                 foundExtension = true;
 120                         break;
 121                 }
 122
 123                 if ( EXTENSIONCHAR(c) )
 124                         ++extChars;
 125                 else
 126                         break;
 127         }
 128
 129         /* if we found one then copy it */
 130         if ( foundExtension ) {
 131                 UInt8 *extStrPtr = extStr;
 132                 const UniChar *unicodeStrPtr = &unicodeStr[i];
 133
 134                 for ( i = 0; i <= extChars; ++i )
 135                         *(extStrPtr++) = (UInt8) *(unicodeStrPtr++);
 136                 extStr[extChars + 1] = '\0';    /* terminate extension + dot */
 137         }
 138 }
 139
 140
 141
 142 /*
 143  * Count filename extension characters (if any)
 144  */
 145 static UInt32
 146 CountFilenameExtensionChars( const unsigned char * filename, UInt32 length )
 147 {
 148         UInt32  i;
 149         UniChar c;
 150         UInt32  extChars;       /* number of extension chars (excluding dot) */
 151         UInt16  maxExtChars;
 152         Boolean foundExtension;
 153
 154         if ( length < 3 )
 155                 return 0;       /* "x.y" is smallest possible extension */
 156
 157         if ( length < (kMaxFileExtensionChars + 2) )
 158                 maxExtChars = length - 2;       /* save room for prefix + dot */
 159         else
 160                 maxExtChars = kMaxFileExtensionChars;
 161
 162         extChars = 0;           /* assume there's no extension */
 163         i = length - 1;         /* index to last ascii character */
 164         foundExtension = false;
 165
 166         while ( extChars <= maxExtChars ) {
 167                 c = filename[i--];
 168
 169                 /* look for leading dot */
 170                 if ( c == (UInt8) '.' ) {
 171                         if ( extChars > 0 )     /* cannot end with a dot */
 172                                 return (extChars);
 173
 174                         break;
 175                 }
 176
 177                 if ( EXTENSIONCHAR(c) )
 178                         ++extChars;
 179                 else
 180                         break;
 181         }
 182
 183         return 0;
 184 }
 185
 186
 187 /*
 188  * extract the file id from a mangled name
 189  */
 190 HFSCatalogNodeID
 191 GetEmbeddedFileID(const unsigned char * filename, UInt32 length, UInt32 *prefixLength)
 192 {
 193         short   extChars;
 194         short   i;
 195         UInt8   c;
 196
 197         *prefixLength = 0;
 198
 199         if ( filename == NULL )
 200                 return 0;
 201
 202         if ( length < 28 )
 203                 return 0;       /* too small to have been mangled */
 204
 205         /* big enough for a file ID (#10) and an extension (.x) ? */
 206         if ( length > 5 )
 207                 extChars = CountFilenameExtensionChars(filename, length);
 208         else
 209                 extChars = 0;
 210
 211         /* skip over dot plus extension characters */
 212         if ( extChars > 0 )
 213                 length -= (extChars + 1);
 214
 215         /* scan for file id digits */
 216         for ( i = length - 1; i >= 0; --i) {
 217                 c = filename[i];
 218
 219                 /* look for file ID marker */
 220                 if ( c == '#' ) {
 221                         if ( (length - i) < 3 )
 222                                 break;  /* too small to be a file ID */
 223
 224                         *prefixLength = i;
 225                         return HexStringToInteger(length - i - 1, &filename[i+1]);
 226                 }
 227
 228                 if ( !IsHexDigit(c) )
 229                         break;  /* file ID string must have hex digits */
 230         }
 231
 232         return 0;
 233 }
 234
 235
 236
 237 static UInt32
 238 HexStringToInteger(UInt32 length, const UInt8 *hexStr)
 239 {
 240         UInt32          value;
 241         short           i;
 242         UInt8           c;
 243         const UInt8     *p;
 244
 245         value = 0;
 246         p = hexStr;
 247
 248         for ( i = 0; i < length; ++i ) {
 249                 c = *p++;
 250
 251                 if (c >= '0' && c <= '9') {
 252                         value = value << 4;
 253                         value += (UInt32) c - (UInt32) '0';
 254                 } else if (c >= 'A' && c <= 'F') {
 255                         value = value << 4;
 256                         value += 10 + ((unsigned int) c - (unsigned int) 'A');
 257                 } else {
 258                         return 0;       /* bad character */
 259                 }
 260         }
 261
 262         return value;
 263 }
 264
 265
 266 /*
 267  * Routine:     FastRelString
 268  *
 269  * Output:      returns -1 if str1 < str2
 270  *              returns  1 if str1 > str2
 271  *              return   0 if equal
 272  *
 273  */
 274 SInt32  FastRelString( ConstStr255Param str1, ConstStr255Param str2 )
 275 {
 276         UInt16*                 compareTable;
 277         SInt32                  bestGuess;
 278         UInt8                   length, length2;
 279         UInt8                   delta;
 280
 281         delta = 0;
 282         length = *(str1++);
 283         length2 = *(str2++);
 284
 285         if (length == length2)
 286                 bestGuess = 0;
 287         else if (length < length2)
 288         {
 289                 bestGuess = -1;
 290                 delta = length2 - length;
 291         }
 292         else
 293         {
 294                 bestGuess = 1;
 295                 length = length2;
 296         }
 297
 298         compareTable = (UInt16*) gCompareTable;
 299
 300         while (length--)
 301         {
 302                 UInt8   aChar, bChar;
 303
 304                 aChar = *(str1++);
 305                 bChar = *(str2++);
 306
 307                 if (aChar != bChar)             //      If they don't match exacly, do case conversion
 308                 {
 309                         UInt16  aSortWord, bSortWord;
 310
 311                         aSortWord = compareTable[aChar];
 312                         bSortWord = compareTable[bChar];
 313
 314                         if (aSortWord > bSortWord)
 315                                 return 1;
 316
 317                         if (aSortWord < bSortWord)
 318                                 return -1;
 319                 }
 320
 321                 //      If characters match exactly, then go on to next character immediately without
 322                 //      doing any extra work.
 323         }
 324
 325         //      if you got to here, then return bestGuess
 326         return bestGuess;
 327 }
 328
 329
 330
 331 //
 332 //      FastUnicodeCompare - Compare two Unicode strings; produce a relative ordering
 333 //
 334 //          IF                          RESULT
 335 //      --------------------------
 336 //      str1 < str2             =>      -1
 337 //      str1 = str2             =>       0
 338 //      str1 > str2             =>      +1
 339 //
 340 //      The lower case table starts with 256 entries (one for each of the upper bytes
 341 //      of the original Unicode char).  If that entry is zero, then all characters with
 342 //      that upper byte are already case folded.  If the entry is non-zero, then it is
 343 //      the _index_ (not byte offset) of the start of the sub-table for the characters
 344 //      with that upper byte.  All ignorable characters are folded to the value zero.
 345 //
 346 //      In pseudocode:
 347 //
 348 //              Let c = source Unicode character
 349 //              Let table[] = lower case table
 350 //
 351 //              lower = table[highbyte(c)]
 352 //              if (lower == 0)
 353 //                      lower = c
 354 //              else
 355 //                      lower = table[lower+lowbyte(c)]
 356 //
 357 //              if (lower == 0)
 358 //                      ignore this character
 359 //
 360 //      To handle ignorable characters, we now need a loop to find the next valid character.
 361 //      Also, we can't pre-compute the number of characters to compare; the string length might
 362 //      be larger than the number of non-ignorable characters.  Further, we must be able to handle
 363 //      ignorable characters at any point in the string, including as the first or last characters.
 364 //      We use a zero value as a sentinel to detect both end-of-string and ignorable characters.
 365 //      Since the File Manager doesn't prevent the NUL character (value zero) as part of a filename,
 366 //      the case mapping table is assumed to map u+0000 to some non-zero value (like 0xFFFF, which is
 367 //      an invalid Unicode character).
 368 //
 369 //      Pseudocode:
 370 //
 371 //              while (1) {
 372 //                      c1 = GetNextValidChar(str1)                     //      returns zero if at end of string
 373 //                      c2 = GetNextValidChar(str2)
 374 //
 375 //                      if (c1 != c2) break                                     //      found a difference
 376 //
 377 //                      if (c1 == 0)                                            //      reached end of string on both strings at once?
 378 //                              return 0;                                               //      yes, so strings are equal
 379 //              }
 380 //
 381 //              // When we get here, c1 != c2.  So, we just need to determine which one is less.
 382 //              if (c1 < c2)
 383 //                      return -1;
 384 //              else
 385 //                      return 1;
 386 //
 387
 388 SInt32 FastUnicodeCompare ( register ConstUniCharArrayPtr str1, register ItemCount length1,
 389                                                         register ConstUniCharArrayPtr str2, register ItemCount length2)
 390 {
 391         register UInt16         c1,c2;
 392         register UInt16         temp;
 393         register UInt16*        lowerCaseTable;
 394
 395         lowerCaseTable = (UInt16*) gLowerCaseTable;
 396
 397         while (1) {
 398                 /* Set default values for c1, c2 in case there are no more valid chars */
 399                 c1 = 0;
 400                 c2 = 0;
 401
 402                 /* Find next non-ignorable char from str1, or zero if no more */
 403                 while (length1 && c1 == 0) {
 404                         c1 = *(str1++);
 405                         --length1;
 406                         /* check for basic latin first */
 407                         if (c1 < 0x0100) {
 408                                 c1 = gLatinCaseFold[c1];
 409                                 break;
 410                         }
 411                         /* case fold if neccessary */
 412                         if ((temp = lowerCaseTable[c1>>8]) != 0)
 413                                 c1 = lowerCaseTable[temp + (c1 & 0x00FF)];
 414                 }
 415
 416
 417                 /* Find next non-ignorable char from str2, or zero if no more */
 418                 while (length2 && c2 == 0) {
 419                         c2 = *(str2++);
 420                         --length2;
 421                         /* check for basic latin first */
 422                         if (c2 < 0x0100) {
 423                                 c2 = gLatinCaseFold[c2];
 424                                 break;
 425                         }
 426                         /* case fold if neccessary */
 427                         if ((temp = lowerCaseTable[c2>>8]) != 0)
 428                                 c2 = lowerCaseTable[temp + (c2 & 0x00FF)];
 429                 }
 430
 431                 if (c1 != c2)           //      found a difference, so stop looping
 432                         break;
 433
 434                 if (c1 == 0)            //      did we reach the end of both strings at the same time?
 435                         return 0;               //      yes, so strings are equal
 436         }
 437
 438         if (c1 < c2)
 439                 return -1;
 440         else
 441                 return 1;
 442 }
 443
 444
 445 OSErr
 446 ConvertUnicodeToUTF8Mangled(ByteCount srcLen, ConstUniCharArrayPtr srcStr, ByteCount maxDstLen,
 447                                          ByteCount *actualDstLen, unsigned char* dstStr, HFSCatalogNodeID cnid)
 448 {
 449         ByteCount subMaxLen;
 450         size_t utf8len;
 451         char fileIDStr[15];
 452         char extStr[15];
 453
 454         GetFileIDString(cnid, fileIDStr);
 455         GetFilenameExtension(srcLen/sizeof(UniChar), srcStr, extStr);
 456
 457         /* remove extension chars from source */
 458         srcLen -= strlen(extStr) * sizeof(UniChar);
 459         subMaxLen = maxDstLen - (strlen(extStr) + strlen(fileIDStr));
 460
 461         (void) utf8_encodestr(srcStr, srcLen, dstStr, &utf8len, subMaxLen, ':', 0);
 462
 463         strcat(dstStr, fileIDStr);
 464         strcat(dstStr, extStr);
 465         *actualDstLen = utf8len + (strlen(extStr) + strlen(fileIDStr));
 466
 467         return noErr;
 468 }
 469