apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include <apti18n.h>
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf;
  47   size_t insize, bufsize;
  48   dest->clear();
  49
  50   cd = iconv_open(codeset, "UTF-8");
  51   if (cd == (iconv_t)(-1)) {
  52      // Something went wrong
  53      if (errno == EINVAL)
  54         _error->Error("conversion from 'UTF-8' to '%s' not available",
  55                codeset);
  56      else
  57         perror("iconv_open");
  58
  59      return false;
  60   }
  61
  62   insize = bufsize = orig.size();
  63   inbuf = orig.data();
  64   inptr = (char *)inbuf;
  65   outbuf = new char[bufsize];
  66   size_t lastError = -1;
  67
  68   while (insize != 0)
  69   {
  70      char *outptr = outbuf;
  71      size_t outsize = bufsize;
  72      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  73      dest->append(outbuf, outptr - outbuf);
  74      if (err == (size_t)(-1))
  75      {
  76         switch (errno)
  77         {
  78         case EILSEQ:
  79            insize--;
  80            inptr++;
  81            // replace a series of unknown multibytes with a single "?"
  82            if (lastError != insize) {
  83               lastError = insize - 1;
  84               dest->append("?");
  85            }
  86            break;
  87         case EINVAL:
  88            insize = 0;
  89            break;
  90         case E2BIG:
  91            if (outptr == outbuf)
  92            {
  93               bufsize *= 2;
  94               delete[] outbuf;
  95               outbuf = new char[bufsize];
  96            }
  97            break;
  98         }
  99      }
 100   }
 101
 102   delete[] outbuf;
 103
 104   iconv_close(cd);
 105
 106   return true;
 107 }
 108                                                                         /*}}}*/
 109 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 110 // ---------------------------------------------------------------------
 111 /* This is handy to use when parsing a file. It also removes \n's left
 112    over from fgets and company */
 113 char *_strstrip(char *String)
 114 {
 115    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 116
 117    if (*String == 0)
 118       return String;
 119
 120    char *End = String + strlen(String) - 1;
 121    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 122                                *End == '\r'); End--);
 123    End++;
 124    *End = 0;
 125    return String;
 126 };
 127                                                                         /*}}}*/
 128 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 129 // ---------------------------------------------------------------------
 130 /* */
 131 char *_strtabexpand(char *String,size_t Len)
 132 {
 133    for (char *I = String; I != I + Len && *I != 0; I++)
 134    {
 135       if (*I != '\t')
 136          continue;
 137       if (I + 8 > String + Len)
 138       {
 139          *I = 0;
 140          return String;
 141       }
 142
 143       /* Assume the start of the string is 0 and find the next 8 char
 144          division */
 145       int Len;
 146       if (String == I)
 147          Len = 1;
 148       else
 149          Len = 8 - ((String - I) % 8);
 150       Len -= 2;
 151       if (Len <= 0)
 152       {
 153          *I = ' ';
 154          continue;
 155       }
 156
 157       memmove(I + Len,I + 1,strlen(I) + 1);
 158       for (char *J = I; J + Len != I; *I = ' ', I++);
 159    }
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* This grabs a single word, converts any % escaped characters to their
 166    proper values and advances the pointer. Double quotes are understood
 167    and striped out as well. This is for URI/URL parsing. It also can
 168    understand [] brackets.*/
 169 bool ParseQuoteWord(const char *&String,string &Res)
 170 {
 171    // Skip leading whitespace
 172    const char *C = String;
 173    for (;*C != 0 && *C == ' '; C++);
 174    if (*C == 0)
 175       return false;
 176
 177    // Jump to the next word
 178    for (;*C != 0 && isspace(*C) == 0; C++)
 179    {
 180       if (*C == '"')
 181       {
 182          for (C++; *C != 0 && *C != '"'; C++);
 183          if (*C == 0)
 184             return false;
 185       }
 186       if (*C == '[')
 187       {
 188          for (C++; *C != 0 && *C != ']'; C++);
 189          if (*C == 0)
 190             return false;
 191       }
 192    }
 193
 194    // Now de-quote characters
 195    char Buffer[1024];
 196    char Tmp[3];
 197    const char *Start = String;
 198    char *I;
 199    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 200    {
 201       if (*Start == '%' && Start + 2 < C &&
 202           isxdigit(Start[1]) && isxdigit(Start[2]))
 203       {
 204          Tmp[0] = Start[1];
 205          Tmp[1] = Start[2];
 206          Tmp[2] = 0;
 207          *I = (char)strtol(Tmp,0,16);
 208          Start += 3;
 209          continue;
 210       }
 211       if (*Start != '"')
 212          *I = *Start;
 213       else
 214          I--;
 215       Start++;
 216    }
 217    *I = 0;
 218    Res = Buffer;
 219
 220    // Skip ending white space
 221    for (;*C != 0 && isspace(*C) != 0; C++);
 222    String = C;
 223    return true;
 224 }
 225                                                                         /*}}}*/
 226 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 227 // ---------------------------------------------------------------------
 228 /* This expects a series of space separated strings enclosed in ""'s.
 229    It concatenates the ""'s into a single string. */
 230 bool ParseCWord(const char *&String,string &Res)
 231 {
 232    // Skip leading whitespace
 233    const char *C = String;
 234    for (;*C != 0 && *C == ' '; C++);
 235    if (*C == 0)
 236       return false;
 237
 238    char Buffer[1024];
 239    char *Buf = Buffer;
 240    if (strlen(String) >= sizeof(Buffer))
 241        return false;
 242
 243    for (; *C != 0; C++)
 244    {
 245       if (*C == '"')
 246       {
 247          for (C++; *C != 0 && *C != '"'; C++)
 248             *Buf++ = *C;
 249
 250          if (*C == 0)
 251             return false;
 252
 253          continue;
 254       }
 255
 256       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 257          continue;
 258       if (isspace(*C) == 0)
 259          return false;
 260       *Buf++ = ' ';
 261    }
 262    *Buf = 0;
 263    Res = Buffer;
 264    String = C;
 265    return true;
 266 }
 267                                                                         /*}}}*/
 268 // QuoteString - Convert a string into quoted from                      /*{{{*/
 269 // ---------------------------------------------------------------------
 270 /* */
 271 string QuoteString(const string &Str, const char *Bad)
 272 {
 273    string Res;
 274    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 275    {
 276       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 277           *I == 0x25 || // percent '%' char
 278           *I <= 0x20 || *I >= 0x7F) // control chars
 279       {
 280          char Buf[10];
 281          sprintf(Buf,"%%%02x",(int)*I);
 282          Res += Buf;
 283       }
 284       else
 285          Res += *I;
 286    }
 287    return Res;
 288 }
 289                                                                         /*}}}*/
 290 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 291 // ---------------------------------------------------------------------
 292 /* This undoes QuoteString */
 293 string DeQuoteString(const string &Str)
 294 {
 295    return DeQuoteString(Str.begin(),Str.end());
 296 }
 297 string DeQuoteString(string::const_iterator const &begin,
 298                         string::const_iterator const &end)
 299 {
 300    string Res;
 301    for (string::const_iterator I = begin; I != end; ++I)
 302    {
 303       if (*I == '%' && I + 2 < end &&
 304           isxdigit(I[1]) && isxdigit(I[2]))
 305       {
 306          char Tmp[3];
 307          Tmp[0] = I[1];
 308          Tmp[1] = I[2];
 309          Tmp[2] = 0;
 310          Res += (char)strtol(Tmp,0,16);
 311          I += 2;
 312          continue;
 313       }
 314       else
 315          Res += *I;
 316    }
 317    return Res;
 318 }
 319
 320                                                                         /*}}}*/
 321 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 322 // ---------------------------------------------------------------------
 323 /* A max of 4 digits are shown before conversion to the next highest unit.
 324    The max length of the string will be 5 chars unless the size is > 10
 325    YottaBytes (E24) */
 326 string SizeToStr(double Size)
 327 {
 328    char S[300];
 329    double ASize;
 330    if (Size >= 0)
 331       ASize = Size;
 332    else
 333       ASize = -1*Size;
 334
 335    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 336       ExaBytes, ZettaBytes, YottaBytes */
 337    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 338    int I = 0;
 339    while (I <= 8)
 340    {
 341       if (ASize < 100 && I != 0)
 342       {
 343          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 344          break;
 345       }
 346
 347       if (ASize < 10000)
 348       {
 349          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 350          break;
 351       }
 352       ASize /= 1000.0;
 353       I++;
 354    }
 355
 356    return S;
 357 }
 358                                                                         /*}}}*/
 359 // TimeToStr - Convert the time into a string                           /*{{{*/
 360 // ---------------------------------------------------------------------
 361 /* Converts a number of seconds to a hms format */
 362 string TimeToStr(unsigned long Sec)
 363 {
 364    char S[300];
 365
 366    while (1)
 367    {
 368       if (Sec > 60*60*24)
 369       {
 370          //d means days, h means hours, min means minutes, s means seconds
 371          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 372          break;
 373       }
 374
 375       if (Sec > 60*60)
 376       {
 377          //h means hours, min means minutes, s means seconds
 378          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 379          break;
 380       }
 381
 382       if (Sec > 60)
 383       {
 384          //min means minutes, s means seconds
 385          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 386          break;
 387       }
 388
 389       //s means seconds
 390       sprintf(S,_("%lis"),Sec);
 391       break;
 392    }
 393
 394    return S;
 395 }
 396                                                                         /*}}}*/
 397 // SubstVar - Substitute a string for another string                    /*{{{*/
 398 // ---------------------------------------------------------------------
 399 /* This replaces all occurances of Subst with Contents in Str. */
 400 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 401 {
 402    string::size_type Pos = 0;
 403    string::size_type OldPos = 0;
 404    string Temp;
 405
 406    while (OldPos < Str.length() &&
 407           (Pos = Str.find(Subst,OldPos)) != string::npos)
 408    {
 409       Temp += string(Str,OldPos,Pos) + Contents;
 410       OldPos = Pos + Subst.length();
 411    }
 412
 413    if (OldPos == 0)
 414       return Str;
 415
 416    return Temp + string(Str,OldPos);
 417 }
 418
 419 string SubstVar(string Str,const struct SubstVar *Vars)
 420 {
 421    for (; Vars->Subst != 0; Vars++)
 422       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 423    return Str;
 424 }
 425                                                                         /*}}}*/
 426 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 427 // ---------------------------------------------------------------------
 428 /* Returns a string with the supplied separator depth + 1 times in it */
 429 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 430 {
 431    std::string output = "";
 432    for(unsigned long d=Depth+1; d > 0; d--)
 433       output.append(Separator);
 434    return output;
 435 }
 436                                                                         /*}}}*/
 437 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 438 // ---------------------------------------------------------------------
 439 /* This converts a URI into a safe filename. It quotes all unsafe characters
 440    and converts / to _ and removes the scheme identifier. The resulting
 441    file name should be unique and never occur again for a different file */
 442 string URItoFileName(const string &URI)
 443 {
 444    // Nuke 'sensitive' items
 445    ::URI U(URI);
 446    U.User.clear();
 447    U.Password.clear();
 448    U.Access.clear();
 449
 450    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 451    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 452    replace(NewURI.begin(),NewURI.end(),'/','_');
 453    return NewURI;
 454 }
 455                                                                         /*}}}*/
 456 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 457 // ---------------------------------------------------------------------
 458 /* This routine performs a base64 transformation on a string. It was ripped
 459    from wget and then patched and bug fixed.
 460
 461    This spec can be found in rfc2045 */
 462 string Base64Encode(const string &S)
 463 {
 464    // Conversion table.
 465    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 466                           'I','J','K','L','M','N','O','P',
 467                           'Q','R','S','T','U','V','W','X',
 468                           'Y','Z','a','b','c','d','e','f',
 469                           'g','h','i','j','k','l','m','n',
 470                           'o','p','q','r','s','t','u','v',
 471                           'w','x','y','z','0','1','2','3',
 472                           '4','5','6','7','8','9','+','/'};
 473
 474    // Pre-allocate some space
 475    string Final;
 476    Final.reserve((4*S.length() + 2)/3 + 2);
 477
 478    /* Transform the 3x8 bits to 4x6 bits, as required by
 479       base64.  */
 480    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 481    {
 482       char Bits[3] = {0,0,0};
 483       Bits[0] = I[0];
 484       if (I + 1 < S.end())
 485          Bits[1] = I[1];
 486       if (I + 2 < S.end())
 487          Bits[2] = I[2];
 488
 489       Final += tbl[Bits[0] >> 2];
 490       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 491
 492       if (I + 1 >= S.end())
 493          break;
 494
 495       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 496
 497       if (I + 2 >= S.end())
 498          break;
 499
 500       Final += tbl[Bits[2] & 0x3f];
 501    }
 502
 503    /* Apply the padding elements, this tells how many bytes the remote
 504       end should discard */
 505    if (S.length() % 3 == 2)
 506       Final += '=';
 507    if (S.length() % 3 == 1)
 508       Final += "==";
 509
 510    return Final;
 511 }
 512                                                                         /*}}}*/
 513 // stringcmp - Arbitrary string compare                                 /*{{{*/
 514 // ---------------------------------------------------------------------
 515 /* This safely compares two non-null terminated strings of arbitrary
 516    length */
 517 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 518 {
 519    for (; A != AEnd && B != BEnd; A++, B++)
 520       if (*A != *B)
 521          break;
 522
 523    if (A == AEnd && B == BEnd)
 524       return 0;
 525    if (A == AEnd)
 526       return 1;
 527    if (B == BEnd)
 528       return -1;
 529    if (*A < *B)
 530       return -1;
 531    return 1;
 532 }
 533
 534 #if __GNUC__ >= 3
 535 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 536               const char *B,const char *BEnd)
 537 {
 538    for (; A != AEnd && B != BEnd; A++, B++)
 539       if (*A != *B)
 540          break;
 541
 542    if (A == AEnd && B == BEnd)
 543       return 0;
 544    if (A == AEnd)
 545       return 1;
 546    if (B == BEnd)
 547       return -1;
 548    if (*A < *B)
 549       return -1;
 550    return 1;
 551 }
 552 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 553               string::const_iterator B,string::const_iterator BEnd)
 554 {
 555    for (; A != AEnd && B != BEnd; A++, B++)
 556       if (*A != *B)
 557          break;
 558
 559    if (A == AEnd && B == BEnd)
 560       return 0;
 561    if (A == AEnd)
 562       return 1;
 563    if (B == BEnd)
 564       return -1;
 565    if (*A < *B)
 566       return -1;
 567    return 1;
 568 }
 569 #endif
 570                                                                         /*}}}*/
 571 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 572 // ---------------------------------------------------------------------
 573 /* */
 574 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 575 {
 576    for (; A != AEnd && B != BEnd; A++, B++)
 577       if (tolower_ascii(*A) != tolower_ascii(*B))
 578          break;
 579
 580    if (A == AEnd && B == BEnd)
 581       return 0;
 582    if (A == AEnd)
 583       return 1;
 584    if (B == BEnd)
 585       return -1;
 586    if (tolower_ascii(*A) < tolower_ascii(*B))
 587       return -1;
 588    return 1;
 589 }
 590 #if __GNUC__ >= 3
 591 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 592                   const char *B,const char *BEnd)
 593 {
 594    for (; A != AEnd && B != BEnd; A++, B++)
 595       if (tolower_ascii(*A) != tolower_ascii(*B))
 596          break;
 597
 598    if (A == AEnd && B == BEnd)
 599       return 0;
 600    if (A == AEnd)
 601       return 1;
 602    if (B == BEnd)
 603       return -1;
 604    if (tolower_ascii(*A) < tolower_ascii(*B))
 605       return -1;
 606    return 1;
 607 }
 608 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 609                   string::const_iterator B,string::const_iterator BEnd)
 610 {
 611    for (; A != AEnd && B != BEnd; A++, B++)
 612       if (tolower_ascii(*A) != tolower_ascii(*B))
 613          break;
 614
 615    if (A == AEnd && B == BEnd)
 616       return 0;
 617    if (A == AEnd)
 618       return 1;
 619    if (B == BEnd)
 620       return -1;
 621    if (tolower_ascii(*A) < tolower_ascii(*B))
 622       return -1;
 623    return 1;
 624 }
 625 #endif
 626                                                                         /*}}}*/
 627 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 628 // ---------------------------------------------------------------------
 629 /* The format is like those used in package files and the method
 630    communication system */
 631 string LookupTag(const string &Message,const char *Tag,const char *Default)
 632 {
 633    // Look for a matching tag.
 634    int Length = strlen(Tag);
 635    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 636    {
 637       // Found the tag
 638       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 639       {
 640          // Find the end of line and strip the leading/trailing spaces
 641          string::const_iterator J;
 642          I += Length + 1;
 643          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 644          for (J = I; *J != '\n' && J < Message.end(); ++J);
 645          for (; J > I && isspace(J[-1]) != 0; --J);
 646
 647          return string(I,J);
 648       }
 649
 650       for (; *I != '\n' && I < Message.end(); ++I);
 651    }
 652
 653    // Failed to find a match
 654    if (Default == 0)
 655       return string();
 656    return Default;
 657 }
 658                                                                         /*}}}*/
 659 // StringToBool - Converts a string into a boolean                      /*{{{*/
 660 // ---------------------------------------------------------------------
 661 /* This inspects the string to see if it is true or if it is false and
 662    then returns the result. Several varients on true/false are checked. */
 663 int StringToBool(const string &Text,int Default)
 664 {
 665    char *End;
 666    int Res = strtol(Text.c_str(),&End,0);
 667    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 668       return Res;
 669
 670    // Check for positives
 671    if (strcasecmp(Text.c_str(),"no") == 0 ||
 672        strcasecmp(Text.c_str(),"false") == 0 ||
 673        strcasecmp(Text.c_str(),"without") == 0 ||
 674        strcasecmp(Text.c_str(),"off") == 0 ||
 675        strcasecmp(Text.c_str(),"disable") == 0)
 676       return 0;
 677
 678    // Check for negatives
 679    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 680        strcasecmp(Text.c_str(),"true") == 0 ||
 681        strcasecmp(Text.c_str(),"with") == 0 ||
 682        strcasecmp(Text.c_str(),"on") == 0 ||
 683        strcasecmp(Text.c_str(),"enable") == 0)
 684       return 1;
 685
 686    return Default;
 687 }
 688                                                                         /*}}}*/
 689 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 690 // ---------------------------------------------------------------------
 691 /* This converts a time_t into a string time representation that is
 692    year 2000 complient and timezone neutral */
 693 string TimeRFC1123(time_t Date)
 694 {
 695    struct tm Conv;
 696    if (gmtime_r(&Date, &Conv) == NULL)
 697       return "";
 698
 699    char Buf[300];
 700    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 701    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 702                           "Aug","Sep","Oct","Nov","Dec"};
 703
 704    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 705            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 706            Conv.tm_min,Conv.tm_sec);
 707    return Buf;
 708 }
 709                                                                         /*}}}*/
 710 // ReadMessages - Read messages from the FD                             /*{{{*/
 711 // ---------------------------------------------------------------------
 712 /* This pulls full messages from the input FD into the message buffer.
 713    It assumes that messages will not pause during transit so no
 714    fancy buffering is used.
 715
 716    In particular: this reads blocks from the input until it believes
 717    that it's run out of input text.  Each block is terminated by a
 718    double newline ('\n' followed by '\n').  As noted below, there is a
 719    bug in this code: it assumes that all the blocks have been read if
 720    it doesn't see additional text in the buffer after the last one is
 721    parsed, which will cause it to lose blocks if the last block
 722    coincides with the end of the buffer.
 723  */
 724 bool ReadMessages(int Fd, vector<string> &List)
 725 {
 726    char Buffer[64000];
 727    char *End = Buffer;
 728    // Represents any left-over from the previous iteration of the
 729    // parse loop.  (i.e., if a message is split across the end
 730    // of the buffer, it goes here)
 731    string PartialMessage;
 732
 733    while (1)
 734    {
 735       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 736       if (Res < 0 && errno == EINTR)
 737          continue;
 738
 739       // Process is dead, this is kind of bad..
 740       if (Res == 0)
 741          return false;
 742
 743       // No data
 744       if (Res < 0 && errno == EAGAIN)
 745          return true;
 746       if (Res < 0)
 747          return false;
 748
 749       End += Res;
 750
 751       // Look for the end of the message
 752       for (char *I = Buffer; I + 1 < End; I++)
 753       {
 754          if (I[0] != '\n' || I[1] != '\n')
 755             continue;
 756
 757          // Pull the message out
 758          string Message(Buffer,I-Buffer);
 759          PartialMessage += Message;
 760
 761          // Fix up the buffer
 762          for (; I < End && *I == '\n'; I++);
 763          End -= I-Buffer;
 764          memmove(Buffer,I,End-Buffer);
 765          I = Buffer;
 766
 767          List.push_back(PartialMessage);
 768          PartialMessage.clear();
 769       }
 770       if (End != Buffer)
 771         {
 772           // If there's text left in the buffer, store it
 773           // in PartialMessage and throw the rest of the buffer
 774           // away.  This allows us to handle messages that
 775           // are longer than the static buffer size.
 776           PartialMessage += string(Buffer, End);
 777           End = Buffer;
 778         }
 779       else
 780         {
 781           // BUG ALERT: if a message block happens to end at a
 782           // multiple of 64000 characters, this will cause it to
 783           // terminate early, leading to a badly formed block and
 784           // probably crashing the method.  However, this is the only
 785           // way we have to find the end of the message block.  I have
 786           // an idea of how to fix this, but it will require changes
 787           // to the protocol (essentially to mark the beginning and
 788           // end of the block).
 789           //
 790           //  -- dburrows 2008-04-02
 791           return true;
 792         }
 793
 794       if (WaitFd(Fd) == false)
 795          return false;
 796    }
 797 }
 798                                                                         /*}}}*/
 799 // MonthConv - Converts a month string into a number                    /*{{{*/
 800 // ---------------------------------------------------------------------
 801 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 802    Made it a bit more robust with a few tolower_ascii though. */
 803 static int MonthConv(char *Month)
 804 {
 805    switch (tolower_ascii(*Month))
 806    {
 807       case 'a':
 808       return tolower_ascii(Month[1]) == 'p'?3:7;
 809       case 'd':
 810       return 11;
 811       case 'f':
 812       return 1;
 813       case 'j':
 814       if (tolower_ascii(Month[1]) == 'a')
 815          return 0;
 816       return tolower_ascii(Month[2]) == 'n'?5:6;
 817       case 'm':
 818       return tolower_ascii(Month[2]) == 'r'?2:4;
 819       case 'n':
 820       return 10;
 821       case 'o':
 822       return 9;
 823       case 's':
 824       return 8;
 825
 826       // Pretend it is January..
 827       default:
 828       return 0;
 829    }
 830 }
 831                                                                         /*}}}*/
 832 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 833 // ---------------------------------------------------------------------
 834 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 835    than local timezone (mktime assumes the latter).
 836
 837    This function is a nonstandard GNU extension that is also present on
 838    the BSDs and maybe other systems. For others we follow the advice of
 839    the manpage of timegm and use his portable replacement. */
 840 #ifndef HAVE_TIMEGM
 841 static time_t timegm(struct tm *t)
 842 {
 843    char *tz = getenv("TZ");
 844    setenv("TZ", "", 1);
 845    tzset();
 846    time_t ret = mktime(t);
 847    if (tz)
 848       setenv("TZ", tz, 1);
 849    else
 850       unsetenv("TZ");
 851    tzset();
 852    return ret;
 853 }
 854 #endif
 855                                                                         /*}}}*/
 856 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 857 // ---------------------------------------------------------------------
 858 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 859    with one exception: All timezones (%Z) are accepted but the protocol
 860    says that it MUST be GMT, but this one is equal to UTC which we will
 861    encounter from time to time (e.g. in Release files) so we accept all
 862    here and just assume it is GMT (or UTC) later on */
 863 bool RFC1123StrToTime(const char* const str,time_t &time)
 864 {
 865    struct tm Tm;
 866    setlocale (LC_ALL,"C");
 867    bool const invalid =
 868    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 869       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 870    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 871        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 872    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 873        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 874    setlocale (LC_ALL,"");
 875    if (invalid == true)
 876       return false;
 877
 878    time = timegm(&Tm);
 879    return true;
 880 }
 881                                                                         /*}}}*/
 882 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 883 // ---------------------------------------------------------------------
 884 /* */
 885 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 886 {
 887    struct tm Tm;
 888    // MDTM includes no whitespaces but recommend and ignored by strptime
 889    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 890       return false;
 891
 892    time = timegm(&Tm);
 893    return true;
 894 }
 895                                                                         /*}}}*/
 896 // StrToTime - Converts a string into a time_t                          /*{{{*/
 897 // ---------------------------------------------------------------------
 898 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 899    and the C library asctime format. It requires the GNU library function
 900    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 901    reason the C library does not provide any such function :< This also
 902    handles the weird, but unambiguous FTP time format*/
 903 bool StrToTime(const string &Val,time_t &Result)
 904 {
 905    struct tm Tm;
 906    char Month[10];
 907    const char *I = Val.c_str();
 908
 909    // Skip the day of the week
 910    for (;*I != 0  && *I != ' '; I++);
 911
 912    // Handle RFC 1123 time
 913    Month[0] = 0;
 914    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 915               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 916    {
 917       // Handle RFC 1036 time
 918       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 919                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 920          Tm.tm_year += 1900;
 921       else
 922       {
 923          // asctime format
 924          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 925                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 926          {
 927             // 'ftp' time
 928             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 929                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 930                return false;
 931             Tm.tm_mon--;
 932          }
 933       }
 934    }
 935
 936    Tm.tm_isdst = 0;
 937    if (Month[0] != 0)
 938       Tm.tm_mon = MonthConv(Month);
 939    Tm.tm_year -= 1900;
 940
 941    // Convert to local time and then to GMT
 942    Result = timegm(&Tm);
 943    return true;
 944 }
 945                                                                         /*}}}*/
 946 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 947 // ---------------------------------------------------------------------
 948 /* This is used in decoding the crazy fixed length string headers in
 949    tar and ar files. */
 950 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 951 {
 952    char S[30];
 953    if (Len >= sizeof(S))
 954       return false;
 955    memcpy(S,Str,Len);
 956    S[Len] = 0;
 957
 958    // All spaces is a zero
 959    Res = 0;
 960    unsigned I;
 961    for (I = 0; S[I] == ' '; I++);
 962    if (S[I] == 0)
 963       return true;
 964
 965    char *End;
 966    Res = strtoul(S,&End,Base);
 967    if (End == S)
 968       return false;
 969
 970    return true;
 971 }
 972                                                                         /*}}}*/
 973 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 974 // ---------------------------------------------------------------------
 975 /* This is used in decoding the crazy fixed length string headers in
 976    tar and ar files. */
 977 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
 978 {
 979    char S[30];
 980    if (Len >= sizeof(S))
 981       return false;
 982    memcpy(S,Str,Len);
 983    S[Len] = 0;
 984
 985    // All spaces is a zero
 986    Res = 0;
 987    unsigned I;
 988    for (I = 0; S[I] == ' '; I++);
 989    if (S[I] == 0)
 990       return true;
 991
 992    char *End;
 993    Res = strtoull(S,&End,Base);
 994    if (End == S)
 995       return false;
 996
 997    return true;
 998 }
 999                                                                         /*}}}*/
1000
1001 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1002 // ---------------------------------------------------------------------
1003 /* This is used in decoding the 256bit encoded fixed length fields in
1004    tar files */
1005 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1006 {
1007    if ((Str[0] & 0x80) == 0)
1008       return false;
1009    else
1010    {
1011       Res = Str[0] & 0x7F;
1012       for(unsigned int i = 1; i < Len; ++i)
1013          Res = (Res<<8) + Str[i];
1014       return true;
1015    }
1016 }
1017                                                                         /*}}}*/
1018 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1019 // ---------------------------------------------------------------------
1020 /* Helper for Hex2Num */
1021 static int HexDigit(int c)
1022 {
1023    if (c >= '0' && c <= '9')
1024       return c - '0';
1025    if (c >= 'a' && c <= 'f')
1026       return c - 'a' + 10;
1027    if (c >= 'A' && c <= 'F')
1028       return c - 'A' + 10;
1029    return 0;
1030 }
1031                                                                         /*}}}*/
1032 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1033 // ---------------------------------------------------------------------
1034 /* The length of the buffer must be exactly 1/2 the length of the string. */
1035 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1036 {
1037    if (Str.length() != Length*2)
1038       return false;
1039
1040    // Convert each digit. We store it in the same order as the string
1041    int J = 0;
1042    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1043    {
1044       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1045          return false;
1046
1047       Num[J] = HexDigit(I[0]) << 4;
1048       Num[J] += HexDigit(I[1]);
1049    }
1050
1051    return true;
1052 }
1053                                                                         /*}}}*/
1054 // TokSplitString - Split a string up by a given token                  /*{{{*/
1055 // ---------------------------------------------------------------------
1056 /* This is intended to be a faster splitter, it does not use dynamic
1057    memories. Input is changed to insert nulls at each token location. */
1058 bool TokSplitString(char Tok,char *Input,char **List,
1059                     unsigned long ListMax)
1060 {
1061    // Strip any leading spaces
1062    char *Start = Input;
1063    char *Stop = Start + strlen(Start);
1064    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1065
1066    unsigned long Count = 0;
1067    char *Pos = Start;
1068    while (Pos != Stop)
1069    {
1070       // Skip to the next Token
1071       for (; Pos != Stop && *Pos != Tok; Pos++);
1072
1073       // Back remove spaces
1074       char *End = Pos;
1075       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1076       *End = 0;
1077
1078       List[Count++] = Start;
1079       if (Count >= ListMax)
1080       {
1081          List[Count-1] = 0;
1082          return false;
1083       }
1084
1085       // Advance pos
1086       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1087       Start = Pos;
1088    }
1089
1090    List[Count] = 0;
1091    return true;
1092 }
1093                                                                         /*}}}*/
1094 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1095 // ---------------------------------------------------------------------
1096 /* This can be used to split a given string up into a vector, so the
1097    propose is the same as in the method above and this one is a bit slower
1098    also, but the advantage is that we have an iteratable vector */
1099 vector<string> VectorizeString(string const &haystack, char const &split)
1100 {
1101    string::const_iterator start = haystack.begin();
1102    string::const_iterator end = start;
1103    vector<string> exploded;
1104    do {
1105       for (; end != haystack.end() && *end != split; ++end);
1106       exploded.push_back(string(start, end));
1107       start = end + 1;
1108    } while (end != haystack.end() && (++end) != haystack.end());
1109    return exploded;
1110 }
1111                                                                         /*}}}*/
1112 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1113 // ---------------------------------------------------------------------
1114 /* */
1115 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1116                       const char **ListEnd)
1117 {
1118    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1119       R->Hit = false;
1120
1121    unsigned long Hits = 0;
1122    for (; ListBegin != ListEnd; ListBegin++)
1123    {
1124       // Check if the name is a regex
1125       const char *I;
1126       bool Regex = true;
1127       for (I = *ListBegin; *I != 0; I++)
1128          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1129             break;
1130       if (*I == 0)
1131          Regex = false;
1132
1133       // Compile the regex pattern
1134       regex_t Pattern;
1135       if (Regex == true)
1136          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1137                      REG_NOSUB) != 0)
1138             Regex = false;
1139
1140       // Search the list
1141       bool Done = false;
1142       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1143       {
1144          if (R->Str[0] == 0)
1145             continue;
1146
1147          if (strcasecmp(R->Str,*ListBegin) != 0)
1148          {
1149             if (Regex == false)
1150                continue;
1151             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1152                continue;
1153          }
1154          Done = true;
1155
1156          if (R->Hit == false)
1157             Hits++;
1158
1159          R->Hit = true;
1160       }
1161
1162       if (Regex == true)
1163          regfree(&Pattern);
1164
1165       if (Done == false)
1166          _error->Warning(_("Selection %s not found"),*ListBegin);
1167    }
1168
1169    return Hits;
1170 }
1171                                                                         /*}}}*/
1172 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1173 // ---------------------------------------------------------------------
1174 /* This is used to make the internationalization strings easier to translate
1175    and to allow reordering of parameters */
1176 void ioprintf(ostream &out,const char *format,...)
1177 {
1178    va_list args;
1179    va_start(args,format);
1180
1181    // sprintf the description
1182    char S[4096];
1183    vsnprintf(S,sizeof(S),format,args);
1184    out << S;
1185 }
1186                                                                         /*}}}*/
1187 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1188 // ---------------------------------------------------------------------
1189 /* This is used to make the internationalization strings easier to translate
1190    and to allow reordering of parameters */
1191 void strprintf(string &out,const char *format,...)
1192 {
1193    va_list args;
1194    va_start(args,format);
1195
1196    // sprintf the description
1197    char S[4096];
1198    vsnprintf(S,sizeof(S),format,args);
1199    out = string(S);
1200 }
1201                                                                         /*}}}*/
1202 // safe_snprintf - Safer snprintf                                       /*{{{*/
1203 // ---------------------------------------------------------------------
1204 /* This is a snprintf that will never (ever) go past 'End' and returns a
1205    pointer to the end of the new string. The returned string is always null
1206    terminated unless Buffer == end. This is a better alterantive to using
1207    consecutive snprintfs. */
1208 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1209 {
1210    va_list args;
1211    int Did;
1212
1213    va_start(args,Format);
1214
1215    if (End <= Buffer)
1216       return End;
1217
1218    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1219    if (Did < 0 || Buffer + Did > End)
1220       return End;
1221    return Buffer + Did;
1222 }
1223                                                                         /*}}}*/
1224 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1225 // ---------------------------------------------------------------------
1226 string StripEpoch(const string &VerStr)
1227 {
1228    size_t i = VerStr.find(":");
1229    if (i == string::npos)
1230       return VerStr;
1231    return VerStr.substr(i+1);
1232 }
1233
1234 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1235 // ---------------------------------------------------------------------
1236 /* This little function is the most called method we have and tries
1237    therefore to do the absolut minimum - and is noteable faster than
1238    standard tolower/toupper and as a bonus avoids problems with different
1239    locales - we only operate on ascii chars anyway. */
1240 int tolower_ascii(int const c)
1241 {
1242    if (c >= 'A' && c <= 'Z')
1243       return c + 32;
1244    return c;
1245 }
1246                                                                         /*}}}*/
1247
1248 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1249 // ---------------------------------------------------------------------
1250 /* The domain list is a comma seperate list of domains that are suffix
1251    matched against the argument */
1252 bool CheckDomainList(const string &Host,const string &List)
1253 {
1254    string::const_iterator Start = List.begin();
1255    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1256    {
1257       if (Cur < List.end() && *Cur != ',')
1258          continue;
1259
1260       // Match the end of the string..
1261       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1262           Cur - Start != 0 &&
1263           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1264          return true;
1265
1266       Start = Cur + 1;
1267    }
1268    return false;
1269 }
1270                                                                         /*}}}*/
1271 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1272 // ---------------------------------------------------------------------
1273 /* */
1274 string DeEscapeString(const string &input)
1275 {
1276    char tmp[3];
1277    string::const_iterator it, escape_start;
1278    string output, octal, hex;
1279    for (it = input.begin(); it != input.end(); ++it)
1280    {
1281       // just copy non-escape chars
1282       if (*it != '\\')
1283       {
1284          output += *it;
1285          continue;
1286       }
1287
1288       // deal with double escape
1289       if (*it == '\\' &&
1290           (it + 1 < input.end()) &&  it[1] == '\\')
1291       {
1292          // copy
1293          output += *it;
1294          // advance iterator one step further
1295          ++it;
1296          continue;
1297       }
1298
1299       // ensure we have a char to read
1300       if (it + 1 == input.end())
1301          continue;
1302
1303       // read it
1304       ++it;
1305       switch (*it)
1306       {
1307          case '0':
1308             if (it + 2 <= input.end()) {
1309                tmp[0] = it[1];
1310                tmp[1] = it[2];
1311                tmp[2] = 0;
1312                output += (char)strtol(tmp, 0, 8);
1313                it += 2;
1314             }
1315             break;
1316          case 'x':
1317             if (it + 2 <= input.end()) {
1318                tmp[0] = it[1];
1319                tmp[1] = it[2];
1320                tmp[2] = 0;
1321                output += (char)strtol(tmp, 0, 16);
1322                it += 2;
1323             }
1324             break;
1325          default:
1326             // FIXME: raise exception here?
1327             break;
1328       }
1329    }
1330    return output;
1331 }
1332                                                                         /*}}}*/
1333 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1334 // ---------------------------------------------------------------------
1335 /* This parses the URI into all of its components */
1336 void URI::CopyFrom(const string &U)
1337 {
1338    string::const_iterator I = U.begin();
1339
1340    // Locate the first colon, this separates the scheme
1341    for (; I < U.end() && *I != ':' ; ++I);
1342    string::const_iterator FirstColon = I;
1343
1344    /* Determine if this is a host type URI with a leading double //
1345       and then search for the first single / */
1346    string::const_iterator SingleSlash = I;
1347    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1348       SingleSlash += 3;
1349
1350    /* Find the / indicating the end of the hostname, ignoring /'s in the
1351       square brackets */
1352    bool InBracket = false;
1353    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1354    {
1355       if (*SingleSlash == '[')
1356          InBracket = true;
1357       if (InBracket == true && *SingleSlash == ']')
1358          InBracket = false;
1359    }
1360
1361    if (SingleSlash > U.end())
1362       SingleSlash = U.end();
1363
1364    // We can now write the access and path specifiers
1365    Access.assign(U.begin(),FirstColon);
1366    if (SingleSlash != U.end())
1367       Path.assign(SingleSlash,U.end());
1368    if (Path.empty() == true)
1369       Path = "/";
1370
1371    // Now we attempt to locate a user:pass@host fragment
1372    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1373       FirstColon += 3;
1374    else
1375       FirstColon += 1;
1376    if (FirstColon >= U.end())
1377       return;
1378
1379    if (FirstColon > SingleSlash)
1380       FirstColon = SingleSlash;
1381
1382    // Find the colon...
1383    I = FirstColon + 1;
1384    if (I > SingleSlash)
1385       I = SingleSlash;
1386    for (; I < SingleSlash && *I != ':'; ++I);
1387    string::const_iterator SecondColon = I;
1388
1389    // Search for the @ after the colon
1390    for (; I < SingleSlash && *I != '@'; ++I);
1391    string::const_iterator At = I;
1392
1393    // Now write the host and user/pass
1394    if (At == SingleSlash)
1395    {
1396       if (FirstColon < SingleSlash)
1397          Host.assign(FirstColon,SingleSlash);
1398    }
1399    else
1400    {
1401       Host.assign(At+1,SingleSlash);
1402       // username and password must be encoded (RFC 3986)
1403       User.assign(DeQuoteString(FirstColon,SecondColon));
1404       if (SecondColon < At)
1405          Password.assign(DeQuoteString(SecondColon+1,At));
1406    }
1407
1408    // Now we parse the RFC 2732 [] hostnames.
1409    unsigned long PortEnd = 0;
1410    InBracket = false;
1411    for (unsigned I = 0; I != Host.length();)
1412    {
1413       if (Host[I] == '[')
1414       {
1415          InBracket = true;
1416          Host.erase(I,1);
1417          continue;
1418       }
1419
1420       if (InBracket == true && Host[I] == ']')
1421       {
1422          InBracket = false;
1423          Host.erase(I,1);
1424          PortEnd = I;
1425          continue;
1426       }
1427       I++;
1428    }
1429
1430    // Tsk, weird.
1431    if (InBracket == true)
1432    {
1433       Host.clear();
1434       return;
1435    }
1436
1437    // Now we parse off a port number from the hostname
1438    Port = 0;
1439    string::size_type Pos = Host.rfind(':');
1440    if (Pos == string::npos || Pos < PortEnd)
1441       return;
1442
1443    Port = atoi(string(Host,Pos+1).c_str());
1444    Host.assign(Host,0,Pos);
1445 }
1446                                                                         /*}}}*/
1447 // URI::operator string - Convert the URI to a string                   /*{{{*/
1448 // ---------------------------------------------------------------------
1449 /* */
1450 URI::operator string()
1451 {
1452    string Res;
1453
1454    if (Access.empty() == false)
1455       Res = Access + ':';
1456
1457    if (Host.empty() == false)
1458    {
1459       if (Access.empty() == false)
1460          Res += "//";
1461
1462       if (User.empty() == false)
1463       {
1464          Res +=  User;
1465          if (Password.empty() == false)
1466             Res += ":" + Password;
1467          Res += "@";
1468       }
1469
1470       // Add RFC 2732 escaping characters
1471       if (Access.empty() == false &&
1472           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1473          Res += '[' + Host + ']';
1474       else
1475          Res += Host;
1476
1477       if (Port != 0)
1478       {
1479          char S[30];
1480          sprintf(S,":%u",Port);
1481          Res += S;
1482       }
1483    }
1484
1485    if (Path.empty() == false)
1486    {
1487       if (Path[0] != '/')
1488          Res += "/" + Path;
1489       else
1490          Res += Path;
1491    }
1492
1493    return Res;
1494 }
1495                                                                         /*}}}*/
1496 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1497 // ---------------------------------------------------------------------
1498 /* */
1499 string URI::SiteOnly(const string &URI)
1500 {
1501    ::URI U(URI);
1502    U.User.clear();
1503    U.Password.clear();
1504    U.Path.clear();
1505    U.Port = 0;
1506    return U;
1507 }
1508                                                                         /*}}}*/
1509 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1510 // ---------------------------------------------------------------------
1511 /* */
1512 string URI::NoUserPassword(const string &URI)
1513 {
1514    ::URI U(URI);
1515    U.User.clear();
1516    U.Password.clear();
1517    U.Port = 0;
1518    return U;
1519 }
1520                                                                         /*}}}*/