apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include <apti18n.h>
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf;
  47   size_t insize, bufsize;
  48   dest->clear();
  49
  50   cd = iconv_open(codeset, "UTF-8");
  51   if (cd == (iconv_t)(-1)) {
  52      // Something went wrong
  53      if (errno == EINVAL)
  54         _error->Error("conversion from 'UTF-8' to '%s' not available",
  55                codeset);
  56      else
  57         perror("iconv_open");
  58
  59      return false;
  60   }
  61
  62   insize = bufsize = orig.size();
  63   inbuf = orig.data();
  64   inptr = (char *)inbuf;
  65   outbuf = new char[bufsize];
  66   size_t lastError = -1;
  67
  68   while (insize != 0)
  69   {
  70      char *outptr = outbuf;
  71      size_t outsize = bufsize;
  72      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  73      dest->append(outbuf, outptr - outbuf);
  74      if (err == (size_t)(-1))
  75      {
  76         switch (errno)
  77         {
  78         case EILSEQ:
  79            insize--;
  80            inptr++;
  81            // replace a series of unknown multibytes with a single "?"
  82            if (lastError != insize) {
  83               lastError = insize - 1;
  84               dest->append("?");
  85            }
  86            break;
  87         case EINVAL:
  88            insize = 0;
  89            break;
  90         case E2BIG:
  91            if (outptr == outbuf)
  92            {
  93               bufsize *= 2;
  94               delete[] outbuf;
  95               outbuf = new char[bufsize];
  96            }
  97            break;
  98         }
  99      }
 100   }
 101
 102   delete[] outbuf;
 103
 104   iconv_close(cd);
 105
 106   return true;
 107 }
 108                                                                         /*}}}*/
 109 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 110 // ---------------------------------------------------------------------
 111 /* This is handy to use when parsing a file. It also removes \n's left
 112    over from fgets and company */
 113 char *_strstrip(char *String)
 114 {
 115    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 116
 117    if (*String == 0)
 118       return String;
 119
 120    char *End = String + strlen(String) - 1;
 121    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 122                                *End == '\r'); End--);
 123    End++;
 124    *End = 0;
 125    return String;
 126 };
 127                                                                         /*}}}*/
 128 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 129 // ---------------------------------------------------------------------
 130 /* */
 131 char *_strtabexpand(char *String,size_t Len)
 132 {
 133    for (char *I = String; I != I + Len && *I != 0; I++)
 134    {
 135       if (*I != '\t')
 136          continue;
 137       if (I + 8 > String + Len)
 138       {
 139          *I = 0;
 140          return String;
 141       }
 142
 143       /* Assume the start of the string is 0 and find the next 8 char
 144          division */
 145       int Len;
 146       if (String == I)
 147          Len = 1;
 148       else
 149          Len = 8 - ((String - I) % 8);
 150       Len -= 2;
 151       if (Len <= 0)
 152       {
 153          *I = ' ';
 154          continue;
 155       }
 156
 157       memmove(I + Len,I + 1,strlen(I) + 1);
 158       for (char *J = I; J + Len != I; *I = ' ', I++);
 159    }
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* This grabs a single word, converts any % escaped characters to their
 166    proper values and advances the pointer. Double quotes are understood
 167    and striped out as well. This is for URI/URL parsing. It also can
 168    understand [] brackets.*/
 169 bool ParseQuoteWord(const char *&String,string &Res)
 170 {
 171    // Skip leading whitespace
 172    const char *C = String;
 173    for (;*C != 0 && *C == ' '; C++);
 174    if (*C == 0)
 175       return false;
 176
 177    // Jump to the next word
 178    for (;*C != 0 && isspace(*C) == 0; C++)
 179    {
 180       if (*C == '"')
 181       {
 182          C = strchr(C + 1, '"');
 183          if (C == NULL)
 184             return false;
 185       }
 186       if (*C == '[')
 187       {
 188          C = strchr(C + 1, ']');
 189          if (C == NULL)
 190             return false;
 191       }
 192    }
 193
 194    // Now de-quote characters
 195    char Buffer[1024];
 196    char Tmp[3];
 197    const char *Start = String;
 198    char *I;
 199    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 200    {
 201       if (*Start == '%' && Start + 2 < C &&
 202           isxdigit(Start[1]) && isxdigit(Start[2]))
 203       {
 204          Tmp[0] = Start[1];
 205          Tmp[1] = Start[2];
 206          Tmp[2] = 0;
 207          *I = (char)strtol(Tmp,0,16);
 208          Start += 3;
 209          continue;
 210       }
 211       if (*Start != '"')
 212          *I = *Start;
 213       else
 214          I--;
 215       Start++;
 216    }
 217    *I = 0;
 218    Res = Buffer;
 219
 220    // Skip ending white space
 221    for (;*C != 0 && isspace(*C) != 0; C++);
 222    String = C;
 223    return true;
 224 }
 225                                                                         /*}}}*/
 226 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 227 // ---------------------------------------------------------------------
 228 /* This expects a series of space separated strings enclosed in ""'s.
 229    It concatenates the ""'s into a single string. */
 230 bool ParseCWord(const char *&String,string &Res)
 231 {
 232    // Skip leading whitespace
 233    const char *C = String;
 234    for (;*C != 0 && *C == ' '; C++);
 235    if (*C == 0)
 236       return false;
 237
 238    char Buffer[1024];
 239    char *Buf = Buffer;
 240    if (strlen(String) >= sizeof(Buffer))
 241        return false;
 242
 243    for (; *C != 0; C++)
 244    {
 245       if (*C == '"')
 246       {
 247          for (C++; *C != 0 && *C != '"'; C++)
 248             *Buf++ = *C;
 249
 250          if (*C == 0)
 251             return false;
 252
 253          continue;
 254       }
 255
 256       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 257          continue;
 258       if (isspace(*C) == 0)
 259          return false;
 260       *Buf++ = ' ';
 261    }
 262    *Buf = 0;
 263    Res = Buffer;
 264    String = C;
 265    return true;
 266 }
 267                                                                         /*}}}*/
 268 // QuoteString - Convert a string into quoted from                      /*{{{*/
 269 // ---------------------------------------------------------------------
 270 /* */
 271 string QuoteString(const string &Str, const char *Bad)
 272 {
 273    string Res;
 274    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 275    {
 276       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 277           *I == 0x25 || // percent '%' char
 278           *I <= 0x20 || *I >= 0x7F) // control chars
 279       {
 280          char Buf[10];
 281          sprintf(Buf,"%%%02x",(int)*I);
 282          Res += Buf;
 283       }
 284       else
 285          Res += *I;
 286    }
 287    return Res;
 288 }
 289                                                                         /*}}}*/
 290 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 291 // ---------------------------------------------------------------------
 292 /* This undoes QuoteString */
 293 string DeQuoteString(const string &Str)
 294 {
 295    return DeQuoteString(Str.begin(),Str.end());
 296 }
 297 string DeQuoteString(string::const_iterator const &begin,
 298                         string::const_iterator const &end)
 299 {
 300    string Res;
 301    for (string::const_iterator I = begin; I != end; ++I)
 302    {
 303       if (*I == '%' && I + 2 < end &&
 304           isxdigit(I[1]) && isxdigit(I[2]))
 305       {
 306          char Tmp[3];
 307          Tmp[0] = I[1];
 308          Tmp[1] = I[2];
 309          Tmp[2] = 0;
 310          Res += (char)strtol(Tmp,0,16);
 311          I += 2;
 312          continue;
 313       }
 314       else
 315          Res += *I;
 316    }
 317    return Res;
 318 }
 319
 320                                                                         /*}}}*/
 321 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 322 // ---------------------------------------------------------------------
 323 /* A max of 4 digits are shown before conversion to the next highest unit.
 324    The max length of the string will be 5 chars unless the size is > 10
 325    YottaBytes (E24) */
 326 string SizeToStr(double Size)
 327 {
 328    char S[300];
 329    double ASize;
 330    if (Size >= 0)
 331       ASize = Size;
 332    else
 333       ASize = -1*Size;
 334
 335    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 336       ExaBytes, ZettaBytes, YottaBytes */
 337    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 338    int I = 0;
 339    while (I <= 8)
 340    {
 341       if (ASize < 100 && I != 0)
 342       {
 343          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 344          break;
 345       }
 346
 347       if (ASize < 10000)
 348       {
 349          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 350          break;
 351       }
 352       ASize /= 1000.0;
 353       I++;
 354    }
 355
 356    return S;
 357 }
 358                                                                         /*}}}*/
 359 // TimeToStr - Convert the time into a string                           /*{{{*/
 360 // ---------------------------------------------------------------------
 361 /* Converts a number of seconds to a hms format */
 362 string TimeToStr(unsigned long Sec)
 363 {
 364    char S[300];
 365
 366    while (1)
 367    {
 368       if (Sec > 60*60*24)
 369       {
 370          //d means days, h means hours, min means minutes, s means seconds
 371          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 372          break;
 373       }
 374
 375       if (Sec > 60*60)
 376       {
 377          //h means hours, min means minutes, s means seconds
 378          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 379          break;
 380       }
 381
 382       if (Sec > 60)
 383       {
 384          //min means minutes, s means seconds
 385          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 386          break;
 387       }
 388
 389       //s means seconds
 390       sprintf(S,_("%lis"),Sec);
 391       break;
 392    }
 393
 394    return S;
 395 }
 396                                                                         /*}}}*/
 397 // SubstVar - Substitute a string for another string                    /*{{{*/
 398 // ---------------------------------------------------------------------
 399 /* This replaces all occurances of Subst with Contents in Str. */
 400 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 401 {
 402    string::size_type Pos = 0;
 403    string::size_type OldPos = 0;
 404    string Temp;
 405
 406    while (OldPos < Str.length() &&
 407           (Pos = Str.find(Subst,OldPos)) != string::npos)
 408    {
 409       Temp += string(Str,OldPos,Pos) + Contents;
 410       OldPos = Pos + Subst.length();
 411    }
 412
 413    if (OldPos == 0)
 414       return Str;
 415
 416    return Temp + string(Str,OldPos);
 417 }
 418
 419 string SubstVar(string Str,const struct SubstVar *Vars)
 420 {
 421    for (; Vars->Subst != 0; Vars++)
 422       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 423    return Str;
 424 }
 425                                                                         /*}}}*/
 426 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 427 // ---------------------------------------------------------------------
 428 /* Returns a string with the supplied separator depth + 1 times in it */
 429 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 430 {
 431    std::string output = "";
 432    for(unsigned long d=Depth+1; d > 0; d--)
 433       output.append(Separator);
 434    return output;
 435 }
 436                                                                         /*}}}*/
 437 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 438 // ---------------------------------------------------------------------
 439 /* This converts a URI into a safe filename. It quotes all unsafe characters
 440    and converts / to _ and removes the scheme identifier. The resulting
 441    file name should be unique and never occur again for a different file */
 442 string URItoFileName(const string &URI)
 443 {
 444    // Nuke 'sensitive' items
 445    ::URI U(URI);
 446    U.User.clear();
 447    U.Password.clear();
 448    U.Access.clear();
 449
 450    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 451    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 452    replace(NewURI.begin(),NewURI.end(),'/','_');
 453    return NewURI;
 454 }
 455                                                                         /*}}}*/
 456 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 457 // ---------------------------------------------------------------------
 458 /* This routine performs a base64 transformation on a string. It was ripped
 459    from wget and then patched and bug fixed.
 460
 461    This spec can be found in rfc2045 */
 462 string Base64Encode(const string &S)
 463 {
 464    // Conversion table.
 465    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 466                           'I','J','K','L','M','N','O','P',
 467                           'Q','R','S','T','U','V','W','X',
 468                           'Y','Z','a','b','c','d','e','f',
 469                           'g','h','i','j','k','l','m','n',
 470                           'o','p','q','r','s','t','u','v',
 471                           'w','x','y','z','0','1','2','3',
 472                           '4','5','6','7','8','9','+','/'};
 473
 474    // Pre-allocate some space
 475    string Final;
 476    Final.reserve((4*S.length() + 2)/3 + 2);
 477
 478    /* Transform the 3x8 bits to 4x6 bits, as required by
 479       base64.  */
 480    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 481    {
 482       char Bits[3] = {0,0,0};
 483       Bits[0] = I[0];
 484       if (I + 1 < S.end())
 485          Bits[1] = I[1];
 486       if (I + 2 < S.end())
 487          Bits[2] = I[2];
 488
 489       Final += tbl[Bits[0] >> 2];
 490       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 491
 492       if (I + 1 >= S.end())
 493          break;
 494
 495       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 496
 497       if (I + 2 >= S.end())
 498          break;
 499
 500       Final += tbl[Bits[2] & 0x3f];
 501    }
 502
 503    /* Apply the padding elements, this tells how many bytes the remote
 504       end should discard */
 505    if (S.length() % 3 == 2)
 506       Final += '=';
 507    if (S.length() % 3 == 1)
 508       Final += "==";
 509
 510    return Final;
 511 }
 512                                                                         /*}}}*/
 513 // stringcmp - Arbitrary string compare                                 /*{{{*/
 514 // ---------------------------------------------------------------------
 515 /* This safely compares two non-null terminated strings of arbitrary
 516    length */
 517 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 518 {
 519    for (; A != AEnd && B != BEnd; A++, B++)
 520       if (*A != *B)
 521          break;
 522
 523    if (A == AEnd && B == BEnd)
 524       return 0;
 525    if (A == AEnd)
 526       return 1;
 527    if (B == BEnd)
 528       return -1;
 529    if (*A < *B)
 530       return -1;
 531    return 1;
 532 }
 533
 534 #if __GNUC__ >= 3
 535 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 536               const char *B,const char *BEnd)
 537 {
 538    for (; A != AEnd && B != BEnd; A++, B++)
 539       if (*A != *B)
 540          break;
 541
 542    if (A == AEnd && B == BEnd)
 543       return 0;
 544    if (A == AEnd)
 545       return 1;
 546    if (B == BEnd)
 547       return -1;
 548    if (*A < *B)
 549       return -1;
 550    return 1;
 551 }
 552 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 553               string::const_iterator B,string::const_iterator BEnd)
 554 {
 555    for (; A != AEnd && B != BEnd; A++, B++)
 556       if (*A != *B)
 557          break;
 558
 559    if (A == AEnd && B == BEnd)
 560       return 0;
 561    if (A == AEnd)
 562       return 1;
 563    if (B == BEnd)
 564       return -1;
 565    if (*A < *B)
 566       return -1;
 567    return 1;
 568 }
 569 #endif
 570                                                                         /*}}}*/
 571 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 572 // ---------------------------------------------------------------------
 573 /* */
 574 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 575 {
 576    for (; A != AEnd && B != BEnd; A++, B++)
 577       if (tolower_ascii(*A) != tolower_ascii(*B))
 578          break;
 579
 580    if (A == AEnd && B == BEnd)
 581       return 0;
 582    if (A == AEnd)
 583       return 1;
 584    if (B == BEnd)
 585       return -1;
 586    if (tolower_ascii(*A) < tolower_ascii(*B))
 587       return -1;
 588    return 1;
 589 }
 590 #if __GNUC__ >= 3
 591 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 592                   const char *B,const char *BEnd)
 593 {
 594    for (; A != AEnd && B != BEnd; A++, B++)
 595       if (tolower_ascii(*A) != tolower_ascii(*B))
 596          break;
 597
 598    if (A == AEnd && B == BEnd)
 599       return 0;
 600    if (A == AEnd)
 601       return 1;
 602    if (B == BEnd)
 603       return -1;
 604    if (tolower_ascii(*A) < tolower_ascii(*B))
 605       return -1;
 606    return 1;
 607 }
 608 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 609                   string::const_iterator B,string::const_iterator BEnd)
 610 {
 611    for (; A != AEnd && B != BEnd; A++, B++)
 612       if (tolower_ascii(*A) != tolower_ascii(*B))
 613          break;
 614
 615    if (A == AEnd && B == BEnd)
 616       return 0;
 617    if (A == AEnd)
 618       return 1;
 619    if (B == BEnd)
 620       return -1;
 621    if (tolower_ascii(*A) < tolower_ascii(*B))
 622       return -1;
 623    return 1;
 624 }
 625 #endif
 626                                                                         /*}}}*/
 627 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 628 // ---------------------------------------------------------------------
 629 /* The format is like those used in package files and the method
 630    communication system */
 631 string LookupTag(const string &Message,const char *Tag,const char *Default)
 632 {
 633    // Look for a matching tag.
 634    int Length = strlen(Tag);
 635    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 636    {
 637       // Found the tag
 638       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 639       {
 640          // Find the end of line and strip the leading/trailing spaces
 641          string::const_iterator J;
 642          I += Length + 1;
 643          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 644          for (J = I; *J != '\n' && J < Message.end(); ++J);
 645          for (; J > I && isspace(J[-1]) != 0; --J);
 646
 647          return string(I,J);
 648       }
 649
 650       for (; *I != '\n' && I < Message.end(); ++I);
 651    }
 652
 653    // Failed to find a match
 654    if (Default == 0)
 655       return string();
 656    return Default;
 657 }
 658                                                                         /*}}}*/
 659 // StringToBool - Converts a string into a boolean                      /*{{{*/
 660 // ---------------------------------------------------------------------
 661 /* This inspects the string to see if it is true or if it is false and
 662    then returns the result. Several varients on true/false are checked. */
 663 int StringToBool(const string &Text,int Default)
 664 {
 665    char *End;
 666    int Res = strtol(Text.c_str(),&End,0);
 667    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 668       return Res;
 669
 670    // Check for positives
 671    if (strcasecmp(Text.c_str(),"no") == 0 ||
 672        strcasecmp(Text.c_str(),"false") == 0 ||
 673        strcasecmp(Text.c_str(),"without") == 0 ||
 674        strcasecmp(Text.c_str(),"off") == 0 ||
 675        strcasecmp(Text.c_str(),"disable") == 0)
 676       return 0;
 677
 678    // Check for negatives
 679    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 680        strcasecmp(Text.c_str(),"true") == 0 ||
 681        strcasecmp(Text.c_str(),"with") == 0 ||
 682        strcasecmp(Text.c_str(),"on") == 0 ||
 683        strcasecmp(Text.c_str(),"enable") == 0)
 684       return 1;
 685
 686    return Default;
 687 }
 688                                                                         /*}}}*/
 689 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 690 // ---------------------------------------------------------------------
 691 /* This converts a time_t into a string time representation that is
 692    year 2000 complient and timezone neutral */
 693 string TimeRFC1123(time_t Date)
 694 {
 695    struct tm Conv;
 696    if (gmtime_r(&Date, &Conv) == NULL)
 697       return "";
 698
 699    char Buf[300];
 700    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 701    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 702                           "Aug","Sep","Oct","Nov","Dec"};
 703
 704    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 705            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 706            Conv.tm_min,Conv.tm_sec);
 707    return Buf;
 708 }
 709                                                                         /*}}}*/
 710 // ReadMessages - Read messages from the FD                             /*{{{*/
 711 // ---------------------------------------------------------------------
 712 /* This pulls full messages from the input FD into the message buffer.
 713    It assumes that messages will not pause during transit so no
 714    fancy buffering is used.
 715
 716    In particular: this reads blocks from the input until it believes
 717    that it's run out of input text.  Each block is terminated by a
 718    double newline ('\n' followed by '\n').  As noted below, there is a
 719    bug in this code: it assumes that all the blocks have been read if
 720    it doesn't see additional text in the buffer after the last one is
 721    parsed, which will cause it to lose blocks if the last block
 722    coincides with the end of the buffer.
 723  */
 724 bool ReadMessages(int Fd, vector<string> &List)
 725 {
 726    char Buffer[64000];
 727    char *End = Buffer;
 728    // Represents any left-over from the previous iteration of the
 729    // parse loop.  (i.e., if a message is split across the end
 730    // of the buffer, it goes here)
 731    string PartialMessage;
 732
 733    while (1)
 734    {
 735       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 736       if (Res < 0 && errno == EINTR)
 737          continue;
 738
 739       // Process is dead, this is kind of bad..
 740       if (Res == 0)
 741          return false;
 742
 743       // No data
 744       if (Res < 0 && errno == EAGAIN)
 745          return true;
 746       if (Res < 0)
 747          return false;
 748
 749       End += Res;
 750
 751       // Look for the end of the message
 752       for (char *I = Buffer; I + 1 < End; I++)
 753       {
 754          if (I[0] != '\n' || I[1] != '\n')
 755             continue;
 756
 757          // Pull the message out
 758          string Message(Buffer,I-Buffer);
 759          PartialMessage += Message;
 760
 761          // Fix up the buffer
 762          for (; I < End && *I == '\n'; I++);
 763          End -= I-Buffer;
 764          memmove(Buffer,I,End-Buffer);
 765          I = Buffer;
 766
 767          List.push_back(PartialMessage);
 768          PartialMessage.clear();
 769       }
 770       if (End != Buffer)
 771         {
 772           // If there's text left in the buffer, store it
 773           // in PartialMessage and throw the rest of the buffer
 774           // away.  This allows us to handle messages that
 775           // are longer than the static buffer size.
 776           PartialMessage += string(Buffer, End);
 777           End = Buffer;
 778         }
 779       else
 780         {
 781           // BUG ALERT: if a message block happens to end at a
 782           // multiple of 64000 characters, this will cause it to
 783           // terminate early, leading to a badly formed block and
 784           // probably crashing the method.  However, this is the only
 785           // way we have to find the end of the message block.  I have
 786           // an idea of how to fix this, but it will require changes
 787           // to the protocol (essentially to mark the beginning and
 788           // end of the block).
 789           //
 790           //  -- dburrows 2008-04-02
 791           return true;
 792         }
 793
 794       if (WaitFd(Fd) == false)
 795          return false;
 796    }
 797 }
 798                                                                         /*}}}*/
 799 // MonthConv - Converts a month string into a number                    /*{{{*/
 800 // ---------------------------------------------------------------------
 801 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 802    Made it a bit more robust with a few tolower_ascii though. */
 803 static int MonthConv(char *Month)
 804 {
 805    switch (tolower_ascii(*Month))
 806    {
 807       case 'a':
 808       return tolower_ascii(Month[1]) == 'p'?3:7;
 809       case 'd':
 810       return 11;
 811       case 'f':
 812       return 1;
 813       case 'j':
 814       if (tolower_ascii(Month[1]) == 'a')
 815          return 0;
 816       return tolower_ascii(Month[2]) == 'n'?5:6;
 817       case 'm':
 818       return tolower_ascii(Month[2]) == 'r'?2:4;
 819       case 'n':
 820       return 10;
 821       case 'o':
 822       return 9;
 823       case 's':
 824       return 8;
 825
 826       // Pretend it is January..
 827       default:
 828       return 0;
 829    }
 830 }
 831                                                                         /*}}}*/
 832 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 833 // ---------------------------------------------------------------------
 834 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 835    than local timezone (mktime assumes the latter).
 836
 837    This function is a nonstandard GNU extension that is also present on
 838    the BSDs and maybe other systems. For others we follow the advice of
 839    the manpage of timegm and use his portable replacement. */
 840 #ifndef HAVE_TIMEGM
 841 static time_t timegm(struct tm *t)
 842 {
 843    char *tz = getenv("TZ");
 844    setenv("TZ", "", 1);
 845    tzset();
 846    time_t ret = mktime(t);
 847    if (tz)
 848       setenv("TZ", tz, 1);
 849    else
 850       unsetenv("TZ");
 851    tzset();
 852    return ret;
 853 }
 854 #endif
 855                                                                         /*}}}*/
 856 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 857 // ---------------------------------------------------------------------
 858 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 859    with one exception: All timezones (%Z) are accepted but the protocol
 860    says that it MUST be GMT, but this one is equal to UTC which we will
 861    encounter from time to time (e.g. in Release files) so we accept all
 862    here and just assume it is GMT (or UTC) later on */
 863 bool RFC1123StrToTime(const char* const str,time_t &time)
 864 {
 865    struct tm Tm;
 866    setlocale (LC_ALL,"C");
 867    bool const invalid =
 868    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 869       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 870    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 871        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 872    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 873        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 874    setlocale (LC_ALL,"");
 875    if (invalid == true)
 876       return false;
 877
 878    time = timegm(&Tm);
 879    return true;
 880 }
 881                                                                         /*}}}*/
 882 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 883 // ---------------------------------------------------------------------
 884 /* */
 885 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 886 {
 887    struct tm Tm;
 888    // MDTM includes no whitespaces but recommend and ignored by strptime
 889    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 890       return false;
 891
 892    time = timegm(&Tm);
 893    return true;
 894 }
 895                                                                         /*}}}*/
 896 // StrToTime - Converts a string into a time_t                          /*{{{*/
 897 // ---------------------------------------------------------------------
 898 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 899    and the C library asctime format. It requires the GNU library function
 900    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 901    reason the C library does not provide any such function :< This also
 902    handles the weird, but unambiguous FTP time format*/
 903 bool StrToTime(const string &Val,time_t &Result)
 904 {
 905    struct tm Tm;
 906    char Month[10];
 907
 908    // Skip the day of the week
 909    const char *I = strchr(Val.c_str(), ' ');
 910
 911    // Handle RFC 1123 time
 912    Month[0] = 0;
 913    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 914               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 915    {
 916       // Handle RFC 1036 time
 917       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 918                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 919          Tm.tm_year += 1900;
 920       else
 921       {
 922          // asctime format
 923          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 924                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 925          {
 926             // 'ftp' time
 927             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 928                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 929                return false;
 930             Tm.tm_mon--;
 931          }
 932       }
 933    }
 934
 935    Tm.tm_isdst = 0;
 936    if (Month[0] != 0)
 937       Tm.tm_mon = MonthConv(Month);
 938    Tm.tm_year -= 1900;
 939
 940    // Convert to local time and then to GMT
 941    Result = timegm(&Tm);
 942    return true;
 943 }
 944                                                                         /*}}}*/
 945 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 946 // ---------------------------------------------------------------------
 947 /* This is used in decoding the crazy fixed length string headers in
 948    tar and ar files. */
 949 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 950 {
 951    char S[30];
 952    if (Len >= sizeof(S))
 953       return false;
 954    memcpy(S,Str,Len);
 955    S[Len] = 0;
 956
 957    // All spaces is a zero
 958    Res = 0;
 959    unsigned I;
 960    for (I = 0; S[I] == ' '; I++);
 961    if (S[I] == 0)
 962       return true;
 963
 964    char *End;
 965    Res = strtoul(S,&End,Base);
 966    if (End == S)
 967       return false;
 968
 969    return true;
 970 }
 971                                                                         /*}}}*/
 972 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 973 // ---------------------------------------------------------------------
 974 /* This is used in decoding the crazy fixed length string headers in
 975    tar and ar files. */
 976 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
 977 {
 978    char S[30];
 979    if (Len >= sizeof(S))
 980       return false;
 981    memcpy(S,Str,Len);
 982    S[Len] = 0;
 983
 984    // All spaces is a zero
 985    Res = 0;
 986    unsigned I;
 987    for (I = 0; S[I] == ' '; I++);
 988    if (S[I] == 0)
 989       return true;
 990
 991    char *End;
 992    Res = strtoull(S,&End,Base);
 993    if (End == S)
 994       return false;
 995
 996    return true;
 997 }
 998                                                                         /*}}}*/
 999
1000 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1001 // ---------------------------------------------------------------------
1002 /* This is used in decoding the 256bit encoded fixed length fields in
1003    tar files */
1004 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1005 {
1006    if ((Str[0] & 0x80) == 0)
1007       return false;
1008    else
1009    {
1010       Res = Str[0] & 0x7F;
1011       for(unsigned int i = 1; i < Len; ++i)
1012          Res = (Res<<8) + Str[i];
1013       return true;
1014    }
1015 }
1016                                                                         /*}}}*/
1017 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1018 // ---------------------------------------------------------------------
1019 /* Helper for Hex2Num */
1020 static int HexDigit(int c)
1021 {
1022    if (c >= '0' && c <= '9')
1023       return c - '0';
1024    if (c >= 'a' && c <= 'f')
1025       return c - 'a' + 10;
1026    if (c >= 'A' && c <= 'F')
1027       return c - 'A' + 10;
1028    return 0;
1029 }
1030                                                                         /*}}}*/
1031 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1032 // ---------------------------------------------------------------------
1033 /* The length of the buffer must be exactly 1/2 the length of the string. */
1034 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1035 {
1036    if (Str.length() != Length*2)
1037       return false;
1038
1039    // Convert each digit. We store it in the same order as the string
1040    int J = 0;
1041    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1042    {
1043       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1044          return false;
1045
1046       Num[J] = HexDigit(I[0]) << 4;
1047       Num[J] += HexDigit(I[1]);
1048    }
1049
1050    return true;
1051 }
1052                                                                         /*}}}*/
1053 // TokSplitString - Split a string up by a given token                  /*{{{*/
1054 // ---------------------------------------------------------------------
1055 /* This is intended to be a faster splitter, it does not use dynamic
1056    memories. Input is changed to insert nulls at each token location. */
1057 bool TokSplitString(char Tok,char *Input,char **List,
1058                     unsigned long ListMax)
1059 {
1060    // Strip any leading spaces
1061    char *Start = Input;
1062    char *Stop = Start + strlen(Start);
1063    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1064
1065    unsigned long Count = 0;
1066    char *Pos = Start;
1067    while (Pos != Stop)
1068    {
1069       // Skip to the next Token
1070       for (; Pos != Stop && *Pos != Tok; Pos++);
1071
1072       // Back remove spaces
1073       char *End = Pos;
1074       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1075       *End = 0;
1076
1077       List[Count++] = Start;
1078       if (Count >= ListMax)
1079       {
1080          List[Count-1] = 0;
1081          return false;
1082       }
1083
1084       // Advance pos
1085       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1086       Start = Pos;
1087    }
1088
1089    List[Count] = 0;
1090    return true;
1091 }
1092                                                                         /*}}}*/
1093 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1094 // ---------------------------------------------------------------------
1095 /* This can be used to split a given string up into a vector, so the
1096    propose is the same as in the method above and this one is a bit slower
1097    also, but the advantage is that we have an iteratable vector */
1098 vector<string> VectorizeString(string const &haystack, char const &split)
1099 {
1100    string::const_iterator start = haystack.begin();
1101    string::const_iterator end = start;
1102    vector<string> exploded;
1103    do {
1104       for (; end != haystack.end() && *end != split; ++end);
1105       exploded.push_back(string(start, end));
1106       start = end + 1;
1107    } while (end != haystack.end() && (++end) != haystack.end());
1108    return exploded;
1109 }
1110                                                                         /*}}}*/
1111 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1112 // ---------------------------------------------------------------------
1113 /* */
1114 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1115                       const char **ListEnd)
1116 {
1117    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1118       R->Hit = false;
1119
1120    unsigned long Hits = 0;
1121    for (; ListBegin != ListEnd; ListBegin++)
1122    {
1123       // Check if the name is a regex
1124       const char *I;
1125       bool Regex = true;
1126       for (I = *ListBegin; *I != 0; I++)
1127          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1128             break;
1129       if (*I == 0)
1130          Regex = false;
1131
1132       // Compile the regex pattern
1133       regex_t Pattern;
1134       if (Regex == true)
1135          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1136                      REG_NOSUB) != 0)
1137             Regex = false;
1138
1139       // Search the list
1140       bool Done = false;
1141       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1142       {
1143          if (R->Str[0] == 0)
1144             continue;
1145
1146          if (strcasecmp(R->Str,*ListBegin) != 0)
1147          {
1148             if (Regex == false)
1149                continue;
1150             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1151                continue;
1152          }
1153          Done = true;
1154
1155          if (R->Hit == false)
1156             Hits++;
1157
1158          R->Hit = true;
1159       }
1160
1161       if (Regex == true)
1162          regfree(&Pattern);
1163
1164       if (Done == false)
1165          _error->Warning(_("Selection %s not found"),*ListBegin);
1166    }
1167
1168    return Hits;
1169 }
1170                                                                         /*}}}*/
1171 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1172 // ---------------------------------------------------------------------
1173 /* This is used to make the internationalization strings easier to translate
1174    and to allow reordering of parameters */
1175 void ioprintf(ostream &out,const char *format,...)
1176 {
1177    va_list args;
1178    va_start(args,format);
1179
1180    // sprintf the description
1181    char S[4096];
1182    vsnprintf(S,sizeof(S),format,args);
1183    out << S;
1184 }
1185                                                                         /*}}}*/
1186 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1187 // ---------------------------------------------------------------------
1188 /* This is used to make the internationalization strings easier to translate
1189    and to allow reordering of parameters */
1190 void strprintf(string &out,const char *format,...)
1191 {
1192    va_list args;
1193    va_start(args,format);
1194
1195    // sprintf the description
1196    char S[4096];
1197    vsnprintf(S,sizeof(S),format,args);
1198    out = string(S);
1199 }
1200                                                                         /*}}}*/
1201 // safe_snprintf - Safer snprintf                                       /*{{{*/
1202 // ---------------------------------------------------------------------
1203 /* This is a snprintf that will never (ever) go past 'End' and returns a
1204    pointer to the end of the new string. The returned string is always null
1205    terminated unless Buffer == end. This is a better alterantive to using
1206    consecutive snprintfs. */
1207 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1208 {
1209    va_list args;
1210    int Did;
1211
1212    va_start(args,Format);
1213
1214    if (End <= Buffer)
1215       return End;
1216
1217    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1218    if (Did < 0 || Buffer + Did > End)
1219       return End;
1220    return Buffer + Did;
1221 }
1222                                                                         /*}}}*/
1223 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1224 // ---------------------------------------------------------------------
1225 string StripEpoch(const string &VerStr)
1226 {
1227    size_t i = VerStr.find(":");
1228    if (i == string::npos)
1229       return VerStr;
1230    return VerStr.substr(i+1);
1231 }
1232
1233 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1234 // ---------------------------------------------------------------------
1235 /* This little function is the most called method we have and tries
1236    therefore to do the absolut minimum - and is noteable faster than
1237    standard tolower/toupper and as a bonus avoids problems with different
1238    locales - we only operate on ascii chars anyway. */
1239 int tolower_ascii(int const c)
1240 {
1241    if (c >= 'A' && c <= 'Z')
1242       return c + 32;
1243    return c;
1244 }
1245                                                                         /*}}}*/
1246
1247 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1248 // ---------------------------------------------------------------------
1249 /* The domain list is a comma seperate list of domains that are suffix
1250    matched against the argument */
1251 bool CheckDomainList(const string &Host,const string &List)
1252 {
1253    string::const_iterator Start = List.begin();
1254    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1255    {
1256       if (Cur < List.end() && *Cur != ',')
1257          continue;
1258
1259       // Match the end of the string..
1260       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1261           Cur - Start != 0 &&
1262           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1263          return true;
1264
1265       Start = Cur + 1;
1266    }
1267    return false;
1268 }
1269                                                                         /*}}}*/
1270 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1271 // ---------------------------------------------------------------------
1272 /* */
1273 string DeEscapeString(const string &input)
1274 {
1275    char tmp[3];
1276    string::const_iterator it, escape_start;
1277    string output, octal, hex;
1278    for (it = input.begin(); it != input.end(); ++it)
1279    {
1280       // just copy non-escape chars
1281       if (*it != '\\')
1282       {
1283          output += *it;
1284          continue;
1285       }
1286
1287       // deal with double escape
1288       if (*it == '\\' &&
1289           (it + 1 < input.end()) &&  it[1] == '\\')
1290       {
1291          // copy
1292          output += *it;
1293          // advance iterator one step further
1294          ++it;
1295          continue;
1296       }
1297
1298       // ensure we have a char to read
1299       if (it + 1 == input.end())
1300          continue;
1301
1302       // read it
1303       ++it;
1304       switch (*it)
1305       {
1306          case '0':
1307             if (it + 2 <= input.end()) {
1308                tmp[0] = it[1];
1309                tmp[1] = it[2];
1310                tmp[2] = 0;
1311                output += (char)strtol(tmp, 0, 8);
1312                it += 2;
1313             }
1314             break;
1315          case 'x':
1316             if (it + 2 <= input.end()) {
1317                tmp[0] = it[1];
1318                tmp[1] = it[2];
1319                tmp[2] = 0;
1320                output += (char)strtol(tmp, 0, 16);
1321                it += 2;
1322             }
1323             break;
1324          default:
1325             // FIXME: raise exception here?
1326             break;
1327       }
1328    }
1329    return output;
1330 }
1331                                                                         /*}}}*/
1332 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1333 // ---------------------------------------------------------------------
1334 /* This parses the URI into all of its components */
1335 void URI::CopyFrom(const string &U)
1336 {
1337    string::const_iterator I = U.begin();
1338
1339    // Locate the first colon, this separates the scheme
1340    for (; I < U.end() && *I != ':' ; ++I);
1341    string::const_iterator FirstColon = I;
1342
1343    /* Determine if this is a host type URI with a leading double //
1344       and then search for the first single / */
1345    string::const_iterator SingleSlash = I;
1346    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1347       SingleSlash += 3;
1348
1349    /* Find the / indicating the end of the hostname, ignoring /'s in the
1350       square brackets */
1351    bool InBracket = false;
1352    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1353    {
1354       if (*SingleSlash == '[')
1355          InBracket = true;
1356       if (InBracket == true && *SingleSlash == ']')
1357          InBracket = false;
1358    }
1359
1360    if (SingleSlash > U.end())
1361       SingleSlash = U.end();
1362
1363    // We can now write the access and path specifiers
1364    Access.assign(U.begin(),FirstColon);
1365    if (SingleSlash != U.end())
1366       Path.assign(SingleSlash,U.end());
1367    if (Path.empty() == true)
1368       Path = "/";
1369
1370    // Now we attempt to locate a user:pass@host fragment
1371    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1372       FirstColon += 3;
1373    else
1374       FirstColon += 1;
1375    if (FirstColon >= U.end())
1376       return;
1377
1378    if (FirstColon > SingleSlash)
1379       FirstColon = SingleSlash;
1380
1381    // Find the colon...
1382    I = FirstColon + 1;
1383    if (I > SingleSlash)
1384       I = SingleSlash;
1385    for (; I < SingleSlash && *I != ':'; ++I);
1386    string::const_iterator SecondColon = I;
1387
1388    // Search for the @ after the colon
1389    for (; I < SingleSlash && *I != '@'; ++I);
1390    string::const_iterator At = I;
1391
1392    // Now write the host and user/pass
1393    if (At == SingleSlash)
1394    {
1395       if (FirstColon < SingleSlash)
1396          Host.assign(FirstColon,SingleSlash);
1397    }
1398    else
1399    {
1400       Host.assign(At+1,SingleSlash);
1401       // username and password must be encoded (RFC 3986)
1402       User.assign(DeQuoteString(FirstColon,SecondColon));
1403       if (SecondColon < At)
1404          Password.assign(DeQuoteString(SecondColon+1,At));
1405    }
1406
1407    // Now we parse the RFC 2732 [] hostnames.
1408    unsigned long PortEnd = 0;
1409    InBracket = false;
1410    for (unsigned I = 0; I != Host.length();)
1411    {
1412       if (Host[I] == '[')
1413       {
1414          InBracket = true;
1415          Host.erase(I,1);
1416          continue;
1417       }
1418
1419       if (InBracket == true && Host[I] == ']')
1420       {
1421          InBracket = false;
1422          Host.erase(I,1);
1423          PortEnd = I;
1424          continue;
1425       }
1426       I++;
1427    }
1428
1429    // Tsk, weird.
1430    if (InBracket == true)
1431    {
1432       Host.clear();
1433       return;
1434    }
1435
1436    // Now we parse off a port number from the hostname
1437    Port = 0;
1438    string::size_type Pos = Host.rfind(':');
1439    if (Pos == string::npos || Pos < PortEnd)
1440       return;
1441
1442    Port = atoi(string(Host,Pos+1).c_str());
1443    Host.assign(Host,0,Pos);
1444 }
1445                                                                         /*}}}*/
1446 // URI::operator string - Convert the URI to a string                   /*{{{*/
1447 // ---------------------------------------------------------------------
1448 /* */
1449 URI::operator string()
1450 {
1451    string Res;
1452
1453    if (Access.empty() == false)
1454       Res = Access + ':';
1455
1456    if (Host.empty() == false)
1457    {
1458       if (Access.empty() == false)
1459          Res += "//";
1460
1461       if (User.empty() == false)
1462       {
1463          Res +=  User;
1464          if (Password.empty() == false)
1465             Res += ":" + Password;
1466          Res += "@";
1467       }
1468
1469       // Add RFC 2732 escaping characters
1470       if (Access.empty() == false &&
1471           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1472          Res += '[' + Host + ']';
1473       else
1474          Res += Host;
1475
1476       if (Port != 0)
1477       {
1478          char S[30];
1479          sprintf(S,":%u",Port);
1480          Res += S;
1481       }
1482    }
1483
1484    if (Path.empty() == false)
1485    {
1486       if (Path[0] != '/')
1487          Res += "/" + Path;
1488       else
1489          Res += Path;
1490    }
1491
1492    return Res;
1493 }
1494                                                                         /*}}}*/
1495 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1496 // ---------------------------------------------------------------------
1497 /* */
1498 string URI::SiteOnly(const string &URI)
1499 {
1500    ::URI U(URI);
1501    U.User.clear();
1502    U.Password.clear();
1503    U.Path.clear();
1504    U.Port = 0;
1505    return U;
1506 }
1507                                                                         /*}}}*/
1508 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1509 // ---------------------------------------------------------------------
1510 /* */
1511 string URI::NoUserPassword(const string &URI)
1512 {
1513    ::URI U(URI);
1514    U.User.clear();
1515    U.Password.clear();
1516    U.Port = 0;
1517    return U;
1518 }
1519                                                                         /*}}}*/