apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <sstream>
  27 #include <stdio.h>
  28 #include <algorithm>
  29 #include <unistd.h>
  30 #include <regex.h>
  31 #include <errno.h>
  32 #include <stdarg.h>
  33 #include <iconv.h>
  34
  35 #include <apti18n.h>
  36
  37 using namespace std;
  38                                                                         /*}}}*/
  39
  40 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  41 // ---------------------------------------------------------------------
  42 /* This is handy to use before display some information for enduser  */
  43 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  44 {
  45   iconv_t cd;
  46   const char *inbuf;
  47   char *inptr, *outbuf;
  48   size_t insize, bufsize;
  49   dest->clear();
  50
  51   cd = iconv_open(codeset, "UTF-8");
  52   if (cd == (iconv_t)(-1)) {
  53      // Something went wrong
  54      if (errno == EINVAL)
  55         _error->Error("conversion from 'UTF-8' to '%s' not available",
  56                codeset);
  57      else
  58         perror("iconv_open");
  59
  60      return false;
  61   }
  62
  63   insize = bufsize = orig.size();
  64   inbuf = orig.data();
  65   inptr = (char *)inbuf;
  66   outbuf = new char[bufsize];
  67   size_t lastError = -1;
  68
  69   while (insize != 0)
  70   {
  71      char *outptr = outbuf;
  72      size_t outsize = bufsize;
  73      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  74      dest->append(outbuf, outptr - outbuf);
  75      if (err == (size_t)(-1))
  76      {
  77         switch (errno)
  78         {
  79         case EILSEQ:
  80            insize--;
  81            inptr++;
  82            // replace a series of unknown multibytes with a single "?"
  83            if (lastError != insize) {
  84               lastError = insize - 1;
  85               dest->append("?");
  86            }
  87            break;
  88         case EINVAL:
  89            insize = 0;
  90            break;
  91         case E2BIG:
  92            if (outptr == outbuf)
  93            {
  94               bufsize *= 2;
  95               delete[] outbuf;
  96               outbuf = new char[bufsize];
  97            }
  98            break;
  99         }
 100      }
 101   }
 102
 103   delete[] outbuf;
 104
 105   iconv_close(cd);
 106
 107   return true;
 108 }
 109                                                                         /*}}}*/
 110 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 111 // ---------------------------------------------------------------------
 112 /* This is handy to use when parsing a file. It also removes \n's left
 113    over from fgets and company */
 114 char *_strstrip(char *String)
 115 {
 116    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 117
 118    if (*String == 0)
 119       return String;
 120
 121    char *End = String + strlen(String) - 1;
 122    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 123                                *End == '\r'); End--);
 124    End++;
 125    *End = 0;
 126    return String;
 127 };
 128                                                                         /*}}}*/
 129 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 130 // ---------------------------------------------------------------------
 131 /* */
 132 char *_strtabexpand(char *String,size_t Len)
 133 {
 134    for (char *I = String; I != I + Len && *I != 0; I++)
 135    {
 136       if (*I != '\t')
 137          continue;
 138       if (I + 8 > String + Len)
 139       {
 140          *I = 0;
 141          return String;
 142       }
 143
 144       /* Assume the start of the string is 0 and find the next 8 char
 145          division */
 146       int Len;
 147       if (String == I)
 148          Len = 1;
 149       else
 150          Len = 8 - ((String - I) % 8);
 151       Len -= 2;
 152       if (Len <= 0)
 153       {
 154          *I = ' ';
 155          continue;
 156       }
 157
 158       memmove(I + Len,I + 1,strlen(I) + 1);
 159       for (char *J = I; J + Len != I; *I = ' ', I++);
 160    }
 161    return String;
 162 }
 163                                                                         /*}}}*/
 164 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 165 // ---------------------------------------------------------------------
 166 /* This grabs a single word, converts any % escaped characters to their
 167    proper values and advances the pointer. Double quotes are understood
 168    and striped out as well. This is for URI/URL parsing. It also can
 169    understand [] brackets.*/
 170 bool ParseQuoteWord(const char *&String,string &Res)
 171 {
 172    // Skip leading whitespace
 173    const char *C = String;
 174    for (;*C != 0 && *C == ' '; C++);
 175    if (*C == 0)
 176       return false;
 177
 178    // Jump to the next word
 179    for (;*C != 0 && isspace(*C) == 0; C++)
 180    {
 181       if (*C == '"')
 182       {
 183          C = strchr(C + 1, '"');
 184          if (C == NULL)
 185             return false;
 186       }
 187       if (*C == '[')
 188       {
 189          C = strchr(C + 1, ']');
 190          if (C == NULL)
 191             return false;
 192       }
 193    }
 194
 195    // Now de-quote characters
 196    char Buffer[1024];
 197    char Tmp[3];
 198    const char *Start = String;
 199    char *I;
 200    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 201    {
 202       if (*Start == '%' && Start + 2 < C &&
 203           isxdigit(Start[1]) && isxdigit(Start[2]))
 204       {
 205          Tmp[0] = Start[1];
 206          Tmp[1] = Start[2];
 207          Tmp[2] = 0;
 208          *I = (char)strtol(Tmp,0,16);
 209          Start += 3;
 210          continue;
 211       }
 212       if (*Start != '"')
 213          *I = *Start;
 214       else
 215          I--;
 216       Start++;
 217    }
 218    *I = 0;
 219    Res = Buffer;
 220
 221    // Skip ending white space
 222    for (;*C != 0 && isspace(*C) != 0; C++);
 223    String = C;
 224    return true;
 225 }
 226                                                                         /*}}}*/
 227 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 228 // ---------------------------------------------------------------------
 229 /* This expects a series of space separated strings enclosed in ""'s.
 230    It concatenates the ""'s into a single string. */
 231 bool ParseCWord(const char *&String,string &Res)
 232 {
 233    // Skip leading whitespace
 234    const char *C = String;
 235    for (;*C != 0 && *C == ' '; C++);
 236    if (*C == 0)
 237       return false;
 238
 239    char Buffer[1024];
 240    char *Buf = Buffer;
 241    if (strlen(String) >= sizeof(Buffer))
 242        return false;
 243
 244    for (; *C != 0; C++)
 245    {
 246       if (*C == '"')
 247       {
 248          for (C++; *C != 0 && *C != '"'; C++)
 249             *Buf++ = *C;
 250
 251          if (*C == 0)
 252             return false;
 253
 254          continue;
 255       }
 256
 257       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 258          continue;
 259       if (isspace(*C) == 0)
 260          return false;
 261       *Buf++ = ' ';
 262    }
 263    *Buf = 0;
 264    Res = Buffer;
 265    String = C;
 266    return true;
 267 }
 268                                                                         /*}}}*/
 269 // QuoteString - Convert a string into quoted from                      /*{{{*/
 270 // ---------------------------------------------------------------------
 271 /* */
 272 string QuoteString(const string &Str, const char *Bad)
 273 {
 274    string Res;
 275    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 276    {
 277       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 278           *I == 0x25 || // percent '%' char
 279           *I <= 0x20 || *I >= 0x7F) // control chars
 280       {
 281          char Buf[10];
 282          sprintf(Buf,"%%%02x",(int)*I);
 283          Res += Buf;
 284       }
 285       else
 286          Res += *I;
 287    }
 288    return Res;
 289 }
 290                                                                         /*}}}*/
 291 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 292 // ---------------------------------------------------------------------
 293 /* This undoes QuoteString */
 294 string DeQuoteString(const string &Str)
 295 {
 296    return DeQuoteString(Str.begin(),Str.end());
 297 }
 298 string DeQuoteString(string::const_iterator const &begin,
 299                         string::const_iterator const &end)
 300 {
 301    string Res;
 302    for (string::const_iterator I = begin; I != end; ++I)
 303    {
 304       if (*I == '%' && I + 2 < end &&
 305           isxdigit(I[1]) && isxdigit(I[2]))
 306       {
 307          char Tmp[3];
 308          Tmp[0] = I[1];
 309          Tmp[1] = I[2];
 310          Tmp[2] = 0;
 311          Res += (char)strtol(Tmp,0,16);
 312          I += 2;
 313          continue;
 314       }
 315       else
 316          Res += *I;
 317    }
 318    return Res;
 319 }
 320
 321                                                                         /*}}}*/
 322 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 323 // ---------------------------------------------------------------------
 324 /* A max of 4 digits are shown before conversion to the next highest unit.
 325    The max length of the string will be 5 chars unless the size is > 10
 326    YottaBytes (E24) */
 327 string SizeToStr(double Size)
 328 {
 329    char S[300];
 330    double ASize;
 331    if (Size >= 0)
 332       ASize = Size;
 333    else
 334       ASize = -1*Size;
 335
 336    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 337       ExaBytes, ZettaBytes, YottaBytes */
 338    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 339    int I = 0;
 340    while (I <= 8)
 341    {
 342       if (ASize < 100 && I != 0)
 343       {
 344          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 345          break;
 346       }
 347
 348       if (ASize < 10000)
 349       {
 350          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 351          break;
 352       }
 353       ASize /= 1000.0;
 354       I++;
 355    }
 356
 357    return S;
 358 }
 359                                                                         /*}}}*/
 360 // TimeToStr - Convert the time into a string                           /*{{{*/
 361 // ---------------------------------------------------------------------
 362 /* Converts a number of seconds to a hms format */
 363 string TimeToStr(unsigned long Sec)
 364 {
 365    char S[300];
 366
 367    while (1)
 368    {
 369       if (Sec > 60*60*24)
 370       {
 371          //d means days, h means hours, min means minutes, s means seconds
 372          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 373          break;
 374       }
 375
 376       if (Sec > 60*60)
 377       {
 378          //h means hours, min means minutes, s means seconds
 379          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 380          break;
 381       }
 382
 383       if (Sec > 60)
 384       {
 385          //min means minutes, s means seconds
 386          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 387          break;
 388       }
 389
 390       //s means seconds
 391       sprintf(S,_("%lis"),Sec);
 392       break;
 393    }
 394
 395    return S;
 396 }
 397                                                                         /*}}}*/
 398 // SubstVar - Substitute a string for another string                    /*{{{*/
 399 // ---------------------------------------------------------------------
 400 /* This replaces all occurances of Subst with Contents in Str. */
 401 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 402 {
 403    string::size_type Pos = 0;
 404    string::size_type OldPos = 0;
 405    string Temp;
 406
 407    while (OldPos < Str.length() &&
 408           (Pos = Str.find(Subst,OldPos)) != string::npos)
 409    {
 410       Temp += string(Str,OldPos,Pos) + Contents;
 411       OldPos = Pos + Subst.length();
 412    }
 413
 414    if (OldPos == 0)
 415       return Str;
 416
 417    return Temp + string(Str,OldPos);
 418 }
 419
 420 string SubstVar(string Str,const struct SubstVar *Vars)
 421 {
 422    for (; Vars->Subst != 0; Vars++)
 423       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 424    return Str;
 425 }
 426                                                                         /*}}}*/
 427 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 428 // ---------------------------------------------------------------------
 429 /* Returns a string with the supplied separator depth + 1 times in it */
 430 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 431 {
 432    std::string output = "";
 433    for(unsigned long d=Depth+1; d > 0; d--)
 434       output.append(Separator);
 435    return output;
 436 }
 437                                                                         /*}}}*/
 438 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 439 // ---------------------------------------------------------------------
 440 /* This converts a URI into a safe filename. It quotes all unsafe characters
 441    and converts / to _ and removes the scheme identifier. The resulting
 442    file name should be unique and never occur again for a different file */
 443 string URItoFileName(const string &URI)
 444 {
 445    // Nuke 'sensitive' items
 446    ::URI U(URI);
 447    U.User.clear();
 448    U.Password.clear();
 449    U.Access.clear();
 450
 451    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 452    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 453    replace(NewURI.begin(),NewURI.end(),'/','_');
 454    return NewURI;
 455 }
 456                                                                         /*}}}*/
 457 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 458 // ---------------------------------------------------------------------
 459 /* This routine performs a base64 transformation on a string. It was ripped
 460    from wget and then patched and bug fixed.
 461
 462    This spec can be found in rfc2045 */
 463 string Base64Encode(const string &S)
 464 {
 465    // Conversion table.
 466    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 467                           'I','J','K','L','M','N','O','P',
 468                           'Q','R','S','T','U','V','W','X',
 469                           'Y','Z','a','b','c','d','e','f',
 470                           'g','h','i','j','k','l','m','n',
 471                           'o','p','q','r','s','t','u','v',
 472                           'w','x','y','z','0','1','2','3',
 473                           '4','5','6','7','8','9','+','/'};
 474
 475    // Pre-allocate some space
 476    string Final;
 477    Final.reserve((4*S.length() + 2)/3 + 2);
 478
 479    /* Transform the 3x8 bits to 4x6 bits, as required by
 480       base64.  */
 481    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 482    {
 483       char Bits[3] = {0,0,0};
 484       Bits[0] = I[0];
 485       if (I + 1 < S.end())
 486          Bits[1] = I[1];
 487       if (I + 2 < S.end())
 488          Bits[2] = I[2];
 489
 490       Final += tbl[Bits[0] >> 2];
 491       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 492
 493       if (I + 1 >= S.end())
 494          break;
 495
 496       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 497
 498       if (I + 2 >= S.end())
 499          break;
 500
 501       Final += tbl[Bits[2] & 0x3f];
 502    }
 503
 504    /* Apply the padding elements, this tells how many bytes the remote
 505       end should discard */
 506    if (S.length() % 3 == 2)
 507       Final += '=';
 508    if (S.length() % 3 == 1)
 509       Final += "==";
 510
 511    return Final;
 512 }
 513                                                                         /*}}}*/
 514 // stringcmp - Arbitrary string compare                                 /*{{{*/
 515 // ---------------------------------------------------------------------
 516 /* This safely compares two non-null terminated strings of arbitrary
 517    length */
 518 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 519 {
 520    for (; A != AEnd && B != BEnd; A++, B++)
 521       if (*A != *B)
 522          break;
 523
 524    if (A == AEnd && B == BEnd)
 525       return 0;
 526    if (A == AEnd)
 527       return 1;
 528    if (B == BEnd)
 529       return -1;
 530    if (*A < *B)
 531       return -1;
 532    return 1;
 533 }
 534
 535 #if __GNUC__ >= 3
 536 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 537               const char *B,const char *BEnd)
 538 {
 539    for (; A != AEnd && B != BEnd; A++, B++)
 540       if (*A != *B)
 541          break;
 542
 543    if (A == AEnd && B == BEnd)
 544       return 0;
 545    if (A == AEnd)
 546       return 1;
 547    if (B == BEnd)
 548       return -1;
 549    if (*A < *B)
 550       return -1;
 551    return 1;
 552 }
 553 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 554               string::const_iterator B,string::const_iterator BEnd)
 555 {
 556    for (; A != AEnd && B != BEnd; A++, B++)
 557       if (*A != *B)
 558          break;
 559
 560    if (A == AEnd && B == BEnd)
 561       return 0;
 562    if (A == AEnd)
 563       return 1;
 564    if (B == BEnd)
 565       return -1;
 566    if (*A < *B)
 567       return -1;
 568    return 1;
 569 }
 570 #endif
 571                                                                         /*}}}*/
 572 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 573 // ---------------------------------------------------------------------
 574 /* */
 575 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 576 {
 577    for (; A != AEnd && B != BEnd; A++, B++)
 578       if (tolower_ascii(*A) != tolower_ascii(*B))
 579          break;
 580
 581    if (A == AEnd && B == BEnd)
 582       return 0;
 583    if (A == AEnd)
 584       return 1;
 585    if (B == BEnd)
 586       return -1;
 587    if (tolower_ascii(*A) < tolower_ascii(*B))
 588       return -1;
 589    return 1;
 590 }
 591 #if __GNUC__ >= 3
 592 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 593                   const char *B,const char *BEnd)
 594 {
 595    for (; A != AEnd && B != BEnd; A++, B++)
 596       if (tolower_ascii(*A) != tolower_ascii(*B))
 597          break;
 598
 599    if (A == AEnd && B == BEnd)
 600       return 0;
 601    if (A == AEnd)
 602       return 1;
 603    if (B == BEnd)
 604       return -1;
 605    if (tolower_ascii(*A) < tolower_ascii(*B))
 606       return -1;
 607    return 1;
 608 }
 609 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 610                   string::const_iterator B,string::const_iterator BEnd)
 611 {
 612    for (; A != AEnd && B != BEnd; A++, B++)
 613       if (tolower_ascii(*A) != tolower_ascii(*B))
 614          break;
 615
 616    if (A == AEnd && B == BEnd)
 617       return 0;
 618    if (A == AEnd)
 619       return 1;
 620    if (B == BEnd)
 621       return -1;
 622    if (tolower_ascii(*A) < tolower_ascii(*B))
 623       return -1;
 624    return 1;
 625 }
 626 #endif
 627                                                                         /*}}}*/
 628 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 629 // ---------------------------------------------------------------------
 630 /* The format is like those used in package files and the method
 631    communication system */
 632 string LookupTag(const string &Message,const char *Tag,const char *Default)
 633 {
 634    // Look for a matching tag.
 635    int Length = strlen(Tag);
 636    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 637    {
 638       // Found the tag
 639       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 640       {
 641          // Find the end of line and strip the leading/trailing spaces
 642          string::const_iterator J;
 643          I += Length + 1;
 644          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 645          for (J = I; *J != '\n' && J < Message.end(); ++J);
 646          for (; J > I && isspace(J[-1]) != 0; --J);
 647
 648          return string(I,J);
 649       }
 650
 651       for (; *I != '\n' && I < Message.end(); ++I);
 652    }
 653
 654    // Failed to find a match
 655    if (Default == 0)
 656       return string();
 657    return Default;
 658 }
 659                                                                         /*}}}*/
 660 // StringToBool - Converts a string into a boolean                      /*{{{*/
 661 // ---------------------------------------------------------------------
 662 /* This inspects the string to see if it is true or if it is false and
 663    then returns the result. Several varients on true/false are checked. */
 664 int StringToBool(const string &Text,int Default)
 665 {
 666    char *End;
 667    int Res = strtol(Text.c_str(),&End,0);
 668    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 669       return Res;
 670
 671    // Check for positives
 672    if (strcasecmp(Text.c_str(),"no") == 0 ||
 673        strcasecmp(Text.c_str(),"false") == 0 ||
 674        strcasecmp(Text.c_str(),"without") == 0 ||
 675        strcasecmp(Text.c_str(),"off") == 0 ||
 676        strcasecmp(Text.c_str(),"disable") == 0)
 677       return 0;
 678
 679    // Check for negatives
 680    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 681        strcasecmp(Text.c_str(),"true") == 0 ||
 682        strcasecmp(Text.c_str(),"with") == 0 ||
 683        strcasecmp(Text.c_str(),"on") == 0 ||
 684        strcasecmp(Text.c_str(),"enable") == 0)
 685       return 1;
 686
 687    return Default;
 688 }
 689                                                                         /*}}}*/
 690 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 691 // ---------------------------------------------------------------------
 692 /* This converts a time_t into a string time representation that is
 693    year 2000 complient and timezone neutral */
 694 string TimeRFC1123(time_t Date)
 695 {
 696    struct tm Conv;
 697    if (gmtime_r(&Date, &Conv) == NULL)
 698       return "";
 699
 700    char Buf[300];
 701    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 702    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 703                           "Aug","Sep","Oct","Nov","Dec"};
 704
 705    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 706            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 707            Conv.tm_min,Conv.tm_sec);
 708    return Buf;
 709 }
 710                                                                         /*}}}*/
 711 // ReadMessages - Read messages from the FD                             /*{{{*/
 712 // ---------------------------------------------------------------------
 713 /* This pulls full messages from the input FD into the message buffer.
 714    It assumes that messages will not pause during transit so no
 715    fancy buffering is used.
 716
 717    In particular: this reads blocks from the input until it believes
 718    that it's run out of input text.  Each block is terminated by a
 719    double newline ('\n' followed by '\n').  As noted below, there is a
 720    bug in this code: it assumes that all the blocks have been read if
 721    it doesn't see additional text in the buffer after the last one is
 722    parsed, which will cause it to lose blocks if the last block
 723    coincides with the end of the buffer.
 724  */
 725 bool ReadMessages(int Fd, vector<string> &List)
 726 {
 727    char Buffer[64000];
 728    char *End = Buffer;
 729    // Represents any left-over from the previous iteration of the
 730    // parse loop.  (i.e., if a message is split across the end
 731    // of the buffer, it goes here)
 732    string PartialMessage;
 733
 734    while (1)
 735    {
 736       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 737       if (Res < 0 && errno == EINTR)
 738          continue;
 739
 740       // Process is dead, this is kind of bad..
 741       if (Res == 0)
 742          return false;
 743
 744       // No data
 745       if (Res < 0 && errno == EAGAIN)
 746          return true;
 747       if (Res < 0)
 748          return false;
 749
 750       End += Res;
 751
 752       // Look for the end of the message
 753       for (char *I = Buffer; I + 1 < End; I++)
 754       {
 755          if (I[1] != '\n' ||
 756              (strncmp(I, "\n\n", 2) != 0 && strncmp(I, "\r\n\r\n", 4) != 0))
 757             continue;
 758
 759          // Pull the message out
 760          string Message(Buffer,I-Buffer);
 761          PartialMessage += Message;
 762
 763          // Fix up the buffer
 764          for (; I < End && (*I == '\r' || *I == '\n'); ++I);
 765          End -= I-Buffer;
 766          memmove(Buffer,I,End-Buffer);
 767          I = Buffer;
 768
 769          List.push_back(PartialMessage);
 770          PartialMessage.clear();
 771       }
 772       if (End != Buffer)
 773         {
 774           // If there's text left in the buffer, store it
 775           // in PartialMessage and throw the rest of the buffer
 776           // away.  This allows us to handle messages that
 777           // are longer than the static buffer size.
 778           PartialMessage += string(Buffer, End);
 779           End = Buffer;
 780         }
 781       else
 782         {
 783           // BUG ALERT: if a message block happens to end at a
 784           // multiple of 64000 characters, this will cause it to
 785           // terminate early, leading to a badly formed block and
 786           // probably crashing the method.  However, this is the only
 787           // way we have to find the end of the message block.  I have
 788           // an idea of how to fix this, but it will require changes
 789           // to the protocol (essentially to mark the beginning and
 790           // end of the block).
 791           //
 792           //  -- dburrows 2008-04-02
 793           return true;
 794         }
 795
 796       if (WaitFd(Fd) == false)
 797          return false;
 798    }
 799 }
 800                                                                         /*}}}*/
 801 // MonthConv - Converts a month string into a number                    /*{{{*/
 802 // ---------------------------------------------------------------------
 803 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 804    Made it a bit more robust with a few tolower_ascii though. */
 805 static int MonthConv(char *Month)
 806 {
 807    switch (tolower_ascii(*Month))
 808    {
 809       case 'a':
 810       return tolower_ascii(Month[1]) == 'p'?3:7;
 811       case 'd':
 812       return 11;
 813       case 'f':
 814       return 1;
 815       case 'j':
 816       if (tolower_ascii(Month[1]) == 'a')
 817          return 0;
 818       return tolower_ascii(Month[2]) == 'n'?5:6;
 819       case 'm':
 820       return tolower_ascii(Month[2]) == 'r'?2:4;
 821       case 'n':
 822       return 10;
 823       case 'o':
 824       return 9;
 825       case 's':
 826       return 8;
 827
 828       // Pretend it is January..
 829       default:
 830       return 0;
 831    }
 832 }
 833                                                                         /*}}}*/
 834 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 835 // ---------------------------------------------------------------------
 836 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 837    than local timezone (mktime assumes the latter).
 838
 839    This function is a nonstandard GNU extension that is also present on
 840    the BSDs and maybe other systems. For others we follow the advice of
 841    the manpage of timegm and use his portable replacement. */
 842 #ifndef HAVE_TIMEGM
 843 static time_t timegm(struct tm *t)
 844 {
 845    char *tz = getenv("TZ");
 846    setenv("TZ", "", 1);
 847    tzset();
 848    time_t ret = mktime(t);
 849    if (tz)
 850       setenv("TZ", tz, 1);
 851    else
 852       unsetenv("TZ");
 853    tzset();
 854    return ret;
 855 }
 856 #endif
 857                                                                         /*}}}*/
 858 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 859 // ---------------------------------------------------------------------
 860 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 861    with one exception: All timezones (%Z) are accepted but the protocol
 862    says that it MUST be GMT, but this one is equal to UTC which we will
 863    encounter from time to time (e.g. in Release files) so we accept all
 864    here and just assume it is GMT (or UTC) later on */
 865 bool RFC1123StrToTime(const char* const str,time_t &time)
 866 {
 867    struct tm Tm;
 868    setlocale (LC_ALL,"C");
 869    bool const invalid =
 870    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 871       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 872    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 873        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 874    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 875        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 876    setlocale (LC_ALL,"");
 877    if (invalid == true)
 878       return false;
 879
 880    time = timegm(&Tm);
 881    return true;
 882 }
 883                                                                         /*}}}*/
 884 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 885 // ---------------------------------------------------------------------
 886 /* */
 887 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 888 {
 889    struct tm Tm;
 890    // MDTM includes no whitespaces but recommend and ignored by strptime
 891    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 892       return false;
 893
 894    time = timegm(&Tm);
 895    return true;
 896 }
 897                                                                         /*}}}*/
 898 // StrToTime - Converts a string into a time_t                          /*{{{*/
 899 // ---------------------------------------------------------------------
 900 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 901    and the C library asctime format. It requires the GNU library function
 902    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 903    reason the C library does not provide any such function :< This also
 904    handles the weird, but unambiguous FTP time format*/
 905 bool StrToTime(const string &Val,time_t &Result)
 906 {
 907    struct tm Tm;
 908    char Month[10];
 909
 910    // Skip the day of the week
 911    const char *I = strchr(Val.c_str(), ' ');
 912
 913    // Handle RFC 1123 time
 914    Month[0] = 0;
 915    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 916               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 917    {
 918       // Handle RFC 1036 time
 919       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 920                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 921          Tm.tm_year += 1900;
 922       else
 923       {
 924          // asctime format
 925          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 926                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 927          {
 928             // 'ftp' time
 929             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 930                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 931                return false;
 932             Tm.tm_mon--;
 933          }
 934       }
 935    }
 936
 937    Tm.tm_isdst = 0;
 938    if (Month[0] != 0)
 939       Tm.tm_mon = MonthConv(Month);
 940    Tm.tm_year -= 1900;
 941
 942    // Convert to local time and then to GMT
 943    Result = timegm(&Tm);
 944    return true;
 945 }
 946                                                                         /*}}}*/
 947 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 948 // ---------------------------------------------------------------------
 949 /* This is used in decoding the crazy fixed length string headers in
 950    tar and ar files. */
 951 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 952 {
 953    char S[30];
 954    if (Len >= sizeof(S))
 955       return false;
 956    memcpy(S,Str,Len);
 957    S[Len] = 0;
 958
 959    // All spaces is a zero
 960    Res = 0;
 961    unsigned I;
 962    for (I = 0; S[I] == ' '; I++);
 963    if (S[I] == 0)
 964       return true;
 965
 966    char *End;
 967    Res = strtoul(S,&End,Base);
 968    if (End == S)
 969       return false;
 970
 971    return true;
 972 }
 973                                                                         /*}}}*/
 974 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 975 // ---------------------------------------------------------------------
 976 /* This is used in decoding the crazy fixed length string headers in
 977    tar and ar files. */
 978 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
 979 {
 980    char S[30];
 981    if (Len >= sizeof(S))
 982       return false;
 983    memcpy(S,Str,Len);
 984    S[Len] = 0;
 985
 986    // All spaces is a zero
 987    Res = 0;
 988    unsigned I;
 989    for (I = 0; S[I] == ' '; I++);
 990    if (S[I] == 0)
 991       return true;
 992
 993    char *End;
 994    Res = strtoull(S,&End,Base);
 995    if (End == S)
 996       return false;
 997
 998    return true;
 999 }
1000                                                                         /*}}}*/
1001
1002 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1003 // ---------------------------------------------------------------------
1004 /* This is used in decoding the 256bit encoded fixed length fields in
1005    tar files */
1006 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1007 {
1008    if ((Str[0] & 0x80) == 0)
1009       return false;
1010    else
1011    {
1012       Res = Str[0] & 0x7F;
1013       for(unsigned int i = 1; i < Len; ++i)
1014          Res = (Res<<8) + Str[i];
1015       return true;
1016    }
1017 }
1018                                                                         /*}}}*/
1019 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1020 // ---------------------------------------------------------------------
1021 /* Helper for Hex2Num */
1022 static int HexDigit(int c)
1023 {
1024    if (c >= '0' && c <= '9')
1025       return c - '0';
1026    if (c >= 'a' && c <= 'f')
1027       return c - 'a' + 10;
1028    if (c >= 'A' && c <= 'F')
1029       return c - 'A' + 10;
1030    return 0;
1031 }
1032                                                                         /*}}}*/
1033 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1034 // ---------------------------------------------------------------------
1035 /* The length of the buffer must be exactly 1/2 the length of the string. */
1036 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1037 {
1038    if (Str.length() != Length*2)
1039       return false;
1040
1041    // Convert each digit. We store it in the same order as the string
1042    int J = 0;
1043    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1044    {
1045       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1046          return false;
1047
1048       Num[J] = HexDigit(I[0]) << 4;
1049       Num[J] += HexDigit(I[1]);
1050    }
1051
1052    return true;
1053 }
1054                                                                         /*}}}*/
1055 // TokSplitString - Split a string up by a given token                  /*{{{*/
1056 // ---------------------------------------------------------------------
1057 /* This is intended to be a faster splitter, it does not use dynamic
1058    memories. Input is changed to insert nulls at each token location. */
1059 bool TokSplitString(char Tok,char *Input,char **List,
1060                     unsigned long ListMax)
1061 {
1062    // Strip any leading spaces
1063    char *Start = Input;
1064    char *Stop = Start + strlen(Start);
1065    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1066
1067    unsigned long Count = 0;
1068    char *Pos = Start;
1069    while (Pos != Stop)
1070    {
1071       // Skip to the next Token
1072       for (; Pos != Stop && *Pos != Tok; Pos++);
1073
1074       // Back remove spaces
1075       char *End = Pos;
1076       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1077       *End = 0;
1078
1079       List[Count++] = Start;
1080       if (Count >= ListMax)
1081       {
1082          List[Count-1] = 0;
1083          return false;
1084       }
1085
1086       // Advance pos
1087       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1088       Start = Pos;
1089    }
1090
1091    List[Count] = 0;
1092    return true;
1093 }
1094                                                                         /*}}}*/
1095 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1096 // ---------------------------------------------------------------------
1097 /* This can be used to split a given string up into a vector, so the
1098    propose is the same as in the method above and this one is a bit slower
1099    also, but the advantage is that we have an iteratable vector */
1100 vector<string> VectorizeString(string const &haystack, char const &split)
1101 {
1102    string::const_iterator start = haystack.begin();
1103    string::const_iterator end = start;
1104    vector<string> exploded;
1105    do {
1106       for (; end != haystack.end() && *end != split; ++end);
1107       exploded.push_back(string(start, end));
1108       start = end + 1;
1109    } while (end != haystack.end() && (++end) != haystack.end());
1110    return exploded;
1111 }
1112                                                                         /*}}}*/
1113 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1114 // ---------------------------------------------------------------------
1115 /* */
1116 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1117                       const char **ListEnd)
1118 {
1119    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1120       R->Hit = false;
1121
1122    unsigned long Hits = 0;
1123    for (; ListBegin != ListEnd; ListBegin++)
1124    {
1125       // Check if the name is a regex
1126       const char *I;
1127       bool Regex = true;
1128       for (I = *ListBegin; *I != 0; I++)
1129          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1130             break;
1131       if (*I == 0)
1132          Regex = false;
1133
1134       // Compile the regex pattern
1135       regex_t Pattern;
1136       if (Regex == true)
1137          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1138                      REG_NOSUB) != 0)
1139             Regex = false;
1140
1141       // Search the list
1142       bool Done = false;
1143       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1144       {
1145          if (R->Str[0] == 0)
1146             continue;
1147
1148          if (strcasecmp(R->Str,*ListBegin) != 0)
1149          {
1150             if (Regex == false)
1151                continue;
1152             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1153                continue;
1154          }
1155          Done = true;
1156
1157          if (R->Hit == false)
1158             Hits++;
1159
1160          R->Hit = true;
1161       }
1162
1163       if (Regex == true)
1164          regfree(&Pattern);
1165
1166       if (Done == false)
1167          _error->Warning(_("Selection %s not found"),*ListBegin);
1168    }
1169
1170    return Hits;
1171 }
1172                                                                         /*}}}*/
1173 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1174 // ---------------------------------------------------------------------
1175 /* This is used to make the internationalization strings easier to translate
1176    and to allow reordering of parameters */
1177 static bool iovprintf(ostream &out, const char *format,
1178                       va_list &args, ssize_t &size) {
1179    char *S = (char*)malloc(size);
1180    ssize_t const n = vsnprintf(S, size, format, args);
1181    if (n > -1 && n < size) {
1182       out << S;
1183       free(S);
1184       return true;
1185    } else {
1186       if (n > -1)
1187          size = n + 1;
1188       else
1189          size *= 2;
1190    }
1191    free(S);
1192    return false;
1193 }
1194 void ioprintf(ostream &out,const char *format,...)
1195 {
1196    va_list args;
1197    ssize_t size = 400;
1198    while (true) {
1199       va_start(args,format);
1200       if (iovprintf(out, format, args, size) == true)
1201          return;
1202       va_end(args);
1203    }
1204 }
1205 void strprintf(string &out,const char *format,...)
1206 {
1207    va_list args;
1208    ssize_t size = 400;
1209    std::ostringstream outstr;
1210    while (true) {
1211       va_start(args,format);
1212       if (iovprintf(outstr, format, args, size) == true)
1213          break;
1214       va_end(args);
1215    }
1216    out = outstr.str();
1217 }
1218                                                                         /*}}}*/
1219 // safe_snprintf - Safer snprintf                                       /*{{{*/
1220 // ---------------------------------------------------------------------
1221 /* This is a snprintf that will never (ever) go past 'End' and returns a
1222    pointer to the end of the new string. The returned string is always null
1223    terminated unless Buffer == end. This is a better alterantive to using
1224    consecutive snprintfs. */
1225 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1226 {
1227    va_list args;
1228    int Did;
1229
1230    va_start(args,Format);
1231
1232    if (End <= Buffer)
1233       return End;
1234
1235    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1236    if (Did < 0 || Buffer + Did > End)
1237       return End;
1238    return Buffer + Did;
1239 }
1240                                                                         /*}}}*/
1241 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1242 // ---------------------------------------------------------------------
1243 string StripEpoch(const string &VerStr)
1244 {
1245    size_t i = VerStr.find(":");
1246    if (i == string::npos)
1247       return VerStr;
1248    return VerStr.substr(i+1);
1249 }
1250
1251 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1252 // ---------------------------------------------------------------------
1253 /* This little function is the most called method we have and tries
1254    therefore to do the absolut minimum - and is noteable faster than
1255    standard tolower/toupper and as a bonus avoids problems with different
1256    locales - we only operate on ascii chars anyway. */
1257 int tolower_ascii(int const c)
1258 {
1259    if (c >= 'A' && c <= 'Z')
1260       return c + 32;
1261    return c;
1262 }
1263                                                                         /*}}}*/
1264
1265 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1266 // ---------------------------------------------------------------------
1267 /* The domain list is a comma seperate list of domains that are suffix
1268    matched against the argument */
1269 bool CheckDomainList(const string &Host,const string &List)
1270 {
1271    string::const_iterator Start = List.begin();
1272    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1273    {
1274       if (Cur < List.end() && *Cur != ',')
1275          continue;
1276
1277       // Match the end of the string..
1278       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1279           Cur - Start != 0 &&
1280           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1281          return true;
1282
1283       Start = Cur + 1;
1284    }
1285    return false;
1286 }
1287                                                                         /*}}}*/
1288 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1289 // ---------------------------------------------------------------------
1290 /* */
1291 string DeEscapeString(const string &input)
1292 {
1293    char tmp[3];
1294    string::const_iterator it, escape_start;
1295    string output, octal, hex;
1296    for (it = input.begin(); it != input.end(); ++it)
1297    {
1298       // just copy non-escape chars
1299       if (*it != '\\')
1300       {
1301          output += *it;
1302          continue;
1303       }
1304
1305       // deal with double escape
1306       if (*it == '\\' &&
1307           (it + 1 < input.end()) &&  it[1] == '\\')
1308       {
1309          // copy
1310          output += *it;
1311          // advance iterator one step further
1312          ++it;
1313          continue;
1314       }
1315
1316       // ensure we have a char to read
1317       if (it + 1 == input.end())
1318          continue;
1319
1320       // read it
1321       ++it;
1322       switch (*it)
1323       {
1324          case '0':
1325             if (it + 2 <= input.end()) {
1326                tmp[0] = it[1];
1327                tmp[1] = it[2];
1328                tmp[2] = 0;
1329                output += (char)strtol(tmp, 0, 8);
1330                it += 2;
1331             }
1332             break;
1333          case 'x':
1334             if (it + 2 <= input.end()) {
1335                tmp[0] = it[1];
1336                tmp[1] = it[2];
1337                tmp[2] = 0;
1338                output += (char)strtol(tmp, 0, 16);
1339                it += 2;
1340             }
1341             break;
1342          default:
1343             // FIXME: raise exception here?
1344             break;
1345       }
1346    }
1347    return output;
1348 }
1349                                                                         /*}}}*/
1350 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1351 // ---------------------------------------------------------------------
1352 /* This parses the URI into all of its components */
1353 void URI::CopyFrom(const string &U)
1354 {
1355    string::const_iterator I = U.begin();
1356
1357    // Locate the first colon, this separates the scheme
1358    for (; I < U.end() && *I != ':' ; ++I);
1359    string::const_iterator FirstColon = I;
1360
1361    /* Determine if this is a host type URI with a leading double //
1362       and then search for the first single / */
1363    string::const_iterator SingleSlash = I;
1364    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1365       SingleSlash += 3;
1366
1367    /* Find the / indicating the end of the hostname, ignoring /'s in the
1368       square brackets */
1369    bool InBracket = false;
1370    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1371    {
1372       if (*SingleSlash == '[')
1373          InBracket = true;
1374       if (InBracket == true && *SingleSlash == ']')
1375          InBracket = false;
1376    }
1377
1378    if (SingleSlash > U.end())
1379       SingleSlash = U.end();
1380
1381    // We can now write the access and path specifiers
1382    Access.assign(U.begin(),FirstColon);
1383    if (SingleSlash != U.end())
1384       Path.assign(SingleSlash,U.end());
1385    if (Path.empty() == true)
1386       Path = "/";
1387
1388    // Now we attempt to locate a user:pass@host fragment
1389    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1390       FirstColon += 3;
1391    else
1392       FirstColon += 1;
1393    if (FirstColon >= U.end())
1394       return;
1395
1396    if (FirstColon > SingleSlash)
1397       FirstColon = SingleSlash;
1398
1399    // Find the colon...
1400    I = FirstColon + 1;
1401    if (I > SingleSlash)
1402       I = SingleSlash;
1403    for (; I < SingleSlash && *I != ':'; ++I);
1404    string::const_iterator SecondColon = I;
1405
1406    // Search for the @ after the colon
1407    for (; I < SingleSlash && *I != '@'; ++I);
1408    string::const_iterator At = I;
1409
1410    // Now write the host and user/pass
1411    if (At == SingleSlash)
1412    {
1413       if (FirstColon < SingleSlash)
1414          Host.assign(FirstColon,SingleSlash);
1415    }
1416    else
1417    {
1418       Host.assign(At+1,SingleSlash);
1419       // username and password must be encoded (RFC 3986)
1420       User.assign(DeQuoteString(FirstColon,SecondColon));
1421       if (SecondColon < At)
1422          Password.assign(DeQuoteString(SecondColon+1,At));
1423    }
1424
1425    // Now we parse the RFC 2732 [] hostnames.
1426    unsigned long PortEnd = 0;
1427    InBracket = false;
1428    for (unsigned I = 0; I != Host.length();)
1429    {
1430       if (Host[I] == '[')
1431       {
1432          InBracket = true;
1433          Host.erase(I,1);
1434          continue;
1435       }
1436
1437       if (InBracket == true && Host[I] == ']')
1438       {
1439          InBracket = false;
1440          Host.erase(I,1);
1441          PortEnd = I;
1442          continue;
1443       }
1444       I++;
1445    }
1446
1447    // Tsk, weird.
1448    if (InBracket == true)
1449    {
1450       Host.clear();
1451       return;
1452    }
1453
1454    // Now we parse off a port number from the hostname
1455    Port = 0;
1456    string::size_type Pos = Host.rfind(':');
1457    if (Pos == string::npos || Pos < PortEnd)
1458       return;
1459
1460    Port = atoi(string(Host,Pos+1).c_str());
1461    Host.assign(Host,0,Pos);
1462 }
1463                                                                         /*}}}*/
1464 // URI::operator string - Convert the URI to a string                   /*{{{*/
1465 // ---------------------------------------------------------------------
1466 /* */
1467 URI::operator string()
1468 {
1469    string Res;
1470
1471    if (Access.empty() == false)
1472       Res = Access + ':';
1473
1474    if (Host.empty() == false)
1475    {
1476       if (Access.empty() == false)
1477          Res += "//";
1478
1479       if (User.empty() == false)
1480       {
1481          Res +=  User;
1482          if (Password.empty() == false)
1483             Res += ":" + Password;
1484          Res += "@";
1485       }
1486
1487       // Add RFC 2732 escaping characters
1488       if (Access.empty() == false &&
1489           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1490          Res += '[' + Host + ']';
1491       else
1492          Res += Host;
1493
1494       if (Port != 0)
1495       {
1496          char S[30];
1497          sprintf(S,":%u",Port);
1498          Res += S;
1499       }
1500    }
1501
1502    if (Path.empty() == false)
1503    {
1504       if (Path[0] != '/')
1505          Res += "/" + Path;
1506       else
1507          Res += Path;
1508    }
1509
1510    return Res;
1511 }
1512                                                                         /*}}}*/
1513 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1514 // ---------------------------------------------------------------------
1515 /* */
1516 string URI::SiteOnly(const string &URI)
1517 {
1518    ::URI U(URI);
1519    U.User.clear();
1520    U.Password.clear();
1521    U.Path.clear();
1522    U.Port = 0;
1523    return U;
1524 }
1525                                                                         /*}}}*/
1526 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1527 // ---------------------------------------------------------------------
1528 /* */
1529 string URI::NoUserPassword(const string &URI)
1530 {
1531    ::URI U(URI);
1532    U.User.clear();
1533    U.Password.clear();
1534    U.Port = 0;
1535    return U;
1536 }
1537                                                                         /*}}}*/