apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <sstream>
  27 #include <stdio.h>
  28 #include <algorithm>
  29 #include <unistd.h>
  30 #include <regex.h>
  31 #include <errno.h>
  32 #include <stdarg.h>
  33 #include <iconv.h>
  34
  35 #include <apti18n.h>
  36
  37 using namespace std;
  38                                                                         /*}}}*/
  39
  40 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  41 // ---------------------------------------------------------------------
  42 /* This is handy to use before display some information for enduser  */
  43 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  44 {
  45   iconv_t cd;
  46   const char *inbuf;
  47   char *inptr, *outbuf;
  48   size_t insize, bufsize;
  49   dest->clear();
  50
  51   cd = iconv_open(codeset, "UTF-8");
  52   if (cd == (iconv_t)(-1)) {
  53      // Something went wrong
  54      if (errno == EINVAL)
  55         _error->Error("conversion from 'UTF-8' to '%s' not available",
  56                codeset);
  57      else
  58         perror("iconv_open");
  59
  60      return false;
  61   }
  62
  63   insize = bufsize = orig.size();
  64   inbuf = orig.data();
  65   inptr = (char *)inbuf;
  66   outbuf = new char[bufsize];
  67   size_t lastError = -1;
  68
  69   while (insize != 0)
  70   {
  71      char *outptr = outbuf;
  72      size_t outsize = bufsize;
  73      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  74      dest->append(outbuf, outptr - outbuf);
  75      if (err == (size_t)(-1))
  76      {
  77         switch (errno)
  78         {
  79         case EILSEQ:
  80            insize--;
  81            inptr++;
  82            // replace a series of unknown multibytes with a single "?"
  83            if (lastError != insize) {
  84               lastError = insize - 1;
  85               dest->append("?");
  86            }
  87            break;
  88         case EINVAL:
  89            insize = 0;
  90            break;
  91         case E2BIG:
  92            if (outptr == outbuf)
  93            {
  94               bufsize *= 2;
  95               delete[] outbuf;
  96               outbuf = new char[bufsize];
  97            }
  98            break;
  99         }
 100      }
 101   }
 102
 103   delete[] outbuf;
 104
 105   iconv_close(cd);
 106
 107   return true;
 108 }
 109                                                                         /*}}}*/
 110 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 111 // ---------------------------------------------------------------------
 112 /* This is handy to use when parsing a file. It also removes \n's left
 113    over from fgets and company */
 114 char *_strstrip(char *String)
 115 {
 116    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 117
 118    if (*String == 0)
 119       return String;
 120
 121    char *End = String + strlen(String) - 1;
 122    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 123                                *End == '\r'); End--);
 124    End++;
 125    *End = 0;
 126    return String;
 127 };
 128                                                                         /*}}}*/
 129 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 130 // ---------------------------------------------------------------------
 131 /* */
 132 char *_strtabexpand(char *String,size_t Len)
 133 {
 134    for (char *I = String; I != I + Len && *I != 0; I++)
 135    {
 136       if (*I != '\t')
 137          continue;
 138       if (I + 8 > String + Len)
 139       {
 140          *I = 0;
 141          return String;
 142       }
 143
 144       /* Assume the start of the string is 0 and find the next 8 char
 145          division */
 146       int Len;
 147       if (String == I)
 148          Len = 1;
 149       else
 150          Len = 8 - ((String - I) % 8);
 151       Len -= 2;
 152       if (Len <= 0)
 153       {
 154          *I = ' ';
 155          continue;
 156       }
 157
 158       memmove(I + Len,I + 1,strlen(I) + 1);
 159       for (char *J = I; J + Len != I; *I = ' ', I++);
 160    }
 161    return String;
 162 }
 163                                                                         /*}}}*/
 164 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 165 // ---------------------------------------------------------------------
 166 /* This grabs a single word, converts any % escaped characters to their
 167    proper values and advances the pointer. Double quotes are understood
 168    and striped out as well. This is for URI/URL parsing. It also can
 169    understand [] brackets.*/
 170 bool ParseQuoteWord(const char *&String,string &Res)
 171 {
 172    // Skip leading whitespace
 173    const char *C = String;
 174    for (;*C != 0 && *C == ' '; C++);
 175    if (*C == 0)
 176       return false;
 177
 178    // Jump to the next word
 179    for (;*C != 0 && isspace(*C) == 0; C++)
 180    {
 181       if (*C == '"')
 182       {
 183          C = strchr(C + 1, '"');
 184          if (C == NULL)
 185             return false;
 186       }
 187       if (*C == '[')
 188       {
 189          C = strchr(C + 1, ']');
 190          if (C == NULL)
 191             return false;
 192       }
 193    }
 194
 195    // Now de-quote characters
 196    char Buffer[1024];
 197    char Tmp[3];
 198    const char *Start = String;
 199    char *I;
 200    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 201    {
 202       if (*Start == '%' && Start + 2 < C &&
 203           isxdigit(Start[1]) && isxdigit(Start[2]))
 204       {
 205          Tmp[0] = Start[1];
 206          Tmp[1] = Start[2];
 207          Tmp[2] = 0;
 208          *I = (char)strtol(Tmp,0,16);
 209          Start += 3;
 210          continue;
 211       }
 212       if (*Start != '"')
 213          *I = *Start;
 214       else
 215          I--;
 216       Start++;
 217    }
 218    *I = 0;
 219    Res = Buffer;
 220
 221    // Skip ending white space
 222    for (;*C != 0 && isspace(*C) != 0; C++);
 223    String = C;
 224    return true;
 225 }
 226                                                                         /*}}}*/
 227 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 228 // ---------------------------------------------------------------------
 229 /* This expects a series of space separated strings enclosed in ""'s.
 230    It concatenates the ""'s into a single string. */
 231 bool ParseCWord(const char *&String,string &Res)
 232 {
 233    // Skip leading whitespace
 234    const char *C = String;
 235    for (;*C != 0 && *C == ' '; C++);
 236    if (*C == 0)
 237       return false;
 238
 239    char Buffer[1024];
 240    char *Buf = Buffer;
 241    if (strlen(String) >= sizeof(Buffer))
 242        return false;
 243
 244    for (; *C != 0; C++)
 245    {
 246       if (*C == '"')
 247       {
 248          for (C++; *C != 0 && *C != '"'; C++)
 249             *Buf++ = *C;
 250
 251          if (*C == 0)
 252             return false;
 253
 254          continue;
 255       }
 256
 257       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 258          continue;
 259       if (isspace(*C) == 0)
 260          return false;
 261       *Buf++ = ' ';
 262    }
 263    *Buf = 0;
 264    Res = Buffer;
 265    String = C;
 266    return true;
 267 }
 268                                                                         /*}}}*/
 269 // QuoteString - Convert a string into quoted from                      /*{{{*/
 270 // ---------------------------------------------------------------------
 271 /* */
 272 string QuoteString(const string &Str, const char *Bad)
 273 {
 274    string Res;
 275    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 276    {
 277       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 278           *I == 0x25 || // percent '%' char
 279           *I <= 0x20 || *I >= 0x7F) // control chars
 280       {
 281          char Buf[10];
 282          sprintf(Buf,"%%%02x",(int)*I);
 283          Res += Buf;
 284       }
 285       else
 286          Res += *I;
 287    }
 288    return Res;
 289 }
 290                                                                         /*}}}*/
 291 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 292 // ---------------------------------------------------------------------
 293 /* This undoes QuoteString */
 294 string DeQuoteString(const string &Str)
 295 {
 296    return DeQuoteString(Str.begin(),Str.end());
 297 }
 298 string DeQuoteString(string::const_iterator const &begin,
 299                         string::const_iterator const &end)
 300 {
 301    string Res;
 302    for (string::const_iterator I = begin; I != end; ++I)
 303    {
 304       if (*I == '%' && I + 2 < end &&
 305           isxdigit(I[1]) && isxdigit(I[2]))
 306       {
 307          char Tmp[3];
 308          Tmp[0] = I[1];
 309          Tmp[1] = I[2];
 310          Tmp[2] = 0;
 311          Res += (char)strtol(Tmp,0,16);
 312          I += 2;
 313          continue;
 314       }
 315       else
 316          Res += *I;
 317    }
 318    return Res;
 319 }
 320
 321                                                                         /*}}}*/
 322 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 323 // ---------------------------------------------------------------------
 324 /* A max of 4 digits are shown before conversion to the next highest unit.
 325    The max length of the string will be 5 chars unless the size is > 10
 326    YottaBytes (E24) */
 327 string SizeToStr(double Size)
 328 {
 329    char S[300];
 330    double ASize;
 331    if (Size >= 0)
 332       ASize = Size;
 333    else
 334       ASize = -1*Size;
 335
 336    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 337       ExaBytes, ZettaBytes, YottaBytes */
 338    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 339    int I = 0;
 340    while (I <= 8)
 341    {
 342       if (ASize < 100 && I != 0)
 343       {
 344          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 345          break;
 346       }
 347
 348       if (ASize < 10000)
 349       {
 350          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 351          break;
 352       }
 353       ASize /= 1000.0;
 354       I++;
 355    }
 356
 357    return S;
 358 }
 359                                                                         /*}}}*/
 360 // TimeToStr - Convert the time into a string                           /*{{{*/
 361 // ---------------------------------------------------------------------
 362 /* Converts a number of seconds to a hms format */
 363 string TimeToStr(unsigned long Sec)
 364 {
 365    char S[300];
 366
 367    while (1)
 368    {
 369       if (Sec > 60*60*24)
 370       {
 371          //d means days, h means hours, min means minutes, s means seconds
 372          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 373          break;
 374       }
 375
 376       if (Sec > 60*60)
 377       {
 378          //h means hours, min means minutes, s means seconds
 379          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 380          break;
 381       }
 382
 383       if (Sec > 60)
 384       {
 385          //min means minutes, s means seconds
 386          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 387          break;
 388       }
 389
 390       //s means seconds
 391       sprintf(S,_("%lis"),Sec);
 392       break;
 393    }
 394
 395    return S;
 396 }
 397                                                                         /*}}}*/
 398 // SubstVar - Substitute a string for another string                    /*{{{*/
 399 // ---------------------------------------------------------------------
 400 /* This replaces all occurances of Subst with Contents in Str. */
 401 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 402 {
 403    string::size_type Pos = 0;
 404    string::size_type OldPos = 0;
 405    string Temp;
 406
 407    while (OldPos < Str.length() &&
 408           (Pos = Str.find(Subst,OldPos)) != string::npos)
 409    {
 410       Temp += string(Str,OldPos,Pos) + Contents;
 411       OldPos = Pos + Subst.length();
 412    }
 413
 414    if (OldPos == 0)
 415       return Str;
 416
 417    return Temp + string(Str,OldPos);
 418 }
 419
 420 string SubstVar(string Str,const struct SubstVar *Vars)
 421 {
 422    for (; Vars->Subst != 0; Vars++)
 423       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 424    return Str;
 425 }
 426                                                                         /*}}}*/
 427 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 428 // ---------------------------------------------------------------------
 429 /* Returns a string with the supplied separator depth + 1 times in it */
 430 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 431 {
 432    std::string output = "";
 433    for(unsigned long d=Depth+1; d > 0; d--)
 434       output.append(Separator);
 435    return output;
 436 }
 437                                                                         /*}}}*/
 438 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 439 // ---------------------------------------------------------------------
 440 /* This converts a URI into a safe filename. It quotes all unsafe characters
 441    and converts / to _ and removes the scheme identifier. The resulting
 442    file name should be unique and never occur again for a different file */
 443 string URItoFileName(const string &URI)
 444 {
 445    // Nuke 'sensitive' items
 446    ::URI U(URI);
 447    U.User.clear();
 448    U.Password.clear();
 449    U.Access.clear();
 450
 451    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 452    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 453    replace(NewURI.begin(),NewURI.end(),'/','_');
 454    return NewURI;
 455 }
 456                                                                         /*}}}*/
 457 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 458 // ---------------------------------------------------------------------
 459 /* This routine performs a base64 transformation on a string. It was ripped
 460    from wget and then patched and bug fixed.
 461
 462    This spec can be found in rfc2045 */
 463 string Base64Encode(const string &S)
 464 {
 465    // Conversion table.
 466    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 467                           'I','J','K','L','M','N','O','P',
 468                           'Q','R','S','T','U','V','W','X',
 469                           'Y','Z','a','b','c','d','e','f',
 470                           'g','h','i','j','k','l','m','n',
 471                           'o','p','q','r','s','t','u','v',
 472                           'w','x','y','z','0','1','2','3',
 473                           '4','5','6','7','8','9','+','/'};
 474
 475    // Pre-allocate some space
 476    string Final;
 477    Final.reserve((4*S.length() + 2)/3 + 2);
 478
 479    /* Transform the 3x8 bits to 4x6 bits, as required by
 480       base64.  */
 481    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 482    {
 483       char Bits[3] = {0,0,0};
 484       Bits[0] = I[0];
 485       if (I + 1 < S.end())
 486          Bits[1] = I[1];
 487       if (I + 2 < S.end())
 488          Bits[2] = I[2];
 489
 490       Final += tbl[Bits[0] >> 2];
 491       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 492
 493       if (I + 1 >= S.end())
 494          break;
 495
 496       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 497
 498       if (I + 2 >= S.end())
 499          break;
 500
 501       Final += tbl[Bits[2] & 0x3f];
 502    }
 503
 504    /* Apply the padding elements, this tells how many bytes the remote
 505       end should discard */
 506    if (S.length() % 3 == 2)
 507       Final += '=';
 508    if (S.length() % 3 == 1)
 509       Final += "==";
 510
 511    return Final;
 512 }
 513                                                                         /*}}}*/
 514 // stringcmp - Arbitrary string compare                                 /*{{{*/
 515 // ---------------------------------------------------------------------
 516 /* This safely compares two non-null terminated strings of arbitrary
 517    length */
 518 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 519 {
 520    for (; A != AEnd && B != BEnd; A++, B++)
 521       if (*A != *B)
 522          break;
 523
 524    if (A == AEnd && B == BEnd)
 525       return 0;
 526    if (A == AEnd)
 527       return 1;
 528    if (B == BEnd)
 529       return -1;
 530    if (*A < *B)
 531       return -1;
 532    return 1;
 533 }
 534
 535 #if __GNUC__ >= 3
 536 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 537               const char *B,const char *BEnd)
 538 {
 539    for (; A != AEnd && B != BEnd; A++, B++)
 540       if (*A != *B)
 541          break;
 542
 543    if (A == AEnd && B == BEnd)
 544       return 0;
 545    if (A == AEnd)
 546       return 1;
 547    if (B == BEnd)
 548       return -1;
 549    if (*A < *B)
 550       return -1;
 551    return 1;
 552 }
 553 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 554               string::const_iterator B,string::const_iterator BEnd)
 555 {
 556    for (; A != AEnd && B != BEnd; A++, B++)
 557       if (*A != *B)
 558          break;
 559
 560    if (A == AEnd && B == BEnd)
 561       return 0;
 562    if (A == AEnd)
 563       return 1;
 564    if (B == BEnd)
 565       return -1;
 566    if (*A < *B)
 567       return -1;
 568    return 1;
 569 }
 570 #endif
 571                                                                         /*}}}*/
 572 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 573 // ---------------------------------------------------------------------
 574 /* */
 575 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 576 {
 577    for (; A != AEnd && B != BEnd; A++, B++)
 578       if (tolower_ascii(*A) != tolower_ascii(*B))
 579          break;
 580
 581    if (A == AEnd && B == BEnd)
 582       return 0;
 583    if (A == AEnd)
 584       return 1;
 585    if (B == BEnd)
 586       return -1;
 587    if (tolower_ascii(*A) < tolower_ascii(*B))
 588       return -1;
 589    return 1;
 590 }
 591 #if __GNUC__ >= 3
 592 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 593                   const char *B,const char *BEnd)
 594 {
 595    for (; A != AEnd && B != BEnd; A++, B++)
 596       if (tolower_ascii(*A) != tolower_ascii(*B))
 597          break;
 598
 599    if (A == AEnd && B == BEnd)
 600       return 0;
 601    if (A == AEnd)
 602       return 1;
 603    if (B == BEnd)
 604       return -1;
 605    if (tolower_ascii(*A) < tolower_ascii(*B))
 606       return -1;
 607    return 1;
 608 }
 609 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 610                   string::const_iterator B,string::const_iterator BEnd)
 611 {
 612    for (; A != AEnd && B != BEnd; A++, B++)
 613       if (tolower_ascii(*A) != tolower_ascii(*B))
 614          break;
 615
 616    if (A == AEnd && B == BEnd)
 617       return 0;
 618    if (A == AEnd)
 619       return 1;
 620    if (B == BEnd)
 621       return -1;
 622    if (tolower_ascii(*A) < tolower_ascii(*B))
 623       return -1;
 624    return 1;
 625 }
 626 #endif
 627                                                                         /*}}}*/
 628 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 629 // ---------------------------------------------------------------------
 630 /* The format is like those used in package files and the method
 631    communication system */
 632 string LookupTag(const string &Message,const char *Tag,const char *Default)
 633 {
 634    // Look for a matching tag.
 635    int Length = strlen(Tag);
 636    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 637    {
 638       // Found the tag
 639       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 640       {
 641          // Find the end of line and strip the leading/trailing spaces
 642          string::const_iterator J;
 643          I += Length + 1;
 644          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 645          for (J = I; *J != '\n' && J < Message.end(); ++J);
 646          for (; J > I && isspace(J[-1]) != 0; --J);
 647
 648          return string(I,J);
 649       }
 650
 651       for (; *I != '\n' && I < Message.end(); ++I);
 652    }
 653
 654    // Failed to find a match
 655    if (Default == 0)
 656       return string();
 657    return Default;
 658 }
 659                                                                         /*}}}*/
 660 // StringToBool - Converts a string into a boolean                      /*{{{*/
 661 // ---------------------------------------------------------------------
 662 /* This inspects the string to see if it is true or if it is false and
 663    then returns the result. Several varients on true/false are checked. */
 664 int StringToBool(const string &Text,int Default)
 665 {
 666    char *End;
 667    int Res = strtol(Text.c_str(),&End,0);
 668    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 669       return Res;
 670
 671    // Check for positives
 672    if (strcasecmp(Text.c_str(),"no") == 0 ||
 673        strcasecmp(Text.c_str(),"false") == 0 ||
 674        strcasecmp(Text.c_str(),"without") == 0 ||
 675        strcasecmp(Text.c_str(),"off") == 0 ||
 676        strcasecmp(Text.c_str(),"disable") == 0)
 677       return 0;
 678
 679    // Check for negatives
 680    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 681        strcasecmp(Text.c_str(),"true") == 0 ||
 682        strcasecmp(Text.c_str(),"with") == 0 ||
 683        strcasecmp(Text.c_str(),"on") == 0 ||
 684        strcasecmp(Text.c_str(),"enable") == 0)
 685       return 1;
 686
 687    return Default;
 688 }
 689                                                                         /*}}}*/
 690 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 691 // ---------------------------------------------------------------------
 692 /* This converts a time_t into a string time representation that is
 693    year 2000 complient and timezone neutral */
 694 string TimeRFC1123(time_t Date)
 695 {
 696    struct tm Conv;
 697    if (gmtime_r(&Date, &Conv) == NULL)
 698       return "";
 699
 700    char Buf[300];
 701    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 702    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 703                           "Aug","Sep","Oct","Nov","Dec"};
 704
 705    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 706            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 707            Conv.tm_min,Conv.tm_sec);
 708    return Buf;
 709 }
 710                                                                         /*}}}*/
 711 // ReadMessages - Read messages from the FD                             /*{{{*/
 712 // ---------------------------------------------------------------------
 713 /* This pulls full messages from the input FD into the message buffer.
 714    It assumes that messages will not pause during transit so no
 715    fancy buffering is used.
 716
 717    In particular: this reads blocks from the input until it believes
 718    that it's run out of input text.  Each block is terminated by a
 719    double newline ('\n' followed by '\n').  As noted below, there is a
 720    bug in this code: it assumes that all the blocks have been read if
 721    it doesn't see additional text in the buffer after the last one is
 722    parsed, which will cause it to lose blocks if the last block
 723    coincides with the end of the buffer.
 724  */
 725 bool ReadMessages(int Fd, vector<string> &List)
 726 {
 727    char Buffer[64000];
 728    char *End = Buffer;
 729    // Represents any left-over from the previous iteration of the
 730    // parse loop.  (i.e., if a message is split across the end
 731    // of the buffer, it goes here)
 732    string PartialMessage;
 733
 734    while (1)
 735    {
 736       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 737       if (Res < 0 && errno == EINTR)
 738          continue;
 739
 740       // Process is dead, this is kind of bad..
 741       if (Res == 0)
 742          return false;
 743
 744       // No data
 745       if (Res < 0 && errno == EAGAIN)
 746          return true;
 747       if (Res < 0)
 748          return false;
 749
 750       End += Res;
 751
 752       // Look for the end of the message
 753       for (char *I = Buffer; I + 1 < End; I++)
 754       {
 755          if (I[0] != '\n' || I[1] != '\n')
 756             continue;
 757
 758          // Pull the message out
 759          string Message(Buffer,I-Buffer);
 760          PartialMessage += Message;
 761
 762          // Fix up the buffer
 763          for (; I < End && *I == '\n'; I++);
 764          End -= I-Buffer;
 765          memmove(Buffer,I,End-Buffer);
 766          I = Buffer;
 767
 768          List.push_back(PartialMessage);
 769          PartialMessage.clear();
 770       }
 771       if (End != Buffer)
 772         {
 773           // If there's text left in the buffer, store it
 774           // in PartialMessage and throw the rest of the buffer
 775           // away.  This allows us to handle messages that
 776           // are longer than the static buffer size.
 777           PartialMessage += string(Buffer, End);
 778           End = Buffer;
 779         }
 780       else
 781         {
 782           // BUG ALERT: if a message block happens to end at a
 783           // multiple of 64000 characters, this will cause it to
 784           // terminate early, leading to a badly formed block and
 785           // probably crashing the method.  However, this is the only
 786           // way we have to find the end of the message block.  I have
 787           // an idea of how to fix this, but it will require changes
 788           // to the protocol (essentially to mark the beginning and
 789           // end of the block).
 790           //
 791           //  -- dburrows 2008-04-02
 792           return true;
 793         }
 794
 795       if (WaitFd(Fd) == false)
 796          return false;
 797    }
 798 }
 799                                                                         /*}}}*/
 800 // MonthConv - Converts a month string into a number                    /*{{{*/
 801 // ---------------------------------------------------------------------
 802 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 803    Made it a bit more robust with a few tolower_ascii though. */
 804 static int MonthConv(char *Month)
 805 {
 806    switch (tolower_ascii(*Month))
 807    {
 808       case 'a':
 809       return tolower_ascii(Month[1]) == 'p'?3:7;
 810       case 'd':
 811       return 11;
 812       case 'f':
 813       return 1;
 814       case 'j':
 815       if (tolower_ascii(Month[1]) == 'a')
 816          return 0;
 817       return tolower_ascii(Month[2]) == 'n'?5:6;
 818       case 'm':
 819       return tolower_ascii(Month[2]) == 'r'?2:4;
 820       case 'n':
 821       return 10;
 822       case 'o':
 823       return 9;
 824       case 's':
 825       return 8;
 826
 827       // Pretend it is January..
 828       default:
 829       return 0;
 830    }
 831 }
 832                                                                         /*}}}*/
 833 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 834 // ---------------------------------------------------------------------
 835 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 836    than local timezone (mktime assumes the latter).
 837
 838    This function is a nonstandard GNU extension that is also present on
 839    the BSDs and maybe other systems. For others we follow the advice of
 840    the manpage of timegm and use his portable replacement. */
 841 #ifndef HAVE_TIMEGM
 842 static time_t timegm(struct tm *t)
 843 {
 844    char *tz = getenv("TZ");
 845    setenv("TZ", "", 1);
 846    tzset();
 847    time_t ret = mktime(t);
 848    if (tz)
 849       setenv("TZ", tz, 1);
 850    else
 851       unsetenv("TZ");
 852    tzset();
 853    return ret;
 854 }
 855 #endif
 856                                                                         /*}}}*/
 857 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 858 // ---------------------------------------------------------------------
 859 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 860    with one exception: All timezones (%Z) are accepted but the protocol
 861    says that it MUST be GMT, but this one is equal to UTC which we will
 862    encounter from time to time (e.g. in Release files) so we accept all
 863    here and just assume it is GMT (or UTC) later on */
 864 bool RFC1123StrToTime(const char* const str,time_t &time)
 865 {
 866    struct tm Tm;
 867    setlocale (LC_ALL,"C");
 868    bool const invalid =
 869    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 870       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 871    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 872        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 873    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 874        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 875    setlocale (LC_ALL,"");
 876    if (invalid == true)
 877       return false;
 878
 879    time = timegm(&Tm);
 880    return true;
 881 }
 882                                                                         /*}}}*/
 883 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 884 // ---------------------------------------------------------------------
 885 /* */
 886 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 887 {
 888    struct tm Tm;
 889    // MDTM includes no whitespaces but recommend and ignored by strptime
 890    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 891       return false;
 892
 893    time = timegm(&Tm);
 894    return true;
 895 }
 896                                                                         /*}}}*/
 897 // StrToTime - Converts a string into a time_t                          /*{{{*/
 898 // ---------------------------------------------------------------------
 899 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 900    and the C library asctime format. It requires the GNU library function
 901    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 902    reason the C library does not provide any such function :< This also
 903    handles the weird, but unambiguous FTP time format*/
 904 bool StrToTime(const string &Val,time_t &Result)
 905 {
 906    struct tm Tm;
 907    char Month[10];
 908
 909    // Skip the day of the week
 910    const char *I = strchr(Val.c_str(), ' ');
 911
 912    // Handle RFC 1123 time
 913    Month[0] = 0;
 914    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 915               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 916    {
 917       // Handle RFC 1036 time
 918       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 919                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 920          Tm.tm_year += 1900;
 921       else
 922       {
 923          // asctime format
 924          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 925                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 926          {
 927             // 'ftp' time
 928             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 929                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 930                return false;
 931             Tm.tm_mon--;
 932          }
 933       }
 934    }
 935
 936    Tm.tm_isdst = 0;
 937    if (Month[0] != 0)
 938       Tm.tm_mon = MonthConv(Month);
 939    Tm.tm_year -= 1900;
 940
 941    // Convert to local time and then to GMT
 942    Result = timegm(&Tm);
 943    return true;
 944 }
 945                                                                         /*}}}*/
 946 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 947 // ---------------------------------------------------------------------
 948 /* This is used in decoding the crazy fixed length string headers in
 949    tar and ar files. */
 950 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 951 {
 952    char S[30];
 953    if (Len >= sizeof(S))
 954       return false;
 955    memcpy(S,Str,Len);
 956    S[Len] = 0;
 957
 958    // All spaces is a zero
 959    Res = 0;
 960    unsigned I;
 961    for (I = 0; S[I] == ' '; I++);
 962    if (S[I] == 0)
 963       return true;
 964
 965    char *End;
 966    Res = strtoul(S,&End,Base);
 967    if (End == S)
 968       return false;
 969
 970    return true;
 971 }
 972                                                                         /*}}}*/
 973 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 974 // ---------------------------------------------------------------------
 975 /* This is used in decoding the crazy fixed length string headers in
 976    tar and ar files. */
 977 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
 978 {
 979    char S[30];
 980    if (Len >= sizeof(S))
 981       return false;
 982    memcpy(S,Str,Len);
 983    S[Len] = 0;
 984
 985    // All spaces is a zero
 986    Res = 0;
 987    unsigned I;
 988    for (I = 0; S[I] == ' '; I++);
 989    if (S[I] == 0)
 990       return true;
 991
 992    char *End;
 993    Res = strtoull(S,&End,Base);
 994    if (End == S)
 995       return false;
 996
 997    return true;
 998 }
 999                                                                         /*}}}*/
1000
1001 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1002 // ---------------------------------------------------------------------
1003 /* This is used in decoding the 256bit encoded fixed length fields in
1004    tar files */
1005 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1006 {
1007    if ((Str[0] & 0x80) == 0)
1008       return false;
1009    else
1010    {
1011       Res = Str[0] & 0x7F;
1012       for(unsigned int i = 1; i < Len; ++i)
1013          Res = (Res<<8) + Str[i];
1014       return true;
1015    }
1016 }
1017                                                                         /*}}}*/
1018 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1019 // ---------------------------------------------------------------------
1020 /* Helper for Hex2Num */
1021 static int HexDigit(int c)
1022 {
1023    if (c >= '0' && c <= '9')
1024       return c - '0';
1025    if (c >= 'a' && c <= 'f')
1026       return c - 'a' + 10;
1027    if (c >= 'A' && c <= 'F')
1028       return c - 'A' + 10;
1029    return 0;
1030 }
1031                                                                         /*}}}*/
1032 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1033 // ---------------------------------------------------------------------
1034 /* The length of the buffer must be exactly 1/2 the length of the string. */
1035 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1036 {
1037    if (Str.length() != Length*2)
1038       return false;
1039
1040    // Convert each digit. We store it in the same order as the string
1041    int J = 0;
1042    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1043    {
1044       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1045          return false;
1046
1047       Num[J] = HexDigit(I[0]) << 4;
1048       Num[J] += HexDigit(I[1]);
1049    }
1050
1051    return true;
1052 }
1053                                                                         /*}}}*/
1054 // TokSplitString - Split a string up by a given token                  /*{{{*/
1055 // ---------------------------------------------------------------------
1056 /* This is intended to be a faster splitter, it does not use dynamic
1057    memories. Input is changed to insert nulls at each token location. */
1058 bool TokSplitString(char Tok,char *Input,char **List,
1059                     unsigned long ListMax)
1060 {
1061    // Strip any leading spaces
1062    char *Start = Input;
1063    char *Stop = Start + strlen(Start);
1064    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1065
1066    unsigned long Count = 0;
1067    char *Pos = Start;
1068    while (Pos != Stop)
1069    {
1070       // Skip to the next Token
1071       for (; Pos != Stop && *Pos != Tok; Pos++);
1072
1073       // Back remove spaces
1074       char *End = Pos;
1075       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1076       *End = 0;
1077
1078       List[Count++] = Start;
1079       if (Count >= ListMax)
1080       {
1081          List[Count-1] = 0;
1082          return false;
1083       }
1084
1085       // Advance pos
1086       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1087       Start = Pos;
1088    }
1089
1090    List[Count] = 0;
1091    return true;
1092 }
1093                                                                         /*}}}*/
1094 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1095 // ---------------------------------------------------------------------
1096 /* This can be used to split a given string up into a vector, so the
1097    propose is the same as in the method above and this one is a bit slower
1098    also, but the advantage is that we have an iteratable vector */
1099 vector<string> VectorizeString(string const &haystack, char const &split)
1100 {
1101    string::const_iterator start = haystack.begin();
1102    string::const_iterator end = start;
1103    vector<string> exploded;
1104    do {
1105       for (; end != haystack.end() && *end != split; ++end);
1106       exploded.push_back(string(start, end));
1107       start = end + 1;
1108    } while (end != haystack.end() && (++end) != haystack.end());
1109    return exploded;
1110 }
1111                                                                         /*}}}*/
1112 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1113 // ---------------------------------------------------------------------
1114 /* */
1115 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1116                       const char **ListEnd)
1117 {
1118    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1119       R->Hit = false;
1120
1121    unsigned long Hits = 0;
1122    for (; ListBegin != ListEnd; ListBegin++)
1123    {
1124       // Check if the name is a regex
1125       const char *I;
1126       bool Regex = true;
1127       for (I = *ListBegin; *I != 0; I++)
1128          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1129             break;
1130       if (*I == 0)
1131          Regex = false;
1132
1133       // Compile the regex pattern
1134       regex_t Pattern;
1135       if (Regex == true)
1136          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1137                      REG_NOSUB) != 0)
1138             Regex = false;
1139
1140       // Search the list
1141       bool Done = false;
1142       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1143       {
1144          if (R->Str[0] == 0)
1145             continue;
1146
1147          if (strcasecmp(R->Str,*ListBegin) != 0)
1148          {
1149             if (Regex == false)
1150                continue;
1151             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1152                continue;
1153          }
1154          Done = true;
1155
1156          if (R->Hit == false)
1157             Hits++;
1158
1159          R->Hit = true;
1160       }
1161
1162       if (Regex == true)
1163          regfree(&Pattern);
1164
1165       if (Done == false)
1166          _error->Warning(_("Selection %s not found"),*ListBegin);
1167    }
1168
1169    return Hits;
1170 }
1171                                                                         /*}}}*/
1172 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1173 // ---------------------------------------------------------------------
1174 /* This is used to make the internationalization strings easier to translate
1175    and to allow reordering of parameters */
1176 static bool iovprintf(ostream &out, const char *format,
1177                       va_list &args, ssize_t &size) {
1178    char *S = (char*)malloc(size);
1179    ssize_t const n = vsnprintf(S, size, format, args);
1180    if (n > -1 && n < size) {
1181       out << S;
1182       free(S);
1183       return true;
1184    } else {
1185       if (n > -1)
1186          size = n + 1;
1187       else
1188          size *= 2;
1189    }
1190    free(S);
1191    return false;
1192 }
1193 void ioprintf(ostream &out,const char *format,...)
1194 {
1195    va_list args;
1196    ssize_t size = 400;
1197    while (true) {
1198       va_start(args,format);
1199       if (iovprintf(out, format, args, size) == true)
1200          return;
1201       va_end(args);
1202    }
1203 }
1204 void strprintf(string &out,const char *format,...)
1205 {
1206    va_list args;
1207    ssize_t size = 400;
1208    std::ostringstream outstr;
1209    while (true) {
1210       va_start(args,format);
1211       if (iovprintf(outstr, format, args, size) == true)
1212          break;
1213       va_end(args);
1214    }
1215    out = outstr.str();
1216 }
1217                                                                         /*}}}*/
1218 // safe_snprintf - Safer snprintf                                       /*{{{*/
1219 // ---------------------------------------------------------------------
1220 /* This is a snprintf that will never (ever) go past 'End' and returns a
1221    pointer to the end of the new string. The returned string is always null
1222    terminated unless Buffer == end. This is a better alterantive to using
1223    consecutive snprintfs. */
1224 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1225 {
1226    va_list args;
1227    int Did;
1228
1229    va_start(args,Format);
1230
1231    if (End <= Buffer)
1232       return End;
1233
1234    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1235    if (Did < 0 || Buffer + Did > End)
1236       return End;
1237    return Buffer + Did;
1238 }
1239                                                                         /*}}}*/
1240 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1241 // ---------------------------------------------------------------------
1242 string StripEpoch(const string &VerStr)
1243 {
1244    size_t i = VerStr.find(":");
1245    if (i == string::npos)
1246       return VerStr;
1247    return VerStr.substr(i+1);
1248 }
1249
1250 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1251 // ---------------------------------------------------------------------
1252 /* This little function is the most called method we have and tries
1253    therefore to do the absolut minimum - and is noteable faster than
1254    standard tolower/toupper and as a bonus avoids problems with different
1255    locales - we only operate on ascii chars anyway. */
1256 int tolower_ascii(int const c)
1257 {
1258    if (c >= 'A' && c <= 'Z')
1259       return c + 32;
1260    return c;
1261 }
1262                                                                         /*}}}*/
1263
1264 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1265 // ---------------------------------------------------------------------
1266 /* The domain list is a comma seperate list of domains that are suffix
1267    matched against the argument */
1268 bool CheckDomainList(const string &Host,const string &List)
1269 {
1270    string::const_iterator Start = List.begin();
1271    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1272    {
1273       if (Cur < List.end() && *Cur != ',')
1274          continue;
1275
1276       // Match the end of the string..
1277       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1278           Cur - Start != 0 &&
1279           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1280          return true;
1281
1282       Start = Cur + 1;
1283    }
1284    return false;
1285 }
1286                                                                         /*}}}*/
1287 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1288 // ---------------------------------------------------------------------
1289 /* */
1290 string DeEscapeString(const string &input)
1291 {
1292    char tmp[3];
1293    string::const_iterator it, escape_start;
1294    string output, octal, hex;
1295    for (it = input.begin(); it != input.end(); ++it)
1296    {
1297       // just copy non-escape chars
1298       if (*it != '\\')
1299       {
1300          output += *it;
1301          continue;
1302       }
1303
1304       // deal with double escape
1305       if (*it == '\\' &&
1306           (it + 1 < input.end()) &&  it[1] == '\\')
1307       {
1308          // copy
1309          output += *it;
1310          // advance iterator one step further
1311          ++it;
1312          continue;
1313       }
1314
1315       // ensure we have a char to read
1316       if (it + 1 == input.end())
1317          continue;
1318
1319       // read it
1320       ++it;
1321       switch (*it)
1322       {
1323          case '0':
1324             if (it + 2 <= input.end()) {
1325                tmp[0] = it[1];
1326                tmp[1] = it[2];
1327                tmp[2] = 0;
1328                output += (char)strtol(tmp, 0, 8);
1329                it += 2;
1330             }
1331             break;
1332          case 'x':
1333             if (it + 2 <= input.end()) {
1334                tmp[0] = it[1];
1335                tmp[1] = it[2];
1336                tmp[2] = 0;
1337                output += (char)strtol(tmp, 0, 16);
1338                it += 2;
1339             }
1340             break;
1341          default:
1342             // FIXME: raise exception here?
1343             break;
1344       }
1345    }
1346    return output;
1347 }
1348                                                                         /*}}}*/
1349 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1350 // ---------------------------------------------------------------------
1351 /* This parses the URI into all of its components */
1352 void URI::CopyFrom(const string &U)
1353 {
1354    string::const_iterator I = U.begin();
1355
1356    // Locate the first colon, this separates the scheme
1357    for (; I < U.end() && *I != ':' ; ++I);
1358    string::const_iterator FirstColon = I;
1359
1360    /* Determine if this is a host type URI with a leading double //
1361       and then search for the first single / */
1362    string::const_iterator SingleSlash = I;
1363    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1364       SingleSlash += 3;
1365
1366    /* Find the / indicating the end of the hostname, ignoring /'s in the
1367       square brackets */
1368    bool InBracket = false;
1369    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1370    {
1371       if (*SingleSlash == '[')
1372          InBracket = true;
1373       if (InBracket == true && *SingleSlash == ']')
1374          InBracket = false;
1375    }
1376
1377    if (SingleSlash > U.end())
1378       SingleSlash = U.end();
1379
1380    // We can now write the access and path specifiers
1381    Access.assign(U.begin(),FirstColon);
1382    if (SingleSlash != U.end())
1383       Path.assign(SingleSlash,U.end());
1384    if (Path.empty() == true)
1385       Path = "/";
1386
1387    // Now we attempt to locate a user:pass@host fragment
1388    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1389       FirstColon += 3;
1390    else
1391       FirstColon += 1;
1392    if (FirstColon >= U.end())
1393       return;
1394
1395    if (FirstColon > SingleSlash)
1396       FirstColon = SingleSlash;
1397
1398    // Find the colon...
1399    I = FirstColon + 1;
1400    if (I > SingleSlash)
1401       I = SingleSlash;
1402    for (; I < SingleSlash && *I != ':'; ++I);
1403    string::const_iterator SecondColon = I;
1404
1405    // Search for the @ after the colon
1406    for (; I < SingleSlash && *I != '@'; ++I);
1407    string::const_iterator At = I;
1408
1409    // Now write the host and user/pass
1410    if (At == SingleSlash)
1411    {
1412       if (FirstColon < SingleSlash)
1413          Host.assign(FirstColon,SingleSlash);
1414    }
1415    else
1416    {
1417       Host.assign(At+1,SingleSlash);
1418       // username and password must be encoded (RFC 3986)
1419       User.assign(DeQuoteString(FirstColon,SecondColon));
1420       if (SecondColon < At)
1421          Password.assign(DeQuoteString(SecondColon+1,At));
1422    }
1423
1424    // Now we parse the RFC 2732 [] hostnames.
1425    unsigned long PortEnd = 0;
1426    InBracket = false;
1427    for (unsigned I = 0; I != Host.length();)
1428    {
1429       if (Host[I] == '[')
1430       {
1431          InBracket = true;
1432          Host.erase(I,1);
1433          continue;
1434       }
1435
1436       if (InBracket == true && Host[I] == ']')
1437       {
1438          InBracket = false;
1439          Host.erase(I,1);
1440          PortEnd = I;
1441          continue;
1442       }
1443       I++;
1444    }
1445
1446    // Tsk, weird.
1447    if (InBracket == true)
1448    {
1449       Host.clear();
1450       return;
1451    }
1452
1453    // Now we parse off a port number from the hostname
1454    Port = 0;
1455    string::size_type Pos = Host.rfind(':');
1456    if (Pos == string::npos || Pos < PortEnd)
1457       return;
1458
1459    Port = atoi(string(Host,Pos+1).c_str());
1460    Host.assign(Host,0,Pos);
1461 }
1462                                                                         /*}}}*/
1463 // URI::operator string - Convert the URI to a string                   /*{{{*/
1464 // ---------------------------------------------------------------------
1465 /* */
1466 URI::operator string()
1467 {
1468    string Res;
1469
1470    if (Access.empty() == false)
1471       Res = Access + ':';
1472
1473    if (Host.empty() == false)
1474    {
1475       if (Access.empty() == false)
1476          Res += "//";
1477
1478       if (User.empty() == false)
1479       {
1480          Res +=  User;
1481          if (Password.empty() == false)
1482             Res += ":" + Password;
1483          Res += "@";
1484       }
1485
1486       // Add RFC 2732 escaping characters
1487       if (Access.empty() == false &&
1488           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1489          Res += '[' + Host + ']';
1490       else
1491          Res += Host;
1492
1493       if (Port != 0)
1494       {
1495          char S[30];
1496          sprintf(S,":%u",Port);
1497          Res += S;
1498       }
1499    }
1500
1501    if (Path.empty() == false)
1502    {
1503       if (Path[0] != '/')
1504          Res += "/" + Path;
1505       else
1506          Res += Path;
1507    }
1508
1509    return Res;
1510 }
1511                                                                         /*}}}*/
1512 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1513 // ---------------------------------------------------------------------
1514 /* */
1515 string URI::SiteOnly(const string &URI)
1516 {
1517    ::URI U(URI);
1518    U.User.clear();
1519    U.Password.clear();
1520    U.Path.clear();
1521    U.Port = 0;
1522    return U;
1523 }
1524                                                                         /*}}}*/
1525 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1526 // ---------------------------------------------------------------------
1527 /* */
1528 string URI::NoUserPassword(const string &URI)
1529 {
1530    ::URI U(URI);
1531    U.User.clear();
1532    U.Password.clear();
1533    U.Port = 0;
1534    return U;
1535 }
1536                                                                         /*}}}*/