apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <sstream>
  27 #include <stdio.h>
  28 #include <algorithm>
  29 #include <unistd.h>
  30 #include <regex.h>
  31 #include <errno.h>
  32 #include <stdarg.h>
  33 #include <iconv.h>
  34
  35 #include <apti18n.h>
  36
  37 using namespace std;
  38                                                                         /*}}}*/
  39
  40 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  41 // ---------------------------------------------------------------------
  42 /* This is handy to use before display some information for enduser  */
  43 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  44 {
  45   iconv_t cd;
  46   const char *inbuf;
  47   char *inptr, *outbuf;
  48   size_t insize, bufsize;
  49   dest->clear();
  50
  51   cd = iconv_open(codeset, "UTF-8");
  52   if (cd == (iconv_t)(-1)) {
  53      // Something went wrong
  54      if (errno == EINVAL)
  55         _error->Error("conversion from 'UTF-8' to '%s' not available",
  56                codeset);
  57      else
  58         perror("iconv_open");
  59
  60      return false;
  61   }
  62
  63   insize = bufsize = orig.size();
  64   inbuf = orig.data();
  65   inptr = (char *)inbuf;
  66   outbuf = new char[bufsize];
  67   size_t lastError = -1;
  68
  69   while (insize != 0)
  70   {
  71      char *outptr = outbuf;
  72      size_t outsize = bufsize;
  73      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  74      dest->append(outbuf, outptr - outbuf);
  75      if (err == (size_t)(-1))
  76      {
  77         switch (errno)
  78         {
  79         case EILSEQ:
  80            insize--;
  81            inptr++;
  82            // replace a series of unknown multibytes with a single "?"
  83            if (lastError != insize) {
  84               lastError = insize - 1;
  85               dest->append("?");
  86            }
  87            break;
  88         case EINVAL:
  89            insize = 0;
  90            break;
  91         case E2BIG:
  92            if (outptr == outbuf)
  93            {
  94               bufsize *= 2;
  95               delete[] outbuf;
  96               outbuf = new char[bufsize];
  97            }
  98            break;
  99         }
 100      }
 101   }
 102
 103   delete[] outbuf;
 104
 105   iconv_close(cd);
 106
 107   return true;
 108 }
 109                                                                         /*}}}*/
 110 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 111 // ---------------------------------------------------------------------
 112 /* This is handy to use when parsing a file. It also removes \n's left
 113    over from fgets and company */
 114 char *_strstrip(char *String)
 115 {
 116    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 117
 118    if (*String == 0)
 119       return String;
 120    return _strrstrip(String);
 121 }
 122                                                                         /*}}}*/
 123 // strrstrip - Remove white space from the back of a string     /*{{{*/
 124 // ---------------------------------------------------------------------
 125 char *_strrstrip(char *String)
 126 {
 127    char *End = String + strlen(String) - 1;
 128    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 129                                *End == '\r'); End--);
 130    End++;
 131    *End = 0;
 132    return String;
 133 };
 134                                                                         /*}}}*/
 135 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 136 // ---------------------------------------------------------------------
 137 /* */
 138 char *_strtabexpand(char *String,size_t Len)
 139 {
 140    for (char *I = String; I != I + Len && *I != 0; I++)
 141    {
 142       if (*I != '\t')
 143          continue;
 144       if (I + 8 > String + Len)
 145       {
 146          *I = 0;
 147          return String;
 148       }
 149
 150       /* Assume the start of the string is 0 and find the next 8 char
 151          division */
 152       int Len;
 153       if (String == I)
 154          Len = 1;
 155       else
 156          Len = 8 - ((String - I) % 8);
 157       Len -= 2;
 158       if (Len <= 0)
 159       {
 160          *I = ' ';
 161          continue;
 162       }
 163
 164       memmove(I + Len,I + 1,strlen(I) + 1);
 165       for (char *J = I; J + Len != I; *I = ' ', I++);
 166    }
 167    return String;
 168 }
 169                                                                         /*}}}*/
 170 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 171 // ---------------------------------------------------------------------
 172 /* This grabs a single word, converts any % escaped characters to their
 173    proper values and advances the pointer. Double quotes are understood
 174    and striped out as well. This is for URI/URL parsing. It also can
 175    understand [] brackets.*/
 176 bool ParseQuoteWord(const char *&String,string &Res)
 177 {
 178    // Skip leading whitespace
 179    const char *C = String;
 180    for (;*C != 0 && *C == ' '; C++);
 181    if (*C == 0)
 182       return false;
 183
 184    // Jump to the next word
 185    for (;*C != 0 && isspace(*C) == 0; C++)
 186    {
 187       if (*C == '"')
 188       {
 189          C = strchr(C + 1, '"');
 190          if (C == NULL)
 191             return false;
 192       }
 193       if (*C == '[')
 194       {
 195          C = strchr(C + 1, ']');
 196          if (C == NULL)
 197             return false;
 198       }
 199    }
 200
 201    // Now de-quote characters
 202    char Buffer[1024];
 203    char Tmp[3];
 204    const char *Start = String;
 205    char *I;
 206    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 207    {
 208       if (*Start == '%' && Start + 2 < C &&
 209           isxdigit(Start[1]) && isxdigit(Start[2]))
 210       {
 211          Tmp[0] = Start[1];
 212          Tmp[1] = Start[2];
 213          Tmp[2] = 0;
 214          *I = (char)strtol(Tmp,0,16);
 215          Start += 3;
 216          continue;
 217       }
 218       if (*Start != '"')
 219          *I = *Start;
 220       else
 221          I--;
 222       Start++;
 223    }
 224    *I = 0;
 225    Res = Buffer;
 226
 227    // Skip ending white space
 228    for (;*C != 0 && isspace(*C) != 0; C++);
 229    String = C;
 230    return true;
 231 }
 232                                                                         /*}}}*/
 233 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 234 // ---------------------------------------------------------------------
 235 /* This expects a series of space separated strings enclosed in ""'s.
 236    It concatenates the ""'s into a single string. */
 237 bool ParseCWord(const char *&String,string &Res)
 238 {
 239    // Skip leading whitespace
 240    const char *C = String;
 241    for (;*C != 0 && *C == ' '; C++);
 242    if (*C == 0)
 243       return false;
 244
 245    char Buffer[1024];
 246    char *Buf = Buffer;
 247    if (strlen(String) >= sizeof(Buffer))
 248        return false;
 249
 250    for (; *C != 0; C++)
 251    {
 252       if (*C == '"')
 253       {
 254          for (C++; *C != 0 && *C != '"'; C++)
 255             *Buf++ = *C;
 256
 257          if (*C == 0)
 258             return false;
 259
 260          continue;
 261       }
 262
 263       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 264          continue;
 265       if (isspace(*C) == 0)
 266          return false;
 267       *Buf++ = ' ';
 268    }
 269    *Buf = 0;
 270    Res = Buffer;
 271    String = C;
 272    return true;
 273 }
 274                                                                         /*}}}*/
 275 // QuoteString - Convert a string into quoted from                      /*{{{*/
 276 // ---------------------------------------------------------------------
 277 /* */
 278 string QuoteString(const string &Str, const char *Bad)
 279 {
 280    string Res;
 281    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 282    {
 283       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 284           *I == 0x25 || // percent '%' char
 285           *I <= 0x20 || *I >= 0x7F) // control chars
 286       {
 287          char Buf[10];
 288          sprintf(Buf,"%%%02x",(int)*I);
 289          Res += Buf;
 290       }
 291       else
 292          Res += *I;
 293    }
 294    return Res;
 295 }
 296                                                                         /*}}}*/
 297 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 298 // ---------------------------------------------------------------------
 299 /* This undoes QuoteString */
 300 string DeQuoteString(const string &Str)
 301 {
 302    return DeQuoteString(Str.begin(),Str.end());
 303 }
 304 string DeQuoteString(string::const_iterator const &begin,
 305                         string::const_iterator const &end)
 306 {
 307    string Res;
 308    for (string::const_iterator I = begin; I != end; ++I)
 309    {
 310       if (*I == '%' && I + 2 < end &&
 311           isxdigit(I[1]) && isxdigit(I[2]))
 312       {
 313          char Tmp[3];
 314          Tmp[0] = I[1];
 315          Tmp[1] = I[2];
 316          Tmp[2] = 0;
 317          Res += (char)strtol(Tmp,0,16);
 318          I += 2;
 319          continue;
 320       }
 321       else
 322          Res += *I;
 323    }
 324    return Res;
 325 }
 326
 327                                                                         /*}}}*/
 328 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 329 // ---------------------------------------------------------------------
 330 /* A max of 4 digits are shown before conversion to the next highest unit.
 331    The max length of the string will be 5 chars unless the size is > 10
 332    YottaBytes (E24) */
 333 string SizeToStr(double Size)
 334 {
 335    char S[300];
 336    double ASize;
 337    if (Size >= 0)
 338       ASize = Size;
 339    else
 340       ASize = -1*Size;
 341
 342    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 343       ExaBytes, ZettaBytes, YottaBytes */
 344    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 345    int I = 0;
 346    while (I <= 8)
 347    {
 348       if (ASize < 100 && I != 0)
 349       {
 350          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 351          break;
 352       }
 353
 354       if (ASize < 10000)
 355       {
 356          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 357          break;
 358       }
 359       ASize /= 1000.0;
 360       I++;
 361    }
 362
 363    return S;
 364 }
 365                                                                         /*}}}*/
 366 // TimeToStr - Convert the time into a string                           /*{{{*/
 367 // ---------------------------------------------------------------------
 368 /* Converts a number of seconds to a hms format */
 369 string TimeToStr(unsigned long Sec)
 370 {
 371    char S[300];
 372
 373    while (1)
 374    {
 375       if (Sec > 60*60*24)
 376       {
 377          //d means days, h means hours, min means minutes, s means seconds
 378          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 379          break;
 380       }
 381
 382       if (Sec > 60*60)
 383       {
 384          //h means hours, min means minutes, s means seconds
 385          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 386          break;
 387       }
 388
 389       if (Sec > 60)
 390       {
 391          //min means minutes, s means seconds
 392          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 393          break;
 394       }
 395
 396       //s means seconds
 397       sprintf(S,_("%lis"),Sec);
 398       break;
 399    }
 400
 401    return S;
 402 }
 403                                                                         /*}}}*/
 404 // SubstVar - Substitute a string for another string                    /*{{{*/
 405 // ---------------------------------------------------------------------
 406 /* This replaces all occurances of Subst with Contents in Str. */
 407 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 408 {
 409    string::size_type Pos = 0;
 410    string::size_type OldPos = 0;
 411    string Temp;
 412
 413    while (OldPos < Str.length() &&
 414           (Pos = Str.find(Subst,OldPos)) != string::npos)
 415    {
 416       Temp += string(Str,OldPos,Pos) + Contents;
 417       OldPos = Pos + Subst.length();
 418    }
 419
 420    if (OldPos == 0)
 421       return Str;
 422
 423    return Temp + string(Str,OldPos);
 424 }
 425
 426 string SubstVar(string Str,const struct SubstVar *Vars)
 427 {
 428    for (; Vars->Subst != 0; Vars++)
 429       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 430    return Str;
 431 }
 432                                                                         /*}}}*/
 433 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 434 // ---------------------------------------------------------------------
 435 /* Returns a string with the supplied separator depth + 1 times in it */
 436 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 437 {
 438    std::string output = "";
 439    for(unsigned long d=Depth+1; d > 0; d--)
 440       output.append(Separator);
 441    return output;
 442 }
 443                                                                         /*}}}*/
 444 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 445 // ---------------------------------------------------------------------
 446 /* This converts a URI into a safe filename. It quotes all unsafe characters
 447    and converts / to _ and removes the scheme identifier. The resulting
 448    file name should be unique and never occur again for a different file */
 449 string URItoFileName(const string &URI)
 450 {
 451    // Nuke 'sensitive' items
 452    ::URI U(URI);
 453    U.User.clear();
 454    U.Password.clear();
 455    U.Access.clear();
 456
 457    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 458    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 459    replace(NewURI.begin(),NewURI.end(),'/','_');
 460    return NewURI;
 461 }
 462                                                                         /*}}}*/
 463 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 464 // ---------------------------------------------------------------------
 465 /* This routine performs a base64 transformation on a string. It was ripped
 466    from wget and then patched and bug fixed.
 467
 468    This spec can be found in rfc2045 */
 469 string Base64Encode(const string &S)
 470 {
 471    // Conversion table.
 472    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 473                           'I','J','K','L','M','N','O','P',
 474                           'Q','R','S','T','U','V','W','X',
 475                           'Y','Z','a','b','c','d','e','f',
 476                           'g','h','i','j','k','l','m','n',
 477                           'o','p','q','r','s','t','u','v',
 478                           'w','x','y','z','0','1','2','3',
 479                           '4','5','6','7','8','9','+','/'};
 480
 481    // Pre-allocate some space
 482    string Final;
 483    Final.reserve((4*S.length() + 2)/3 + 2);
 484
 485    /* Transform the 3x8 bits to 4x6 bits, as required by
 486       base64.  */
 487    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 488    {
 489       char Bits[3] = {0,0,0};
 490       Bits[0] = I[0];
 491       if (I + 1 < S.end())
 492          Bits[1] = I[1];
 493       if (I + 2 < S.end())
 494          Bits[2] = I[2];
 495
 496       Final += tbl[Bits[0] >> 2];
 497       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 498
 499       if (I + 1 >= S.end())
 500          break;
 501
 502       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 503
 504       if (I + 2 >= S.end())
 505          break;
 506
 507       Final += tbl[Bits[2] & 0x3f];
 508    }
 509
 510    /* Apply the padding elements, this tells how many bytes the remote
 511       end should discard */
 512    if (S.length() % 3 == 2)
 513       Final += '=';
 514    if (S.length() % 3 == 1)
 515       Final += "==";
 516
 517    return Final;
 518 }
 519                                                                         /*}}}*/
 520 // stringcmp - Arbitrary string compare                                 /*{{{*/
 521 // ---------------------------------------------------------------------
 522 /* This safely compares two non-null terminated strings of arbitrary
 523    length */
 524 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 525 {
 526    for (; A != AEnd && B != BEnd; A++, B++)
 527       if (*A != *B)
 528          break;
 529
 530    if (A == AEnd && B == BEnd)
 531       return 0;
 532    if (A == AEnd)
 533       return 1;
 534    if (B == BEnd)
 535       return -1;
 536    if (*A < *B)
 537       return -1;
 538    return 1;
 539 }
 540
 541 #if __GNUC__ >= 3
 542 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 543               const char *B,const char *BEnd)
 544 {
 545    for (; A != AEnd && B != BEnd; A++, B++)
 546       if (*A != *B)
 547          break;
 548
 549    if (A == AEnd && B == BEnd)
 550       return 0;
 551    if (A == AEnd)
 552       return 1;
 553    if (B == BEnd)
 554       return -1;
 555    if (*A < *B)
 556       return -1;
 557    return 1;
 558 }
 559 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 560               string::const_iterator B,string::const_iterator BEnd)
 561 {
 562    for (; A != AEnd && B != BEnd; A++, B++)
 563       if (*A != *B)
 564          break;
 565
 566    if (A == AEnd && B == BEnd)
 567       return 0;
 568    if (A == AEnd)
 569       return 1;
 570    if (B == BEnd)
 571       return -1;
 572    if (*A < *B)
 573       return -1;
 574    return 1;
 575 }
 576 #endif
 577                                                                         /*}}}*/
 578 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 579 // ---------------------------------------------------------------------
 580 /* */
 581 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 582 {
 583    for (; A != AEnd && B != BEnd; A++, B++)
 584       if (tolower_ascii(*A) != tolower_ascii(*B))
 585          break;
 586
 587    if (A == AEnd && B == BEnd)
 588       return 0;
 589    if (A == AEnd)
 590       return 1;
 591    if (B == BEnd)
 592       return -1;
 593    if (tolower_ascii(*A) < tolower_ascii(*B))
 594       return -1;
 595    return 1;
 596 }
 597 #if __GNUC__ >= 3
 598 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 599                   const char *B,const char *BEnd)
 600 {
 601    for (; A != AEnd && B != BEnd; A++, B++)
 602       if (tolower_ascii(*A) != tolower_ascii(*B))
 603          break;
 604
 605    if (A == AEnd && B == BEnd)
 606       return 0;
 607    if (A == AEnd)
 608       return 1;
 609    if (B == BEnd)
 610       return -1;
 611    if (tolower_ascii(*A) < tolower_ascii(*B))
 612       return -1;
 613    return 1;
 614 }
 615 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 616                   string::const_iterator B,string::const_iterator BEnd)
 617 {
 618    for (; A != AEnd && B != BEnd; A++, B++)
 619       if (tolower_ascii(*A) != tolower_ascii(*B))
 620          break;
 621
 622    if (A == AEnd && B == BEnd)
 623       return 0;
 624    if (A == AEnd)
 625       return 1;
 626    if (B == BEnd)
 627       return -1;
 628    if (tolower_ascii(*A) < tolower_ascii(*B))
 629       return -1;
 630    return 1;
 631 }
 632 #endif
 633                                                                         /*}}}*/
 634 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 635 // ---------------------------------------------------------------------
 636 /* The format is like those used in package files and the method
 637    communication system */
 638 string LookupTag(const string &Message,const char *Tag,const char *Default)
 639 {
 640    // Look for a matching tag.
 641    int Length = strlen(Tag);
 642    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 643    {
 644       // Found the tag
 645       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 646       {
 647          // Find the end of line and strip the leading/trailing spaces
 648          string::const_iterator J;
 649          I += Length + 1;
 650          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 651          for (J = I; *J != '\n' && J < Message.end(); ++J);
 652          for (; J > I && isspace(J[-1]) != 0; --J);
 653
 654          return string(I,J);
 655       }
 656
 657       for (; *I != '\n' && I < Message.end(); ++I);
 658    }
 659
 660    // Failed to find a match
 661    if (Default == 0)
 662       return string();
 663    return Default;
 664 }
 665                                                                         /*}}}*/
 666 // StringToBool - Converts a string into a boolean                      /*{{{*/
 667 // ---------------------------------------------------------------------
 668 /* This inspects the string to see if it is true or if it is false and
 669    then returns the result. Several varients on true/false are checked. */
 670 int StringToBool(const string &Text,int Default)
 671 {
 672    char *End;
 673    int Res = strtol(Text.c_str(),&End,0);
 674    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 675       return Res;
 676
 677    // Check for positives
 678    if (strcasecmp(Text.c_str(),"no") == 0 ||
 679        strcasecmp(Text.c_str(),"false") == 0 ||
 680        strcasecmp(Text.c_str(),"without") == 0 ||
 681        strcasecmp(Text.c_str(),"off") == 0 ||
 682        strcasecmp(Text.c_str(),"disable") == 0)
 683       return 0;
 684
 685    // Check for negatives
 686    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 687        strcasecmp(Text.c_str(),"true") == 0 ||
 688        strcasecmp(Text.c_str(),"with") == 0 ||
 689        strcasecmp(Text.c_str(),"on") == 0 ||
 690        strcasecmp(Text.c_str(),"enable") == 0)
 691       return 1;
 692
 693    return Default;
 694 }
 695                                                                         /*}}}*/
 696 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 697 // ---------------------------------------------------------------------
 698 /* This converts a time_t into a string time representation that is
 699    year 2000 complient and timezone neutral */
 700 string TimeRFC1123(time_t Date)
 701 {
 702    struct tm Conv;
 703    if (gmtime_r(&Date, &Conv) == NULL)
 704       return "";
 705
 706    char Buf[300];
 707    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 708    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 709                           "Aug","Sep","Oct","Nov","Dec"};
 710
 711    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 712            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 713            Conv.tm_min,Conv.tm_sec);
 714    return Buf;
 715 }
 716                                                                         /*}}}*/
 717 // ReadMessages - Read messages from the FD                             /*{{{*/
 718 // ---------------------------------------------------------------------
 719 /* This pulls full messages from the input FD into the message buffer.
 720    It assumes that messages will not pause during transit so no
 721    fancy buffering is used.
 722
 723    In particular: this reads blocks from the input until it believes
 724    that it's run out of input text.  Each block is terminated by a
 725    double newline ('\n' followed by '\n').  As noted below, there is a
 726    bug in this code: it assumes that all the blocks have been read if
 727    it doesn't see additional text in the buffer after the last one is
 728    parsed, which will cause it to lose blocks if the last block
 729    coincides with the end of the buffer.
 730  */
 731 bool ReadMessages(int Fd, vector<string> &List)
 732 {
 733    char Buffer[64000];
 734    char *End = Buffer;
 735    // Represents any left-over from the previous iteration of the
 736    // parse loop.  (i.e., if a message is split across the end
 737    // of the buffer, it goes here)
 738    string PartialMessage;
 739
 740    while (1)
 741    {
 742       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 743       if (Res < 0 && errno == EINTR)
 744          continue;
 745
 746       // Process is dead, this is kind of bad..
 747       if (Res == 0)
 748          return false;
 749
 750       // No data
 751       if (Res < 0 && errno == EAGAIN)
 752          return true;
 753       if (Res < 0)
 754          return false;
 755
 756       End += Res;
 757
 758       // Look for the end of the message
 759       for (char *I = Buffer; I + 1 < End; I++)
 760       {
 761          if (I[1] != '\n' ||
 762                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 763             continue;
 764
 765          // Pull the message out
 766          string Message(Buffer,I-Buffer);
 767          PartialMessage += Message;
 768
 769          // Fix up the buffer
 770          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 771          End -= I-Buffer;
 772          memmove(Buffer,I,End-Buffer);
 773          I = Buffer;
 774
 775          List.push_back(PartialMessage);
 776          PartialMessage.clear();
 777       }
 778       if (End != Buffer)
 779         {
 780           // If there's text left in the buffer, store it
 781           // in PartialMessage and throw the rest of the buffer
 782           // away.  This allows us to handle messages that
 783           // are longer than the static buffer size.
 784           PartialMessage += string(Buffer, End);
 785           End = Buffer;
 786         }
 787       else
 788         {
 789           // BUG ALERT: if a message block happens to end at a
 790           // multiple of 64000 characters, this will cause it to
 791           // terminate early, leading to a badly formed block and
 792           // probably crashing the method.  However, this is the only
 793           // way we have to find the end of the message block.  I have
 794           // an idea of how to fix this, but it will require changes
 795           // to the protocol (essentially to mark the beginning and
 796           // end of the block).
 797           //
 798           //  -- dburrows 2008-04-02
 799           return true;
 800         }
 801
 802       if (WaitFd(Fd) == false)
 803          return false;
 804    }
 805 }
 806                                                                         /*}}}*/
 807 // MonthConv - Converts a month string into a number                    /*{{{*/
 808 // ---------------------------------------------------------------------
 809 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 810    Made it a bit more robust with a few tolower_ascii though. */
 811 static int MonthConv(char *Month)
 812 {
 813    switch (tolower_ascii(*Month))
 814    {
 815       case 'a':
 816       return tolower_ascii(Month[1]) == 'p'?3:7;
 817       case 'd':
 818       return 11;
 819       case 'f':
 820       return 1;
 821       case 'j':
 822       if (tolower_ascii(Month[1]) == 'a')
 823          return 0;
 824       return tolower_ascii(Month[2]) == 'n'?5:6;
 825       case 'm':
 826       return tolower_ascii(Month[2]) == 'r'?2:4;
 827       case 'n':
 828       return 10;
 829       case 'o':
 830       return 9;
 831       case 's':
 832       return 8;
 833
 834       // Pretend it is January..
 835       default:
 836       return 0;
 837    }
 838 }
 839                                                                         /*}}}*/
 840 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 841 // ---------------------------------------------------------------------
 842 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 843    than local timezone (mktime assumes the latter).
 844
 845    This function is a nonstandard GNU extension that is also present on
 846    the BSDs and maybe other systems. For others we follow the advice of
 847    the manpage of timegm and use his portable replacement. */
 848 #ifndef HAVE_TIMEGM
 849 static time_t timegm(struct tm *t)
 850 {
 851    char *tz = getenv("TZ");
 852    setenv("TZ", "", 1);
 853    tzset();
 854    time_t ret = mktime(t);
 855    if (tz)
 856       setenv("TZ", tz, 1);
 857    else
 858       unsetenv("TZ");
 859    tzset();
 860    return ret;
 861 }
 862 #endif
 863                                                                         /*}}}*/
 864 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 865 // ---------------------------------------------------------------------
 866 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 867    with one exception: All timezones (%Z) are accepted but the protocol
 868    says that it MUST be GMT, but this one is equal to UTC which we will
 869    encounter from time to time (e.g. in Release files) so we accept all
 870    here and just assume it is GMT (or UTC) later on */
 871 bool RFC1123StrToTime(const char* const str,time_t &time)
 872 {
 873    struct tm Tm;
 874    setlocale (LC_ALL,"C");
 875    bool const invalid =
 876    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 877       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 878    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 879        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 880    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 881        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 882    setlocale (LC_ALL,"");
 883    if (invalid == true)
 884       return false;
 885
 886    time = timegm(&Tm);
 887    return true;
 888 }
 889                                                                         /*}}}*/
 890 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 891 // ---------------------------------------------------------------------
 892 /* */
 893 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 894 {
 895    struct tm Tm;
 896    // MDTM includes no whitespaces but recommend and ignored by strptime
 897    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 898       return false;
 899
 900    time = timegm(&Tm);
 901    return true;
 902 }
 903                                                                         /*}}}*/
 904 // StrToTime - Converts a string into a time_t                          /*{{{*/
 905 // ---------------------------------------------------------------------
 906 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 907    and the C library asctime format. It requires the GNU library function
 908    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 909    reason the C library does not provide any such function :< This also
 910    handles the weird, but unambiguous FTP time format*/
 911 bool StrToTime(const string &Val,time_t &Result)
 912 {
 913    struct tm Tm;
 914    char Month[10];
 915
 916    // Skip the day of the week
 917    const char *I = strchr(Val.c_str(), ' ');
 918
 919    // Handle RFC 1123 time
 920    Month[0] = 0;
 921    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 922               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 923    {
 924       // Handle RFC 1036 time
 925       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 926                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 927          Tm.tm_year += 1900;
 928       else
 929       {
 930          // asctime format
 931          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 932                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 933          {
 934             // 'ftp' time
 935             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 936                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 937                return false;
 938             Tm.tm_mon--;
 939          }
 940       }
 941    }
 942
 943    Tm.tm_isdst = 0;
 944    if (Month[0] != 0)
 945       Tm.tm_mon = MonthConv(Month);
 946    else
 947       Tm.tm_mon = 0; // we don't have a month, so pick something
 948    Tm.tm_year -= 1900;
 949
 950    // Convert to local time and then to GMT
 951    Result = timegm(&Tm);
 952    return true;
 953 }
 954                                                                         /*}}}*/
 955 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 956 // ---------------------------------------------------------------------
 957 /* This is used in decoding the crazy fixed length string headers in
 958    tar and ar files. */
 959 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 960 {
 961    char S[30];
 962    if (Len >= sizeof(S))
 963       return false;
 964    memcpy(S,Str,Len);
 965    S[Len] = 0;
 966
 967    // All spaces is a zero
 968    Res = 0;
 969    unsigned I;
 970    for (I = 0; S[I] == ' '; I++);
 971    if (S[I] == 0)
 972       return true;
 973
 974    char *End;
 975    Res = strtoul(S,&End,Base);
 976    if (End == S)
 977       return false;
 978
 979    return true;
 980 }
 981                                                                         /*}}}*/
 982 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 983 // ---------------------------------------------------------------------
 984 /* This is used in decoding the crazy fixed length string headers in
 985    tar and ar files. */
 986 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
 987 {
 988    char S[30];
 989    if (Len >= sizeof(S))
 990       return false;
 991    memcpy(S,Str,Len);
 992    S[Len] = 0;
 993
 994    // All spaces is a zero
 995    Res = 0;
 996    unsigned I;
 997    for (I = 0; S[I] == ' '; I++);
 998    if (S[I] == 0)
 999       return true;
1000
1001    char *End;
1002    Res = strtoull(S,&End,Base);
1003    if (End == S)
1004       return false;
1005
1006    return true;
1007 }
1008                                                                         /*}}}*/
1009
1010 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1011 // ---------------------------------------------------------------------
1012 /* This is used in decoding the 256bit encoded fixed length fields in
1013    tar files */
1014 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1015 {
1016    if ((Str[0] & 0x80) == 0)
1017       return false;
1018    else
1019    {
1020       Res = Str[0] & 0x7F;
1021       for(unsigned int i = 1; i < Len; ++i)
1022          Res = (Res<<8) + Str[i];
1023       return true;
1024    }
1025 }
1026                                                                         /*}}}*/
1027 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1028 // ---------------------------------------------------------------------
1029 /* Helper for Hex2Num */
1030 static int HexDigit(int c)
1031 {
1032    if (c >= '0' && c <= '9')
1033       return c - '0';
1034    if (c >= 'a' && c <= 'f')
1035       return c - 'a' + 10;
1036    if (c >= 'A' && c <= 'F')
1037       return c - 'A' + 10;
1038    return 0;
1039 }
1040                                                                         /*}}}*/
1041 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1042 // ---------------------------------------------------------------------
1043 /* The length of the buffer must be exactly 1/2 the length of the string. */
1044 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1045 {
1046    if (Str.length() != Length*2)
1047       return false;
1048
1049    // Convert each digit. We store it in the same order as the string
1050    int J = 0;
1051    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1052    {
1053       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1054          return false;
1055
1056       Num[J] = HexDigit(I[0]) << 4;
1057       Num[J] += HexDigit(I[1]);
1058    }
1059
1060    return true;
1061 }
1062                                                                         /*}}}*/
1063 // TokSplitString - Split a string up by a given token                  /*{{{*/
1064 // ---------------------------------------------------------------------
1065 /* This is intended to be a faster splitter, it does not use dynamic
1066    memories. Input is changed to insert nulls at each token location. */
1067 bool TokSplitString(char Tok,char *Input,char **List,
1068                     unsigned long ListMax)
1069 {
1070    // Strip any leading spaces
1071    char *Start = Input;
1072    char *Stop = Start + strlen(Start);
1073    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1074
1075    unsigned long Count = 0;
1076    char *Pos = Start;
1077    while (Pos != Stop)
1078    {
1079       // Skip to the next Token
1080       for (; Pos != Stop && *Pos != Tok; Pos++);
1081
1082       // Back remove spaces
1083       char *End = Pos;
1084       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1085       *End = 0;
1086
1087       List[Count++] = Start;
1088       if (Count >= ListMax)
1089       {
1090          List[Count-1] = 0;
1091          return false;
1092       }
1093
1094       // Advance pos
1095       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1096       Start = Pos;
1097    }
1098
1099    List[Count] = 0;
1100    return true;
1101 }
1102                                                                         /*}}}*/
1103 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1104 // ---------------------------------------------------------------------
1105 /* This can be used to split a given string up into a vector, so the
1106    propose is the same as in the method above and this one is a bit slower
1107    also, but the advantage is that we have an iteratable vector */
1108 vector<string> VectorizeString(string const &haystack, char const &split)
1109 {
1110    string::const_iterator start = haystack.begin();
1111    string::const_iterator end = start;
1112    vector<string> exploded;
1113    do {
1114       for (; end != haystack.end() && *end != split; ++end);
1115       exploded.push_back(string(start, end));
1116       start = end + 1;
1117    } while (end != haystack.end() && (++end) != haystack.end());
1118    return exploded;
1119 }
1120                                                                         /*}}}*/
1121 // StringSplit - split a string into a string vector by token           /*{{{*/
1122 // ---------------------------------------------------------------------
1123 /* This can be used to split a given string up from a given string token
1124  * into a vector of strings. A optional "maxsplit" argument can be used
1125  * to limit the splitting, in this case the
1126  */
1127 vector<string> StringSplit(string const &s, std::string const &sep,
1128                            unsigned int maxsplit)
1129 {
1130    vector<string> split;
1131    size_t start, pos;
1132
1133    // no seperator given, this is bogus
1134    if(sep.size() == 0)
1135       return split;
1136
1137    start = pos = 0;
1138    while (pos != string::npos)
1139    {
1140       pos = s.find(sep, start);
1141       split.push_back(s.substr(start, pos-start));
1142
1143       // if maxsplit is reached, the remaining string is the last item
1144       if(maxsplit > 0 && split.size() >= maxsplit)
1145       {
1146          split[split.size()-1] = s.substr(start);
1147          break;
1148       }
1149       start = pos+sep.size();
1150    }
1151    return split;
1152 }
1153                                                                         /*}}}*/
1154 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1155 // ---------------------------------------------------------------------
1156 /* */
1157 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1158                       const char **ListEnd)
1159 {
1160    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1161       R->Hit = false;
1162
1163    unsigned long Hits = 0;
1164    for (; ListBegin != ListEnd; ListBegin++)
1165    {
1166       // Check if the name is a regex
1167       const char *I;
1168       bool Regex = true;
1169       for (I = *ListBegin; *I != 0; I++)
1170          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1171             break;
1172       if (*I == 0)
1173          Regex = false;
1174
1175       // Compile the regex pattern
1176       regex_t Pattern;
1177       if (Regex == true)
1178          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1179                      REG_NOSUB) != 0)
1180             Regex = false;
1181
1182       // Search the list
1183       bool Done = false;
1184       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1185       {
1186          if (R->Str[0] == 0)
1187             continue;
1188
1189          if (strcasecmp(R->Str,*ListBegin) != 0)
1190          {
1191             if (Regex == false)
1192                continue;
1193             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1194                continue;
1195          }
1196          Done = true;
1197
1198          if (R->Hit == false)
1199             Hits++;
1200
1201          R->Hit = true;
1202       }
1203
1204       if (Regex == true)
1205          regfree(&Pattern);
1206
1207       if (Done == false)
1208          _error->Warning(_("Selection %s not found"),*ListBegin);
1209    }
1210
1211    return Hits;
1212 }
1213                                                                         /*}}}*/
1214 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1215 // ---------------------------------------------------------------------
1216 /* This is used to make the internationalization strings easier to translate
1217    and to allow reordering of parameters */
1218 static bool iovprintf(ostream &out, const char *format,
1219                       va_list &args, ssize_t &size) {
1220    char *S = (char*)malloc(size);
1221    ssize_t const n = vsnprintf(S, size, format, args);
1222    if (n > -1 && n < size) {
1223       out << S;
1224       free(S);
1225       return true;
1226    } else {
1227       if (n > -1)
1228          size = n + 1;
1229       else
1230          size *= 2;
1231    }
1232    free(S);
1233    return false;
1234 }
1235 void ioprintf(ostream &out,const char *format,...)
1236 {
1237    va_list args;
1238    ssize_t size = 400;
1239    while (true) {
1240       va_start(args,format);
1241       if (iovprintf(out, format, args, size) == true)
1242          return;
1243       va_end(args);
1244    }
1245 }
1246 void strprintf(string &out,const char *format,...)
1247 {
1248    va_list args;
1249    ssize_t size = 400;
1250    std::ostringstream outstr;
1251    while (true) {
1252       va_start(args,format);
1253       if (iovprintf(outstr, format, args, size) == true)
1254          break;
1255       va_end(args);
1256    }
1257    out = outstr.str();
1258 }
1259                                                                         /*}}}*/
1260 // safe_snprintf - Safer snprintf                                       /*{{{*/
1261 // ---------------------------------------------------------------------
1262 /* This is a snprintf that will never (ever) go past 'End' and returns a
1263    pointer to the end of the new string. The returned string is always null
1264    terminated unless Buffer == end. This is a better alterantive to using
1265    consecutive snprintfs. */
1266 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1267 {
1268    va_list args;
1269    int Did;
1270
1271    if (End <= Buffer)
1272       return End;
1273    va_start(args,Format);
1274    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1275    va_end(args);
1276
1277    if (Did < 0 || Buffer + Did > End)
1278       return End;
1279    return Buffer + Did;
1280 }
1281                                                                         /*}}}*/
1282 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1283 // ---------------------------------------------------------------------
1284 string StripEpoch(const string &VerStr)
1285 {
1286    size_t i = VerStr.find(":");
1287    if (i == string::npos)
1288       return VerStr;
1289    return VerStr.substr(i+1);
1290 }
1291                                                                         /*}}}*/
1292 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1293 // ---------------------------------------------------------------------
1294 /* This little function is the most called method we have and tries
1295    therefore to do the absolut minimum - and is noteable faster than
1296    standard tolower/toupper and as a bonus avoids problems with different
1297    locales - we only operate on ascii chars anyway. */
1298 int tolower_ascii(int const c)
1299 {
1300    if (c >= 'A' && c <= 'Z')
1301       return c + 32;
1302    return c;
1303 }
1304                                                                         /*}}}*/
1305
1306 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1307 // ---------------------------------------------------------------------
1308 /* The domain list is a comma seperate list of domains that are suffix
1309    matched against the argument */
1310 bool CheckDomainList(const string &Host,const string &List)
1311 {
1312    string::const_iterator Start = List.begin();
1313    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1314    {
1315       if (Cur < List.end() && *Cur != ',')
1316          continue;
1317
1318       // Match the end of the string..
1319       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1320           Cur - Start != 0 &&
1321           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1322          return true;
1323
1324       Start = Cur + 1;
1325    }
1326    return false;
1327 }
1328                                                                         /*}}}*/
1329 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1330 // ---------------------------------------------------------------------
1331 /* */
1332 size_t strv_length(const char **str_array)
1333 {
1334    size_t i;
1335    for (i=0; str_array[i] != NULL; i++)
1336       /* nothing */
1337       ;
1338    return i;
1339 }
1340
1341 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1342 // ---------------------------------------------------------------------
1343 /* */
1344 string DeEscapeString(const string &input)
1345 {
1346    char tmp[3];
1347    string::const_iterator it;
1348    string output;
1349    for (it = input.begin(); it != input.end(); ++it)
1350    {
1351       // just copy non-escape chars
1352       if (*it != '\\')
1353       {
1354          output += *it;
1355          continue;
1356       }
1357
1358       // deal with double escape
1359       if (*it == '\\' &&
1360           (it + 1 < input.end()) &&  it[1] == '\\')
1361       {
1362          // copy
1363          output += *it;
1364          // advance iterator one step further
1365          ++it;
1366          continue;
1367       }
1368
1369       // ensure we have a char to read
1370       if (it + 1 == input.end())
1371          continue;
1372
1373       // read it
1374       ++it;
1375       switch (*it)
1376       {
1377          case '0':
1378             if (it + 2 <= input.end()) {
1379                tmp[0] = it[1];
1380                tmp[1] = it[2];
1381                tmp[2] = 0;
1382                output += (char)strtol(tmp, 0, 8);
1383                it += 2;
1384             }
1385             break;
1386          case 'x':
1387             if (it + 2 <= input.end()) {
1388                tmp[0] = it[1];
1389                tmp[1] = it[2];
1390                tmp[2] = 0;
1391                output += (char)strtol(tmp, 0, 16);
1392                it += 2;
1393             }
1394             break;
1395          default:
1396             // FIXME: raise exception here?
1397             break;
1398       }
1399    }
1400    return output;
1401 }
1402                                                                         /*}}}*/
1403 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1404 // ---------------------------------------------------------------------
1405 /* This parses the URI into all of its components */
1406 void URI::CopyFrom(const string &U)
1407 {
1408    string::const_iterator I = U.begin();
1409
1410    // Locate the first colon, this separates the scheme
1411    for (; I < U.end() && *I != ':' ; ++I);
1412    string::const_iterator FirstColon = I;
1413
1414    /* Determine if this is a host type URI with a leading double //
1415       and then search for the first single / */
1416    string::const_iterator SingleSlash = I;
1417    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1418       SingleSlash += 3;
1419
1420    /* Find the / indicating the end of the hostname, ignoring /'s in the
1421       square brackets */
1422    bool InBracket = false;
1423    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1424    {
1425       if (*SingleSlash == '[')
1426          InBracket = true;
1427       if (InBracket == true && *SingleSlash == ']')
1428          InBracket = false;
1429    }
1430
1431    if (SingleSlash > U.end())
1432       SingleSlash = U.end();
1433
1434    // We can now write the access and path specifiers
1435    Access.assign(U.begin(),FirstColon);
1436    if (SingleSlash != U.end())
1437       Path.assign(SingleSlash,U.end());
1438    if (Path.empty() == true)
1439       Path = "/";
1440
1441    // Now we attempt to locate a user:pass@host fragment
1442    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1443       FirstColon += 3;
1444    else
1445       FirstColon += 1;
1446    if (FirstColon >= U.end())
1447       return;
1448
1449    if (FirstColon > SingleSlash)
1450       FirstColon = SingleSlash;
1451
1452    // Find the colon...
1453    I = FirstColon + 1;
1454    if (I > SingleSlash)
1455       I = SingleSlash;
1456    for (; I < SingleSlash && *I != ':'; ++I);
1457    string::const_iterator SecondColon = I;
1458
1459    // Search for the @ after the colon
1460    for (; I < SingleSlash && *I != '@'; ++I);
1461    string::const_iterator At = I;
1462
1463    // Now write the host and user/pass
1464    if (At == SingleSlash)
1465    {
1466       if (FirstColon < SingleSlash)
1467          Host.assign(FirstColon,SingleSlash);
1468    }
1469    else
1470    {
1471       Host.assign(At+1,SingleSlash);
1472       // username and password must be encoded (RFC 3986)
1473       User.assign(DeQuoteString(FirstColon,SecondColon));
1474       if (SecondColon < At)
1475          Password.assign(DeQuoteString(SecondColon+1,At));
1476    }
1477
1478    // Now we parse the RFC 2732 [] hostnames.
1479    unsigned long PortEnd = 0;
1480    InBracket = false;
1481    for (unsigned I = 0; I != Host.length();)
1482    {
1483       if (Host[I] == '[')
1484       {
1485          InBracket = true;
1486          Host.erase(I,1);
1487          continue;
1488       }
1489
1490       if (InBracket == true && Host[I] == ']')
1491       {
1492          InBracket = false;
1493          Host.erase(I,1);
1494          PortEnd = I;
1495          continue;
1496       }
1497       I++;
1498    }
1499
1500    // Tsk, weird.
1501    if (InBracket == true)
1502    {
1503       Host.clear();
1504       return;
1505    }
1506
1507    // Now we parse off a port number from the hostname
1508    Port = 0;
1509    string::size_type Pos = Host.rfind(':');
1510    if (Pos == string::npos || Pos < PortEnd)
1511       return;
1512
1513    Port = atoi(string(Host,Pos+1).c_str());
1514    Host.assign(Host,0,Pos);
1515 }
1516                                                                         /*}}}*/
1517 // URI::operator string - Convert the URI to a string                   /*{{{*/
1518 // ---------------------------------------------------------------------
1519 /* */
1520 URI::operator string()
1521 {
1522    string Res;
1523
1524    if (Access.empty() == false)
1525       Res = Access + ':';
1526
1527    if (Host.empty() == false)
1528    {
1529       if (Access.empty() == false)
1530          Res += "//";
1531
1532       if (User.empty() == false)
1533       {
1534          // FIXME: Technically userinfo is permitted even less
1535          // characters than these, but this is not conveniently
1536          // expressed with a blacklist.
1537          Res += QuoteString(User, ":/?#[]@");
1538          if (Password.empty() == false)
1539             Res += ":" + QuoteString(Password, ":/?#[]@");
1540          Res += "@";
1541       }
1542
1543       // Add RFC 2732 escaping characters
1544       if (Access.empty() == false &&
1545           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1546          Res += '[' + Host + ']';
1547       else
1548          Res += Host;
1549
1550       if (Port != 0)
1551       {
1552          char S[30];
1553          sprintf(S,":%u",Port);
1554          Res += S;
1555       }
1556    }
1557
1558    if (Path.empty() == false)
1559    {
1560       if (Path[0] != '/')
1561          Res += "/" + Path;
1562       else
1563          Res += Path;
1564    }
1565
1566    return Res;
1567 }
1568                                                                         /*}}}*/
1569 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1570 // ---------------------------------------------------------------------
1571 /* */
1572 string URI::SiteOnly(const string &URI)
1573 {
1574    ::URI U(URI);
1575    U.User.clear();
1576    U.Password.clear();
1577    U.Path.clear();
1578    return U;
1579 }
1580                                                                         /*}}}*/
1581 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1582 // ---------------------------------------------------------------------
1583 /* */
1584 string URI::NoUserPassword(const string &URI)
1585 {
1586    ::URI U(URI);
1587    U.User.clear();
1588    U.Password.clear();
1589    return U;
1590 }
1591                                                                         /*}}}*/