apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <sstream>
  27 #include <stdio.h>
  28 #include <algorithm>
  29 #include <unistd.h>
  30 #include <regex.h>
  31 #include <errno.h>
  32 #include <stdarg.h>
  33 #include <iconv.h>
  34
  35 #include <apti18n.h>
  36
  37 using namespace std;
  38                                                                         /*}}}*/
  39 // Strip - Remove white space from the front and back of a string       /*{{{*/
  40 // ---------------------------------------------------------------------
  41 namespace APT {
  42    namespace String {
  43 std::string Strip(const std::string &s)
  44 {
  45    size_t start = s.find_first_not_of(" \t\n");
  46    // only whitespace
  47    if (start == string::npos)
  48       return "";
  49    size_t end = s.find_last_not_of(" \t\n");
  50    return s.substr(start, end-start+1);
  51 }
  52
  53 bool Endswith(const std::string &s, const std::string &end)
  54 {
  55    if (end.size() > s.size())
  56       return false;
  57    return (s.substr(s.size() - end.size(), s.size()) == end);
  58 }
  59
  60 }
  61 }
  62                                                                         /*}}}*/
  63 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  64 // ---------------------------------------------------------------------
  65 /* This is handy to use before display some information for enduser  */
  66 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  67 {
  68   iconv_t cd;
  69   const char *inbuf;
  70   char *inptr, *outbuf;
  71   size_t insize, bufsize;
  72   dest->clear();
  73
  74   cd = iconv_open(codeset, "UTF-8");
  75   if (cd == (iconv_t)(-1)) {
  76      // Something went wrong
  77      if (errno == EINVAL)
  78         _error->Error("conversion from 'UTF-8' to '%s' not available",
  79                codeset);
  80      else
  81         perror("iconv_open");
  82
  83      return false;
  84   }
  85
  86   insize = bufsize = orig.size();
  87   inbuf = orig.data();
  88   inptr = (char *)inbuf;
  89   outbuf = new char[bufsize];
  90   size_t lastError = -1;
  91
  92   while (insize != 0)
  93   {
  94      char *outptr = outbuf;
  95      size_t outsize = bufsize;
  96      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  97      dest->append(outbuf, outptr - outbuf);
  98      if (err == (size_t)(-1))
  99      {
 100         switch (errno)
 101         {
 102         case EILSEQ:
 103            insize--;
 104            inptr++;
 105            // replace a series of unknown multibytes with a single "?"
 106            if (lastError != insize) {
 107               lastError = insize - 1;
 108               dest->append("?");
 109            }
 110            break;
 111         case EINVAL:
 112            insize = 0;
 113            break;
 114         case E2BIG:
 115            if (outptr == outbuf)
 116            {
 117               bufsize *= 2;
 118               delete[] outbuf;
 119               outbuf = new char[bufsize];
 120            }
 121            break;
 122         }
 123      }
 124   }
 125
 126   delete[] outbuf;
 127
 128   iconv_close(cd);
 129
 130   return true;
 131 }
 132                                                                         /*}}}*/
 133 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 134 // ---------------------------------------------------------------------
 135 /* This is handy to use when parsing a file. It also removes \n's left
 136    over from fgets and company */
 137 char *_strstrip(char *String)
 138 {
 139    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 140
 141    if (*String == 0)
 142       return String;
 143    return _strrstrip(String);
 144 }
 145                                                                         /*}}}*/
 146 // strrstrip - Remove white space from the back of a string     /*{{{*/
 147 // ---------------------------------------------------------------------
 148 char *_strrstrip(char *String)
 149 {
 150    char *End = String + strlen(String) - 1;
 151    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 152                                *End == '\r'); End--);
 153    End++;
 154    *End = 0;
 155    return String;
 156 };
 157                                                                         /*}}}*/
 158 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 159 // ---------------------------------------------------------------------
 160 /* */
 161 char *_strtabexpand(char *String,size_t Len)
 162 {
 163    for (char *I = String; I != I + Len && *I != 0; I++)
 164    {
 165       if (*I != '\t')
 166          continue;
 167       if (I + 8 > String + Len)
 168       {
 169          *I = 0;
 170          return String;
 171       }
 172
 173       /* Assume the start of the string is 0 and find the next 8 char
 174          division */
 175       int Len;
 176       if (String == I)
 177          Len = 1;
 178       else
 179          Len = 8 - ((String - I) % 8);
 180       Len -= 2;
 181       if (Len <= 0)
 182       {
 183          *I = ' ';
 184          continue;
 185       }
 186
 187       memmove(I + Len,I + 1,strlen(I) + 1);
 188       for (char *J = I; J + Len != I; *I = ' ', I++);
 189    }
 190    return String;
 191 }
 192                                                                         /*}}}*/
 193 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 194 // ---------------------------------------------------------------------
 195 /* This grabs a single word, converts any % escaped characters to their
 196    proper values and advances the pointer. Double quotes are understood
 197    and striped out as well. This is for URI/URL parsing. It also can
 198    understand [] brackets.*/
 199 bool ParseQuoteWord(const char *&String,string &Res)
 200 {
 201    // Skip leading whitespace
 202    const char *C = String;
 203    for (;*C != 0 && *C == ' '; C++);
 204    if (*C == 0)
 205       return false;
 206
 207    // Jump to the next word
 208    for (;*C != 0 && isspace(*C) == 0; C++)
 209    {
 210       if (*C == '"')
 211       {
 212          C = strchr(C + 1, '"');
 213          if (C == NULL)
 214             return false;
 215       }
 216       if (*C == '[')
 217       {
 218          C = strchr(C + 1, ']');
 219          if (C == NULL)
 220             return false;
 221       }
 222    }
 223
 224    // Now de-quote characters
 225    char Buffer[1024];
 226    char Tmp[3];
 227    const char *Start = String;
 228    char *I;
 229    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 230    {
 231       if (*Start == '%' && Start + 2 < C &&
 232           isxdigit(Start[1]) && isxdigit(Start[2]))
 233       {
 234          Tmp[0] = Start[1];
 235          Tmp[1] = Start[2];
 236          Tmp[2] = 0;
 237          *I = (char)strtol(Tmp,0,16);
 238          Start += 3;
 239          continue;
 240       }
 241       if (*Start != '"')
 242          *I = *Start;
 243       else
 244          I--;
 245       Start++;
 246    }
 247    *I = 0;
 248    Res = Buffer;
 249
 250    // Skip ending white space
 251    for (;*C != 0 && isspace(*C) != 0; C++);
 252    String = C;
 253    return true;
 254 }
 255                                                                         /*}}}*/
 256 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 257 // ---------------------------------------------------------------------
 258 /* This expects a series of space separated strings enclosed in ""'s.
 259    It concatenates the ""'s into a single string. */
 260 bool ParseCWord(const char *&String,string &Res)
 261 {
 262    // Skip leading whitespace
 263    const char *C = String;
 264    for (;*C != 0 && *C == ' '; C++);
 265    if (*C == 0)
 266       return false;
 267
 268    char Buffer[1024];
 269    char *Buf = Buffer;
 270    if (strlen(String) >= sizeof(Buffer))
 271        return false;
 272
 273    for (; *C != 0; C++)
 274    {
 275       if (*C == '"')
 276       {
 277          for (C++; *C != 0 && *C != '"'; C++)
 278             *Buf++ = *C;
 279
 280          if (*C == 0)
 281             return false;
 282
 283          continue;
 284       }
 285
 286       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 287          continue;
 288       if (isspace(*C) == 0)
 289          return false;
 290       *Buf++ = ' ';
 291    }
 292    *Buf = 0;
 293    Res = Buffer;
 294    String = C;
 295    return true;
 296 }
 297                                                                         /*}}}*/
 298 // QuoteString - Convert a string into quoted from                      /*{{{*/
 299 // ---------------------------------------------------------------------
 300 /* */
 301 string QuoteString(const string &Str, const char *Bad)
 302 {
 303    string Res;
 304    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 305    {
 306       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 307           *I == 0x25 || // percent '%' char
 308           *I <= 0x20 || *I >= 0x7F) // control chars
 309       {
 310          char Buf[10];
 311          sprintf(Buf,"%%%02x",(int)*I);
 312          Res += Buf;
 313       }
 314       else
 315          Res += *I;
 316    }
 317    return Res;
 318 }
 319                                                                         /*}}}*/
 320 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 321 // ---------------------------------------------------------------------
 322 /* This undoes QuoteString */
 323 string DeQuoteString(const string &Str)
 324 {
 325    return DeQuoteString(Str.begin(),Str.end());
 326 }
 327 string DeQuoteString(string::const_iterator const &begin,
 328                         string::const_iterator const &end)
 329 {
 330    string Res;
 331    for (string::const_iterator I = begin; I != end; ++I)
 332    {
 333       if (*I == '%' && I + 2 < end &&
 334           isxdigit(I[1]) && isxdigit(I[2]))
 335       {
 336          char Tmp[3];
 337          Tmp[0] = I[1];
 338          Tmp[1] = I[2];
 339          Tmp[2] = 0;
 340          Res += (char)strtol(Tmp,0,16);
 341          I += 2;
 342          continue;
 343       }
 344       else
 345          Res += *I;
 346    }
 347    return Res;
 348 }
 349
 350                                                                         /*}}}*/
 351 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 352 // ---------------------------------------------------------------------
 353 /* A max of 4 digits are shown before conversion to the next highest unit.
 354    The max length of the string will be 5 chars unless the size is > 10
 355    YottaBytes (E24) */
 356 string SizeToStr(double Size)
 357 {
 358    char S[300];
 359    double ASize;
 360    if (Size >= 0)
 361       ASize = Size;
 362    else
 363       ASize = -1*Size;
 364
 365    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 366       ExaBytes, ZettaBytes, YottaBytes */
 367    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 368    int I = 0;
 369    while (I <= 8)
 370    {
 371       if (ASize < 100 && I != 0)
 372       {
 373          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 374          break;
 375       }
 376
 377       if (ASize < 10000)
 378       {
 379          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 380          break;
 381       }
 382       ASize /= 1000.0;
 383       I++;
 384    }
 385
 386    return S;
 387 }
 388                                                                         /*}}}*/
 389 // TimeToStr - Convert the time into a string                           /*{{{*/
 390 // ---------------------------------------------------------------------
 391 /* Converts a number of seconds to a hms format */
 392 string TimeToStr(unsigned long Sec)
 393 {
 394    char S[300];
 395
 396    while (1)
 397    {
 398       if (Sec > 60*60*24)
 399       {
 400          //d means days, h means hours, min means minutes, s means seconds
 401          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 402          break;
 403       }
 404
 405       if (Sec > 60*60)
 406       {
 407          //h means hours, min means minutes, s means seconds
 408          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 409          break;
 410       }
 411
 412       if (Sec > 60)
 413       {
 414          //min means minutes, s means seconds
 415          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 416          break;
 417       }
 418
 419       //s means seconds
 420       sprintf(S,_("%lis"),Sec);
 421       break;
 422    }
 423
 424    return S;
 425 }
 426                                                                         /*}}}*/
 427 // SubstVar - Substitute a string for another string                    /*{{{*/
 428 // ---------------------------------------------------------------------
 429 /* This replaces all occurances of Subst with Contents in Str. */
 430 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 431 {
 432    string::size_type Pos = 0;
 433    string::size_type OldPos = 0;
 434    string Temp;
 435
 436    while (OldPos < Str.length() &&
 437           (Pos = Str.find(Subst,OldPos)) != string::npos)
 438    {
 439       Temp += string(Str,OldPos,Pos) + Contents;
 440       OldPos = Pos + Subst.length();
 441    }
 442
 443    if (OldPos == 0)
 444       return Str;
 445
 446    return Temp + string(Str,OldPos);
 447 }
 448
 449 string SubstVar(string Str,const struct SubstVar *Vars)
 450 {
 451    for (; Vars->Subst != 0; Vars++)
 452       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 453    return Str;
 454 }
 455                                                                         /*}}}*/
 456 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 457 // ---------------------------------------------------------------------
 458 /* Returns a string with the supplied separator depth + 1 times in it */
 459 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 460 {
 461    std::string output = "";
 462    for(unsigned long d=Depth+1; d > 0; d--)
 463       output.append(Separator);
 464    return output;
 465 }
 466                                                                         /*}}}*/
 467 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 468 // ---------------------------------------------------------------------
 469 /* This converts a URI into a safe filename. It quotes all unsafe characters
 470    and converts / to _ and removes the scheme identifier. The resulting
 471    file name should be unique and never occur again for a different file */
 472 string URItoFileName(const string &URI)
 473 {
 474    // Nuke 'sensitive' items
 475    ::URI U(URI);
 476    U.User.clear();
 477    U.Password.clear();
 478    U.Access.clear();
 479
 480    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 481    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 482    replace(NewURI.begin(),NewURI.end(),'/','_');
 483    return NewURI;
 484 }
 485                                                                         /*}}}*/
 486 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 487 // ---------------------------------------------------------------------
 488 /* This routine performs a base64 transformation on a string. It was ripped
 489    from wget and then patched and bug fixed.
 490
 491    This spec can be found in rfc2045 */
 492 string Base64Encode(const string &S)
 493 {
 494    // Conversion table.
 495    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 496                           'I','J','K','L','M','N','O','P',
 497                           'Q','R','S','T','U','V','W','X',
 498                           'Y','Z','a','b','c','d','e','f',
 499                           'g','h','i','j','k','l','m','n',
 500                           'o','p','q','r','s','t','u','v',
 501                           'w','x','y','z','0','1','2','3',
 502                           '4','5','6','7','8','9','+','/'};
 503
 504    // Pre-allocate some space
 505    string Final;
 506    Final.reserve((4*S.length() + 2)/3 + 2);
 507
 508    /* Transform the 3x8 bits to 4x6 bits, as required by
 509       base64.  */
 510    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 511    {
 512       char Bits[3] = {0,0,0};
 513       Bits[0] = I[0];
 514       if (I + 1 < S.end())
 515          Bits[1] = I[1];
 516       if (I + 2 < S.end())
 517          Bits[2] = I[2];
 518
 519       Final += tbl[Bits[0] >> 2];
 520       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 521
 522       if (I + 1 >= S.end())
 523          break;
 524
 525       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 526
 527       if (I + 2 >= S.end())
 528          break;
 529
 530       Final += tbl[Bits[2] & 0x3f];
 531    }
 532
 533    /* Apply the padding elements, this tells how many bytes the remote
 534       end should discard */
 535    if (S.length() % 3 == 2)
 536       Final += '=';
 537    if (S.length() % 3 == 1)
 538       Final += "==";
 539
 540    return Final;
 541 }
 542                                                                         /*}}}*/
 543 // stringcmp - Arbitrary string compare                                 /*{{{*/
 544 // ---------------------------------------------------------------------
 545 /* This safely compares two non-null terminated strings of arbitrary
 546    length */
 547 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 548 {
 549    for (; A != AEnd && B != BEnd; A++, B++)
 550       if (*A != *B)
 551          break;
 552
 553    if (A == AEnd && B == BEnd)
 554       return 0;
 555    if (A == AEnd)
 556       return 1;
 557    if (B == BEnd)
 558       return -1;
 559    if (*A < *B)
 560       return -1;
 561    return 1;
 562 }
 563
 564 #if __GNUC__ >= 3
 565 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 566               const char *B,const char *BEnd)
 567 {
 568    for (; A != AEnd && B != BEnd; A++, B++)
 569       if (*A != *B)
 570          break;
 571
 572    if (A == AEnd && B == BEnd)
 573       return 0;
 574    if (A == AEnd)
 575       return 1;
 576    if (B == BEnd)
 577       return -1;
 578    if (*A < *B)
 579       return -1;
 580    return 1;
 581 }
 582 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 583               string::const_iterator B,string::const_iterator BEnd)
 584 {
 585    for (; A != AEnd && B != BEnd; A++, B++)
 586       if (*A != *B)
 587          break;
 588
 589    if (A == AEnd && B == BEnd)
 590       return 0;
 591    if (A == AEnd)
 592       return 1;
 593    if (B == BEnd)
 594       return -1;
 595    if (*A < *B)
 596       return -1;
 597    return 1;
 598 }
 599 #endif
 600                                                                         /*}}}*/
 601 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 602 // ---------------------------------------------------------------------
 603 /* */
 604 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 605 {
 606    for (; A != AEnd && B != BEnd; A++, B++)
 607       if (tolower_ascii(*A) != tolower_ascii(*B))
 608          break;
 609
 610    if (A == AEnd && B == BEnd)
 611       return 0;
 612    if (A == AEnd)
 613       return 1;
 614    if (B == BEnd)
 615       return -1;
 616    if (tolower_ascii(*A) < tolower_ascii(*B))
 617       return -1;
 618    return 1;
 619 }
 620 #if __GNUC__ >= 3
 621 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 622                   const char *B,const char *BEnd)
 623 {
 624    for (; A != AEnd && B != BEnd; A++, B++)
 625       if (tolower_ascii(*A) != tolower_ascii(*B))
 626          break;
 627
 628    if (A == AEnd && B == BEnd)
 629       return 0;
 630    if (A == AEnd)
 631       return 1;
 632    if (B == BEnd)
 633       return -1;
 634    if (tolower_ascii(*A) < tolower_ascii(*B))
 635       return -1;
 636    return 1;
 637 }
 638 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 639                   string::const_iterator B,string::const_iterator BEnd)
 640 {
 641    for (; A != AEnd && B != BEnd; A++, B++)
 642       if (tolower_ascii(*A) != tolower_ascii(*B))
 643          break;
 644
 645    if (A == AEnd && B == BEnd)
 646       return 0;
 647    if (A == AEnd)
 648       return 1;
 649    if (B == BEnd)
 650       return -1;
 651    if (tolower_ascii(*A) < tolower_ascii(*B))
 652       return -1;
 653    return 1;
 654 }
 655 #endif
 656                                                                         /*}}}*/
 657 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 658 // ---------------------------------------------------------------------
 659 /* The format is like those used in package files and the method
 660    communication system */
 661 string LookupTag(const string &Message,const char *Tag,const char *Default)
 662 {
 663    // Look for a matching tag.
 664    int Length = strlen(Tag);
 665    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 666    {
 667       // Found the tag
 668       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 669       {
 670          // Find the end of line and strip the leading/trailing spaces
 671          string::const_iterator J;
 672          I += Length + 1;
 673          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 674          for (J = I; *J != '\n' && J < Message.end(); ++J);
 675          for (; J > I && isspace(J[-1]) != 0; --J);
 676
 677          return string(I,J);
 678       }
 679
 680       for (; *I != '\n' && I < Message.end(); ++I);
 681    }
 682
 683    // Failed to find a match
 684    if (Default == 0)
 685       return string();
 686    return Default;
 687 }
 688                                                                         /*}}}*/
 689 // StringToBool - Converts a string into a boolean                      /*{{{*/
 690 // ---------------------------------------------------------------------
 691 /* This inspects the string to see if it is true or if it is false and
 692    then returns the result. Several varients on true/false are checked. */
 693 int StringToBool(const string &Text,int Default)
 694 {
 695    char *End;
 696    int Res = strtol(Text.c_str(),&End,0);
 697    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 698       return Res;
 699
 700    // Check for positives
 701    if (strcasecmp(Text.c_str(),"no") == 0 ||
 702        strcasecmp(Text.c_str(),"false") == 0 ||
 703        strcasecmp(Text.c_str(),"without") == 0 ||
 704        strcasecmp(Text.c_str(),"off") == 0 ||
 705        strcasecmp(Text.c_str(),"disable") == 0)
 706       return 0;
 707
 708    // Check for negatives
 709    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 710        strcasecmp(Text.c_str(),"true") == 0 ||
 711        strcasecmp(Text.c_str(),"with") == 0 ||
 712        strcasecmp(Text.c_str(),"on") == 0 ||
 713        strcasecmp(Text.c_str(),"enable") == 0)
 714       return 1;
 715
 716    return Default;
 717 }
 718                                                                         /*}}}*/
 719 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 720 // ---------------------------------------------------------------------
 721 /* This converts a time_t into a string time representation that is
 722    year 2000 complient and timezone neutral */
 723 string TimeRFC1123(time_t Date)
 724 {
 725    struct tm Conv;
 726    if (gmtime_r(&Date, &Conv) == NULL)
 727       return "";
 728
 729    char Buf[300];
 730    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 731    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 732                           "Aug","Sep","Oct","Nov","Dec"};
 733
 734    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 735            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 736            Conv.tm_min,Conv.tm_sec);
 737    return Buf;
 738 }
 739                                                                         /*}}}*/
 740 // ReadMessages - Read messages from the FD                             /*{{{*/
 741 // ---------------------------------------------------------------------
 742 /* This pulls full messages from the input FD into the message buffer.
 743    It assumes that messages will not pause during transit so no
 744    fancy buffering is used.
 745
 746    In particular: this reads blocks from the input until it believes
 747    that it's run out of input text.  Each block is terminated by a
 748    double newline ('\n' followed by '\n').  As noted below, there is a
 749    bug in this code: it assumes that all the blocks have been read if
 750    it doesn't see additional text in the buffer after the last one is
 751    parsed, which will cause it to lose blocks if the last block
 752    coincides with the end of the buffer.
 753  */
 754 bool ReadMessages(int Fd, vector<string> &List)
 755 {
 756    char Buffer[64000];
 757    char *End = Buffer;
 758    // Represents any left-over from the previous iteration of the
 759    // parse loop.  (i.e., if a message is split across the end
 760    // of the buffer, it goes here)
 761    string PartialMessage;
 762
 763    while (1)
 764    {
 765       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 766       if (Res < 0 && errno == EINTR)
 767          continue;
 768
 769       // Process is dead, this is kind of bad..
 770       if (Res == 0)
 771          return false;
 772
 773       // No data
 774       if (Res < 0 && errno == EAGAIN)
 775          return true;
 776       if (Res < 0)
 777          return false;
 778
 779       End += Res;
 780
 781       // Look for the end of the message
 782       for (char *I = Buffer; I + 1 < End; I++)
 783       {
 784          if (I[1] != '\n' ||
 785                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 786             continue;
 787
 788          // Pull the message out
 789          string Message(Buffer,I-Buffer);
 790          PartialMessage += Message;
 791
 792          // Fix up the buffer
 793          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 794          End -= I-Buffer;
 795          memmove(Buffer,I,End-Buffer);
 796          I = Buffer;
 797
 798          List.push_back(PartialMessage);
 799          PartialMessage.clear();
 800       }
 801       if (End != Buffer)
 802         {
 803           // If there's text left in the buffer, store it
 804           // in PartialMessage and throw the rest of the buffer
 805           // away.  This allows us to handle messages that
 806           // are longer than the static buffer size.
 807           PartialMessage += string(Buffer, End);
 808           End = Buffer;
 809         }
 810       else
 811         {
 812           // BUG ALERT: if a message block happens to end at a
 813           // multiple of 64000 characters, this will cause it to
 814           // terminate early, leading to a badly formed block and
 815           // probably crashing the method.  However, this is the only
 816           // way we have to find the end of the message block.  I have
 817           // an idea of how to fix this, but it will require changes
 818           // to the protocol (essentially to mark the beginning and
 819           // end of the block).
 820           //
 821           //  -- dburrows 2008-04-02
 822           return true;
 823         }
 824
 825       if (WaitFd(Fd) == false)
 826          return false;
 827    }
 828 }
 829                                                                         /*}}}*/
 830 // MonthConv - Converts a month string into a number                    /*{{{*/
 831 // ---------------------------------------------------------------------
 832 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 833    Made it a bit more robust with a few tolower_ascii though. */
 834 static int MonthConv(char *Month)
 835 {
 836    switch (tolower_ascii(*Month))
 837    {
 838       case 'a':
 839       return tolower_ascii(Month[1]) == 'p'?3:7;
 840       case 'd':
 841       return 11;
 842       case 'f':
 843       return 1;
 844       case 'j':
 845       if (tolower_ascii(Month[1]) == 'a')
 846          return 0;
 847       return tolower_ascii(Month[2]) == 'n'?5:6;
 848       case 'm':
 849       return tolower_ascii(Month[2]) == 'r'?2:4;
 850       case 'n':
 851       return 10;
 852       case 'o':
 853       return 9;
 854       case 's':
 855       return 8;
 856
 857       // Pretend it is January..
 858       default:
 859       return 0;
 860    }
 861 }
 862                                                                         /*}}}*/
 863 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 864 // ---------------------------------------------------------------------
 865 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 866    than local timezone (mktime assumes the latter).
 867
 868    This function is a nonstandard GNU extension that is also present on
 869    the BSDs and maybe other systems. For others we follow the advice of
 870    the manpage of timegm and use his portable replacement. */
 871 #ifndef HAVE_TIMEGM
 872 static time_t timegm(struct tm *t)
 873 {
 874    char *tz = getenv("TZ");
 875    setenv("TZ", "", 1);
 876    tzset();
 877    time_t ret = mktime(t);
 878    if (tz)
 879       setenv("TZ", tz, 1);
 880    else
 881       unsetenv("TZ");
 882    tzset();
 883    return ret;
 884 }
 885 #endif
 886                                                                         /*}}}*/
 887 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 888 // ---------------------------------------------------------------------
 889 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 890    with one exception: All timezones (%Z) are accepted but the protocol
 891    says that it MUST be GMT, but this one is equal to UTC which we will
 892    encounter from time to time (e.g. in Release files) so we accept all
 893    here and just assume it is GMT (or UTC) later on */
 894 bool RFC1123StrToTime(const char* const str,time_t &time)
 895 {
 896    struct tm Tm;
 897    setlocale (LC_ALL,"C");
 898    bool const invalid =
 899    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 900       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 901    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 902        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 903    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 904        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 905    setlocale (LC_ALL,"");
 906    if (invalid == true)
 907       return false;
 908
 909    time = timegm(&Tm);
 910    return true;
 911 }
 912                                                                         /*}}}*/
 913 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 914 // ---------------------------------------------------------------------
 915 /* */
 916 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 917 {
 918    struct tm Tm;
 919    // MDTM includes no whitespaces but recommend and ignored by strptime
 920    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 921       return false;
 922
 923    time = timegm(&Tm);
 924    return true;
 925 }
 926                                                                         /*}}}*/
 927 // StrToTime - Converts a string into a time_t                          /*{{{*/
 928 // ---------------------------------------------------------------------
 929 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 930    and the C library asctime format. It requires the GNU library function
 931    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 932    reason the C library does not provide any such function :< This also
 933    handles the weird, but unambiguous FTP time format*/
 934 bool StrToTime(const string &Val,time_t &Result)
 935 {
 936    struct tm Tm;
 937    char Month[10];
 938
 939    // Skip the day of the week
 940    const char *I = strchr(Val.c_str(), ' ');
 941
 942    // Handle RFC 1123 time
 943    Month[0] = 0;
 944    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 945               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 946    {
 947       // Handle RFC 1036 time
 948       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 949                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 950          Tm.tm_year += 1900;
 951       else
 952       {
 953          // asctime format
 954          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 955                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 956          {
 957             // 'ftp' time
 958             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 959                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 960                return false;
 961             Tm.tm_mon--;
 962          }
 963       }
 964    }
 965
 966    Tm.tm_isdst = 0;
 967    if (Month[0] != 0)
 968       Tm.tm_mon = MonthConv(Month);
 969    else
 970       Tm.tm_mon = 0; // we don't have a month, so pick something
 971    Tm.tm_year -= 1900;
 972
 973    // Convert to local time and then to GMT
 974    Result = timegm(&Tm);
 975    return true;
 976 }
 977                                                                         /*}}}*/
 978 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 979 // ---------------------------------------------------------------------
 980 /* This is used in decoding the crazy fixed length string headers in
 981    tar and ar files. */
 982 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 983 {
 984    char S[30];
 985    if (Len >= sizeof(S))
 986       return false;
 987    memcpy(S,Str,Len);
 988    S[Len] = 0;
 989
 990    // All spaces is a zero
 991    Res = 0;
 992    unsigned I;
 993    for (I = 0; S[I] == ' '; I++);
 994    if (S[I] == 0)
 995       return true;
 996
 997    char *End;
 998    Res = strtoul(S,&End,Base);
 999    if (End == S)
1000       return false;
1001
1002    return true;
1003 }
1004                                                                         /*}}}*/
1005 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1006 // ---------------------------------------------------------------------
1007 /* This is used in decoding the crazy fixed length string headers in
1008    tar and ar files. */
1009 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1010 {
1011    char S[30];
1012    if (Len >= sizeof(S))
1013       return false;
1014    memcpy(S,Str,Len);
1015    S[Len] = 0;
1016
1017    // All spaces is a zero
1018    Res = 0;
1019    unsigned I;
1020    for (I = 0; S[I] == ' '; I++);
1021    if (S[I] == 0)
1022       return true;
1023
1024    char *End;
1025    Res = strtoull(S,&End,Base);
1026    if (End == S)
1027       return false;
1028
1029    return true;
1030 }
1031                                                                         /*}}}*/
1032
1033 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1034 // ---------------------------------------------------------------------
1035 /* This is used in decoding the 256bit encoded fixed length fields in
1036    tar files */
1037 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1038 {
1039    if ((Str[0] & 0x80) == 0)
1040       return false;
1041    else
1042    {
1043       Res = Str[0] & 0x7F;
1044       for(unsigned int i = 1; i < Len; ++i)
1045          Res = (Res<<8) + Str[i];
1046       return true;
1047    }
1048 }
1049                                                                         /*}}}*/
1050 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1051 // ---------------------------------------------------------------------
1052 /* Helper for Hex2Num */
1053 static int HexDigit(int c)
1054 {
1055    if (c >= '0' && c <= '9')
1056       return c - '0';
1057    if (c >= 'a' && c <= 'f')
1058       return c - 'a' + 10;
1059    if (c >= 'A' && c <= 'F')
1060       return c - 'A' + 10;
1061    return 0;
1062 }
1063                                                                         /*}}}*/
1064 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1065 // ---------------------------------------------------------------------
1066 /* The length of the buffer must be exactly 1/2 the length of the string. */
1067 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1068 {
1069    if (Str.length() != Length*2)
1070       return false;
1071
1072    // Convert each digit. We store it in the same order as the string
1073    int J = 0;
1074    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1075    {
1076       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1077          return false;
1078
1079       Num[J] = HexDigit(I[0]) << 4;
1080       Num[J] += HexDigit(I[1]);
1081    }
1082
1083    return true;
1084 }
1085                                                                         /*}}}*/
1086 // TokSplitString - Split a string up by a given token                  /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* This is intended to be a faster splitter, it does not use dynamic
1089    memories. Input is changed to insert nulls at each token location. */
1090 bool TokSplitString(char Tok,char *Input,char **List,
1091                     unsigned long ListMax)
1092 {
1093    // Strip any leading spaces
1094    char *Start = Input;
1095    char *Stop = Start + strlen(Start);
1096    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1097
1098    unsigned long Count = 0;
1099    char *Pos = Start;
1100    while (Pos != Stop)
1101    {
1102       // Skip to the next Token
1103       for (; Pos != Stop && *Pos != Tok; Pos++);
1104
1105       // Back remove spaces
1106       char *End = Pos;
1107       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1108       *End = 0;
1109
1110       List[Count++] = Start;
1111       if (Count >= ListMax)
1112       {
1113          List[Count-1] = 0;
1114          return false;
1115       }
1116
1117       // Advance pos
1118       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1119       Start = Pos;
1120    }
1121
1122    List[Count] = 0;
1123    return true;
1124 }
1125                                                                         /*}}}*/
1126 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1127 // ---------------------------------------------------------------------
1128 /* This can be used to split a given string up into a vector, so the
1129    propose is the same as in the method above and this one is a bit slower
1130    also, but the advantage is that we have an iteratable vector */
1131 vector<string> VectorizeString(string const &haystack, char const &split)
1132 {
1133    string::const_iterator start = haystack.begin();
1134    string::const_iterator end = start;
1135    vector<string> exploded;
1136    do {
1137       for (; end != haystack.end() && *end != split; ++end);
1138       exploded.push_back(string(start, end));
1139       start = end + 1;
1140    } while (end != haystack.end() && (++end) != haystack.end());
1141    return exploded;
1142 }
1143                                                                         /*}}}*/
1144 // StringSplit - split a string into a string vector by token           /*{{{*/
1145 // ---------------------------------------------------------------------
1146 /* See header for details.
1147  */
1148 vector<string> StringSplit(std::string const &s, std::string const &sep,
1149                            unsigned int maxsplit)
1150 {
1151    vector<string> split;
1152    size_t start, pos;
1153
1154    // no seperator given, this is bogus
1155    if(sep.size() == 0)
1156       return split;
1157
1158    start = pos = 0;
1159    while (pos != string::npos)
1160    {
1161       pos = s.find(sep, start);
1162       split.push_back(s.substr(start, pos-start));
1163
1164       // if maxsplit is reached, the remaining string is the last item
1165       if(split.size() >= maxsplit)
1166       {
1167          split[split.size()-1] = s.substr(start);
1168          break;
1169       }
1170       start = pos+sep.size();
1171    }
1172    return split;
1173 }
1174                                                                         /*}}}*/
1175 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1176 // ---------------------------------------------------------------------
1177 /* */
1178 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1179                       const char **ListEnd)
1180 {
1181    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1182       R->Hit = false;
1183
1184    unsigned long Hits = 0;
1185    for (; ListBegin != ListEnd; ListBegin++)
1186    {
1187       // Check if the name is a regex
1188       const char *I;
1189       bool Regex = true;
1190       for (I = *ListBegin; *I != 0; I++)
1191          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1192             break;
1193       if (*I == 0)
1194          Regex = false;
1195
1196       // Compile the regex pattern
1197       regex_t Pattern;
1198       if (Regex == true)
1199          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1200                      REG_NOSUB) != 0)
1201             Regex = false;
1202
1203       // Search the list
1204       bool Done = false;
1205       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1206       {
1207          if (R->Str[0] == 0)
1208             continue;
1209
1210          if (strcasecmp(R->Str,*ListBegin) != 0)
1211          {
1212             if (Regex == false)
1213                continue;
1214             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1215                continue;
1216          }
1217          Done = true;
1218
1219          if (R->Hit == false)
1220             Hits++;
1221
1222          R->Hit = true;
1223       }
1224
1225       if (Regex == true)
1226          regfree(&Pattern);
1227
1228       if (Done == false)
1229          _error->Warning(_("Selection %s not found"),*ListBegin);
1230    }
1231
1232    return Hits;
1233 }
1234                                                                         /*}}}*/
1235 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1236 // ---------------------------------------------------------------------
1237 /* This is used to make the internationalization strings easier to translate
1238    and to allow reordering of parameters */
1239 static bool iovprintf(ostream &out, const char *format,
1240                       va_list &args, ssize_t &size) {
1241    char *S = (char*)malloc(size);
1242    ssize_t const n = vsnprintf(S, size, format, args);
1243    if (n > -1 && n < size) {
1244       out << S;
1245       free(S);
1246       return true;
1247    } else {
1248       if (n > -1)
1249          size = n + 1;
1250       else
1251          size *= 2;
1252    }
1253    free(S);
1254    return false;
1255 }
1256 void ioprintf(ostream &out,const char *format,...)
1257 {
1258    va_list args;
1259    ssize_t size = 400;
1260    while (true) {
1261       va_start(args,format);
1262       if (iovprintf(out, format, args, size) == true)
1263          return;
1264       va_end(args);
1265    }
1266 }
1267 void strprintf(string &out,const char *format,...)
1268 {
1269    va_list args;
1270    ssize_t size = 400;
1271    std::ostringstream outstr;
1272    while (true) {
1273       va_start(args,format);
1274       if (iovprintf(outstr, format, args, size) == true)
1275          break;
1276       va_end(args);
1277    }
1278    out = outstr.str();
1279 }
1280                                                                         /*}}}*/
1281 // safe_snprintf - Safer snprintf                                       /*{{{*/
1282 // ---------------------------------------------------------------------
1283 /* This is a snprintf that will never (ever) go past 'End' and returns a
1284    pointer to the end of the new string. The returned string is always null
1285    terminated unless Buffer == end. This is a better alterantive to using
1286    consecutive snprintfs. */
1287 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1288 {
1289    va_list args;
1290    int Did;
1291
1292    if (End <= Buffer)
1293       return End;
1294    va_start(args,Format);
1295    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1296    va_end(args);
1297
1298    if (Did < 0 || Buffer + Did > End)
1299       return End;
1300    return Buffer + Did;
1301 }
1302                                                                         /*}}}*/
1303 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1304 // ---------------------------------------------------------------------
1305 string StripEpoch(const string &VerStr)
1306 {
1307    size_t i = VerStr.find(":");
1308    if (i == string::npos)
1309       return VerStr;
1310    return VerStr.substr(i+1);
1311 }
1312                                                                         /*}}}*/
1313 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1314 // ---------------------------------------------------------------------
1315 /* This little function is the most called method we have and tries
1316    therefore to do the absolut minimum - and is noteable faster than
1317    standard tolower/toupper and as a bonus avoids problems with different
1318    locales - we only operate on ascii chars anyway. */
1319 int tolower_ascii(int const c)
1320 {
1321    if (c >= 'A' && c <= 'Z')
1322       return c + 32;
1323    return c;
1324 }
1325                                                                         /*}}}*/
1326
1327 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1328 // ---------------------------------------------------------------------
1329 /* The domain list is a comma seperate list of domains that are suffix
1330    matched against the argument */
1331 bool CheckDomainList(const string &Host,const string &List)
1332 {
1333    string::const_iterator Start = List.begin();
1334    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1335    {
1336       if (Cur < List.end() && *Cur != ',')
1337          continue;
1338
1339       // Match the end of the string..
1340       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1341           Cur - Start != 0 &&
1342           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1343          return true;
1344
1345       Start = Cur + 1;
1346    }
1347    return false;
1348 }
1349                                                                         /*}}}*/
1350 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1351 // ---------------------------------------------------------------------
1352 /* */
1353 size_t strv_length(const char **str_array)
1354 {
1355    size_t i;
1356    for (i=0; str_array[i] != NULL; i++)
1357       /* nothing */
1358       ;
1359    return i;
1360 }
1361
1362 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1363 // ---------------------------------------------------------------------
1364 /* */
1365 string DeEscapeString(const string &input)
1366 {
1367    char tmp[3];
1368    string::const_iterator it;
1369    string output;
1370    for (it = input.begin(); it != input.end(); ++it)
1371    {
1372       // just copy non-escape chars
1373       if (*it != '\\')
1374       {
1375          output += *it;
1376          continue;
1377       }
1378
1379       // deal with double escape
1380       if (*it == '\\' &&
1381           (it + 1 < input.end()) &&  it[1] == '\\')
1382       {
1383          // copy
1384          output += *it;
1385          // advance iterator one step further
1386          ++it;
1387          continue;
1388       }
1389
1390       // ensure we have a char to read
1391       if (it + 1 == input.end())
1392          continue;
1393
1394       // read it
1395       ++it;
1396       switch (*it)
1397       {
1398          case '0':
1399             if (it + 2 <= input.end()) {
1400                tmp[0] = it[1];
1401                tmp[1] = it[2];
1402                tmp[2] = 0;
1403                output += (char)strtol(tmp, 0, 8);
1404                it += 2;
1405             }
1406             break;
1407          case 'x':
1408             if (it + 2 <= input.end()) {
1409                tmp[0] = it[1];
1410                tmp[1] = it[2];
1411                tmp[2] = 0;
1412                output += (char)strtol(tmp, 0, 16);
1413                it += 2;
1414             }
1415             break;
1416          default:
1417             // FIXME: raise exception here?
1418             break;
1419       }
1420    }
1421    return output;
1422 }
1423                                                                         /*}}}*/
1424 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1425 // ---------------------------------------------------------------------
1426 /* This parses the URI into all of its components */
1427 void URI::CopyFrom(const string &U)
1428 {
1429    string::const_iterator I = U.begin();
1430
1431    // Locate the first colon, this separates the scheme
1432    for (; I < U.end() && *I != ':' ; ++I);
1433    string::const_iterator FirstColon = I;
1434
1435    /* Determine if this is a host type URI with a leading double //
1436       and then search for the first single / */
1437    string::const_iterator SingleSlash = I;
1438    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1439       SingleSlash += 3;
1440
1441    /* Find the / indicating the end of the hostname, ignoring /'s in the
1442       square brackets */
1443    bool InBracket = false;
1444    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1445    {
1446       if (*SingleSlash == '[')
1447          InBracket = true;
1448       if (InBracket == true && *SingleSlash == ']')
1449          InBracket = false;
1450    }
1451
1452    if (SingleSlash > U.end())
1453       SingleSlash = U.end();
1454
1455    // We can now write the access and path specifiers
1456    Access.assign(U.begin(),FirstColon);
1457    if (SingleSlash != U.end())
1458       Path.assign(SingleSlash,U.end());
1459    if (Path.empty() == true)
1460       Path = "/";
1461
1462    // Now we attempt to locate a user:pass@host fragment
1463    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1464       FirstColon += 3;
1465    else
1466       FirstColon += 1;
1467    if (FirstColon >= U.end())
1468       return;
1469
1470    if (FirstColon > SingleSlash)
1471       FirstColon = SingleSlash;
1472
1473    // Find the colon...
1474    I = FirstColon + 1;
1475    if (I > SingleSlash)
1476       I = SingleSlash;
1477    for (; I < SingleSlash && *I != ':'; ++I);
1478    string::const_iterator SecondColon = I;
1479
1480    // Search for the @ after the colon
1481    for (; I < SingleSlash && *I != '@'; ++I);
1482    string::const_iterator At = I;
1483
1484    // Now write the host and user/pass
1485    if (At == SingleSlash)
1486    {
1487       if (FirstColon < SingleSlash)
1488          Host.assign(FirstColon,SingleSlash);
1489    }
1490    else
1491    {
1492       Host.assign(At+1,SingleSlash);
1493       // username and password must be encoded (RFC 3986)
1494       User.assign(DeQuoteString(FirstColon,SecondColon));
1495       if (SecondColon < At)
1496          Password.assign(DeQuoteString(SecondColon+1,At));
1497    }
1498
1499    // Now we parse the RFC 2732 [] hostnames.
1500    unsigned long PortEnd = 0;
1501    InBracket = false;
1502    for (unsigned I = 0; I != Host.length();)
1503    {
1504       if (Host[I] == '[')
1505       {
1506          InBracket = true;
1507          Host.erase(I,1);
1508          continue;
1509       }
1510
1511       if (InBracket == true && Host[I] == ']')
1512       {
1513          InBracket = false;
1514          Host.erase(I,1);
1515          PortEnd = I;
1516          continue;
1517       }
1518       I++;
1519    }
1520
1521    // Tsk, weird.
1522    if (InBracket == true)
1523    {
1524       Host.clear();
1525       return;
1526    }
1527
1528    // Now we parse off a port number from the hostname
1529    Port = 0;
1530    string::size_type Pos = Host.rfind(':');
1531    if (Pos == string::npos || Pos < PortEnd)
1532       return;
1533
1534    Port = atoi(string(Host,Pos+1).c_str());
1535    Host.assign(Host,0,Pos);
1536 }
1537                                                                         /*}}}*/
1538 // URI::operator string - Convert the URI to a string                   /*{{{*/
1539 // ---------------------------------------------------------------------
1540 /* */
1541 URI::operator string()
1542 {
1543    string Res;
1544
1545    if (Access.empty() == false)
1546       Res = Access + ':';
1547
1548    if (Host.empty() == false)
1549    {
1550       if (Access.empty() == false)
1551          Res += "//";
1552
1553       if (User.empty() == false)
1554       {
1555          // FIXME: Technically userinfo is permitted even less
1556          // characters than these, but this is not conveniently
1557          // expressed with a blacklist.
1558          Res += QuoteString(User, ":/?#[]@");
1559          if (Password.empty() == false)
1560             Res += ":" + QuoteString(Password, ":/?#[]@");
1561          Res += "@";
1562       }
1563
1564       // Add RFC 2732 escaping characters
1565       if (Access.empty() == false &&
1566           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1567          Res += '[' + Host + ']';
1568       else
1569          Res += Host;
1570
1571       if (Port != 0)
1572       {
1573          char S[30];
1574          sprintf(S,":%u",Port);
1575          Res += S;
1576       }
1577    }
1578
1579    if (Path.empty() == false)
1580    {
1581       if (Path[0] != '/')
1582          Res += "/" + Path;
1583       else
1584          Res += Path;
1585    }
1586
1587    return Res;
1588 }
1589                                                                         /*}}}*/
1590 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1591 // ---------------------------------------------------------------------
1592 /* */
1593 string URI::SiteOnly(const string &URI)
1594 {
1595    ::URI U(URI);
1596    U.User.clear();
1597    U.Password.clear();
1598    U.Path.clear();
1599    return U;
1600 }
1601                                                                         /*}}}*/
1602 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1603 // ---------------------------------------------------------------------
1604 /* */
1605 string URI::NoUserPassword(const string &URI)
1606 {
1607    ::URI U(URI);
1608    U.User.clear();
1609    U.Password.clear();
1610    return U;
1611 }
1612                                                                         /*}}}*/