apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <sstream>
  27 #include <stdio.h>
  28 #include <algorithm>
  29 #include <unistd.h>
  30 #include <regex.h>
  31 #include <errno.h>
  32 #include <stdarg.h>
  33 #include <iconv.h>
  34
  35 #include <apti18n.h>
  36
  37 using namespace std;
  38                                                                         /*}}}*/
  39 // Strip - Remove white space from the front and back of a string       /*{{{*/
  40 // ---------------------------------------------------------------------
  41 namespace APT {
  42    namespace String {
  43 std::string Strip(const std::string &s)
  44 {
  45    size_t start = s.find_first_not_of(" \t\n");
  46    // only whitespace
  47    if (start == string::npos)
  48       return "";
  49    size_t end = s.find_last_not_of(" \t\n");
  50    return s.substr(start, end-start+1);
  51 }
  52
  53 bool Endswith(const std::string &s, const std::string &end)
  54 {
  55    if (end.size() > s.size())
  56       return false;
  57    return (s.substr(s.size() - end.size(), s.size()) == end);
  58 }
  59
  60 }
  61 }
  62                                                                         /*}}}*/
  63 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  64 // ---------------------------------------------------------------------
  65 /* This is handy to use before display some information for enduser  */
  66 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  67 {
  68   iconv_t cd;
  69   const char *inbuf;
  70   char *inptr, *outbuf;
  71   size_t insize, bufsize;
  72   dest->clear();
  73
  74   cd = iconv_open(codeset, "UTF-8");
  75   if (cd == (iconv_t)(-1)) {
  76      // Something went wrong
  77      if (errno == EINVAL)
  78         _error->Error("conversion from 'UTF-8' to '%s' not available",
  79                codeset);
  80      else
  81         perror("iconv_open");
  82
  83      return false;
  84   }
  85
  86   insize = bufsize = orig.size();
  87   inbuf = orig.data();
  88   inptr = (char *)inbuf;
  89   outbuf = new char[bufsize];
  90   size_t lastError = -1;
  91
  92   while (insize != 0)
  93   {
  94      char *outptr = outbuf;
  95      size_t outsize = bufsize;
  96      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  97      dest->append(outbuf, outptr - outbuf);
  98      if (err == (size_t)(-1))
  99      {
 100         switch (errno)
 101         {
 102         case EILSEQ:
 103            insize--;
 104            inptr++;
 105            // replace a series of unknown multibytes with a single "?"
 106            if (lastError != insize) {
 107               lastError = insize - 1;
 108               dest->append("?");
 109            }
 110            break;
 111         case EINVAL:
 112            insize = 0;
 113            break;
 114         case E2BIG:
 115            if (outptr == outbuf)
 116            {
 117               bufsize *= 2;
 118               delete[] outbuf;
 119               outbuf = new char[bufsize];
 120            }
 121            break;
 122         }
 123      }
 124   }
 125
 126   delete[] outbuf;
 127
 128   iconv_close(cd);
 129
 130   return true;
 131 }
 132                                                                         /*}}}*/
 133 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 134 // ---------------------------------------------------------------------
 135 /* This is handy to use when parsing a file. It also removes \n's left
 136    over from fgets and company */
 137 char *_strstrip(char *String)
 138 {
 139    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 140
 141    if (*String == 0)
 142       return String;
 143    return _strrstrip(String);
 144 }
 145                                                                         /*}}}*/
 146 // strrstrip - Remove white space from the back of a string     /*{{{*/
 147 // ---------------------------------------------------------------------
 148 char *_strrstrip(char *String)
 149 {
 150    char *End = String + strlen(String) - 1;
 151    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 152                                *End == '\r'); End--);
 153    End++;
 154    *End = 0;
 155    return String;
 156 }
 157                                                                         /*}}}*/
 158 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 159 // ---------------------------------------------------------------------
 160 /* */
 161 char *_strtabexpand(char *String,size_t Len)
 162 {
 163    for (char *I = String; I != I + Len && *I != 0; I++)
 164    {
 165       if (*I != '\t')
 166          continue;
 167       if (I + 8 > String + Len)
 168       {
 169          *I = 0;
 170          return String;
 171       }
 172
 173       /* Assume the start of the string is 0 and find the next 8 char
 174          division */
 175       int Len;
 176       if (String == I)
 177          Len = 1;
 178       else
 179          Len = 8 - ((String - I) % 8);
 180       Len -= 2;
 181       if (Len <= 0)
 182       {
 183          *I = ' ';
 184          continue;
 185       }
 186
 187       memmove(I + Len,I + 1,strlen(I) + 1);
 188       for (char *J = I; J + Len != I; *I = ' ', I++);
 189    }
 190    return String;
 191 }
 192                                                                         /*}}}*/
 193 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 194 // ---------------------------------------------------------------------
 195 /* This grabs a single word, converts any % escaped characters to their
 196    proper values and advances the pointer. Double quotes are understood
 197    and striped out as well. This is for URI/URL parsing. It also can
 198    understand [] brackets.*/
 199 bool ParseQuoteWord(const char *&String,string &Res)
 200 {
 201    // Skip leading whitespace
 202    const char *C = String;
 203    for (;*C != 0 && *C == ' '; C++);
 204    if (*C == 0)
 205       return false;
 206
 207    // Jump to the next word
 208    for (;*C != 0 && isspace(*C) == 0; C++)
 209    {
 210       if (*C == '"')
 211       {
 212          C = strchr(C + 1, '"');
 213          if (C == NULL)
 214             return false;
 215       }
 216       if (*C == '[')
 217       {
 218          C = strchr(C + 1, ']');
 219          if (C == NULL)
 220             return false;
 221       }
 222    }
 223
 224    // Now de-quote characters
 225    char Buffer[1024];
 226    char Tmp[3];
 227    const char *Start = String;
 228    char *I;
 229    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 230    {
 231       if (*Start == '%' && Start + 2 < C &&
 232           isxdigit(Start[1]) && isxdigit(Start[2]))
 233       {
 234          Tmp[0] = Start[1];
 235          Tmp[1] = Start[2];
 236          Tmp[2] = 0;
 237          *I = (char)strtol(Tmp,0,16);
 238          Start += 3;
 239          continue;
 240       }
 241       if (*Start != '"')
 242          *I = *Start;
 243       else
 244          I--;
 245       Start++;
 246    }
 247    *I = 0;
 248    Res = Buffer;
 249
 250    // Skip ending white space
 251    for (;*C != 0 && isspace(*C) != 0; C++);
 252    String = C;
 253    return true;
 254 }
 255                                                                         /*}}}*/
 256 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 257 // ---------------------------------------------------------------------
 258 /* This expects a series of space separated strings enclosed in ""'s.
 259    It concatenates the ""'s into a single string. */
 260 bool ParseCWord(const char *&String,string &Res)
 261 {
 262    // Skip leading whitespace
 263    const char *C = String;
 264    for (;*C != 0 && *C == ' '; C++);
 265    if (*C == 0)
 266       return false;
 267
 268    char Buffer[1024];
 269    char *Buf = Buffer;
 270    if (strlen(String) >= sizeof(Buffer))
 271        return false;
 272
 273    for (; *C != 0; C++)
 274    {
 275       if (*C == '"')
 276       {
 277          for (C++; *C != 0 && *C != '"'; C++)
 278             *Buf++ = *C;
 279
 280          if (*C == 0)
 281             return false;
 282
 283          continue;
 284       }
 285
 286       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 287          continue;
 288       if (isspace(*C) == 0)
 289          return false;
 290       *Buf++ = ' ';
 291    }
 292    *Buf = 0;
 293    Res = Buffer;
 294    String = C;
 295    return true;
 296 }
 297                                                                         /*}}}*/
 298 // QuoteString - Convert a string into quoted from                      /*{{{*/
 299 // ---------------------------------------------------------------------
 300 /* */
 301 string QuoteString(const string &Str, const char *Bad)
 302 {
 303    string Res;
 304    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 305    {
 306       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 307           *I == 0x25 || // percent '%' char
 308           *I <= 0x20 || *I >= 0x7F) // control chars
 309       {
 310          char Buf[10];
 311          sprintf(Buf,"%%%02x",(int)*I);
 312          Res += Buf;
 313       }
 314       else
 315          Res += *I;
 316    }
 317    return Res;
 318 }
 319                                                                         /*}}}*/
 320 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 321 // ---------------------------------------------------------------------
 322 /* This undoes QuoteString */
 323 string DeQuoteString(const string &Str)
 324 {
 325    return DeQuoteString(Str.begin(),Str.end());
 326 }
 327 string DeQuoteString(string::const_iterator const &begin,
 328                         string::const_iterator const &end)
 329 {
 330    string Res;
 331    for (string::const_iterator I = begin; I != end; ++I)
 332    {
 333       if (*I == '%' && I + 2 < end &&
 334           isxdigit(I[1]) && isxdigit(I[2]))
 335       {
 336          char Tmp[3];
 337          Tmp[0] = I[1];
 338          Tmp[1] = I[2];
 339          Tmp[2] = 0;
 340          Res += (char)strtol(Tmp,0,16);
 341          I += 2;
 342          continue;
 343       }
 344       else
 345          Res += *I;
 346    }
 347    return Res;
 348 }
 349
 350                                                                         /*}}}*/
 351 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 352 // ---------------------------------------------------------------------
 353 /* A max of 4 digits are shown before conversion to the next highest unit.
 354    The max length of the string will be 5 chars unless the size is > 10
 355    YottaBytes (E24) */
 356 string SizeToStr(double Size)
 357 {
 358    char S[300];
 359    double ASize;
 360    if (Size >= 0)
 361       ASize = Size;
 362    else
 363       ASize = -1*Size;
 364
 365    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 366       ExaBytes, ZettaBytes, YottaBytes */
 367    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 368    int I = 0;
 369    while (I <= 8)
 370    {
 371       if (ASize < 100 && I != 0)
 372       {
 373          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 374          break;
 375       }
 376
 377       if (ASize < 10000)
 378       {
 379          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 380          break;
 381       }
 382       ASize /= 1000.0;
 383       I++;
 384    }
 385
 386    return S;
 387 }
 388                                                                         /*}}}*/
 389 // TimeToStr - Convert the time into a string                           /*{{{*/
 390 // ---------------------------------------------------------------------
 391 /* Converts a number of seconds to a hms format */
 392 string TimeToStr(unsigned long Sec)
 393 {
 394    char S[300];
 395
 396    while (1)
 397    {
 398       if (Sec > 60*60*24)
 399       {
 400          //d means days, h means hours, min means minutes, s means seconds
 401          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 402          break;
 403       }
 404
 405       if (Sec > 60*60)
 406       {
 407          //h means hours, min means minutes, s means seconds
 408          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 409          break;
 410       }
 411
 412       if (Sec > 60)
 413       {
 414          //min means minutes, s means seconds
 415          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 416          break;
 417       }
 418
 419       //s means seconds
 420       sprintf(S,_("%lis"),Sec);
 421       break;
 422    }
 423
 424    return S;
 425 }
 426                                                                         /*}}}*/
 427 // SubstVar - Substitute a string for another string                    /*{{{*/
 428 // ---------------------------------------------------------------------
 429 /* This replaces all occurrences of Subst with Contents in Str. */
 430 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 431 {
 432    string::size_type Pos = 0;
 433    string::size_type OldPos = 0;
 434    string Temp;
 435
 436    while (OldPos < Str.length() &&
 437           (Pos = Str.find(Subst,OldPos)) != string::npos)
 438    {
 439       Temp += string(Str,OldPos,Pos) + Contents;
 440       OldPos = Pos + Subst.length();
 441    }
 442
 443    if (OldPos == 0)
 444       return Str;
 445
 446    return Temp + string(Str,OldPos);
 447 }
 448
 449 string SubstVar(string Str,const struct SubstVar *Vars)
 450 {
 451    for (; Vars->Subst != 0; Vars++)
 452       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 453    return Str;
 454 }
 455                                                                         /*}}}*/
 456 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 457 // ---------------------------------------------------------------------
 458 /* Returns a string with the supplied separator depth + 1 times in it */
 459 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 460 {
 461    std::string output = "";
 462    for(unsigned long d=Depth+1; d > 0; d--)
 463       output.append(Separator);
 464    return output;
 465 }
 466                                                                         /*}}}*/
 467 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 468 // ---------------------------------------------------------------------
 469 /* This converts a URI into a safe filename. It quotes all unsafe characters
 470    and converts / to _ and removes the scheme identifier. The resulting
 471    file name should be unique and never occur again for a different file */
 472 string URItoFileName(const string &URI)
 473 {
 474    // Nuke 'sensitive' items
 475    ::URI U(URI);
 476    U.User.clear();
 477    U.Password.clear();
 478    U.Access.clear();
 479
 480    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 481    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 482    replace(NewURI.begin(),NewURI.end(),'/','_');
 483    return NewURI;
 484 }
 485                                                                         /*}}}*/
 486 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 487 // ---------------------------------------------------------------------
 488 /* This routine performs a base64 transformation on a string. It was ripped
 489    from wget and then patched and bug fixed.
 490
 491    This spec can be found in rfc2045 */
 492 string Base64Encode(const string &S)
 493 {
 494    // Conversion table.
 495    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 496                           'I','J','K','L','M','N','O','P',
 497                           'Q','R','S','T','U','V','W','X',
 498                           'Y','Z','a','b','c','d','e','f',
 499                           'g','h','i','j','k','l','m','n',
 500                           'o','p','q','r','s','t','u','v',
 501                           'w','x','y','z','0','1','2','3',
 502                           '4','5','6','7','8','9','+','/'};
 503
 504    // Pre-allocate some space
 505    string Final;
 506    Final.reserve((4*S.length() + 2)/3 + 2);
 507
 508    /* Transform the 3x8 bits to 4x6 bits, as required by
 509       base64.  */
 510    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 511    {
 512       char Bits[3] = {0,0,0};
 513       Bits[0] = I[0];
 514       if (I + 1 < S.end())
 515          Bits[1] = I[1];
 516       if (I + 2 < S.end())
 517          Bits[2] = I[2];
 518
 519       Final += tbl[Bits[0] >> 2];
 520       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 521
 522       if (I + 1 >= S.end())
 523          break;
 524
 525       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 526
 527       if (I + 2 >= S.end())
 528          break;
 529
 530       Final += tbl[Bits[2] & 0x3f];
 531    }
 532
 533    /* Apply the padding elements, this tells how many bytes the remote
 534       end should discard */
 535    if (S.length() % 3 == 2)
 536       Final += '=';
 537    if (S.length() % 3 == 1)
 538       Final += "==";
 539
 540    return Final;
 541 }
 542                                                                         /*}}}*/
 543 // stringcmp - Arbitrary string compare                                 /*{{{*/
 544 // ---------------------------------------------------------------------
 545 /* This safely compares two non-null terminated strings of arbitrary
 546    length */
 547 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 548 {
 549    for (; A != AEnd && B != BEnd; A++, B++)
 550       if (*A != *B)
 551          break;
 552
 553    if (A == AEnd && B == BEnd)
 554       return 0;
 555    if (A == AEnd)
 556       return 1;
 557    if (B == BEnd)
 558       return -1;
 559    if (*A < *B)
 560       return -1;
 561    return 1;
 562 }
 563
 564 #if __GNUC__ >= 3
 565 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 566               const char *B,const char *BEnd)
 567 {
 568    for (; A != AEnd && B != BEnd; A++, B++)
 569       if (*A != *B)
 570          break;
 571
 572    if (A == AEnd && B == BEnd)
 573       return 0;
 574    if (A == AEnd)
 575       return 1;
 576    if (B == BEnd)
 577       return -1;
 578    if (*A < *B)
 579       return -1;
 580    return 1;
 581 }
 582 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 583               string::const_iterator B,string::const_iterator BEnd)
 584 {
 585    for (; A != AEnd && B != BEnd; A++, B++)
 586       if (*A != *B)
 587          break;
 588
 589    if (A == AEnd && B == BEnd)
 590       return 0;
 591    if (A == AEnd)
 592       return 1;
 593    if (B == BEnd)
 594       return -1;
 595    if (*A < *B)
 596       return -1;
 597    return 1;
 598 }
 599 #endif
 600                                                                         /*}}}*/
 601 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 602 // ---------------------------------------------------------------------
 603 /* */
 604 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 605 {
 606    for (; A != AEnd && B != BEnd; A++, B++)
 607       if (tolower_ascii(*A) != tolower_ascii(*B))
 608          break;
 609
 610    if (A == AEnd && B == BEnd)
 611       return 0;
 612    if (A == AEnd)
 613       return 1;
 614    if (B == BEnd)
 615       return -1;
 616    if (tolower_ascii(*A) < tolower_ascii(*B))
 617       return -1;
 618    return 1;
 619 }
 620 #if __GNUC__ >= 3
 621 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 622                   const char *B,const char *BEnd)
 623 {
 624    for (; A != AEnd && B != BEnd; A++, B++)
 625       if (tolower_ascii(*A) != tolower_ascii(*B))
 626          break;
 627
 628    if (A == AEnd && B == BEnd)
 629       return 0;
 630    if (A == AEnd)
 631       return 1;
 632    if (B == BEnd)
 633       return -1;
 634    if (tolower_ascii(*A) < tolower_ascii(*B))
 635       return -1;
 636    return 1;
 637 }
 638 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 639                   string::const_iterator B,string::const_iterator BEnd)
 640 {
 641    for (; A != AEnd && B != BEnd; A++, B++)
 642       if (tolower_ascii(*A) != tolower_ascii(*B))
 643          break;
 644
 645    if (A == AEnd && B == BEnd)
 646       return 0;
 647    if (A == AEnd)
 648       return 1;
 649    if (B == BEnd)
 650       return -1;
 651    if (tolower_ascii(*A) < tolower_ascii(*B))
 652       return -1;
 653    return 1;
 654 }
 655 #endif
 656                                                                         /*}}}*/
 657 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 658 // ---------------------------------------------------------------------
 659 /* The format is like those used in package files and the method
 660    communication system */
 661 string LookupTag(const string &Message,const char *Tag,const char *Default)
 662 {
 663    // Look for a matching tag.
 664    int Length = strlen(Tag);
 665    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 666    {
 667       // Found the tag
 668       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 669       {
 670          // Find the end of line and strip the leading/trailing spaces
 671          string::const_iterator J;
 672          I += Length + 1;
 673          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 674          for (J = I; *J != '\n' && J < Message.end(); ++J);
 675          for (; J > I && isspace(J[-1]) != 0; --J);
 676
 677          return string(I,J);
 678       }
 679
 680       for (; *I != '\n' && I < Message.end(); ++I);
 681    }
 682
 683    // Failed to find a match
 684    if (Default == 0)
 685       return string();
 686    return Default;
 687 }
 688                                                                         /*}}}*/
 689 // StringToBool - Converts a string into a boolean                      /*{{{*/
 690 // ---------------------------------------------------------------------
 691 /* This inspects the string to see if it is true or if it is false and
 692    then returns the result. Several varients on true/false are checked. */
 693 int StringToBool(const string &Text,int Default)
 694 {
 695    char *End;
 696    int Res = strtol(Text.c_str(),&End,0);
 697    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 698       return Res;
 699
 700    // Check for positives
 701    if (strcasecmp(Text.c_str(),"no") == 0 ||
 702        strcasecmp(Text.c_str(),"false") == 0 ||
 703        strcasecmp(Text.c_str(),"without") == 0 ||
 704        strcasecmp(Text.c_str(),"off") == 0 ||
 705        strcasecmp(Text.c_str(),"disable") == 0)
 706       return 0;
 707
 708    // Check for negatives
 709    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 710        strcasecmp(Text.c_str(),"true") == 0 ||
 711        strcasecmp(Text.c_str(),"with") == 0 ||
 712        strcasecmp(Text.c_str(),"on") == 0 ||
 713        strcasecmp(Text.c_str(),"enable") == 0)
 714       return 1;
 715
 716    return Default;
 717 }
 718                                                                         /*}}}*/
 719 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 720 // ---------------------------------------------------------------------
 721 /* This converts a time_t into a string time representation that is
 722    year 2000 complient and timezone neutral */
 723 string TimeRFC1123(time_t Date)
 724 {
 725    struct tm Conv;
 726    if (gmtime_r(&Date, &Conv) == NULL)
 727       return "";
 728
 729    char Buf[300];
 730    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 731    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 732                           "Aug","Sep","Oct","Nov","Dec"};
 733
 734    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 735            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 736            Conv.tm_min,Conv.tm_sec);
 737    return Buf;
 738 }
 739                                                                         /*}}}*/
 740 // ReadMessages - Read messages from the FD                             /*{{{*/
 741 // ---------------------------------------------------------------------
 742 /* This pulls full messages from the input FD into the message buffer.
 743    It assumes that messages will not pause during transit so no
 744    fancy buffering is used.
 745
 746    In particular: this reads blocks from the input until it believes
 747    that it's run out of input text.  Each block is terminated by a
 748    double newline ('\n' followed by '\n').  As noted below, there is a
 749    bug in this code: it assumes that all the blocks have been read if
 750    it doesn't see additional text in the buffer after the last one is
 751    parsed, which will cause it to lose blocks if the last block
 752    coincides with the end of the buffer.
 753  */
 754 bool ReadMessages(int Fd, vector<string> &List)
 755 {
 756    char Buffer[64000];
 757    char *End = Buffer;
 758    // Represents any left-over from the previous iteration of the
 759    // parse loop.  (i.e., if a message is split across the end
 760    // of the buffer, it goes here)
 761    string PartialMessage;
 762
 763    while (1)
 764    {
 765       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 766       if (Res < 0 && errno == EINTR)
 767          continue;
 768
 769       // Process is dead, this is kind of bad..
 770       if (Res == 0)
 771          return false;
 772
 773       // No data
 774       if (Res < 0 && errno == EAGAIN)
 775          return true;
 776       if (Res < 0)
 777          return false;
 778
 779       End += Res;
 780
 781       // Look for the end of the message
 782       for (char *I = Buffer; I + 1 < End; I++)
 783       {
 784          if (I[1] != '\n' ||
 785                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 786             continue;
 787
 788          // Pull the message out
 789          string Message(Buffer,I-Buffer);
 790          PartialMessage += Message;
 791
 792          // Fix up the buffer
 793          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 794          End -= I-Buffer;
 795          memmove(Buffer,I,End-Buffer);
 796          I = Buffer;
 797
 798          List.push_back(PartialMessage);
 799          PartialMessage.clear();
 800       }
 801       if (End != Buffer)
 802         {
 803           // If there's text left in the buffer, store it
 804           // in PartialMessage and throw the rest of the buffer
 805           // away.  This allows us to handle messages that
 806           // are longer than the static buffer size.
 807           PartialMessage += string(Buffer, End);
 808           End = Buffer;
 809         }
 810       else
 811         {
 812           // BUG ALERT: if a message block happens to end at a
 813           // multiple of 64000 characters, this will cause it to
 814           // terminate early, leading to a badly formed block and
 815           // probably crashing the method.  However, this is the only
 816           // way we have to find the end of the message block.  I have
 817           // an idea of how to fix this, but it will require changes
 818           // to the protocol (essentially to mark the beginning and
 819           // end of the block).
 820           //
 821           //  -- dburrows 2008-04-02
 822           return true;
 823         }
 824
 825       if (WaitFd(Fd) == false)
 826          return false;
 827    }
 828 }
 829                                                                         /*}}}*/
 830 // MonthConv - Converts a month string into a number                    /*{{{*/
 831 // ---------------------------------------------------------------------
 832 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 833    Made it a bit more robust with a few tolower_ascii though. */
 834 static int MonthConv(char *Month)
 835 {
 836    switch (tolower_ascii(*Month))
 837    {
 838       case 'a':
 839       return tolower_ascii(Month[1]) == 'p'?3:7;
 840       case 'd':
 841       return 11;
 842       case 'f':
 843       return 1;
 844       case 'j':
 845       if (tolower_ascii(Month[1]) == 'a')
 846          return 0;
 847       return tolower_ascii(Month[2]) == 'n'?5:6;
 848       case 'm':
 849       return tolower_ascii(Month[2]) == 'r'?2:4;
 850       case 'n':
 851       return 10;
 852       case 'o':
 853       return 9;
 854       case 's':
 855       return 8;
 856
 857       // Pretend it is January..
 858       default:
 859       return 0;
 860    }
 861 }
 862                                                                         /*}}}*/
 863 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 864 // ---------------------------------------------------------------------
 865 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 866    than local timezone (mktime assumes the latter).
 867
 868    This function is a nonstandard GNU extension that is also present on
 869    the BSDs and maybe other systems. For others we follow the advice of
 870    the manpage of timegm and use his portable replacement. */
 871 #ifndef HAVE_TIMEGM
 872 static time_t timegm(struct tm *t)
 873 {
 874    char *tz = getenv("TZ");
 875    setenv("TZ", "", 1);
 876    tzset();
 877    time_t ret = mktime(t);
 878    if (tz)
 879       setenv("TZ", tz, 1);
 880    else
 881       unsetenv("TZ");
 882    tzset();
 883    return ret;
 884 }
 885 #endif
 886                                                                         /*}}}*/
 887 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 888 // ---------------------------------------------------------------------
 889 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 890    with one exception: All timezones (%Z) are accepted but the protocol
 891    says that it MUST be GMT, but this one is equal to UTC which we will
 892    encounter from time to time (e.g. in Release files) so we accept all
 893    here and just assume it is GMT (or UTC) later on */
 894 bool RFC1123StrToTime(const char* const str,time_t &time)
 895 {
 896    struct tm Tm;
 897    setlocale (LC_ALL,"C");
 898    bool const invalid =
 899    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 900       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 901    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 902        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 903    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 904        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 905    setlocale (LC_ALL,"");
 906    if (invalid == true)
 907       return false;
 908
 909    time = timegm(&Tm);
 910    return true;
 911 }
 912                                                                         /*}}}*/
 913 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 914 // ---------------------------------------------------------------------
 915 /* */
 916 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 917 {
 918    struct tm Tm;
 919    // MDTM includes no whitespaces but recommend and ignored by strptime
 920    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 921       return false;
 922
 923    time = timegm(&Tm);
 924    return true;
 925 }
 926                                                                         /*}}}*/
 927 // StrToTime - Converts a string into a time_t                          /*{{{*/
 928 // ---------------------------------------------------------------------
 929 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
 930    and the C library asctime format. It requires the GNU library function
 931    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 932    reason the C library does not provide any such function :< This also
 933    handles the weird, but unambiguous FTP time format*/
 934 bool StrToTime(const string &Val,time_t &Result)
 935 {
 936    struct tm Tm;
 937    char Month[10];
 938
 939    // Skip the day of the week
 940    const char *I = strchr(Val.c_str(), ' ');
 941
 942    // Handle RFC 1123 time
 943    Month[0] = 0;
 944    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 945               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 946    {
 947       // Handle RFC 1036 time
 948       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 949                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 950          Tm.tm_year += 1900;
 951       else
 952       {
 953          // asctime format
 954          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 955                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 956          {
 957             // 'ftp' time
 958             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 959                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 960                return false;
 961             Tm.tm_mon--;
 962          }
 963       }
 964    }
 965
 966    Tm.tm_isdst = 0;
 967    if (Month[0] != 0)
 968       Tm.tm_mon = MonthConv(Month);
 969    else
 970       Tm.tm_mon = 0; // we don't have a month, so pick something
 971    Tm.tm_year -= 1900;
 972
 973    // Convert to local time and then to GMT
 974    Result = timegm(&Tm);
 975    return true;
 976 }
 977                                                                         /*}}}*/
 978 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 979 // ---------------------------------------------------------------------
 980 /* This is used in decoding the crazy fixed length string headers in
 981    tar and ar files. */
 982 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 983 {
 984    char S[30];
 985    if (Len >= sizeof(S))
 986       return false;
 987    memcpy(S,Str,Len);
 988    S[Len] = 0;
 989
 990    // All spaces is a zero
 991    Res = 0;
 992    unsigned I;
 993    for (I = 0; S[I] == ' '; I++);
 994    if (S[I] == 0)
 995       return true;
 996
 997    char *End;
 998    Res = strtoul(S,&End,Base);
 999    if (End == S)
1000       return false;
1001
1002    return true;
1003 }
1004                                                                         /*}}}*/
1005 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1006 // ---------------------------------------------------------------------
1007 /* This is used in decoding the crazy fixed length string headers in
1008    tar and ar files. */
1009 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1010 {
1011    char S[30];
1012    if (Len >= sizeof(S))
1013       return false;
1014    memcpy(S,Str,Len);
1015    S[Len] = 0;
1016
1017    // All spaces is a zero
1018    Res = 0;
1019    unsigned I;
1020    for (I = 0; S[I] == ' '; I++);
1021    if (S[I] == 0)
1022       return true;
1023
1024    char *End;
1025    Res = strtoull(S,&End,Base);
1026    if (End == S)
1027       return false;
1028
1029    return true;
1030 }
1031                                                                         /*}}}*/
1032
1033 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1034 // ---------------------------------------------------------------------
1035 /* This is used in decoding the 256bit encoded fixed length fields in
1036    tar files */
1037 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1038 {
1039    if ((Str[0] & 0x80) == 0)
1040       return false;
1041    else
1042    {
1043       Res = Str[0] & 0x7F;
1044       for(unsigned int i = 1; i < Len; ++i)
1045          Res = (Res<<8) + Str[i];
1046       return true;
1047    }
1048 }
1049                                                                         /*}}}*/
1050 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1051 // ---------------------------------------------------------------------
1052 /* Helper for Hex2Num */
1053 static int HexDigit(int c)
1054 {
1055    if (c >= '0' && c <= '9')
1056       return c - '0';
1057    if (c >= 'a' && c <= 'f')
1058       return c - 'a' + 10;
1059    if (c >= 'A' && c <= 'F')
1060       return c - 'A' + 10;
1061    return 0;
1062 }
1063                                                                         /*}}}*/
1064 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1065 // ---------------------------------------------------------------------
1066 /* The length of the buffer must be exactly 1/2 the length of the string. */
1067 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1068 {
1069    if (Str.length() != Length*2)
1070       return false;
1071
1072    // Convert each digit. We store it in the same order as the string
1073    int J = 0;
1074    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1075    {
1076       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1077          return false;
1078
1079       Num[J] = HexDigit(I[0]) << 4;
1080       Num[J] += HexDigit(I[1]);
1081    }
1082
1083    return true;
1084 }
1085                                                                         /*}}}*/
1086 // TokSplitString - Split a string up by a given token                  /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* This is intended to be a faster splitter, it does not use dynamic
1089    memories. Input is changed to insert nulls at each token location. */
1090 bool TokSplitString(char Tok,char *Input,char **List,
1091                     unsigned long ListMax)
1092 {
1093    // Strip any leading spaces
1094    char *Start = Input;
1095    char *Stop = Start + strlen(Start);
1096    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1097
1098    unsigned long Count = 0;
1099    char *Pos = Start;
1100    while (Pos != Stop)
1101    {
1102       // Skip to the next Token
1103       for (; Pos != Stop && *Pos != Tok; Pos++);
1104
1105       // Back remove spaces
1106       char *End = Pos;
1107       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1108       *End = 0;
1109
1110       List[Count++] = Start;
1111       if (Count >= ListMax)
1112       {
1113          List[Count-1] = 0;
1114          return false;
1115       }
1116
1117       // Advance pos
1118       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1119       Start = Pos;
1120    }
1121
1122    List[Count] = 0;
1123    return true;
1124 }
1125                                                                         /*}}}*/
1126 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1127 // ---------------------------------------------------------------------
1128 /* This can be used to split a given string up into a vector, so the
1129    propose is the same as in the method above and this one is a bit slower
1130    also, but the advantage is that we have an iteratable vector */
1131 vector<string> VectorizeString(string const &haystack, char const &split)
1132 {
1133    vector<string> exploded;
1134    if (haystack.empty() == true)
1135       return exploded;
1136    string::const_iterator start = haystack.begin();
1137    string::const_iterator end = start;
1138    do {
1139       for (; end != haystack.end() && *end != split; ++end);
1140       exploded.push_back(string(start, end));
1141       start = end + 1;
1142    } while (end != haystack.end() && (++end) != haystack.end());
1143    return exploded;
1144 }
1145                                                                         /*}}}*/
1146 // StringSplit - split a string into a string vector by token           /*{{{*/
1147 // ---------------------------------------------------------------------
1148 /* See header for details.
1149  */
1150 vector<string> StringSplit(std::string const &s, std::string const &sep,
1151                            unsigned int maxsplit)
1152 {
1153    vector<string> split;
1154    size_t start, pos;
1155
1156    // no seperator given, this is bogus
1157    if(sep.size() == 0)
1158       return split;
1159
1160    start = pos = 0;
1161    while (pos != string::npos)
1162    {
1163       pos = s.find(sep, start);
1164       split.push_back(s.substr(start, pos-start));
1165
1166       // if maxsplit is reached, the remaining string is the last item
1167       if(split.size() >= maxsplit)
1168       {
1169          split[split.size()-1] = s.substr(start);
1170          break;
1171       }
1172       start = pos+sep.size();
1173    }
1174    return split;
1175 }
1176                                                                         /*}}}*/
1177 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1178 // ---------------------------------------------------------------------
1179 /* */
1180 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1181                       const char **ListEnd)
1182 {
1183    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1184       R->Hit = false;
1185
1186    unsigned long Hits = 0;
1187    for (; ListBegin != ListEnd; ListBegin++)
1188    {
1189       // Check if the name is a regex
1190       const char *I;
1191       bool Regex = true;
1192       for (I = *ListBegin; *I != 0; I++)
1193          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1194             break;
1195       if (*I == 0)
1196          Regex = false;
1197
1198       // Compile the regex pattern
1199       regex_t Pattern;
1200       if (Regex == true)
1201          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1202                      REG_NOSUB) != 0)
1203             Regex = false;
1204
1205       // Search the list
1206       bool Done = false;
1207       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1208       {
1209          if (R->Str[0] == 0)
1210             continue;
1211
1212          if (strcasecmp(R->Str,*ListBegin) != 0)
1213          {
1214             if (Regex == false)
1215                continue;
1216             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1217                continue;
1218          }
1219          Done = true;
1220
1221          if (R->Hit == false)
1222             Hits++;
1223
1224          R->Hit = true;
1225       }
1226
1227       if (Regex == true)
1228          regfree(&Pattern);
1229
1230       if (Done == false)
1231          _error->Warning(_("Selection %s not found"),*ListBegin);
1232    }
1233
1234    return Hits;
1235 }
1236                                                                         /*}}}*/
1237 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1238 // ---------------------------------------------------------------------
1239 /* This is used to make the internationalization strings easier to translate
1240    and to allow reordering of parameters */
1241 static bool iovprintf(ostream &out, const char *format,
1242                       va_list &args, ssize_t &size) {
1243    char *S = (char*)malloc(size);
1244    ssize_t const n = vsnprintf(S, size, format, args);
1245    if (n > -1 && n < size) {
1246       out << S;
1247       free(S);
1248       return true;
1249    } else {
1250       if (n > -1)
1251          size = n + 1;
1252       else
1253          size *= 2;
1254    }
1255    free(S);
1256    return false;
1257 }
1258 void ioprintf(ostream &out,const char *format,...)
1259 {
1260    va_list args;
1261    ssize_t size = 400;
1262    while (true) {
1263       va_start(args,format);
1264       if (iovprintf(out, format, args, size) == true)
1265          return;
1266       va_end(args);
1267    }
1268 }
1269 void strprintf(string &out,const char *format,...)
1270 {
1271    va_list args;
1272    ssize_t size = 400;
1273    std::ostringstream outstr;
1274    while (true) {
1275       va_start(args,format);
1276       if (iovprintf(outstr, format, args, size) == true)
1277          break;
1278       va_end(args);
1279    }
1280    out = outstr.str();
1281 }
1282                                                                         /*}}}*/
1283 // safe_snprintf - Safer snprintf                                       /*{{{*/
1284 // ---------------------------------------------------------------------
1285 /* This is a snprintf that will never (ever) go past 'End' and returns a
1286    pointer to the end of the new string. The returned string is always null
1287    terminated unless Buffer == end. This is a better alterantive to using
1288    consecutive snprintfs. */
1289 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1290 {
1291    va_list args;
1292    int Did;
1293
1294    if (End <= Buffer)
1295       return End;
1296    va_start(args,Format);
1297    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1298    va_end(args);
1299
1300    if (Did < 0 || Buffer + Did > End)
1301       return End;
1302    return Buffer + Did;
1303 }
1304                                                                         /*}}}*/
1305 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1306 // ---------------------------------------------------------------------
1307 string StripEpoch(const string &VerStr)
1308 {
1309    size_t i = VerStr.find(":");
1310    if (i == string::npos)
1311       return VerStr;
1312    return VerStr.substr(i+1);
1313 }
1314                                                                         /*}}}*/
1315 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1316 // ---------------------------------------------------------------------
1317 /* This little function is the most called method we have and tries
1318    therefore to do the absolut minimum - and is notable faster than
1319    standard tolower/toupper and as a bonus avoids problems with different
1320    locales - we only operate on ascii chars anyway. */
1321 int tolower_ascii(int const c)
1322 {
1323    if (c >= 'A' && c <= 'Z')
1324       return c + 32;
1325    return c;
1326 }
1327                                                                         /*}}}*/
1328
1329 // CheckDomainList - See if Host is in a , separate list                /*{{{*/
1330 // ---------------------------------------------------------------------
1331 /* The domain list is a comma separate list of domains that are suffix
1332    matched against the argument */
1333 bool CheckDomainList(const string &Host,const string &List)
1334 {
1335    string::const_iterator Start = List.begin();
1336    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1337    {
1338       if (Cur < List.end() && *Cur != ',')
1339          continue;
1340
1341       // Match the end of the string..
1342       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1343           Cur - Start != 0 &&
1344           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1345          return true;
1346
1347       Start = Cur + 1;
1348    }
1349    return false;
1350 }
1351                                                                         /*}}}*/
1352 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1353 // ---------------------------------------------------------------------
1354 /* */
1355 size_t strv_length(const char **str_array)
1356 {
1357    size_t i;
1358    for (i=0; str_array[i] != NULL; i++)
1359       /* nothing */
1360       ;
1361    return i;
1362 }
1363
1364 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1365 // ---------------------------------------------------------------------
1366 /* */
1367 string DeEscapeString(const string &input)
1368 {
1369    char tmp[3];
1370    string::const_iterator it;
1371    string output;
1372    for (it = input.begin(); it != input.end(); ++it)
1373    {
1374       // just copy non-escape chars
1375       if (*it != '\\')
1376       {
1377          output += *it;
1378          continue;
1379       }
1380
1381       // deal with double escape
1382       if (*it == '\\' &&
1383           (it + 1 < input.end()) &&  it[1] == '\\')
1384       {
1385          // copy
1386          output += *it;
1387          // advance iterator one step further
1388          ++it;
1389          continue;
1390       }
1391
1392       // ensure we have a char to read
1393       if (it + 1 == input.end())
1394          continue;
1395
1396       // read it
1397       ++it;
1398       switch (*it)
1399       {
1400          case '0':
1401             if (it + 2 <= input.end()) {
1402                tmp[0] = it[1];
1403                tmp[1] = it[2];
1404                tmp[2] = 0;
1405                output += (char)strtol(tmp, 0, 8);
1406                it += 2;
1407             }
1408             break;
1409          case 'x':
1410             if (it + 2 <= input.end()) {
1411                tmp[0] = it[1];
1412                tmp[1] = it[2];
1413                tmp[2] = 0;
1414                output += (char)strtol(tmp, 0, 16);
1415                it += 2;
1416             }
1417             break;
1418          default:
1419             // FIXME: raise exception here?
1420             break;
1421       }
1422    }
1423    return output;
1424 }
1425                                                                         /*}}}*/
1426 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1427 // ---------------------------------------------------------------------
1428 /* This parses the URI into all of its components */
1429 void URI::CopyFrom(const string &U)
1430 {
1431    string::const_iterator I = U.begin();
1432
1433    // Locate the first colon, this separates the scheme
1434    for (; I < U.end() && *I != ':' ; ++I);
1435    string::const_iterator FirstColon = I;
1436
1437    /* Determine if this is a host type URI with a leading double //
1438       and then search for the first single / */
1439    string::const_iterator SingleSlash = I;
1440    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1441       SingleSlash += 3;
1442
1443    /* Find the / indicating the end of the hostname, ignoring /'s in the
1444       square brackets */
1445    bool InBracket = false;
1446    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1447    {
1448       if (*SingleSlash == '[')
1449          InBracket = true;
1450       if (InBracket == true && *SingleSlash == ']')
1451          InBracket = false;
1452    }
1453
1454    if (SingleSlash > U.end())
1455       SingleSlash = U.end();
1456
1457    // We can now write the access and path specifiers
1458    Access.assign(U.begin(),FirstColon);
1459    if (SingleSlash != U.end())
1460       Path.assign(SingleSlash,U.end());
1461    if (Path.empty() == true)
1462       Path = "/";
1463
1464    // Now we attempt to locate a user:pass@host fragment
1465    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1466       FirstColon += 3;
1467    else
1468       FirstColon += 1;
1469    if (FirstColon >= U.end())
1470       return;
1471
1472    if (FirstColon > SingleSlash)
1473       FirstColon = SingleSlash;
1474
1475    // Find the colon...
1476    I = FirstColon + 1;
1477    if (I > SingleSlash)
1478       I = SingleSlash;
1479    for (; I < SingleSlash && *I != ':'; ++I);
1480    string::const_iterator SecondColon = I;
1481
1482    // Search for the @ after the colon
1483    for (; I < SingleSlash && *I != '@'; ++I);
1484    string::const_iterator At = I;
1485
1486    // Now write the host and user/pass
1487    if (At == SingleSlash)
1488    {
1489       if (FirstColon < SingleSlash)
1490          Host.assign(FirstColon,SingleSlash);
1491    }
1492    else
1493    {
1494       Host.assign(At+1,SingleSlash);
1495       // username and password must be encoded (RFC 3986)
1496       User.assign(DeQuoteString(FirstColon,SecondColon));
1497       if (SecondColon < At)
1498          Password.assign(DeQuoteString(SecondColon+1,At));
1499    }
1500
1501    // Now we parse the RFC 2732 [] hostnames.
1502    unsigned long PortEnd = 0;
1503    InBracket = false;
1504    for (unsigned I = 0; I != Host.length();)
1505    {
1506       if (Host[I] == '[')
1507       {
1508          InBracket = true;
1509          Host.erase(I,1);
1510          continue;
1511       }
1512
1513       if (InBracket == true && Host[I] == ']')
1514       {
1515          InBracket = false;
1516          Host.erase(I,1);
1517          PortEnd = I;
1518          continue;
1519       }
1520       I++;
1521    }
1522
1523    // Tsk, weird.
1524    if (InBracket == true)
1525    {
1526       Host.clear();
1527       return;
1528    }
1529
1530    // Now we parse off a port number from the hostname
1531    Port = 0;
1532    string::size_type Pos = Host.rfind(':');
1533    if (Pos == string::npos || Pos < PortEnd)
1534       return;
1535
1536    Port = atoi(string(Host,Pos+1).c_str());
1537    Host.assign(Host,0,Pos);
1538 }
1539                                                                         /*}}}*/
1540 // URI::operator string - Convert the URI to a string                   /*{{{*/
1541 // ---------------------------------------------------------------------
1542 /* */
1543 URI::operator string()
1544 {
1545    string Res;
1546
1547    if (Access.empty() == false)
1548       Res = Access + ':';
1549
1550    if (Host.empty() == false)
1551    {
1552       if (Access.empty() == false)
1553          Res += "//";
1554
1555       if (User.empty() == false)
1556       {
1557          // FIXME: Technically userinfo is permitted even less
1558          // characters than these, but this is not conveniently
1559          // expressed with a blacklist.
1560          Res += QuoteString(User, ":/?#[]@");
1561          if (Password.empty() == false)
1562             Res += ":" + QuoteString(Password, ":/?#[]@");
1563          Res += "@";
1564       }
1565
1566       // Add RFC 2732 escaping characters
1567       if (Access.empty() == false &&
1568           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1569          Res += '[' + Host + ']';
1570       else
1571          Res += Host;
1572
1573       if (Port != 0)
1574       {
1575          char S[30];
1576          sprintf(S,":%u",Port);
1577          Res += S;
1578       }
1579    }
1580
1581    if (Path.empty() == false)
1582    {
1583       if (Path[0] != '/')
1584          Res += "/" + Path;
1585       else
1586          Res += Path;
1587    }
1588
1589    return Res;
1590 }
1591                                                                         /*}}}*/
1592 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1593 // ---------------------------------------------------------------------
1594 /* */
1595 string URI::SiteOnly(const string &URI)
1596 {
1597    ::URI U(URI);
1598    U.User.clear();
1599    U.Password.clear();
1600    U.Path.clear();
1601    return U;
1602 }
1603                                                                         /*}}}*/
1604 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1605 // ---------------------------------------------------------------------
1606 /* */
1607 string URI::NoUserPassword(const string &URI)
1608 {
1609    ::URI U(URI);
1610    U.User.clear();
1611    U.Password.clear();
1612    return U;
1613 }
1614                                                                         /*}}}*/