apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <stddef.h>
  25 #include <stdlib.h>
  26 #include <time.h>
  27 #include <string>
  28 #include <vector>
  29 #include <ctype.h>
  30 #include <string.h>
  31 #include <sstream>
  32 #include <stdio.h>
  33 #include <algorithm>
  34 #include <unistd.h>
  35 #include <regex.h>
  36 #include <errno.h>
  37 #include <stdarg.h>
  38 #include <iconv.h>
  39
  40 #include <apti18n.h>
  41                                                                         /*}}}*/
  42 using namespace std;
  43
  44 // Strip - Remove white space from the front and back of a string       /*{{{*/
  45 // ---------------------------------------------------------------------
  46 namespace APT {
  47    namespace String {
  48 std::string Strip(const std::string &s)
  49 {
  50    size_t start = s.find_first_not_of(" \t\n");
  51    // only whitespace
  52    if (start == string::npos)
  53       return "";
  54    size_t end = s.find_last_not_of(" \t\n");
  55    return s.substr(start, end-start+1);
  56 }
  57
  58 bool Endswith(const std::string &s, const std::string &end)
  59 {
  60    if (end.size() > s.size())
  61       return false;
  62    return (s.substr(s.size() - end.size(), s.size()) == end);
  63 }
  64
  65 }
  66 }
  67                                                                         /*}}}*/
  68 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  69 // ---------------------------------------------------------------------
  70 /* This is handy to use before display some information for enduser  */
  71 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  72 {
  73   iconv_t cd;
  74   const char *inbuf;
  75   char *inptr, *outbuf;
  76   size_t insize, bufsize;
  77   dest->clear();
  78
  79   cd = iconv_open(codeset, "UTF-8");
  80   if (cd == (iconv_t)(-1)) {
  81      // Something went wrong
  82      if (errno == EINVAL)
  83         _error->Error("conversion from 'UTF-8' to '%s' not available",
  84                codeset);
  85      else
  86         perror("iconv_open");
  87
  88      return false;
  89   }
  90
  91   insize = bufsize = orig.size();
  92   inbuf = orig.data();
  93   inptr = (char *)inbuf;
  94   outbuf = new char[bufsize];
  95   size_t lastError = -1;
  96
  97   while (insize != 0)
  98   {
  99      char *outptr = outbuf;
 100      size_t outsize = bufsize;
 101      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
 102      dest->append(outbuf, outptr - outbuf);
 103      if (err == (size_t)(-1))
 104      {
 105         switch (errno)
 106         {
 107         case EILSEQ:
 108            insize--;
 109            inptr++;
 110            // replace a series of unknown multibytes with a single "?"
 111            if (lastError != insize) {
 112               lastError = insize - 1;
 113               dest->append("?");
 114            }
 115            break;
 116         case EINVAL:
 117            insize = 0;
 118            break;
 119         case E2BIG:
 120            if (outptr == outbuf)
 121            {
 122               bufsize *= 2;
 123               delete[] outbuf;
 124               outbuf = new char[bufsize];
 125            }
 126            break;
 127         }
 128      }
 129   }
 130
 131   delete[] outbuf;
 132
 133   iconv_close(cd);
 134
 135   return true;
 136 }
 137                                                                         /*}}}*/
 138 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 139 // ---------------------------------------------------------------------
 140 /* This is handy to use when parsing a file. It also removes \n's left
 141    over from fgets and company */
 142 char *_strstrip(char *String)
 143 {
 144    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 145
 146    if (*String == 0)
 147       return String;
 148    return _strrstrip(String);
 149 }
 150                                                                         /*}}}*/
 151 // strrstrip - Remove white space from the back of a string     /*{{{*/
 152 // ---------------------------------------------------------------------
 153 char *_strrstrip(char *String)
 154 {
 155    char *End = String + strlen(String) - 1;
 156    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 157                                *End == '\r'); End--);
 158    End++;
 159    *End = 0;
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* */
 166 char *_strtabexpand(char *String,size_t Len)
 167 {
 168    for (char *I = String; I != I + Len && *I != 0; I++)
 169    {
 170       if (*I != '\t')
 171          continue;
 172       if (I + 8 > String + Len)
 173       {
 174          *I = 0;
 175          return String;
 176       }
 177
 178       /* Assume the start of the string is 0 and find the next 8 char
 179          division */
 180       int Len;
 181       if (String == I)
 182          Len = 1;
 183       else
 184          Len = 8 - ((String - I) % 8);
 185       Len -= 2;
 186       if (Len <= 0)
 187       {
 188          *I = ' ';
 189          continue;
 190       }
 191
 192       memmove(I + Len,I + 1,strlen(I) + 1);
 193       for (char *J = I; J + Len != I; *I = ' ', I++);
 194    }
 195    return String;
 196 }
 197                                                                         /*}}}*/
 198 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 199 // ---------------------------------------------------------------------
 200 /* This grabs a single word, converts any % escaped characters to their
 201    proper values and advances the pointer. Double quotes are understood
 202    and striped out as well. This is for URI/URL parsing. It also can
 203    understand [] brackets.*/
 204 bool ParseQuoteWord(const char *&String,string &Res)
 205 {
 206    // Skip leading whitespace
 207    const char *C = String;
 208    for (;*C != 0 && *C == ' '; C++);
 209    if (*C == 0)
 210       return false;
 211
 212    // Jump to the next word
 213    for (;*C != 0 && isspace(*C) == 0; C++)
 214    {
 215       if (*C == '"')
 216       {
 217          C = strchr(C + 1, '"');
 218          if (C == NULL)
 219             return false;
 220       }
 221       if (*C == '[')
 222       {
 223          C = strchr(C + 1, ']');
 224          if (C == NULL)
 225             return false;
 226       }
 227    }
 228
 229    // Now de-quote characters
 230    char Buffer[1024];
 231    char Tmp[3];
 232    const char *Start = String;
 233    char *I;
 234    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 235    {
 236       if (*Start == '%' && Start + 2 < C &&
 237           isxdigit(Start[1]) && isxdigit(Start[2]))
 238       {
 239          Tmp[0] = Start[1];
 240          Tmp[1] = Start[2];
 241          Tmp[2] = 0;
 242          *I = (char)strtol(Tmp,0,16);
 243          Start += 3;
 244          continue;
 245       }
 246       if (*Start != '"')
 247          *I = *Start;
 248       else
 249          I--;
 250       Start++;
 251    }
 252    *I = 0;
 253    Res = Buffer;
 254
 255    // Skip ending white space
 256    for (;*C != 0 && isspace(*C) != 0; C++);
 257    String = C;
 258    return true;
 259 }
 260                                                                         /*}}}*/
 261 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 262 // ---------------------------------------------------------------------
 263 /* This expects a series of space separated strings enclosed in ""'s.
 264    It concatenates the ""'s into a single string. */
 265 bool ParseCWord(const char *&String,string &Res)
 266 {
 267    // Skip leading whitespace
 268    const char *C = String;
 269    for (;*C != 0 && *C == ' '; C++);
 270    if (*C == 0)
 271       return false;
 272
 273    char Buffer[1024];
 274    char *Buf = Buffer;
 275    if (strlen(String) >= sizeof(Buffer))
 276        return false;
 277
 278    for (; *C != 0; C++)
 279    {
 280       if (*C == '"')
 281       {
 282          for (C++; *C != 0 && *C != '"'; C++)
 283             *Buf++ = *C;
 284
 285          if (*C == 0)
 286             return false;
 287
 288          continue;
 289       }
 290
 291       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 292          continue;
 293       if (isspace(*C) == 0)
 294          return false;
 295       *Buf++ = ' ';
 296    }
 297    *Buf = 0;
 298    Res = Buffer;
 299    String = C;
 300    return true;
 301 }
 302                                                                         /*}}}*/
 303 // QuoteString - Convert a string into quoted from                      /*{{{*/
 304 // ---------------------------------------------------------------------
 305 /* */
 306 string QuoteString(const string &Str, const char *Bad)
 307 {
 308    string Res;
 309    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 310    {
 311       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 312           *I == 0x25 || // percent '%' char
 313           *I <= 0x20 || *I >= 0x7F) // control chars
 314       {
 315          char Buf[10];
 316          sprintf(Buf,"%%%02x",(int)*I);
 317          Res += Buf;
 318       }
 319       else
 320          Res += *I;
 321    }
 322    return Res;
 323 }
 324                                                                         /*}}}*/
 325 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 326 // ---------------------------------------------------------------------
 327 /* This undoes QuoteString */
 328 string DeQuoteString(const string &Str)
 329 {
 330    return DeQuoteString(Str.begin(),Str.end());
 331 }
 332 string DeQuoteString(string::const_iterator const &begin,
 333                         string::const_iterator const &end)
 334 {
 335    string Res;
 336    for (string::const_iterator I = begin; I != end; ++I)
 337    {
 338       if (*I == '%' && I + 2 < end &&
 339           isxdigit(I[1]) && isxdigit(I[2]))
 340       {
 341          char Tmp[3];
 342          Tmp[0] = I[1];
 343          Tmp[1] = I[2];
 344          Tmp[2] = 0;
 345          Res += (char)strtol(Tmp,0,16);
 346          I += 2;
 347          continue;
 348       }
 349       else
 350          Res += *I;
 351    }
 352    return Res;
 353 }
 354
 355                                                                         /*}}}*/
 356 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 357 // ---------------------------------------------------------------------
 358 /* A max of 4 digits are shown before conversion to the next highest unit.
 359    The max length of the string will be 5 chars unless the size is > 10
 360    YottaBytes (E24) */
 361 string SizeToStr(double Size)
 362 {
 363    char S[300];
 364    double ASize;
 365    if (Size >= 0)
 366       ASize = Size;
 367    else
 368       ASize = -1*Size;
 369
 370    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 371       ExaBytes, ZettaBytes, YottaBytes */
 372    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 373    int I = 0;
 374    while (I <= 8)
 375    {
 376       if (ASize < 100 && I != 0)
 377       {
 378          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 379          break;
 380       }
 381
 382       if (ASize < 10000)
 383       {
 384          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 385          break;
 386       }
 387       ASize /= 1000.0;
 388       I++;
 389    }
 390
 391    return S;
 392 }
 393                                                                         /*}}}*/
 394 // TimeToStr - Convert the time into a string                           /*{{{*/
 395 // ---------------------------------------------------------------------
 396 /* Converts a number of seconds to a hms format */
 397 string TimeToStr(unsigned long Sec)
 398 {
 399    char S[300];
 400
 401    while (1)
 402    {
 403       if (Sec > 60*60*24)
 404       {
 405          //d means days, h means hours, min means minutes, s means seconds
 406          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 407          break;
 408       }
 409
 410       if (Sec > 60*60)
 411       {
 412          //h means hours, min means minutes, s means seconds
 413          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 414          break;
 415       }
 416
 417       if (Sec > 60)
 418       {
 419          //min means minutes, s means seconds
 420          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 421          break;
 422       }
 423
 424       //s means seconds
 425       sprintf(S,_("%lis"),Sec);
 426       break;
 427    }
 428
 429    return S;
 430 }
 431                                                                         /*}}}*/
 432 // SubstVar - Substitute a string for another string                    /*{{{*/
 433 // ---------------------------------------------------------------------
 434 /* This replaces all occurrences of Subst with Contents in Str. */
 435 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 436 {
 437    if (Subst.empty() == true)
 438       return Str;
 439
 440    string::size_type Pos = 0;
 441    string::size_type OldPos = 0;
 442    string Temp;
 443
 444    while (OldPos < Str.length() &&
 445           (Pos = Str.find(Subst,OldPos)) != string::npos)
 446    {
 447       if (OldPos != Pos)
 448          Temp.append(Str, OldPos, Pos - OldPos);
 449       if (Contents.empty() == false)
 450          Temp.append(Contents);
 451       OldPos = Pos + Subst.length();
 452    }
 453
 454    if (OldPos == 0)
 455       return Str;
 456
 457    if (OldPos >= Str.length())
 458       return Temp;
 459    return Temp + string(Str,OldPos);
 460 }
 461 string SubstVar(string Str,const struct SubstVar *Vars)
 462 {
 463    for (; Vars->Subst != 0; Vars++)
 464       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 465    return Str;
 466 }
 467                                                                         /*}}}*/
 468 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 469 // ---------------------------------------------------------------------
 470 /* Returns a string with the supplied separator depth + 1 times in it */
 471 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 472 {
 473    std::string output = "";
 474    for(unsigned long d=Depth+1; d > 0; d--)
 475       output.append(Separator);
 476    return output;
 477 }
 478                                                                         /*}}}*/
 479 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 480 // ---------------------------------------------------------------------
 481 /* This converts a URI into a safe filename. It quotes all unsafe characters
 482    and converts / to _ and removes the scheme identifier. The resulting
 483    file name should be unique and never occur again for a different file */
 484 string URItoFileName(const string &URI)
 485 {
 486    // Nuke 'sensitive' items
 487    ::URI U(URI);
 488    U.User.clear();
 489    U.Password.clear();
 490    U.Access.clear();
 491
 492    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 493    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 494    replace(NewURI.begin(),NewURI.end(),'/','_');
 495    return NewURI;
 496 }
 497                                                                         /*}}}*/
 498 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 499 // ---------------------------------------------------------------------
 500 /* This routine performs a base64 transformation on a string. It was ripped
 501    from wget and then patched and bug fixed.
 502
 503    This spec can be found in rfc2045 */
 504 string Base64Encode(const string &S)
 505 {
 506    // Conversion table.
 507    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 508                           'I','J','K','L','M','N','O','P',
 509                           'Q','R','S','T','U','V','W','X',
 510                           'Y','Z','a','b','c','d','e','f',
 511                           'g','h','i','j','k','l','m','n',
 512                           'o','p','q','r','s','t','u','v',
 513                           'w','x','y','z','0','1','2','3',
 514                           '4','5','6','7','8','9','+','/'};
 515
 516    // Pre-allocate some space
 517    string Final;
 518    Final.reserve((4*S.length() + 2)/3 + 2);
 519
 520    /* Transform the 3x8 bits to 4x6 bits, as required by
 521       base64.  */
 522    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 523    {
 524       char Bits[3] = {0,0,0};
 525       Bits[0] = I[0];
 526       if (I + 1 < S.end())
 527          Bits[1] = I[1];
 528       if (I + 2 < S.end())
 529          Bits[2] = I[2];
 530
 531       Final += tbl[Bits[0] >> 2];
 532       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 533
 534       if (I + 1 >= S.end())
 535          break;
 536
 537       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 538
 539       if (I + 2 >= S.end())
 540          break;
 541
 542       Final += tbl[Bits[2] & 0x3f];
 543    }
 544
 545    /* Apply the padding elements, this tells how many bytes the remote
 546       end should discard */
 547    if (S.length() % 3 == 2)
 548       Final += '=';
 549    if (S.length() % 3 == 1)
 550       Final += "==";
 551
 552    return Final;
 553 }
 554                                                                         /*}}}*/
 555 // stringcmp - Arbitrary string compare                                 /*{{{*/
 556 // ---------------------------------------------------------------------
 557 /* This safely compares two non-null terminated strings of arbitrary
 558    length */
 559 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 560 {
 561    for (; A != AEnd && B != BEnd; A++, B++)
 562       if (*A != *B)
 563          break;
 564
 565    if (A == AEnd && B == BEnd)
 566       return 0;
 567    if (A == AEnd)
 568       return 1;
 569    if (B == BEnd)
 570       return -1;
 571    if (*A < *B)
 572       return -1;
 573    return 1;
 574 }
 575
 576 #if __GNUC__ >= 3
 577 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 578               const char *B,const char *BEnd)
 579 {
 580    for (; A != AEnd && B != BEnd; A++, B++)
 581       if (*A != *B)
 582          break;
 583
 584    if (A == AEnd && B == BEnd)
 585       return 0;
 586    if (A == AEnd)
 587       return 1;
 588    if (B == BEnd)
 589       return -1;
 590    if (*A < *B)
 591       return -1;
 592    return 1;
 593 }
 594 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 595               string::const_iterator B,string::const_iterator BEnd)
 596 {
 597    for (; A != AEnd && B != BEnd; A++, B++)
 598       if (*A != *B)
 599          break;
 600
 601    if (A == AEnd && B == BEnd)
 602       return 0;
 603    if (A == AEnd)
 604       return 1;
 605    if (B == BEnd)
 606       return -1;
 607    if (*A < *B)
 608       return -1;
 609    return 1;
 610 }
 611 #endif
 612                                                                         /*}}}*/
 613 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 614 // ---------------------------------------------------------------------
 615 /* */
 616 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 617 {
 618    for (; A != AEnd && B != BEnd; A++, B++)
 619       if (tolower_ascii(*A) != tolower_ascii(*B))
 620          break;
 621
 622    if (A == AEnd && B == BEnd)
 623       return 0;
 624    if (A == AEnd)
 625       return 1;
 626    if (B == BEnd)
 627       return -1;
 628    if (tolower_ascii(*A) < tolower_ascii(*B))
 629       return -1;
 630    return 1;
 631 }
 632 #if __GNUC__ >= 3
 633 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 634                   const char *B,const char *BEnd)
 635 {
 636    for (; A != AEnd && B != BEnd; A++, B++)
 637       if (tolower_ascii(*A) != tolower_ascii(*B))
 638          break;
 639
 640    if (A == AEnd && B == BEnd)
 641       return 0;
 642    if (A == AEnd)
 643       return 1;
 644    if (B == BEnd)
 645       return -1;
 646    if (tolower_ascii(*A) < tolower_ascii(*B))
 647       return -1;
 648    return 1;
 649 }
 650 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 651                   string::const_iterator B,string::const_iterator BEnd)
 652 {
 653    for (; A != AEnd && B != BEnd; A++, B++)
 654       if (tolower_ascii(*A) != tolower_ascii(*B))
 655          break;
 656
 657    if (A == AEnd && B == BEnd)
 658       return 0;
 659    if (A == AEnd)
 660       return 1;
 661    if (B == BEnd)
 662       return -1;
 663    if (tolower_ascii(*A) < tolower_ascii(*B))
 664       return -1;
 665    return 1;
 666 }
 667 #endif
 668                                                                         /*}}}*/
 669 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 670 // ---------------------------------------------------------------------
 671 /* The format is like those used in package files and the method
 672    communication system */
 673 string LookupTag(const string &Message,const char *Tag,const char *Default)
 674 {
 675    // Look for a matching tag.
 676    int Length = strlen(Tag);
 677    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 678    {
 679       // Found the tag
 680       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 681       {
 682          // Find the end of line and strip the leading/trailing spaces
 683          string::const_iterator J;
 684          I += Length + 1;
 685          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 686          for (J = I; *J != '\n' && J < Message.end(); ++J);
 687          for (; J > I && isspace(J[-1]) != 0; --J);
 688
 689          return string(I,J);
 690       }
 691
 692       for (; *I != '\n' && I < Message.end(); ++I);
 693    }
 694
 695    // Failed to find a match
 696    if (Default == 0)
 697       return string();
 698    return Default;
 699 }
 700                                                                         /*}}}*/
 701 // StringToBool - Converts a string into a boolean                      /*{{{*/
 702 // ---------------------------------------------------------------------
 703 /* This inspects the string to see if it is true or if it is false and
 704    then returns the result. Several varients on true/false are checked. */
 705 int StringToBool(const string &Text,int Default)
 706 {
 707    char *End;
 708    int Res = strtol(Text.c_str(),&End,0);
 709    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 710       return Res;
 711
 712    // Check for positives
 713    if (strcasecmp(Text.c_str(),"no") == 0 ||
 714        strcasecmp(Text.c_str(),"false") == 0 ||
 715        strcasecmp(Text.c_str(),"without") == 0 ||
 716        strcasecmp(Text.c_str(),"off") == 0 ||
 717        strcasecmp(Text.c_str(),"disable") == 0)
 718       return 0;
 719
 720    // Check for negatives
 721    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 722        strcasecmp(Text.c_str(),"true") == 0 ||
 723        strcasecmp(Text.c_str(),"with") == 0 ||
 724        strcasecmp(Text.c_str(),"on") == 0 ||
 725        strcasecmp(Text.c_str(),"enable") == 0)
 726       return 1;
 727
 728    return Default;
 729 }
 730                                                                         /*}}}*/
 731 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 732 // ---------------------------------------------------------------------
 733 /* This converts a time_t into a string time representation that is
 734    year 2000 complient and timezone neutral */
 735 string TimeRFC1123(time_t Date)
 736 {
 737    struct tm Conv;
 738    if (gmtime_r(&Date, &Conv) == NULL)
 739       return "";
 740
 741    char Buf[300];
 742    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 743    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 744                           "Aug","Sep","Oct","Nov","Dec"};
 745
 746    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 747            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 748            Conv.tm_min,Conv.tm_sec);
 749    return Buf;
 750 }
 751                                                                         /*}}}*/
 752 // ReadMessages - Read messages from the FD                             /*{{{*/
 753 // ---------------------------------------------------------------------
 754 /* This pulls full messages from the input FD into the message buffer.
 755    It assumes that messages will not pause during transit so no
 756    fancy buffering is used.
 757
 758    In particular: this reads blocks from the input until it believes
 759    that it's run out of input text.  Each block is terminated by a
 760    double newline ('\n' followed by '\n').  As noted below, there is a
 761    bug in this code: it assumes that all the blocks have been read if
 762    it doesn't see additional text in the buffer after the last one is
 763    parsed, which will cause it to lose blocks if the last block
 764    coincides with the end of the buffer.
 765  */
 766 bool ReadMessages(int Fd, vector<string> &List)
 767 {
 768    char Buffer[64000];
 769    char *End = Buffer;
 770    // Represents any left-over from the previous iteration of the
 771    // parse loop.  (i.e., if a message is split across the end
 772    // of the buffer, it goes here)
 773    string PartialMessage;
 774
 775    while (1)
 776    {
 777       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 778       if (Res < 0 && errno == EINTR)
 779          continue;
 780
 781       // Process is dead, this is kind of bad..
 782       if (Res == 0)
 783          return false;
 784
 785       // No data
 786       if (Res < 0 && errno == EAGAIN)
 787          return true;
 788       if (Res < 0)
 789          return false;
 790
 791       End += Res;
 792
 793       // Look for the end of the message
 794       for (char *I = Buffer; I + 1 < End; I++)
 795       {
 796          if (I[1] != '\n' ||
 797                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 798             continue;
 799
 800          // Pull the message out
 801          string Message(Buffer,I-Buffer);
 802          PartialMessage += Message;
 803
 804          // Fix up the buffer
 805          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 806          End -= I-Buffer;
 807          memmove(Buffer,I,End-Buffer);
 808          I = Buffer;
 809
 810          List.push_back(PartialMessage);
 811          PartialMessage.clear();
 812       }
 813       if (End != Buffer)
 814         {
 815           // If there's text left in the buffer, store it
 816           // in PartialMessage and throw the rest of the buffer
 817           // away.  This allows us to handle messages that
 818           // are longer than the static buffer size.
 819           PartialMessage += string(Buffer, End);
 820           End = Buffer;
 821         }
 822       else
 823         {
 824           // BUG ALERT: if a message block happens to end at a
 825           // multiple of 64000 characters, this will cause it to
 826           // terminate early, leading to a badly formed block and
 827           // probably crashing the method.  However, this is the only
 828           // way we have to find the end of the message block.  I have
 829           // an idea of how to fix this, but it will require changes
 830           // to the protocol (essentially to mark the beginning and
 831           // end of the block).
 832           //
 833           //  -- dburrows 2008-04-02
 834           return true;
 835         }
 836
 837       if (WaitFd(Fd) == false)
 838          return false;
 839    }
 840 }
 841                                                                         /*}}}*/
 842 // MonthConv - Converts a month string into a number                    /*{{{*/
 843 // ---------------------------------------------------------------------
 844 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 845    Made it a bit more robust with a few tolower_ascii though. */
 846 static int MonthConv(char *Month)
 847 {
 848    switch (tolower_ascii(*Month))
 849    {
 850       case 'a':
 851       return tolower_ascii(Month[1]) == 'p'?3:7;
 852       case 'd':
 853       return 11;
 854       case 'f':
 855       return 1;
 856       case 'j':
 857       if (tolower_ascii(Month[1]) == 'a')
 858          return 0;
 859       return tolower_ascii(Month[2]) == 'n'?5:6;
 860       case 'm':
 861       return tolower_ascii(Month[2]) == 'r'?2:4;
 862       case 'n':
 863       return 10;
 864       case 'o':
 865       return 9;
 866       case 's':
 867       return 8;
 868
 869       // Pretend it is January..
 870       default:
 871       return 0;
 872    }
 873 }
 874                                                                         /*}}}*/
 875 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 876 // ---------------------------------------------------------------------
 877 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 878    than local timezone (mktime assumes the latter).
 879
 880    This function is a nonstandard GNU extension that is also present on
 881    the BSDs and maybe other systems. For others we follow the advice of
 882    the manpage of timegm and use his portable replacement. */
 883 #ifndef HAVE_TIMEGM
 884 static time_t timegm(struct tm *t)
 885 {
 886    char *tz = getenv("TZ");
 887    setenv("TZ", "", 1);
 888    tzset();
 889    time_t ret = mktime(t);
 890    if (tz)
 891       setenv("TZ", tz, 1);
 892    else
 893       unsetenv("TZ");
 894    tzset();
 895    return ret;
 896 }
 897 #endif
 898                                                                         /*}}}*/
 899 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 900 // ---------------------------------------------------------------------
 901 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 902    with one exception: All timezones (%Z) are accepted but the protocol
 903    says that it MUST be GMT, but this one is equal to UTC which we will
 904    encounter from time to time (e.g. in Release files) so we accept all
 905    here and just assume it is GMT (or UTC) later on */
 906 bool RFC1123StrToTime(const char* const str,time_t &time)
 907 {
 908    struct tm Tm;
 909    setlocale (LC_ALL,"C");
 910    bool const invalid =
 911    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 912       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 913    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 914        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 915    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 916        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 917    setlocale (LC_ALL,"");
 918    if (invalid == true)
 919       return false;
 920
 921    time = timegm(&Tm);
 922    return true;
 923 }
 924                                                                         /*}}}*/
 925 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 926 // ---------------------------------------------------------------------
 927 /* */
 928 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 929 {
 930    struct tm Tm;
 931    // MDTM includes no whitespaces but recommend and ignored by strptime
 932    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 933       return false;
 934
 935    time = timegm(&Tm);
 936    return true;
 937 }
 938                                                                         /*}}}*/
 939 // StrToTime - Converts a string into a time_t                          /*{{{*/
 940 // ---------------------------------------------------------------------
 941 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
 942    and the C library asctime format. It requires the GNU library function
 943    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 944    reason the C library does not provide any such function :< This also
 945    handles the weird, but unambiguous FTP time format*/
 946 bool StrToTime(const string &Val,time_t &Result)
 947 {
 948    struct tm Tm;
 949    char Month[10];
 950
 951    // Skip the day of the week
 952    const char *I = strchr(Val.c_str(), ' ');
 953
 954    // Handle RFC 1123 time
 955    Month[0] = 0;
 956    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 957               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 958    {
 959       // Handle RFC 1036 time
 960       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 961                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 962          Tm.tm_year += 1900;
 963       else
 964       {
 965          // asctime format
 966          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 967                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 968          {
 969             // 'ftp' time
 970             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 971                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 972                return false;
 973             Tm.tm_mon--;
 974          }
 975       }
 976    }
 977
 978    Tm.tm_isdst = 0;
 979    if (Month[0] != 0)
 980       Tm.tm_mon = MonthConv(Month);
 981    else
 982       Tm.tm_mon = 0; // we don't have a month, so pick something
 983    Tm.tm_year -= 1900;
 984
 985    // Convert to local time and then to GMT
 986    Result = timegm(&Tm);
 987    return true;
 988 }
 989                                                                         /*}}}*/
 990 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 991 // ---------------------------------------------------------------------
 992 /* This is used in decoding the crazy fixed length string headers in
 993    tar and ar files. */
 994 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 995 {
 996    char S[30];
 997    if (Len >= sizeof(S))
 998       return false;
 999    memcpy(S,Str,Len);
1000    S[Len] = 0;
1001
1002    // All spaces is a zero
1003    Res = 0;
1004    unsigned I;
1005    for (I = 0; S[I] == ' '; I++);
1006    if (S[I] == 0)
1007       return true;
1008
1009    char *End;
1010    Res = strtoul(S,&End,Base);
1011    if (End == S)
1012       return false;
1013
1014    return true;
1015 }
1016                                                                         /*}}}*/
1017 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1018 // ---------------------------------------------------------------------
1019 /* This is used in decoding the crazy fixed length string headers in
1020    tar and ar files. */
1021 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1022 {
1023    char S[30];
1024    if (Len >= sizeof(S))
1025       return false;
1026    memcpy(S,Str,Len);
1027    S[Len] = 0;
1028
1029    // All spaces is a zero
1030    Res = 0;
1031    unsigned I;
1032    for (I = 0; S[I] == ' '; I++);
1033    if (S[I] == 0)
1034       return true;
1035
1036    char *End;
1037    Res = strtoull(S,&End,Base);
1038    if (End == S)
1039       return false;
1040
1041    return true;
1042 }
1043                                                                         /*}}}*/
1044
1045 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1046 // ---------------------------------------------------------------------
1047 /* This is used in decoding the 256bit encoded fixed length fields in
1048    tar files */
1049 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1050 {
1051    if ((Str[0] & 0x80) == 0)
1052       return false;
1053    else
1054    {
1055       Res = Str[0] & 0x7F;
1056       for(unsigned int i = 1; i < Len; ++i)
1057          Res = (Res<<8) + Str[i];
1058       return true;
1059    }
1060 }
1061                                                                         /*}}}*/
1062 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1063 // ---------------------------------------------------------------------
1064 /* This is used in decoding the 256bit encoded fixed length fields in
1065    tar files */
1066 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1067 {
1068    unsigned long long Num;
1069    bool rc;
1070
1071    rc = Base256ToNum(Str, Num, Len);
1072    Res = Num;
1073    if (Res != Num)
1074       return false;
1075
1076    return rc;
1077 }
1078                                                                         /*}}}*/
1079 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1080 // ---------------------------------------------------------------------
1081 /* Helper for Hex2Num */
1082 static int HexDigit(int c)
1083 {
1084    if (c >= '0' && c <= '9')
1085       return c - '0';
1086    if (c >= 'a' && c <= 'f')
1087       return c - 'a' + 10;
1088    if (c >= 'A' && c <= 'F')
1089       return c - 'A' + 10;
1090    return 0;
1091 }
1092                                                                         /*}}}*/
1093 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1094 // ---------------------------------------------------------------------
1095 /* The length of the buffer must be exactly 1/2 the length of the string. */
1096 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1097 {
1098    if (Str.length() != Length*2)
1099       return false;
1100
1101    // Convert each digit. We store it in the same order as the string
1102    int J = 0;
1103    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1104    {
1105       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1106          return false;
1107
1108       Num[J] = HexDigit(I[0]) << 4;
1109       Num[J] += HexDigit(I[1]);
1110    }
1111
1112    return true;
1113 }
1114                                                                         /*}}}*/
1115 // TokSplitString - Split a string up by a given token                  /*{{{*/
1116 // ---------------------------------------------------------------------
1117 /* This is intended to be a faster splitter, it does not use dynamic
1118    memories. Input is changed to insert nulls at each token location. */
1119 bool TokSplitString(char Tok,char *Input,char **List,
1120                     unsigned long ListMax)
1121 {
1122    // Strip any leading spaces
1123    char *Start = Input;
1124    char *Stop = Start + strlen(Start);
1125    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1126
1127    unsigned long Count = 0;
1128    char *Pos = Start;
1129    while (Pos != Stop)
1130    {
1131       // Skip to the next Token
1132       for (; Pos != Stop && *Pos != Tok; Pos++);
1133
1134       // Back remove spaces
1135       char *End = Pos;
1136       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1137       *End = 0;
1138
1139       List[Count++] = Start;
1140       if (Count >= ListMax)
1141       {
1142          List[Count-1] = 0;
1143          return false;
1144       }
1145
1146       // Advance pos
1147       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1148       Start = Pos;
1149    }
1150
1151    List[Count] = 0;
1152    return true;
1153 }
1154                                                                         /*}}}*/
1155 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1156 // ---------------------------------------------------------------------
1157 /* This can be used to split a given string up into a vector, so the
1158    propose is the same as in the method above and this one is a bit slower
1159    also, but the advantage is that we have an iteratable vector */
1160 vector<string> VectorizeString(string const &haystack, char const &split)
1161 {
1162    vector<string> exploded;
1163    if (haystack.empty() == true)
1164       return exploded;
1165    string::const_iterator start = haystack.begin();
1166    string::const_iterator end = start;
1167    do {
1168       for (; end != haystack.end() && *end != split; ++end);
1169       exploded.push_back(string(start, end));
1170       start = end + 1;
1171    } while (end != haystack.end() && (++end) != haystack.end());
1172    return exploded;
1173 }
1174                                                                         /*}}}*/
1175 // StringSplit - split a string into a string vector by token           /*{{{*/
1176 // ---------------------------------------------------------------------
1177 /* See header for details.
1178  */
1179 vector<string> StringSplit(std::string const &s, std::string const &sep,
1180                            unsigned int maxsplit)
1181 {
1182    vector<string> split;
1183    size_t start, pos;
1184
1185    // no seperator given, this is bogus
1186    if(sep.size() == 0)
1187       return split;
1188
1189    start = pos = 0;
1190    while (pos != string::npos)
1191    {
1192       pos = s.find(sep, start);
1193       split.push_back(s.substr(start, pos-start));
1194
1195       // if maxsplit is reached, the remaining string is the last item
1196       if(split.size() >= maxsplit)
1197       {
1198          split[split.size()-1] = s.substr(start);
1199          break;
1200       }
1201       start = pos+sep.size();
1202    }
1203    return split;
1204 }
1205                                                                         /*}}}*/
1206 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1207 // ---------------------------------------------------------------------
1208 /* */
1209 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1210                       const char **ListEnd)
1211 {
1212    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1213       R->Hit = false;
1214
1215    unsigned long Hits = 0;
1216    for (; ListBegin < ListEnd; ++ListBegin)
1217    {
1218       // Check if the name is a regex
1219       const char *I;
1220       bool Regex = true;
1221       for (I = *ListBegin; *I != 0; I++)
1222          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1223             break;
1224       if (*I == 0)
1225          Regex = false;
1226
1227       // Compile the regex pattern
1228       regex_t Pattern;
1229       if (Regex == true)
1230          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1231                      REG_NOSUB) != 0)
1232             Regex = false;
1233
1234       // Search the list
1235       bool Done = false;
1236       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1237       {
1238          if (R->Str[0] == 0)
1239             continue;
1240
1241          if (strcasecmp(R->Str,*ListBegin) != 0)
1242          {
1243             if (Regex == false)
1244                continue;
1245             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1246                continue;
1247          }
1248          Done = true;
1249
1250          if (R->Hit == false)
1251             Hits++;
1252
1253          R->Hit = true;
1254       }
1255
1256       if (Regex == true)
1257          regfree(&Pattern);
1258
1259       if (Done == false)
1260          _error->Warning(_("Selection %s not found"),*ListBegin);
1261    }
1262
1263    return Hits;
1264 }
1265                                                                         /*}}}*/
1266 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1267 // ---------------------------------------------------------------------
1268 /* This is used to make the internationalization strings easier to translate
1269    and to allow reordering of parameters */
1270 static bool iovprintf(ostream &out, const char *format,
1271                       va_list &args, ssize_t &size) {
1272    char *S = (char*)malloc(size);
1273    ssize_t const n = vsnprintf(S, size, format, args);
1274    if (n > -1 && n < size) {
1275       out << S;
1276       free(S);
1277       return true;
1278    } else {
1279       if (n > -1)
1280          size = n + 1;
1281       else
1282          size *= 2;
1283    }
1284    free(S);
1285    return false;
1286 }
1287 void ioprintf(ostream &out,const char *format,...)
1288 {
1289    va_list args;
1290    ssize_t size = 400;
1291    while (true) {
1292       va_start(args,format);
1293       if (iovprintf(out, format, args, size) == true)
1294          return;
1295       va_end(args);
1296    }
1297 }
1298 void strprintf(string &out,const char *format,...)
1299 {
1300    va_list args;
1301    ssize_t size = 400;
1302    std::ostringstream outstr;
1303    while (true) {
1304       va_start(args,format);
1305       if (iovprintf(outstr, format, args, size) == true)
1306          break;
1307       va_end(args);
1308    }
1309    out = outstr.str();
1310 }
1311                                                                         /*}}}*/
1312 // safe_snprintf - Safer snprintf                                       /*{{{*/
1313 // ---------------------------------------------------------------------
1314 /* This is a snprintf that will never (ever) go past 'End' and returns a
1315    pointer to the end of the new string. The returned string is always null
1316    terminated unless Buffer == end. This is a better alterantive to using
1317    consecutive snprintfs. */
1318 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1319 {
1320    va_list args;
1321    int Did;
1322
1323    if (End <= Buffer)
1324       return End;
1325    va_start(args,Format);
1326    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1327    va_end(args);
1328
1329    if (Did < 0 || Buffer + Did > End)
1330       return End;
1331    return Buffer + Did;
1332 }
1333                                                                         /*}}}*/
1334 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1335 // ---------------------------------------------------------------------
1336 string StripEpoch(const string &VerStr)
1337 {
1338    size_t i = VerStr.find(":");
1339    if (i == string::npos)
1340       return VerStr;
1341    return VerStr.substr(i+1);
1342 }
1343                                                                         /*}}}*/
1344 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1345 // ---------------------------------------------------------------------
1346 /* This little function is the most called method we have and tries
1347    therefore to do the absolut minimum - and is notable faster than
1348    standard tolower/toupper and as a bonus avoids problems with different
1349    locales - we only operate on ascii chars anyway. */
1350 int tolower_ascii(int const c)
1351 {
1352    if (c >= 'A' && c <= 'Z')
1353       return c + 32;
1354    return c;
1355 }
1356                                                                         /*}}}*/
1357
1358 // CheckDomainList - See if Host is in a , separate list                /*{{{*/
1359 // ---------------------------------------------------------------------
1360 /* The domain list is a comma separate list of domains that are suffix
1361    matched against the argument */
1362 bool CheckDomainList(const string &Host,const string &List)
1363 {
1364    string::const_iterator Start = List.begin();
1365    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1366    {
1367       if (Cur < List.end() && *Cur != ',')
1368          continue;
1369
1370       // Match the end of the string..
1371       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1372           Cur - Start != 0 &&
1373           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1374          return true;
1375
1376       Start = Cur + 1;
1377    }
1378    return false;
1379 }
1380                                                                         /*}}}*/
1381 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1382 // ---------------------------------------------------------------------
1383 /* */
1384 size_t strv_length(const char **str_array)
1385 {
1386    size_t i;
1387    for (i=0; str_array[i] != NULL; i++)
1388       /* nothing */
1389       ;
1390    return i;
1391 }
1392
1393 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1394 // ---------------------------------------------------------------------
1395 /* */
1396 string DeEscapeString(const string &input)
1397 {
1398    char tmp[3];
1399    string::const_iterator it;
1400    string output;
1401    for (it = input.begin(); it != input.end(); ++it)
1402    {
1403       // just copy non-escape chars
1404       if (*it != '\\')
1405       {
1406          output += *it;
1407          continue;
1408       }
1409
1410       // deal with double escape
1411       if (*it == '\\' &&
1412           (it + 1 < input.end()) &&  it[1] == '\\')
1413       {
1414          // copy
1415          output += *it;
1416          // advance iterator one step further
1417          ++it;
1418          continue;
1419       }
1420
1421       // ensure we have a char to read
1422       if (it + 1 == input.end())
1423          continue;
1424
1425       // read it
1426       ++it;
1427       switch (*it)
1428       {
1429          case '0':
1430             if (it + 2 <= input.end()) {
1431                tmp[0] = it[1];
1432                tmp[1] = it[2];
1433                tmp[2] = 0;
1434                output += (char)strtol(tmp, 0, 8);
1435                it += 2;
1436             }
1437             break;
1438          case 'x':
1439             if (it + 2 <= input.end()) {
1440                tmp[0] = it[1];
1441                tmp[1] = it[2];
1442                tmp[2] = 0;
1443                output += (char)strtol(tmp, 0, 16);
1444                it += 2;
1445             }
1446             break;
1447          default:
1448             // FIXME: raise exception here?
1449             break;
1450       }
1451    }
1452    return output;
1453 }
1454                                                                         /*}}}*/
1455 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1456 // ---------------------------------------------------------------------
1457 /* This parses the URI into all of its components */
1458 void URI::CopyFrom(const string &U)
1459 {
1460    string::const_iterator I = U.begin();
1461
1462    // Locate the first colon, this separates the scheme
1463    for (; I < U.end() && *I != ':' ; ++I);
1464    string::const_iterator FirstColon = I;
1465
1466    /* Determine if this is a host type URI with a leading double //
1467       and then search for the first single / */
1468    string::const_iterator SingleSlash = I;
1469    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1470       SingleSlash += 3;
1471
1472    /* Find the / indicating the end of the hostname, ignoring /'s in the
1473       square brackets */
1474    bool InBracket = false;
1475    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1476    {
1477       if (*SingleSlash == '[')
1478          InBracket = true;
1479       if (InBracket == true && *SingleSlash == ']')
1480          InBracket = false;
1481    }
1482
1483    if (SingleSlash > U.end())
1484       SingleSlash = U.end();
1485
1486    // We can now write the access and path specifiers
1487    Access.assign(U.begin(),FirstColon);
1488    if (SingleSlash != U.end())
1489       Path.assign(SingleSlash,U.end());
1490    if (Path.empty() == true)
1491       Path = "/";
1492
1493    // Now we attempt to locate a user:pass@host fragment
1494    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1495       FirstColon += 3;
1496    else
1497       FirstColon += 1;
1498    if (FirstColon >= U.end())
1499       return;
1500
1501    if (FirstColon > SingleSlash)
1502       FirstColon = SingleSlash;
1503
1504    // Find the colon...
1505    I = FirstColon + 1;
1506    if (I > SingleSlash)
1507       I = SingleSlash;
1508    for (; I < SingleSlash && *I != ':'; ++I);
1509    string::const_iterator SecondColon = I;
1510
1511    // Search for the @ after the colon
1512    for (; I < SingleSlash && *I != '@'; ++I);
1513    string::const_iterator At = I;
1514
1515    // Now write the host and user/pass
1516    if (At == SingleSlash)
1517    {
1518       if (FirstColon < SingleSlash)
1519          Host.assign(FirstColon,SingleSlash);
1520    }
1521    else
1522    {
1523       Host.assign(At+1,SingleSlash);
1524       // username and password must be encoded (RFC 3986)
1525       User.assign(DeQuoteString(FirstColon,SecondColon));
1526       if (SecondColon < At)
1527          Password.assign(DeQuoteString(SecondColon+1,At));
1528    }
1529
1530    // Now we parse the RFC 2732 [] hostnames.
1531    unsigned long PortEnd = 0;
1532    InBracket = false;
1533    for (unsigned I = 0; I != Host.length();)
1534    {
1535       if (Host[I] == '[')
1536       {
1537          InBracket = true;
1538          Host.erase(I,1);
1539          continue;
1540       }
1541
1542       if (InBracket == true && Host[I] == ']')
1543       {
1544          InBracket = false;
1545          Host.erase(I,1);
1546          PortEnd = I;
1547          continue;
1548       }
1549       I++;
1550    }
1551
1552    // Tsk, weird.
1553    if (InBracket == true)
1554    {
1555       Host.clear();
1556       return;
1557    }
1558
1559    // Now we parse off a port number from the hostname
1560    Port = 0;
1561    string::size_type Pos = Host.rfind(':');
1562    if (Pos == string::npos || Pos < PortEnd)
1563       return;
1564
1565    Port = atoi(string(Host,Pos+1).c_str());
1566    Host.assign(Host,0,Pos);
1567 }
1568                                                                         /*}}}*/
1569 // URI::operator string - Convert the URI to a string                   /*{{{*/
1570 // ---------------------------------------------------------------------
1571 /* */
1572 URI::operator string()
1573 {
1574    string Res;
1575
1576    if (Access.empty() == false)
1577       Res = Access + ':';
1578
1579    if (Host.empty() == false)
1580    {
1581       if (Access.empty() == false)
1582          Res += "//";
1583
1584       if (User.empty() == false)
1585       {
1586          // FIXME: Technically userinfo is permitted even less
1587          // characters than these, but this is not conveniently
1588          // expressed with a blacklist.
1589          Res += QuoteString(User, ":/?#[]@");
1590          if (Password.empty() == false)
1591             Res += ":" + QuoteString(Password, ":/?#[]@");
1592          Res += "@";
1593       }
1594
1595       // Add RFC 2732 escaping characters
1596       if (Access.empty() == false &&
1597           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1598          Res += '[' + Host + ']';
1599       else
1600          Res += Host;
1601
1602       if (Port != 0)
1603       {
1604          char S[30];
1605          sprintf(S,":%u",Port);
1606          Res += S;
1607       }
1608    }
1609
1610    if (Path.empty() == false)
1611    {
1612       if (Path[0] != '/')
1613          Res += "/" + Path;
1614       else
1615          Res += Path;
1616    }
1617
1618    return Res;
1619 }
1620                                                                         /*}}}*/
1621 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1622 // ---------------------------------------------------------------------
1623 /* */
1624 string URI::SiteOnly(const string &URI)
1625 {
1626    ::URI U(URI);
1627    U.User.clear();
1628    U.Password.clear();
1629    U.Path.clear();
1630    return U;
1631 }
1632                                                                         /*}}}*/
1633 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1634 // ---------------------------------------------------------------------
1635 /* */
1636 string URI::NoUserPassword(const string &URI)
1637 {
1638    ::URI U(URI);
1639    U.User.clear();
1640    U.Password.clear();
1641    return U;
1642 }
1643                                                                         /*}}}*/