apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <stddef.h>
  25 #include <stdlib.h>
  26 #include <time.h>
  27 #include <string>
  28 #include <vector>
  29 #include <ctype.h>
  30 #include <string.h>
  31 #include <sstream>
  32 #include <stdio.h>
  33 #include <algorithm>
  34 #include <unistd.h>
  35 #include <regex.h>
  36 #include <errno.h>
  37 #include <stdarg.h>
  38 #include <iconv.h>
  39
  40 #include <apti18n.h>
  41                                                                         /*}}}*/
  42 using namespace std;
  43
  44 // Strip - Remove white space from the front and back of a string       /*{{{*/
  45 // ---------------------------------------------------------------------
  46 namespace APT {
  47    namespace String {
  48 std::string Strip(const std::string &s)
  49 {
  50    size_t start = s.find_first_not_of(" \t\n");
  51    // only whitespace
  52    if (start == string::npos)
  53       return "";
  54    size_t end = s.find_last_not_of(" \t\n");
  55    return s.substr(start, end-start+1);
  56 }
  57
  58 bool Endswith(const std::string &s, const std::string &end)
  59 {
  60    if (end.size() > s.size())
  61       return false;
  62    return (s.substr(s.size() - end.size(), s.size()) == end);
  63 }
  64
  65 }
  66 }
  67                                                                         /*}}}*/
  68 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  69 // ---------------------------------------------------------------------
  70 /* This is handy to use before display some information for enduser  */
  71 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  72 {
  73   iconv_t cd;
  74   const char *inbuf;
  75   char *inptr, *outbuf;
  76   size_t insize, bufsize;
  77   dest->clear();
  78
  79   cd = iconv_open(codeset, "UTF-8");
  80   if (cd == (iconv_t)(-1)) {
  81      // Something went wrong
  82      if (errno == EINVAL)
  83         _error->Error("conversion from 'UTF-8' to '%s' not available",
  84                codeset);
  85      else
  86         perror("iconv_open");
  87
  88      return false;
  89   }
  90
  91   insize = bufsize = orig.size();
  92   inbuf = orig.data();
  93   inptr = (char *)inbuf;
  94   outbuf = new char[bufsize];
  95   size_t lastError = -1;
  96
  97   while (insize != 0)
  98   {
  99      char *outptr = outbuf;
 100      size_t outsize = bufsize;
 101      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
 102      dest->append(outbuf, outptr - outbuf);
 103      if (err == (size_t)(-1))
 104      {
 105         switch (errno)
 106         {
 107         case EILSEQ:
 108            insize--;
 109            inptr++;
 110            // replace a series of unknown multibytes with a single "?"
 111            if (lastError != insize) {
 112               lastError = insize - 1;
 113               dest->append("?");
 114            }
 115            break;
 116         case EINVAL:
 117            insize = 0;
 118            break;
 119         case E2BIG:
 120            if (outptr == outbuf)
 121            {
 122               bufsize *= 2;
 123               delete[] outbuf;
 124               outbuf = new char[bufsize];
 125            }
 126            break;
 127         }
 128      }
 129   }
 130
 131   delete[] outbuf;
 132
 133   iconv_close(cd);
 134
 135   return true;
 136 }
 137                                                                         /*}}}*/
 138 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 139 // ---------------------------------------------------------------------
 140 /* This is handy to use when parsing a file. It also removes \n's left
 141    over from fgets and company */
 142 char *_strstrip(char *String)
 143 {
 144    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 145
 146    if (*String == 0)
 147       return String;
 148    return _strrstrip(String);
 149 }
 150                                                                         /*}}}*/
 151 // strrstrip - Remove white space from the back of a string     /*{{{*/
 152 // ---------------------------------------------------------------------
 153 char *_strrstrip(char *String)
 154 {
 155    char *End = String + strlen(String) - 1;
 156    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 157                                *End == '\r'); End--);
 158    End++;
 159    *End = 0;
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* */
 166 char *_strtabexpand(char *String,size_t Len)
 167 {
 168    for (char *I = String; I != I + Len && *I != 0; I++)
 169    {
 170       if (*I != '\t')
 171          continue;
 172       if (I + 8 > String + Len)
 173       {
 174          *I = 0;
 175          return String;
 176       }
 177
 178       /* Assume the start of the string is 0 and find the next 8 char
 179          division */
 180       int Len;
 181       if (String == I)
 182          Len = 1;
 183       else
 184          Len = 8 - ((String - I) % 8);
 185       Len -= 2;
 186       if (Len <= 0)
 187       {
 188          *I = ' ';
 189          continue;
 190       }
 191
 192       memmove(I + Len,I + 1,strlen(I) + 1);
 193       for (char *J = I; J + Len != I; *I = ' ', I++);
 194    }
 195    return String;
 196 }
 197                                                                         /*}}}*/
 198 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 199 // ---------------------------------------------------------------------
 200 /* This grabs a single word, converts any % escaped characters to their
 201    proper values and advances the pointer. Double quotes are understood
 202    and striped out as well. This is for URI/URL parsing. It also can
 203    understand [] brackets.*/
 204 bool ParseQuoteWord(const char *&String,string &Res)
 205 {
 206    // Skip leading whitespace
 207    const char *C = String;
 208    for (;*C != 0 && *C == ' '; C++);
 209    if (*C == 0)
 210       return false;
 211
 212    // Jump to the next word
 213    for (;*C != 0 && isspace(*C) == 0; C++)
 214    {
 215       if (*C == '"')
 216       {
 217          C = strchr(C + 1, '"');
 218          if (C == NULL)
 219             return false;
 220       }
 221       if (*C == '[')
 222       {
 223          C = strchr(C + 1, ']');
 224          if (C == NULL)
 225             return false;
 226       }
 227    }
 228
 229    // Now de-quote characters
 230    char Buffer[1024];
 231    char Tmp[3];
 232    const char *Start = String;
 233    char *I;
 234    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 235    {
 236       if (*Start == '%' && Start + 2 < C &&
 237           isxdigit(Start[1]) && isxdigit(Start[2]))
 238       {
 239          Tmp[0] = Start[1];
 240          Tmp[1] = Start[2];
 241          Tmp[2] = 0;
 242          *I = (char)strtol(Tmp,0,16);
 243          Start += 3;
 244          continue;
 245       }
 246       if (*Start != '"')
 247          *I = *Start;
 248       else
 249          I--;
 250       Start++;
 251    }
 252    *I = 0;
 253    Res = Buffer;
 254
 255    // Skip ending white space
 256    for (;*C != 0 && isspace(*C) != 0; C++);
 257    String = C;
 258    return true;
 259 }
 260                                                                         /*}}}*/
 261 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 262 // ---------------------------------------------------------------------
 263 /* This expects a series of space separated strings enclosed in ""'s.
 264    It concatenates the ""'s into a single string. */
 265 bool ParseCWord(const char *&String,string &Res)
 266 {
 267    // Skip leading whitespace
 268    const char *C = String;
 269    for (;*C != 0 && *C == ' '; C++);
 270    if (*C == 0)
 271       return false;
 272
 273    char Buffer[1024];
 274    char *Buf = Buffer;
 275    if (strlen(String) >= sizeof(Buffer))
 276        return false;
 277
 278    for (; *C != 0; C++)
 279    {
 280       if (*C == '"')
 281       {
 282          for (C++; *C != 0 && *C != '"'; C++)
 283             *Buf++ = *C;
 284
 285          if (*C == 0)
 286             return false;
 287
 288          continue;
 289       }
 290
 291       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 292          continue;
 293       if (isspace(*C) == 0)
 294          return false;
 295       *Buf++ = ' ';
 296    }
 297    *Buf = 0;
 298    Res = Buffer;
 299    String = C;
 300    return true;
 301 }
 302                                                                         /*}}}*/
 303 // QuoteString - Convert a string into quoted from                      /*{{{*/
 304 // ---------------------------------------------------------------------
 305 /* */
 306 string QuoteString(const string &Str, const char *Bad)
 307 {
 308    string Res;
 309    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 310    {
 311       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 312           *I == 0x25 || // percent '%' char
 313           *I <= 0x20 || *I >= 0x7F) // control chars
 314       {
 315          char Buf[10];
 316          sprintf(Buf,"%%%02x",(int)*I);
 317          Res += Buf;
 318       }
 319       else
 320          Res += *I;
 321    }
 322    return Res;
 323 }
 324                                                                         /*}}}*/
 325 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 326 // ---------------------------------------------------------------------
 327 /* This undoes QuoteString */
 328 string DeQuoteString(const string &Str)
 329 {
 330    return DeQuoteString(Str.begin(),Str.end());
 331 }
 332 string DeQuoteString(string::const_iterator const &begin,
 333                         string::const_iterator const &end)
 334 {
 335    string Res;
 336    for (string::const_iterator I = begin; I != end; ++I)
 337    {
 338       if (*I == '%' && I + 2 < end &&
 339           isxdigit(I[1]) && isxdigit(I[2]))
 340       {
 341          char Tmp[3];
 342          Tmp[0] = I[1];
 343          Tmp[1] = I[2];
 344          Tmp[2] = 0;
 345          Res += (char)strtol(Tmp,0,16);
 346          I += 2;
 347          continue;
 348       }
 349       else
 350          Res += *I;
 351    }
 352    return Res;
 353 }
 354
 355                                                                         /*}}}*/
 356 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 357 // ---------------------------------------------------------------------
 358 /* A max of 4 digits are shown before conversion to the next highest unit.
 359    The max length of the string will be 5 chars unless the size is > 10
 360    YottaBytes (E24) */
 361 string SizeToStr(double Size)
 362 {
 363    char S[300];
 364    double ASize;
 365    if (Size >= 0)
 366       ASize = Size;
 367    else
 368       ASize = -1*Size;
 369
 370    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 371       ExaBytes, ZettaBytes, YottaBytes */
 372    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 373    int I = 0;
 374    while (I <= 8)
 375    {
 376       if (ASize < 100 && I != 0)
 377       {
 378          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 379          break;
 380       }
 381
 382       if (ASize < 10000)
 383       {
 384          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 385          break;
 386       }
 387       ASize /= 1000.0;
 388       I++;
 389    }
 390
 391    return S;
 392 }
 393                                                                         /*}}}*/
 394 // TimeToStr - Convert the time into a string                           /*{{{*/
 395 // ---------------------------------------------------------------------
 396 /* Converts a number of seconds to a hms format */
 397 string TimeToStr(unsigned long Sec)
 398 {
 399    char S[300];
 400
 401    while (1)
 402    {
 403       if (Sec > 60*60*24)
 404       {
 405          //d means days, h means hours, min means minutes, s means seconds
 406          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 407          break;
 408       }
 409
 410       if (Sec > 60*60)
 411       {
 412          //h means hours, min means minutes, s means seconds
 413          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 414          break;
 415       }
 416
 417       if (Sec > 60)
 418       {
 419          //min means minutes, s means seconds
 420          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 421          break;
 422       }
 423
 424       //s means seconds
 425       sprintf(S,_("%lis"),Sec);
 426       break;
 427    }
 428
 429    return S;
 430 }
 431                                                                         /*}}}*/
 432 // SubstVar - Substitute a string for another string                    /*{{{*/
 433 // ---------------------------------------------------------------------
 434 /* This replaces all occurrences of Subst with Contents in Str. */
 435 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 436 {
 437    string::size_type Pos = 0;
 438    string::size_type OldPos = 0;
 439    string Temp;
 440
 441    while (OldPos < Str.length() &&
 442           (Pos = Str.find(Subst,OldPos)) != string::npos)
 443    {
 444       Temp += string(Str,OldPos,Pos) + Contents;
 445       OldPos = Pos + Subst.length();
 446    }
 447
 448    if (OldPos == 0)
 449       return Str;
 450
 451    return Temp + string(Str,OldPos);
 452 }
 453
 454 string SubstVar(string Str,const struct SubstVar *Vars)
 455 {
 456    for (; Vars->Subst != 0; Vars++)
 457       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 458    return Str;
 459 }
 460                                                                         /*}}}*/
 461 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 462 // ---------------------------------------------------------------------
 463 /* Returns a string with the supplied separator depth + 1 times in it */
 464 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 465 {
 466    std::string output = "";
 467    for(unsigned long d=Depth+1; d > 0; d--)
 468       output.append(Separator);
 469    return output;
 470 }
 471                                                                         /*}}}*/
 472 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 473 // ---------------------------------------------------------------------
 474 /* This converts a URI into a safe filename. It quotes all unsafe characters
 475    and converts / to _ and removes the scheme identifier. The resulting
 476    file name should be unique and never occur again for a different file */
 477 string URItoFileName(const string &URI)
 478 {
 479    // Nuke 'sensitive' items
 480    ::URI U(URI);
 481    U.User.clear();
 482    U.Password.clear();
 483    U.Access.clear();
 484
 485    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 486    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 487    replace(NewURI.begin(),NewURI.end(),'/','_');
 488    return NewURI;
 489 }
 490                                                                         /*}}}*/
 491 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 492 // ---------------------------------------------------------------------
 493 /* This routine performs a base64 transformation on a string. It was ripped
 494    from wget and then patched and bug fixed.
 495
 496    This spec can be found in rfc2045 */
 497 string Base64Encode(const string &S)
 498 {
 499    // Conversion table.
 500    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 501                           'I','J','K','L','M','N','O','P',
 502                           'Q','R','S','T','U','V','W','X',
 503                           'Y','Z','a','b','c','d','e','f',
 504                           'g','h','i','j','k','l','m','n',
 505                           'o','p','q','r','s','t','u','v',
 506                           'w','x','y','z','0','1','2','3',
 507                           '4','5','6','7','8','9','+','/'};
 508
 509    // Pre-allocate some space
 510    string Final;
 511    Final.reserve((4*S.length() + 2)/3 + 2);
 512
 513    /* Transform the 3x8 bits to 4x6 bits, as required by
 514       base64.  */
 515    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 516    {
 517       char Bits[3] = {0,0,0};
 518       Bits[0] = I[0];
 519       if (I + 1 < S.end())
 520          Bits[1] = I[1];
 521       if (I + 2 < S.end())
 522          Bits[2] = I[2];
 523
 524       Final += tbl[Bits[0] >> 2];
 525       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 526
 527       if (I + 1 >= S.end())
 528          break;
 529
 530       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 531
 532       if (I + 2 >= S.end())
 533          break;
 534
 535       Final += tbl[Bits[2] & 0x3f];
 536    }
 537
 538    /* Apply the padding elements, this tells how many bytes the remote
 539       end should discard */
 540    if (S.length() % 3 == 2)
 541       Final += '=';
 542    if (S.length() % 3 == 1)
 543       Final += "==";
 544
 545    return Final;
 546 }
 547                                                                         /*}}}*/
 548 // stringcmp - Arbitrary string compare                                 /*{{{*/
 549 // ---------------------------------------------------------------------
 550 /* This safely compares two non-null terminated strings of arbitrary
 551    length */
 552 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 553 {
 554    for (; A != AEnd && B != BEnd; A++, B++)
 555       if (*A != *B)
 556          break;
 557
 558    if (A == AEnd && B == BEnd)
 559       return 0;
 560    if (A == AEnd)
 561       return 1;
 562    if (B == BEnd)
 563       return -1;
 564    if (*A < *B)
 565       return -1;
 566    return 1;
 567 }
 568
 569 #if __GNUC__ >= 3
 570 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 571               const char *B,const char *BEnd)
 572 {
 573    for (; A != AEnd && B != BEnd; A++, B++)
 574       if (*A != *B)
 575          break;
 576
 577    if (A == AEnd && B == BEnd)
 578       return 0;
 579    if (A == AEnd)
 580       return 1;
 581    if (B == BEnd)
 582       return -1;
 583    if (*A < *B)
 584       return -1;
 585    return 1;
 586 }
 587 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 588               string::const_iterator B,string::const_iterator BEnd)
 589 {
 590    for (; A != AEnd && B != BEnd; A++, B++)
 591       if (*A != *B)
 592          break;
 593
 594    if (A == AEnd && B == BEnd)
 595       return 0;
 596    if (A == AEnd)
 597       return 1;
 598    if (B == BEnd)
 599       return -1;
 600    if (*A < *B)
 601       return -1;
 602    return 1;
 603 }
 604 #endif
 605                                                                         /*}}}*/
 606 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 607 // ---------------------------------------------------------------------
 608 /* */
 609 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 610 {
 611    for (; A != AEnd && B != BEnd; A++, B++)
 612       if (tolower_ascii(*A) != tolower_ascii(*B))
 613          break;
 614
 615    if (A == AEnd && B == BEnd)
 616       return 0;
 617    if (A == AEnd)
 618       return 1;
 619    if (B == BEnd)
 620       return -1;
 621    if (tolower_ascii(*A) < tolower_ascii(*B))
 622       return -1;
 623    return 1;
 624 }
 625 #if __GNUC__ >= 3
 626 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 627                   const char *B,const char *BEnd)
 628 {
 629    for (; A != AEnd && B != BEnd; A++, B++)
 630       if (tolower_ascii(*A) != tolower_ascii(*B))
 631          break;
 632
 633    if (A == AEnd && B == BEnd)
 634       return 0;
 635    if (A == AEnd)
 636       return 1;
 637    if (B == BEnd)
 638       return -1;
 639    if (tolower_ascii(*A) < tolower_ascii(*B))
 640       return -1;
 641    return 1;
 642 }
 643 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 644                   string::const_iterator B,string::const_iterator BEnd)
 645 {
 646    for (; A != AEnd && B != BEnd; A++, B++)
 647       if (tolower_ascii(*A) != tolower_ascii(*B))
 648          break;
 649
 650    if (A == AEnd && B == BEnd)
 651       return 0;
 652    if (A == AEnd)
 653       return 1;
 654    if (B == BEnd)
 655       return -1;
 656    if (tolower_ascii(*A) < tolower_ascii(*B))
 657       return -1;
 658    return 1;
 659 }
 660 #endif
 661                                                                         /*}}}*/
 662 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 663 // ---------------------------------------------------------------------
 664 /* The format is like those used in package files and the method
 665    communication system */
 666 string LookupTag(const string &Message,const char *Tag,const char *Default)
 667 {
 668    // Look for a matching tag.
 669    int Length = strlen(Tag);
 670    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 671    {
 672       // Found the tag
 673       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 674       {
 675          // Find the end of line and strip the leading/trailing spaces
 676          string::const_iterator J;
 677          I += Length + 1;
 678          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 679          for (J = I; *J != '\n' && J < Message.end(); ++J);
 680          for (; J > I && isspace(J[-1]) != 0; --J);
 681
 682          return string(I,J);
 683       }
 684
 685       for (; *I != '\n' && I < Message.end(); ++I);
 686    }
 687
 688    // Failed to find a match
 689    if (Default == 0)
 690       return string();
 691    return Default;
 692 }
 693                                                                         /*}}}*/
 694 // StringToBool - Converts a string into a boolean                      /*{{{*/
 695 // ---------------------------------------------------------------------
 696 /* This inspects the string to see if it is true or if it is false and
 697    then returns the result. Several varients on true/false are checked. */
 698 int StringToBool(const string &Text,int Default)
 699 {
 700    char *End;
 701    int Res = strtol(Text.c_str(),&End,0);
 702    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 703       return Res;
 704
 705    // Check for positives
 706    if (strcasecmp(Text.c_str(),"no") == 0 ||
 707        strcasecmp(Text.c_str(),"false") == 0 ||
 708        strcasecmp(Text.c_str(),"without") == 0 ||
 709        strcasecmp(Text.c_str(),"off") == 0 ||
 710        strcasecmp(Text.c_str(),"disable") == 0)
 711       return 0;
 712
 713    // Check for negatives
 714    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 715        strcasecmp(Text.c_str(),"true") == 0 ||
 716        strcasecmp(Text.c_str(),"with") == 0 ||
 717        strcasecmp(Text.c_str(),"on") == 0 ||
 718        strcasecmp(Text.c_str(),"enable") == 0)
 719       return 1;
 720
 721    return Default;
 722 }
 723                                                                         /*}}}*/
 724 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 725 // ---------------------------------------------------------------------
 726 /* This converts a time_t into a string time representation that is
 727    year 2000 complient and timezone neutral */
 728 string TimeRFC1123(time_t Date)
 729 {
 730    struct tm Conv;
 731    if (gmtime_r(&Date, &Conv) == NULL)
 732       return "";
 733
 734    char Buf[300];
 735    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 736    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 737                           "Aug","Sep","Oct","Nov","Dec"};
 738
 739    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 740            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 741            Conv.tm_min,Conv.tm_sec);
 742    return Buf;
 743 }
 744                                                                         /*}}}*/
 745 // ReadMessages - Read messages from the FD                             /*{{{*/
 746 // ---------------------------------------------------------------------
 747 /* This pulls full messages from the input FD into the message buffer.
 748    It assumes that messages will not pause during transit so no
 749    fancy buffering is used.
 750
 751    In particular: this reads blocks from the input until it believes
 752    that it's run out of input text.  Each block is terminated by a
 753    double newline ('\n' followed by '\n').  As noted below, there is a
 754    bug in this code: it assumes that all the blocks have been read if
 755    it doesn't see additional text in the buffer after the last one is
 756    parsed, which will cause it to lose blocks if the last block
 757    coincides with the end of the buffer.
 758  */
 759 bool ReadMessages(int Fd, vector<string> &List)
 760 {
 761    char Buffer[64000];
 762    char *End = Buffer;
 763    // Represents any left-over from the previous iteration of the
 764    // parse loop.  (i.e., if a message is split across the end
 765    // of the buffer, it goes here)
 766    string PartialMessage;
 767
 768    while (1)
 769    {
 770       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 771       if (Res < 0 && errno == EINTR)
 772          continue;
 773
 774       // Process is dead, this is kind of bad..
 775       if (Res == 0)
 776          return false;
 777
 778       // No data
 779       if (Res < 0 && errno == EAGAIN)
 780          return true;
 781       if (Res < 0)
 782          return false;
 783
 784       End += Res;
 785
 786       // Look for the end of the message
 787       for (char *I = Buffer; I + 1 < End; I++)
 788       {
 789          if (I[1] != '\n' ||
 790                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 791             continue;
 792
 793          // Pull the message out
 794          string Message(Buffer,I-Buffer);
 795          PartialMessage += Message;
 796
 797          // Fix up the buffer
 798          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 799          End -= I-Buffer;
 800          memmove(Buffer,I,End-Buffer);
 801          I = Buffer;
 802
 803          List.push_back(PartialMessage);
 804          PartialMessage.clear();
 805       }
 806       if (End != Buffer)
 807         {
 808           // If there's text left in the buffer, store it
 809           // in PartialMessage and throw the rest of the buffer
 810           // away.  This allows us to handle messages that
 811           // are longer than the static buffer size.
 812           PartialMessage += string(Buffer, End);
 813           End = Buffer;
 814         }
 815       else
 816         {
 817           // BUG ALERT: if a message block happens to end at a
 818           // multiple of 64000 characters, this will cause it to
 819           // terminate early, leading to a badly formed block and
 820           // probably crashing the method.  However, this is the only
 821           // way we have to find the end of the message block.  I have
 822           // an idea of how to fix this, but it will require changes
 823           // to the protocol (essentially to mark the beginning and
 824           // end of the block).
 825           //
 826           //  -- dburrows 2008-04-02
 827           return true;
 828         }
 829
 830       if (WaitFd(Fd) == false)
 831          return false;
 832    }
 833 }
 834                                                                         /*}}}*/
 835 // MonthConv - Converts a month string into a number                    /*{{{*/
 836 // ---------------------------------------------------------------------
 837 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 838    Made it a bit more robust with a few tolower_ascii though. */
 839 static int MonthConv(char *Month)
 840 {
 841    switch (tolower_ascii(*Month))
 842    {
 843       case 'a':
 844       return tolower_ascii(Month[1]) == 'p'?3:7;
 845       case 'd':
 846       return 11;
 847       case 'f':
 848       return 1;
 849       case 'j':
 850       if (tolower_ascii(Month[1]) == 'a')
 851          return 0;
 852       return tolower_ascii(Month[2]) == 'n'?5:6;
 853       case 'm':
 854       return tolower_ascii(Month[2]) == 'r'?2:4;
 855       case 'n':
 856       return 10;
 857       case 'o':
 858       return 9;
 859       case 's':
 860       return 8;
 861
 862       // Pretend it is January..
 863       default:
 864       return 0;
 865    }
 866 }
 867                                                                         /*}}}*/
 868 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 869 // ---------------------------------------------------------------------
 870 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 871    than local timezone (mktime assumes the latter).
 872
 873    This function is a nonstandard GNU extension that is also present on
 874    the BSDs and maybe other systems. For others we follow the advice of
 875    the manpage of timegm and use his portable replacement. */
 876 #ifndef HAVE_TIMEGM
 877 static time_t timegm(struct tm *t)
 878 {
 879    char *tz = getenv("TZ");
 880    setenv("TZ", "", 1);
 881    tzset();
 882    time_t ret = mktime(t);
 883    if (tz)
 884       setenv("TZ", tz, 1);
 885    else
 886       unsetenv("TZ");
 887    tzset();
 888    return ret;
 889 }
 890 #endif
 891                                                                         /*}}}*/
 892 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 893 // ---------------------------------------------------------------------
 894 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 895    with one exception: All timezones (%Z) are accepted but the protocol
 896    says that it MUST be GMT, but this one is equal to UTC which we will
 897    encounter from time to time (e.g. in Release files) so we accept all
 898    here and just assume it is GMT (or UTC) later on */
 899 bool RFC1123StrToTime(const char* const str,time_t &time)
 900 {
 901    struct tm Tm;
 902    setlocale (LC_ALL,"C");
 903    bool const invalid =
 904    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 905       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 906    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 907        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 908    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 909        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 910    setlocale (LC_ALL,"");
 911    if (invalid == true)
 912       return false;
 913
 914    time = timegm(&Tm);
 915    return true;
 916 }
 917                                                                         /*}}}*/
 918 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 919 // ---------------------------------------------------------------------
 920 /* */
 921 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 922 {
 923    struct tm Tm;
 924    // MDTM includes no whitespaces but recommend and ignored by strptime
 925    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 926       return false;
 927
 928    time = timegm(&Tm);
 929    return true;
 930 }
 931                                                                         /*}}}*/
 932 // StrToTime - Converts a string into a time_t                          /*{{{*/
 933 // ---------------------------------------------------------------------
 934 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
 935    and the C library asctime format. It requires the GNU library function
 936    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 937    reason the C library does not provide any such function :< This also
 938    handles the weird, but unambiguous FTP time format*/
 939 bool StrToTime(const string &Val,time_t &Result)
 940 {
 941    struct tm Tm;
 942    char Month[10];
 943
 944    // Skip the day of the week
 945    const char *I = strchr(Val.c_str(), ' ');
 946
 947    // Handle RFC 1123 time
 948    Month[0] = 0;
 949    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 950               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 951    {
 952       // Handle RFC 1036 time
 953       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 954                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 955          Tm.tm_year += 1900;
 956       else
 957       {
 958          // asctime format
 959          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 960                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 961          {
 962             // 'ftp' time
 963             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 964                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 965                return false;
 966             Tm.tm_mon--;
 967          }
 968       }
 969    }
 970
 971    Tm.tm_isdst = 0;
 972    if (Month[0] != 0)
 973       Tm.tm_mon = MonthConv(Month);
 974    else
 975       Tm.tm_mon = 0; // we don't have a month, so pick something
 976    Tm.tm_year -= 1900;
 977
 978    // Convert to local time and then to GMT
 979    Result = timegm(&Tm);
 980    return true;
 981 }
 982                                                                         /*}}}*/
 983 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 984 // ---------------------------------------------------------------------
 985 /* This is used in decoding the crazy fixed length string headers in
 986    tar and ar files. */
 987 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 988 {
 989    char S[30];
 990    if (Len >= sizeof(S))
 991       return false;
 992    memcpy(S,Str,Len);
 993    S[Len] = 0;
 994
 995    // All spaces is a zero
 996    Res = 0;
 997    unsigned I;
 998    for (I = 0; S[I] == ' '; I++);
 999    if (S[I] == 0)
1000       return true;
1001
1002    char *End;
1003    Res = strtoul(S,&End,Base);
1004    if (End == S)
1005       return false;
1006
1007    return true;
1008 }
1009                                                                         /*}}}*/
1010 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1011 // ---------------------------------------------------------------------
1012 /* This is used in decoding the crazy fixed length string headers in
1013    tar and ar files. */
1014 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1015 {
1016    char S[30];
1017    if (Len >= sizeof(S))
1018       return false;
1019    memcpy(S,Str,Len);
1020    S[Len] = 0;
1021
1022    // All spaces is a zero
1023    Res = 0;
1024    unsigned I;
1025    for (I = 0; S[I] == ' '; I++);
1026    if (S[I] == 0)
1027       return true;
1028
1029    char *End;
1030    Res = strtoull(S,&End,Base);
1031    if (End == S)
1032       return false;
1033
1034    return true;
1035 }
1036                                                                         /*}}}*/
1037
1038 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1039 // ---------------------------------------------------------------------
1040 /* This is used in decoding the 256bit encoded fixed length fields in
1041    tar files */
1042 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1043 {
1044    if ((Str[0] & 0x80) == 0)
1045       return false;
1046    else
1047    {
1048       Res = Str[0] & 0x7F;
1049       for(unsigned int i = 1; i < Len; ++i)
1050          Res = (Res<<8) + Str[i];
1051       return true;
1052    }
1053 }
1054                                                                         /*}}}*/
1055 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1056 // ---------------------------------------------------------------------
1057 /* Helper for Hex2Num */
1058 static int HexDigit(int c)
1059 {
1060    if (c >= '0' && c <= '9')
1061       return c - '0';
1062    if (c >= 'a' && c <= 'f')
1063       return c - 'a' + 10;
1064    if (c >= 'A' && c <= 'F')
1065       return c - 'A' + 10;
1066    return 0;
1067 }
1068                                                                         /*}}}*/
1069 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1070 // ---------------------------------------------------------------------
1071 /* The length of the buffer must be exactly 1/2 the length of the string. */
1072 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1073 {
1074    if (Str.length() != Length*2)
1075       return false;
1076
1077    // Convert each digit. We store it in the same order as the string
1078    int J = 0;
1079    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1080    {
1081       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1082          return false;
1083
1084       Num[J] = HexDigit(I[0]) << 4;
1085       Num[J] += HexDigit(I[1]);
1086    }
1087
1088    return true;
1089 }
1090                                                                         /*}}}*/
1091 // TokSplitString - Split a string up by a given token                  /*{{{*/
1092 // ---------------------------------------------------------------------
1093 /* This is intended to be a faster splitter, it does not use dynamic
1094    memories. Input is changed to insert nulls at each token location. */
1095 bool TokSplitString(char Tok,char *Input,char **List,
1096                     unsigned long ListMax)
1097 {
1098    // Strip any leading spaces
1099    char *Start = Input;
1100    char *Stop = Start + strlen(Start);
1101    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1102
1103    unsigned long Count = 0;
1104    char *Pos = Start;
1105    while (Pos != Stop)
1106    {
1107       // Skip to the next Token
1108       for (; Pos != Stop && *Pos != Tok; Pos++);
1109
1110       // Back remove spaces
1111       char *End = Pos;
1112       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1113       *End = 0;
1114
1115       List[Count++] = Start;
1116       if (Count >= ListMax)
1117       {
1118          List[Count-1] = 0;
1119          return false;
1120       }
1121
1122       // Advance pos
1123       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1124       Start = Pos;
1125    }
1126
1127    List[Count] = 0;
1128    return true;
1129 }
1130                                                                         /*}}}*/
1131 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1132 // ---------------------------------------------------------------------
1133 /* This can be used to split a given string up into a vector, so the
1134    propose is the same as in the method above and this one is a bit slower
1135    also, but the advantage is that we have an iteratable vector */
1136 vector<string> VectorizeString(string const &haystack, char const &split)
1137 {
1138    vector<string> exploded;
1139    if (haystack.empty() == true)
1140       return exploded;
1141    string::const_iterator start = haystack.begin();
1142    string::const_iterator end = start;
1143    do {
1144       for (; end != haystack.end() && *end != split; ++end);
1145       exploded.push_back(string(start, end));
1146       start = end + 1;
1147    } while (end != haystack.end() && (++end) != haystack.end());
1148    return exploded;
1149 }
1150                                                                         /*}}}*/
1151 // StringSplit - split a string into a string vector by token           /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* See header for details.
1154  */
1155 vector<string> StringSplit(std::string const &s, std::string const &sep,
1156                            unsigned int maxsplit)
1157 {
1158    vector<string> split;
1159    size_t start, pos;
1160
1161    // no seperator given, this is bogus
1162    if(sep.size() == 0)
1163       return split;
1164
1165    start = pos = 0;
1166    while (pos != string::npos)
1167    {
1168       pos = s.find(sep, start);
1169       split.push_back(s.substr(start, pos-start));
1170
1171       // if maxsplit is reached, the remaining string is the last item
1172       if(split.size() >= maxsplit)
1173       {
1174          split[split.size()-1] = s.substr(start);
1175          break;
1176       }
1177       start = pos+sep.size();
1178    }
1179    return split;
1180 }
1181                                                                         /*}}}*/
1182 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1183 // ---------------------------------------------------------------------
1184 /* */
1185 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1186                       const char **ListEnd)
1187 {
1188    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1189       R->Hit = false;
1190
1191    unsigned long Hits = 0;
1192    for (; ListBegin < ListEnd; ++ListBegin)
1193    {
1194       // Check if the name is a regex
1195       const char *I;
1196       bool Regex = true;
1197       for (I = *ListBegin; *I != 0; I++)
1198          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1199             break;
1200       if (*I == 0)
1201          Regex = false;
1202
1203       // Compile the regex pattern
1204       regex_t Pattern;
1205       if (Regex == true)
1206          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1207                      REG_NOSUB) != 0)
1208             Regex = false;
1209
1210       // Search the list
1211       bool Done = false;
1212       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1213       {
1214          if (R->Str[0] == 0)
1215             continue;
1216
1217          if (strcasecmp(R->Str,*ListBegin) != 0)
1218          {
1219             if (Regex == false)
1220                continue;
1221             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1222                continue;
1223          }
1224          Done = true;
1225
1226          if (R->Hit == false)
1227             Hits++;
1228
1229          R->Hit = true;
1230       }
1231
1232       if (Regex == true)
1233          regfree(&Pattern);
1234
1235       if (Done == false)
1236          _error->Warning(_("Selection %s not found"),*ListBegin);
1237    }
1238
1239    return Hits;
1240 }
1241                                                                         /*}}}*/
1242 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1243 // ---------------------------------------------------------------------
1244 /* This is used to make the internationalization strings easier to translate
1245    and to allow reordering of parameters */
1246 static bool iovprintf(ostream &out, const char *format,
1247                       va_list &args, ssize_t &size) {
1248    char *S = (char*)malloc(size);
1249    ssize_t const n = vsnprintf(S, size, format, args);
1250    if (n > -1 && n < size) {
1251       out << S;
1252       free(S);
1253       return true;
1254    } else {
1255       if (n > -1)
1256          size = n + 1;
1257       else
1258          size *= 2;
1259    }
1260    free(S);
1261    return false;
1262 }
1263 void ioprintf(ostream &out,const char *format,...)
1264 {
1265    va_list args;
1266    ssize_t size = 400;
1267    while (true) {
1268       va_start(args,format);
1269       if (iovprintf(out, format, args, size) == true)
1270          return;
1271       va_end(args);
1272    }
1273 }
1274 void strprintf(string &out,const char *format,...)
1275 {
1276    va_list args;
1277    ssize_t size = 400;
1278    std::ostringstream outstr;
1279    while (true) {
1280       va_start(args,format);
1281       if (iovprintf(outstr, format, args, size) == true)
1282          break;
1283       va_end(args);
1284    }
1285    out = outstr.str();
1286 }
1287                                                                         /*}}}*/
1288 // safe_snprintf - Safer snprintf                                       /*{{{*/
1289 // ---------------------------------------------------------------------
1290 /* This is a snprintf that will never (ever) go past 'End' and returns a
1291    pointer to the end of the new string. The returned string is always null
1292    terminated unless Buffer == end. This is a better alterantive to using
1293    consecutive snprintfs. */
1294 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1295 {
1296    va_list args;
1297    int Did;
1298
1299    if (End <= Buffer)
1300       return End;
1301    va_start(args,Format);
1302    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1303    va_end(args);
1304
1305    if (Did < 0 || Buffer + Did > End)
1306       return End;
1307    return Buffer + Did;
1308 }
1309                                                                         /*}}}*/
1310 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1311 // ---------------------------------------------------------------------
1312 string StripEpoch(const string &VerStr)
1313 {
1314    size_t i = VerStr.find(":");
1315    if (i == string::npos)
1316       return VerStr;
1317    return VerStr.substr(i+1);
1318 }
1319                                                                         /*}}}*/
1320 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1321 // ---------------------------------------------------------------------
1322 /* This little function is the most called method we have and tries
1323    therefore to do the absolut minimum - and is notable faster than
1324    standard tolower/toupper and as a bonus avoids problems with different
1325    locales - we only operate on ascii chars anyway. */
1326 int tolower_ascii(int const c)
1327 {
1328    if (c >= 'A' && c <= 'Z')
1329       return c + 32;
1330    return c;
1331 }
1332                                                                         /*}}}*/
1333
1334 // CheckDomainList - See if Host is in a , separate list                /*{{{*/
1335 // ---------------------------------------------------------------------
1336 /* The domain list is a comma separate list of domains that are suffix
1337    matched against the argument */
1338 bool CheckDomainList(const string &Host,const string &List)
1339 {
1340    string::const_iterator Start = List.begin();
1341    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1342    {
1343       if (Cur < List.end() && *Cur != ',')
1344          continue;
1345
1346       // Match the end of the string..
1347       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1348           Cur - Start != 0 &&
1349           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1350          return true;
1351
1352       Start = Cur + 1;
1353    }
1354    return false;
1355 }
1356                                                                         /*}}}*/
1357 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1358 // ---------------------------------------------------------------------
1359 /* */
1360 size_t strv_length(const char **str_array)
1361 {
1362    size_t i;
1363    for (i=0; str_array[i] != NULL; i++)
1364       /* nothing */
1365       ;
1366    return i;
1367 }
1368
1369 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1370 // ---------------------------------------------------------------------
1371 /* */
1372 string DeEscapeString(const string &input)
1373 {
1374    char tmp[3];
1375    string::const_iterator it;
1376    string output;
1377    for (it = input.begin(); it != input.end(); ++it)
1378    {
1379       // just copy non-escape chars
1380       if (*it != '\\')
1381       {
1382          output += *it;
1383          continue;
1384       }
1385
1386       // deal with double escape
1387       if (*it == '\\' &&
1388           (it + 1 < input.end()) &&  it[1] == '\\')
1389       {
1390          // copy
1391          output += *it;
1392          // advance iterator one step further
1393          ++it;
1394          continue;
1395       }
1396
1397       // ensure we have a char to read
1398       if (it + 1 == input.end())
1399          continue;
1400
1401       // read it
1402       ++it;
1403       switch (*it)
1404       {
1405          case '0':
1406             if (it + 2 <= input.end()) {
1407                tmp[0] = it[1];
1408                tmp[1] = it[2];
1409                tmp[2] = 0;
1410                output += (char)strtol(tmp, 0, 8);
1411                it += 2;
1412             }
1413             break;
1414          case 'x':
1415             if (it + 2 <= input.end()) {
1416                tmp[0] = it[1];
1417                tmp[1] = it[2];
1418                tmp[2] = 0;
1419                output += (char)strtol(tmp, 0, 16);
1420                it += 2;
1421             }
1422             break;
1423          default:
1424             // FIXME: raise exception here?
1425             break;
1426       }
1427    }
1428    return output;
1429 }
1430                                                                         /*}}}*/
1431 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1432 // ---------------------------------------------------------------------
1433 /* This parses the URI into all of its components */
1434 void URI::CopyFrom(const string &U)
1435 {
1436    string::const_iterator I = U.begin();
1437
1438    // Locate the first colon, this separates the scheme
1439    for (; I < U.end() && *I != ':' ; ++I);
1440    string::const_iterator FirstColon = I;
1441
1442    /* Determine if this is a host type URI with a leading double //
1443       and then search for the first single / */
1444    string::const_iterator SingleSlash = I;
1445    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1446       SingleSlash += 3;
1447
1448    /* Find the / indicating the end of the hostname, ignoring /'s in the
1449       square brackets */
1450    bool InBracket = false;
1451    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1452    {
1453       if (*SingleSlash == '[')
1454          InBracket = true;
1455       if (InBracket == true && *SingleSlash == ']')
1456          InBracket = false;
1457    }
1458
1459    if (SingleSlash > U.end())
1460       SingleSlash = U.end();
1461
1462    // We can now write the access and path specifiers
1463    Access.assign(U.begin(),FirstColon);
1464    if (SingleSlash != U.end())
1465       Path.assign(SingleSlash,U.end());
1466    if (Path.empty() == true)
1467       Path = "/";
1468
1469    // Now we attempt to locate a user:pass@host fragment
1470    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1471       FirstColon += 3;
1472    else
1473       FirstColon += 1;
1474    if (FirstColon >= U.end())
1475       return;
1476
1477    if (FirstColon > SingleSlash)
1478       FirstColon = SingleSlash;
1479
1480    // Find the colon...
1481    I = FirstColon + 1;
1482    if (I > SingleSlash)
1483       I = SingleSlash;
1484    for (; I < SingleSlash && *I != ':'; ++I);
1485    string::const_iterator SecondColon = I;
1486
1487    // Search for the @ after the colon
1488    for (; I < SingleSlash && *I != '@'; ++I);
1489    string::const_iterator At = I;
1490
1491    // Now write the host and user/pass
1492    if (At == SingleSlash)
1493    {
1494       if (FirstColon < SingleSlash)
1495          Host.assign(FirstColon,SingleSlash);
1496    }
1497    else
1498    {
1499       Host.assign(At+1,SingleSlash);
1500       // username and password must be encoded (RFC 3986)
1501       User.assign(DeQuoteString(FirstColon,SecondColon));
1502       if (SecondColon < At)
1503          Password.assign(DeQuoteString(SecondColon+1,At));
1504    }
1505
1506    // Now we parse the RFC 2732 [] hostnames.
1507    unsigned long PortEnd = 0;
1508    InBracket = false;
1509    for (unsigned I = 0; I != Host.length();)
1510    {
1511       if (Host[I] == '[')
1512       {
1513          InBracket = true;
1514          Host.erase(I,1);
1515          continue;
1516       }
1517
1518       if (InBracket == true && Host[I] == ']')
1519       {
1520          InBracket = false;
1521          Host.erase(I,1);
1522          PortEnd = I;
1523          continue;
1524       }
1525       I++;
1526    }
1527
1528    // Tsk, weird.
1529    if (InBracket == true)
1530    {
1531       Host.clear();
1532       return;
1533    }
1534
1535    // Now we parse off a port number from the hostname
1536    Port = 0;
1537    string::size_type Pos = Host.rfind(':');
1538    if (Pos == string::npos || Pos < PortEnd)
1539       return;
1540
1541    Port = atoi(string(Host,Pos+1).c_str());
1542    Host.assign(Host,0,Pos);
1543 }
1544                                                                         /*}}}*/
1545 // URI::operator string - Convert the URI to a string                   /*{{{*/
1546 // ---------------------------------------------------------------------
1547 /* */
1548 URI::operator string()
1549 {
1550    string Res;
1551
1552    if (Access.empty() == false)
1553       Res = Access + ':';
1554
1555    if (Host.empty() == false)
1556    {
1557       if (Access.empty() == false)
1558          Res += "//";
1559
1560       if (User.empty() == false)
1561       {
1562          // FIXME: Technically userinfo is permitted even less
1563          // characters than these, but this is not conveniently
1564          // expressed with a blacklist.
1565          Res += QuoteString(User, ":/?#[]@");
1566          if (Password.empty() == false)
1567             Res += ":" + QuoteString(Password, ":/?#[]@");
1568          Res += "@";
1569       }
1570
1571       // Add RFC 2732 escaping characters
1572       if (Access.empty() == false &&
1573           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1574          Res += '[' + Host + ']';
1575       else
1576          Res += Host;
1577
1578       if (Port != 0)
1579       {
1580          char S[30];
1581          sprintf(S,":%u",Port);
1582          Res += S;
1583       }
1584    }
1585
1586    if (Path.empty() == false)
1587    {
1588       if (Path[0] != '/')
1589          Res += "/" + Path;
1590       else
1591          Res += Path;
1592    }
1593
1594    return Res;
1595 }
1596                                                                         /*}}}*/
1597 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1598 // ---------------------------------------------------------------------
1599 /* */
1600 string URI::SiteOnly(const string &URI)
1601 {
1602    ::URI U(URI);
1603    U.User.clear();
1604    U.Password.clear();
1605    U.Path.clear();
1606    return U;
1607 }
1608                                                                         /*}}}*/
1609 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1610 // ---------------------------------------------------------------------
1611 /* */
1612 string URI::NoUserPassword(const string &URI)
1613 {
1614    ::URI U(URI);
1615    U.User.clear();
1616    U.Password.clear();
1617    return U;
1618 }
1619                                                                         /*}}}*/