apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <sstream>
  27 #include <stdio.h>
  28 #include <algorithm>
  29 #include <unistd.h>
  30 #include <regex.h>
  31 #include <errno.h>
  32 #include <stdarg.h>
  33 #include <iconv.h>
  34
  35 #include <apti18n.h>
  36
  37 using namespace std;
  38                                                                         /*}}}*/
  39 // Strip - Remove white space from the front and back of a string       /*{{{*/
  40 // ---------------------------------------------------------------------
  41 namespace APT {
  42    namespace String {
  43 std::string Strip(const std::string &s)
  44 {
  45    size_t start = s.find_first_not_of(" \t\n");
  46    // only whitespace
  47    if (start == string::npos)
  48       return "";
  49    size_t end = s.find_last_not_of(" \t\n");
  50    return s.substr(start, end-start+1);
  51 }
  52 }
  53 }
  54                                                                         /*}}}*/
  55 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  56 // ---------------------------------------------------------------------
  57 /* This is handy to use before display some information for enduser  */
  58 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  59 {
  60   iconv_t cd;
  61   const char *inbuf;
  62   char *inptr, *outbuf;
  63   size_t insize, bufsize;
  64   dest->clear();
  65
  66   cd = iconv_open(codeset, "UTF-8");
  67   if (cd == (iconv_t)(-1)) {
  68      // Something went wrong
  69      if (errno == EINVAL)
  70         _error->Error("conversion from 'UTF-8' to '%s' not available",
  71                codeset);
  72      else
  73         perror("iconv_open");
  74
  75      return false;
  76   }
  77
  78   insize = bufsize = orig.size();
  79   inbuf = orig.data();
  80   inptr = (char *)inbuf;
  81   outbuf = new char[bufsize];
  82   size_t lastError = -1;
  83
  84   while (insize != 0)
  85   {
  86      char *outptr = outbuf;
  87      size_t outsize = bufsize;
  88      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  89      dest->append(outbuf, outptr - outbuf);
  90      if (err == (size_t)(-1))
  91      {
  92         switch (errno)
  93         {
  94         case EILSEQ:
  95            insize--;
  96            inptr++;
  97            // replace a series of unknown multibytes with a single "?"
  98            if (lastError != insize) {
  99               lastError = insize - 1;
 100               dest->append("?");
 101            }
 102            break;
 103         case EINVAL:
 104            insize = 0;
 105            break;
 106         case E2BIG:
 107            if (outptr == outbuf)
 108            {
 109               bufsize *= 2;
 110               delete[] outbuf;
 111               outbuf = new char[bufsize];
 112            }
 113            break;
 114         }
 115      }
 116   }
 117
 118   delete[] outbuf;
 119
 120   iconv_close(cd);
 121
 122   return true;
 123 }
 124                                                                         /*}}}*/
 125 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 126 // ---------------------------------------------------------------------
 127 /* This is handy to use when parsing a file. It also removes \n's left
 128    over from fgets and company */
 129 char *_strstrip(char *String)
 130 {
 131    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 132
 133    if (*String == 0)
 134       return String;
 135    return _strrstrip(String);
 136 }
 137                                                                         /*}}}*/
 138 // strrstrip - Remove white space from the back of a string     /*{{{*/
 139 // ---------------------------------------------------------------------
 140 char *_strrstrip(char *String)
 141 {
 142    char *End = String + strlen(String) - 1;
 143    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 144                                *End == '\r'); End--);
 145    End++;
 146    *End = 0;
 147    return String;
 148 };
 149                                                                         /*}}}*/
 150 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 151 // ---------------------------------------------------------------------
 152 /* */
 153 char *_strtabexpand(char *String,size_t Len)
 154 {
 155    for (char *I = String; I != I + Len && *I != 0; I++)
 156    {
 157       if (*I != '\t')
 158          continue;
 159       if (I + 8 > String + Len)
 160       {
 161          *I = 0;
 162          return String;
 163       }
 164
 165       /* Assume the start of the string is 0 and find the next 8 char
 166          division */
 167       int Len;
 168       if (String == I)
 169          Len = 1;
 170       else
 171          Len = 8 - ((String - I) % 8);
 172       Len -= 2;
 173       if (Len <= 0)
 174       {
 175          *I = ' ';
 176          continue;
 177       }
 178
 179       memmove(I + Len,I + 1,strlen(I) + 1);
 180       for (char *J = I; J + Len != I; *I = ' ', I++);
 181    }
 182    return String;
 183 }
 184                                                                         /*}}}*/
 185 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 186 // ---------------------------------------------------------------------
 187 /* This grabs a single word, converts any % escaped characters to their
 188    proper values and advances the pointer. Double quotes are understood
 189    and striped out as well. This is for URI/URL parsing. It also can
 190    understand [] brackets.*/
 191 bool ParseQuoteWord(const char *&String,string &Res)
 192 {
 193    // Skip leading whitespace
 194    const char *C = String;
 195    for (;*C != 0 && *C == ' '; C++);
 196    if (*C == 0)
 197       return false;
 198
 199    // Jump to the next word
 200    for (;*C != 0 && isspace(*C) == 0; C++)
 201    {
 202       if (*C == '"')
 203       {
 204          C = strchr(C + 1, '"');
 205          if (C == NULL)
 206             return false;
 207       }
 208       if (*C == '[')
 209       {
 210          C = strchr(C + 1, ']');
 211          if (C == NULL)
 212             return false;
 213       }
 214    }
 215
 216    // Now de-quote characters
 217    char Buffer[1024];
 218    char Tmp[3];
 219    const char *Start = String;
 220    char *I;
 221    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 222    {
 223       if (*Start == '%' && Start + 2 < C &&
 224           isxdigit(Start[1]) && isxdigit(Start[2]))
 225       {
 226          Tmp[0] = Start[1];
 227          Tmp[1] = Start[2];
 228          Tmp[2] = 0;
 229          *I = (char)strtol(Tmp,0,16);
 230          Start += 3;
 231          continue;
 232       }
 233       if (*Start != '"')
 234          *I = *Start;
 235       else
 236          I--;
 237       Start++;
 238    }
 239    *I = 0;
 240    Res = Buffer;
 241
 242    // Skip ending white space
 243    for (;*C != 0 && isspace(*C) != 0; C++);
 244    String = C;
 245    return true;
 246 }
 247                                                                         /*}}}*/
 248 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 249 // ---------------------------------------------------------------------
 250 /* This expects a series of space separated strings enclosed in ""'s.
 251    It concatenates the ""'s into a single string. */
 252 bool ParseCWord(const char *&String,string &Res)
 253 {
 254    // Skip leading whitespace
 255    const char *C = String;
 256    for (;*C != 0 && *C == ' '; C++);
 257    if (*C == 0)
 258       return false;
 259
 260    char Buffer[1024];
 261    char *Buf = Buffer;
 262    if (strlen(String) >= sizeof(Buffer))
 263        return false;
 264
 265    for (; *C != 0; C++)
 266    {
 267       if (*C == '"')
 268       {
 269          for (C++; *C != 0 && *C != '"'; C++)
 270             *Buf++ = *C;
 271
 272          if (*C == 0)
 273             return false;
 274
 275          continue;
 276       }
 277
 278       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 279          continue;
 280       if (isspace(*C) == 0)
 281          return false;
 282       *Buf++ = ' ';
 283    }
 284    *Buf = 0;
 285    Res = Buffer;
 286    String = C;
 287    return true;
 288 }
 289                                                                         /*}}}*/
 290 // QuoteString - Convert a string into quoted from                      /*{{{*/
 291 // ---------------------------------------------------------------------
 292 /* */
 293 string QuoteString(const string &Str, const char *Bad)
 294 {
 295    string Res;
 296    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 297    {
 298       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 299           *I == 0x25 || // percent '%' char
 300           *I <= 0x20 || *I >= 0x7F) // control chars
 301       {
 302          char Buf[10];
 303          sprintf(Buf,"%%%02x",(int)*I);
 304          Res += Buf;
 305       }
 306       else
 307          Res += *I;
 308    }
 309    return Res;
 310 }
 311                                                                         /*}}}*/
 312 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 313 // ---------------------------------------------------------------------
 314 /* This undoes QuoteString */
 315 string DeQuoteString(const string &Str)
 316 {
 317    return DeQuoteString(Str.begin(),Str.end());
 318 }
 319 string DeQuoteString(string::const_iterator const &begin,
 320                         string::const_iterator const &end)
 321 {
 322    string Res;
 323    for (string::const_iterator I = begin; I != end; ++I)
 324    {
 325       if (*I == '%' && I + 2 < end &&
 326           isxdigit(I[1]) && isxdigit(I[2]))
 327       {
 328          char Tmp[3];
 329          Tmp[0] = I[1];
 330          Tmp[1] = I[2];
 331          Tmp[2] = 0;
 332          Res += (char)strtol(Tmp,0,16);
 333          I += 2;
 334          continue;
 335       }
 336       else
 337          Res += *I;
 338    }
 339    return Res;
 340 }
 341
 342                                                                         /*}}}*/
 343 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 344 // ---------------------------------------------------------------------
 345 /* A max of 4 digits are shown before conversion to the next highest unit.
 346    The max length of the string will be 5 chars unless the size is > 10
 347    YottaBytes (E24) */
 348 string SizeToStr(double Size)
 349 {
 350    char S[300];
 351    double ASize;
 352    if (Size >= 0)
 353       ASize = Size;
 354    else
 355       ASize = -1*Size;
 356
 357    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 358       ExaBytes, ZettaBytes, YottaBytes */
 359    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 360    int I = 0;
 361    while (I <= 8)
 362    {
 363       if (ASize < 100 && I != 0)
 364       {
 365          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 366          break;
 367       }
 368
 369       if (ASize < 10000)
 370       {
 371          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 372          break;
 373       }
 374       ASize /= 1000.0;
 375       I++;
 376    }
 377
 378    return S;
 379 }
 380                                                                         /*}}}*/
 381 // TimeToStr - Convert the time into a string                           /*{{{*/
 382 // ---------------------------------------------------------------------
 383 /* Converts a number of seconds to a hms format */
 384 string TimeToStr(unsigned long Sec)
 385 {
 386    char S[300];
 387
 388    while (1)
 389    {
 390       if (Sec > 60*60*24)
 391       {
 392          //d means days, h means hours, min means minutes, s means seconds
 393          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 394          break;
 395       }
 396
 397       if (Sec > 60*60)
 398       {
 399          //h means hours, min means minutes, s means seconds
 400          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 401          break;
 402       }
 403
 404       if (Sec > 60)
 405       {
 406          //min means minutes, s means seconds
 407          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 408          break;
 409       }
 410
 411       //s means seconds
 412       sprintf(S,_("%lis"),Sec);
 413       break;
 414    }
 415
 416    return S;
 417 }
 418                                                                         /*}}}*/
 419 // SubstVar - Substitute a string for another string                    /*{{{*/
 420 // ---------------------------------------------------------------------
 421 /* This replaces all occurances of Subst with Contents in Str. */
 422 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 423 {
 424    string::size_type Pos = 0;
 425    string::size_type OldPos = 0;
 426    string Temp;
 427
 428    while (OldPos < Str.length() &&
 429           (Pos = Str.find(Subst,OldPos)) != string::npos)
 430    {
 431       Temp += string(Str,OldPos,Pos) + Contents;
 432       OldPos = Pos + Subst.length();
 433    }
 434
 435    if (OldPos == 0)
 436       return Str;
 437
 438    return Temp + string(Str,OldPos);
 439 }
 440
 441 string SubstVar(string Str,const struct SubstVar *Vars)
 442 {
 443    for (; Vars->Subst != 0; Vars++)
 444       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 445    return Str;
 446 }
 447                                                                         /*}}}*/
 448 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 449 // ---------------------------------------------------------------------
 450 /* Returns a string with the supplied separator depth + 1 times in it */
 451 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 452 {
 453    std::string output = "";
 454    for(unsigned long d=Depth+1; d > 0; d--)
 455       output.append(Separator);
 456    return output;
 457 }
 458                                                                         /*}}}*/
 459 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 460 // ---------------------------------------------------------------------
 461 /* This converts a URI into a safe filename. It quotes all unsafe characters
 462    and converts / to _ and removes the scheme identifier. The resulting
 463    file name should be unique and never occur again for a different file */
 464 string URItoFileName(const string &URI)
 465 {
 466    // Nuke 'sensitive' items
 467    ::URI U(URI);
 468    U.User.clear();
 469    U.Password.clear();
 470    U.Access.clear();
 471
 472    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 473    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 474    replace(NewURI.begin(),NewURI.end(),'/','_');
 475    return NewURI;
 476 }
 477                                                                         /*}}}*/
 478 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 479 // ---------------------------------------------------------------------
 480 /* This routine performs a base64 transformation on a string. It was ripped
 481    from wget and then patched and bug fixed.
 482
 483    This spec can be found in rfc2045 */
 484 string Base64Encode(const string &S)
 485 {
 486    // Conversion table.
 487    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 488                           'I','J','K','L','M','N','O','P',
 489                           'Q','R','S','T','U','V','W','X',
 490                           'Y','Z','a','b','c','d','e','f',
 491                           'g','h','i','j','k','l','m','n',
 492                           'o','p','q','r','s','t','u','v',
 493                           'w','x','y','z','0','1','2','3',
 494                           '4','5','6','7','8','9','+','/'};
 495
 496    // Pre-allocate some space
 497    string Final;
 498    Final.reserve((4*S.length() + 2)/3 + 2);
 499
 500    /* Transform the 3x8 bits to 4x6 bits, as required by
 501       base64.  */
 502    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 503    {
 504       char Bits[3] = {0,0,0};
 505       Bits[0] = I[0];
 506       if (I + 1 < S.end())
 507          Bits[1] = I[1];
 508       if (I + 2 < S.end())
 509          Bits[2] = I[2];
 510
 511       Final += tbl[Bits[0] >> 2];
 512       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 513
 514       if (I + 1 >= S.end())
 515          break;
 516
 517       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 518
 519       if (I + 2 >= S.end())
 520          break;
 521
 522       Final += tbl[Bits[2] & 0x3f];
 523    }
 524
 525    /* Apply the padding elements, this tells how many bytes the remote
 526       end should discard */
 527    if (S.length() % 3 == 2)
 528       Final += '=';
 529    if (S.length() % 3 == 1)
 530       Final += "==";
 531
 532    return Final;
 533 }
 534                                                                         /*}}}*/
 535 // stringcmp - Arbitrary string compare                                 /*{{{*/
 536 // ---------------------------------------------------------------------
 537 /* This safely compares two non-null terminated strings of arbitrary
 538    length */
 539 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 540 {
 541    for (; A != AEnd && B != BEnd; A++, B++)
 542       if (*A != *B)
 543          break;
 544
 545    if (A == AEnd && B == BEnd)
 546       return 0;
 547    if (A == AEnd)
 548       return 1;
 549    if (B == BEnd)
 550       return -1;
 551    if (*A < *B)
 552       return -1;
 553    return 1;
 554 }
 555
 556 #if __GNUC__ >= 3
 557 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 558               const char *B,const char *BEnd)
 559 {
 560    for (; A != AEnd && B != BEnd; A++, B++)
 561       if (*A != *B)
 562          break;
 563
 564    if (A == AEnd && B == BEnd)
 565       return 0;
 566    if (A == AEnd)
 567       return 1;
 568    if (B == BEnd)
 569       return -1;
 570    if (*A < *B)
 571       return -1;
 572    return 1;
 573 }
 574 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 575               string::const_iterator B,string::const_iterator BEnd)
 576 {
 577    for (; A != AEnd && B != BEnd; A++, B++)
 578       if (*A != *B)
 579          break;
 580
 581    if (A == AEnd && B == BEnd)
 582       return 0;
 583    if (A == AEnd)
 584       return 1;
 585    if (B == BEnd)
 586       return -1;
 587    if (*A < *B)
 588       return -1;
 589    return 1;
 590 }
 591 #endif
 592                                                                         /*}}}*/
 593 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 594 // ---------------------------------------------------------------------
 595 /* */
 596 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 597 {
 598    for (; A != AEnd && B != BEnd; A++, B++)
 599       if (tolower_ascii(*A) != tolower_ascii(*B))
 600          break;
 601
 602    if (A == AEnd && B == BEnd)
 603       return 0;
 604    if (A == AEnd)
 605       return 1;
 606    if (B == BEnd)
 607       return -1;
 608    if (tolower_ascii(*A) < tolower_ascii(*B))
 609       return -1;
 610    return 1;
 611 }
 612 #if __GNUC__ >= 3
 613 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 614                   const char *B,const char *BEnd)
 615 {
 616    for (; A != AEnd && B != BEnd; A++, B++)
 617       if (tolower_ascii(*A) != tolower_ascii(*B))
 618          break;
 619
 620    if (A == AEnd && B == BEnd)
 621       return 0;
 622    if (A == AEnd)
 623       return 1;
 624    if (B == BEnd)
 625       return -1;
 626    if (tolower_ascii(*A) < tolower_ascii(*B))
 627       return -1;
 628    return 1;
 629 }
 630 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 631                   string::const_iterator B,string::const_iterator BEnd)
 632 {
 633    for (; A != AEnd && B != BEnd; A++, B++)
 634       if (tolower_ascii(*A) != tolower_ascii(*B))
 635          break;
 636
 637    if (A == AEnd && B == BEnd)
 638       return 0;
 639    if (A == AEnd)
 640       return 1;
 641    if (B == BEnd)
 642       return -1;
 643    if (tolower_ascii(*A) < tolower_ascii(*B))
 644       return -1;
 645    return 1;
 646 }
 647 #endif
 648                                                                         /*}}}*/
 649 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 650 // ---------------------------------------------------------------------
 651 /* The format is like those used in package files and the method
 652    communication system */
 653 string LookupTag(const string &Message,const char *Tag,const char *Default)
 654 {
 655    // Look for a matching tag.
 656    int Length = strlen(Tag);
 657    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 658    {
 659       // Found the tag
 660       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 661       {
 662          // Find the end of line and strip the leading/trailing spaces
 663          string::const_iterator J;
 664          I += Length + 1;
 665          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 666          for (J = I; *J != '\n' && J < Message.end(); ++J);
 667          for (; J > I && isspace(J[-1]) != 0; --J);
 668
 669          return string(I,J);
 670       }
 671
 672       for (; *I != '\n' && I < Message.end(); ++I);
 673    }
 674
 675    // Failed to find a match
 676    if (Default == 0)
 677       return string();
 678    return Default;
 679 }
 680                                                                         /*}}}*/
 681 // StringToBool - Converts a string into a boolean                      /*{{{*/
 682 // ---------------------------------------------------------------------
 683 /* This inspects the string to see if it is true or if it is false and
 684    then returns the result. Several varients on true/false are checked. */
 685 int StringToBool(const string &Text,int Default)
 686 {
 687    char *End;
 688    int Res = strtol(Text.c_str(),&End,0);
 689    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 690       return Res;
 691
 692    // Check for positives
 693    if (strcasecmp(Text.c_str(),"no") == 0 ||
 694        strcasecmp(Text.c_str(),"false") == 0 ||
 695        strcasecmp(Text.c_str(),"without") == 0 ||
 696        strcasecmp(Text.c_str(),"off") == 0 ||
 697        strcasecmp(Text.c_str(),"disable") == 0)
 698       return 0;
 699
 700    // Check for negatives
 701    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 702        strcasecmp(Text.c_str(),"true") == 0 ||
 703        strcasecmp(Text.c_str(),"with") == 0 ||
 704        strcasecmp(Text.c_str(),"on") == 0 ||
 705        strcasecmp(Text.c_str(),"enable") == 0)
 706       return 1;
 707
 708    return Default;
 709 }
 710                                                                         /*}}}*/
 711 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 712 // ---------------------------------------------------------------------
 713 /* This converts a time_t into a string time representation that is
 714    year 2000 complient and timezone neutral */
 715 string TimeRFC1123(time_t Date)
 716 {
 717    struct tm Conv;
 718    if (gmtime_r(&Date, &Conv) == NULL)
 719       return "";
 720
 721    char Buf[300];
 722    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 723    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 724                           "Aug","Sep","Oct","Nov","Dec"};
 725
 726    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 727            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 728            Conv.tm_min,Conv.tm_sec);
 729    return Buf;
 730 }
 731                                                                         /*}}}*/
 732 // ReadMessages - Read messages from the FD                             /*{{{*/
 733 // ---------------------------------------------------------------------
 734 /* This pulls full messages from the input FD into the message buffer.
 735    It assumes that messages will not pause during transit so no
 736    fancy buffering is used.
 737
 738    In particular: this reads blocks from the input until it believes
 739    that it's run out of input text.  Each block is terminated by a
 740    double newline ('\n' followed by '\n').  As noted below, there is a
 741    bug in this code: it assumes that all the blocks have been read if
 742    it doesn't see additional text in the buffer after the last one is
 743    parsed, which will cause it to lose blocks if the last block
 744    coincides with the end of the buffer.
 745  */
 746 bool ReadMessages(int Fd, vector<string> &List)
 747 {
 748    char Buffer[64000];
 749    char *End = Buffer;
 750    // Represents any left-over from the previous iteration of the
 751    // parse loop.  (i.e., if a message is split across the end
 752    // of the buffer, it goes here)
 753    string PartialMessage;
 754
 755    while (1)
 756    {
 757       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 758       if (Res < 0 && errno == EINTR)
 759          continue;
 760
 761       // Process is dead, this is kind of bad..
 762       if (Res == 0)
 763          return false;
 764
 765       // No data
 766       if (Res < 0 && errno == EAGAIN)
 767          return true;
 768       if (Res < 0)
 769          return false;
 770
 771       End += Res;
 772
 773       // Look for the end of the message
 774       for (char *I = Buffer; I + 1 < End; I++)
 775       {
 776          if (I[1] != '\n' ||
 777                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 778             continue;
 779
 780          // Pull the message out
 781          string Message(Buffer,I-Buffer);
 782          PartialMessage += Message;
 783
 784          // Fix up the buffer
 785          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 786          End -= I-Buffer;
 787          memmove(Buffer,I,End-Buffer);
 788          I = Buffer;
 789
 790          List.push_back(PartialMessage);
 791          PartialMessage.clear();
 792       }
 793       if (End != Buffer)
 794         {
 795           // If there's text left in the buffer, store it
 796           // in PartialMessage and throw the rest of the buffer
 797           // away.  This allows us to handle messages that
 798           // are longer than the static buffer size.
 799           PartialMessage += string(Buffer, End);
 800           End = Buffer;
 801         }
 802       else
 803         {
 804           // BUG ALERT: if a message block happens to end at a
 805           // multiple of 64000 characters, this will cause it to
 806           // terminate early, leading to a badly formed block and
 807           // probably crashing the method.  However, this is the only
 808           // way we have to find the end of the message block.  I have
 809           // an idea of how to fix this, but it will require changes
 810           // to the protocol (essentially to mark the beginning and
 811           // end of the block).
 812           //
 813           //  -- dburrows 2008-04-02
 814           return true;
 815         }
 816
 817       if (WaitFd(Fd) == false)
 818          return false;
 819    }
 820 }
 821                                                                         /*}}}*/
 822 // MonthConv - Converts a month string into a number                    /*{{{*/
 823 // ---------------------------------------------------------------------
 824 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 825    Made it a bit more robust with a few tolower_ascii though. */
 826 static int MonthConv(char *Month)
 827 {
 828    switch (tolower_ascii(*Month))
 829    {
 830       case 'a':
 831       return tolower_ascii(Month[1]) == 'p'?3:7;
 832       case 'd':
 833       return 11;
 834       case 'f':
 835       return 1;
 836       case 'j':
 837       if (tolower_ascii(Month[1]) == 'a')
 838          return 0;
 839       return tolower_ascii(Month[2]) == 'n'?5:6;
 840       case 'm':
 841       return tolower_ascii(Month[2]) == 'r'?2:4;
 842       case 'n':
 843       return 10;
 844       case 'o':
 845       return 9;
 846       case 's':
 847       return 8;
 848
 849       // Pretend it is January..
 850       default:
 851       return 0;
 852    }
 853 }
 854                                                                         /*}}}*/
 855 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 856 // ---------------------------------------------------------------------
 857 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 858    than local timezone (mktime assumes the latter).
 859
 860    This function is a nonstandard GNU extension that is also present on
 861    the BSDs and maybe other systems. For others we follow the advice of
 862    the manpage of timegm and use his portable replacement. */
 863 #ifndef HAVE_TIMEGM
 864 static time_t timegm(struct tm *t)
 865 {
 866    char *tz = getenv("TZ");
 867    setenv("TZ", "", 1);
 868    tzset();
 869    time_t ret = mktime(t);
 870    if (tz)
 871       setenv("TZ", tz, 1);
 872    else
 873       unsetenv("TZ");
 874    tzset();
 875    return ret;
 876 }
 877 #endif
 878                                                                         /*}}}*/
 879 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 880 // ---------------------------------------------------------------------
 881 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 882    with one exception: All timezones (%Z) are accepted but the protocol
 883    says that it MUST be GMT, but this one is equal to UTC which we will
 884    encounter from time to time (e.g. in Release files) so we accept all
 885    here and just assume it is GMT (or UTC) later on */
 886 bool RFC1123StrToTime(const char* const str,time_t &time)
 887 {
 888    struct tm Tm;
 889    setlocale (LC_ALL,"C");
 890    bool const invalid =
 891    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 892       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 893    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 894        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 895    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 896        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 897    setlocale (LC_ALL,"");
 898    if (invalid == true)
 899       return false;
 900
 901    time = timegm(&Tm);
 902    return true;
 903 }
 904                                                                         /*}}}*/
 905 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 906 // ---------------------------------------------------------------------
 907 /* */
 908 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 909 {
 910    struct tm Tm;
 911    // MDTM includes no whitespaces but recommend and ignored by strptime
 912    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 913       return false;
 914
 915    time = timegm(&Tm);
 916    return true;
 917 }
 918                                                                         /*}}}*/
 919 // StrToTime - Converts a string into a time_t                          /*{{{*/
 920 // ---------------------------------------------------------------------
 921 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 922    and the C library asctime format. It requires the GNU library function
 923    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 924    reason the C library does not provide any such function :< This also
 925    handles the weird, but unambiguous FTP time format*/
 926 bool StrToTime(const string &Val,time_t &Result)
 927 {
 928    struct tm Tm;
 929    char Month[10];
 930
 931    // Skip the day of the week
 932    const char *I = strchr(Val.c_str(), ' ');
 933
 934    // Handle RFC 1123 time
 935    Month[0] = 0;
 936    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 937               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 938    {
 939       // Handle RFC 1036 time
 940       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 941                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 942          Tm.tm_year += 1900;
 943       else
 944       {
 945          // asctime format
 946          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 947                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 948          {
 949             // 'ftp' time
 950             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 951                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 952                return false;
 953             Tm.tm_mon--;
 954          }
 955       }
 956    }
 957
 958    Tm.tm_isdst = 0;
 959    if (Month[0] != 0)
 960       Tm.tm_mon = MonthConv(Month);
 961    else
 962       Tm.tm_mon = 0; // we don't have a month, so pick something
 963    Tm.tm_year -= 1900;
 964
 965    // Convert to local time and then to GMT
 966    Result = timegm(&Tm);
 967    return true;
 968 }
 969                                                                         /*}}}*/
 970 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 971 // ---------------------------------------------------------------------
 972 /* This is used in decoding the crazy fixed length string headers in
 973    tar and ar files. */
 974 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 975 {
 976    char S[30];
 977    if (Len >= sizeof(S))
 978       return false;
 979    memcpy(S,Str,Len);
 980    S[Len] = 0;
 981
 982    // All spaces is a zero
 983    Res = 0;
 984    unsigned I;
 985    for (I = 0; S[I] == ' '; I++);
 986    if (S[I] == 0)
 987       return true;
 988
 989    char *End;
 990    Res = strtoul(S,&End,Base);
 991    if (End == S)
 992       return false;
 993
 994    return true;
 995 }
 996                                                                         /*}}}*/
 997 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 998 // ---------------------------------------------------------------------
 999 /* This is used in decoding the crazy fixed length string headers in
1000    tar and ar files. */
1001 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1002 {
1003    char S[30];
1004    if (Len >= sizeof(S))
1005       return false;
1006    memcpy(S,Str,Len);
1007    S[Len] = 0;
1008
1009    // All spaces is a zero
1010    Res = 0;
1011    unsigned I;
1012    for (I = 0; S[I] == ' '; I++);
1013    if (S[I] == 0)
1014       return true;
1015
1016    char *End;
1017    Res = strtoull(S,&End,Base);
1018    if (End == S)
1019       return false;
1020
1021    return true;
1022 }
1023                                                                         /*}}}*/
1024
1025 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1026 // ---------------------------------------------------------------------
1027 /* This is used in decoding the 256bit encoded fixed length fields in
1028    tar files */
1029 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1030 {
1031    if ((Str[0] & 0x80) == 0)
1032       return false;
1033    else
1034    {
1035       Res = Str[0] & 0x7F;
1036       for(unsigned int i = 1; i < Len; ++i)
1037          Res = (Res<<8) + Str[i];
1038       return true;
1039    }
1040 }
1041                                                                         /*}}}*/
1042 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1043 // ---------------------------------------------------------------------
1044 /* Helper for Hex2Num */
1045 static int HexDigit(int c)
1046 {
1047    if (c >= '0' && c <= '9')
1048       return c - '0';
1049    if (c >= 'a' && c <= 'f')
1050       return c - 'a' + 10;
1051    if (c >= 'A' && c <= 'F')
1052       return c - 'A' + 10;
1053    return 0;
1054 }
1055                                                                         /*}}}*/
1056 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1057 // ---------------------------------------------------------------------
1058 /* The length of the buffer must be exactly 1/2 the length of the string. */
1059 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1060 {
1061    if (Str.length() != Length*2)
1062       return false;
1063
1064    // Convert each digit. We store it in the same order as the string
1065    int J = 0;
1066    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1067    {
1068       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1069          return false;
1070
1071       Num[J] = HexDigit(I[0]) << 4;
1072       Num[J] += HexDigit(I[1]);
1073    }
1074
1075    return true;
1076 }
1077                                                                         /*}}}*/
1078 // TokSplitString - Split a string up by a given token                  /*{{{*/
1079 // ---------------------------------------------------------------------
1080 /* This is intended to be a faster splitter, it does not use dynamic
1081    memories. Input is changed to insert nulls at each token location. */
1082 bool TokSplitString(char Tok,char *Input,char **List,
1083                     unsigned long ListMax)
1084 {
1085    // Strip any leading spaces
1086    char *Start = Input;
1087    char *Stop = Start + strlen(Start);
1088    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1089
1090    unsigned long Count = 0;
1091    char *Pos = Start;
1092    while (Pos != Stop)
1093    {
1094       // Skip to the next Token
1095       for (; Pos != Stop && *Pos != Tok; Pos++);
1096
1097       // Back remove spaces
1098       char *End = Pos;
1099       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1100       *End = 0;
1101
1102       List[Count++] = Start;
1103       if (Count >= ListMax)
1104       {
1105          List[Count-1] = 0;
1106          return false;
1107       }
1108
1109       // Advance pos
1110       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1111       Start = Pos;
1112    }
1113
1114    List[Count] = 0;
1115    return true;
1116 }
1117                                                                         /*}}}*/
1118 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1119 // ---------------------------------------------------------------------
1120 /* This can be used to split a given string up into a vector, so the
1121    propose is the same as in the method above and this one is a bit slower
1122    also, but the advantage is that we have an iteratable vector */
1123 vector<string> VectorizeString(string const &haystack, char const &split)
1124 {
1125    string::const_iterator start = haystack.begin();
1126    string::const_iterator end = start;
1127    vector<string> exploded;
1128    do {
1129       for (; end != haystack.end() && *end != split; ++end);
1130       exploded.push_back(string(start, end));
1131       start = end + 1;
1132    } while (end != haystack.end() && (++end) != haystack.end());
1133    return exploded;
1134 }
1135                                                                         /*}}}*/
1136 // StringSplit - split a string into a string vector by token           /*{{{*/
1137 // ---------------------------------------------------------------------
1138 /* See header for details.
1139  */
1140 vector<string> StringSplit(std::string const &s, std::string const &sep,
1141                            unsigned int maxsplit)
1142 {
1143    vector<string> split;
1144    size_t start, pos;
1145
1146    // no seperator given, this is bogus
1147    if(sep.size() == 0)
1148       return split;
1149
1150    start = pos = 0;
1151    while (pos != string::npos)
1152    {
1153       pos = s.find(sep, start);
1154       split.push_back(s.substr(start, pos-start));
1155
1156       // if maxsplit is reached, the remaining string is the last item
1157       if(split.size() >= maxsplit)
1158       {
1159          split[split.size()-1] = s.substr(start);
1160          break;
1161       }
1162       start = pos+sep.size();
1163    }
1164    return split;
1165 }
1166                                                                         /*}}}*/
1167 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1168 // ---------------------------------------------------------------------
1169 /* */
1170 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1171                       const char **ListEnd)
1172 {
1173    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1174       R->Hit = false;
1175
1176    unsigned long Hits = 0;
1177    for (; ListBegin != ListEnd; ListBegin++)
1178    {
1179       // Check if the name is a regex
1180       const char *I;
1181       bool Regex = true;
1182       for (I = *ListBegin; *I != 0; I++)
1183          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1184             break;
1185       if (*I == 0)
1186          Regex = false;
1187
1188       // Compile the regex pattern
1189       regex_t Pattern;
1190       if (Regex == true)
1191          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1192                      REG_NOSUB) != 0)
1193             Regex = false;
1194
1195       // Search the list
1196       bool Done = false;
1197       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1198       {
1199          if (R->Str[0] == 0)
1200             continue;
1201
1202          if (strcasecmp(R->Str,*ListBegin) != 0)
1203          {
1204             if (Regex == false)
1205                continue;
1206             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1207                continue;
1208          }
1209          Done = true;
1210
1211          if (R->Hit == false)
1212             Hits++;
1213
1214          R->Hit = true;
1215       }
1216
1217       if (Regex == true)
1218          regfree(&Pattern);
1219
1220       if (Done == false)
1221          _error->Warning(_("Selection %s not found"),*ListBegin);
1222    }
1223
1224    return Hits;
1225 }
1226                                                                         /*}}}*/
1227 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1228 // ---------------------------------------------------------------------
1229 /* This is used to make the internationalization strings easier to translate
1230    and to allow reordering of parameters */
1231 static bool iovprintf(ostream &out, const char *format,
1232                       va_list &args, ssize_t &size) {
1233    char *S = (char*)malloc(size);
1234    ssize_t const n = vsnprintf(S, size, format, args);
1235    if (n > -1 && n < size) {
1236       out << S;
1237       free(S);
1238       return true;
1239    } else {
1240       if (n > -1)
1241          size = n + 1;
1242       else
1243          size *= 2;
1244    }
1245    free(S);
1246    return false;
1247 }
1248 void ioprintf(ostream &out,const char *format,...)
1249 {
1250    va_list args;
1251    ssize_t size = 400;
1252    while (true) {
1253       va_start(args,format);
1254       if (iovprintf(out, format, args, size) == true)
1255          return;
1256       va_end(args);
1257    }
1258 }
1259 void strprintf(string &out,const char *format,...)
1260 {
1261    va_list args;
1262    ssize_t size = 400;
1263    std::ostringstream outstr;
1264    while (true) {
1265       va_start(args,format);
1266       if (iovprintf(outstr, format, args, size) == true)
1267          break;
1268       va_end(args);
1269    }
1270    out = outstr.str();
1271 }
1272                                                                         /*}}}*/
1273 // safe_snprintf - Safer snprintf                                       /*{{{*/
1274 // ---------------------------------------------------------------------
1275 /* This is a snprintf that will never (ever) go past 'End' and returns a
1276    pointer to the end of the new string. The returned string is always null
1277    terminated unless Buffer == end. This is a better alterantive to using
1278    consecutive snprintfs. */
1279 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1280 {
1281    va_list args;
1282    int Did;
1283
1284    if (End <= Buffer)
1285       return End;
1286    va_start(args,Format);
1287    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1288    va_end(args);
1289
1290    if (Did < 0 || Buffer + Did > End)
1291       return End;
1292    return Buffer + Did;
1293 }
1294                                                                         /*}}}*/
1295 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1296 // ---------------------------------------------------------------------
1297 string StripEpoch(const string &VerStr)
1298 {
1299    size_t i = VerStr.find(":");
1300    if (i == string::npos)
1301       return VerStr;
1302    return VerStr.substr(i+1);
1303 }
1304                                                                         /*}}}*/
1305 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1306 // ---------------------------------------------------------------------
1307 /* This little function is the most called method we have and tries
1308    therefore to do the absolut minimum - and is noteable faster than
1309    standard tolower/toupper and as a bonus avoids problems with different
1310    locales - we only operate on ascii chars anyway. */
1311 int tolower_ascii(int const c)
1312 {
1313    if (c >= 'A' && c <= 'Z')
1314       return c + 32;
1315    return c;
1316 }
1317                                                                         /*}}}*/
1318
1319 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1320 // ---------------------------------------------------------------------
1321 /* The domain list is a comma seperate list of domains that are suffix
1322    matched against the argument */
1323 bool CheckDomainList(const string &Host,const string &List)
1324 {
1325    string::const_iterator Start = List.begin();
1326    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1327    {
1328       if (Cur < List.end() && *Cur != ',')
1329          continue;
1330
1331       // Match the end of the string..
1332       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1333           Cur - Start != 0 &&
1334           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1335          return true;
1336
1337       Start = Cur + 1;
1338    }
1339    return false;
1340 }
1341                                                                         /*}}}*/
1342 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1343 // ---------------------------------------------------------------------
1344 /* */
1345 size_t strv_length(const char **str_array)
1346 {
1347    size_t i;
1348    for (i=0; str_array[i] != NULL; i++)
1349       /* nothing */
1350       ;
1351    return i;
1352 }
1353
1354 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1355 // ---------------------------------------------------------------------
1356 /* */
1357 string DeEscapeString(const string &input)
1358 {
1359    char tmp[3];
1360    string::const_iterator it;
1361    string output;
1362    for (it = input.begin(); it != input.end(); ++it)
1363    {
1364       // just copy non-escape chars
1365       if (*it != '\\')
1366       {
1367          output += *it;
1368          continue;
1369       }
1370
1371       // deal with double escape
1372       if (*it == '\\' &&
1373           (it + 1 < input.end()) &&  it[1] == '\\')
1374       {
1375          // copy
1376          output += *it;
1377          // advance iterator one step further
1378          ++it;
1379          continue;
1380       }
1381
1382       // ensure we have a char to read
1383       if (it + 1 == input.end())
1384          continue;
1385
1386       // read it
1387       ++it;
1388       switch (*it)
1389       {
1390          case '0':
1391             if (it + 2 <= input.end()) {
1392                tmp[0] = it[1];
1393                tmp[1] = it[2];
1394                tmp[2] = 0;
1395                output += (char)strtol(tmp, 0, 8);
1396                it += 2;
1397             }
1398             break;
1399          case 'x':
1400             if (it + 2 <= input.end()) {
1401                tmp[0] = it[1];
1402                tmp[1] = it[2];
1403                tmp[2] = 0;
1404                output += (char)strtol(tmp, 0, 16);
1405                it += 2;
1406             }
1407             break;
1408          default:
1409             // FIXME: raise exception here?
1410             break;
1411       }
1412    }
1413    return output;
1414 }
1415                                                                         /*}}}*/
1416 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1417 // ---------------------------------------------------------------------
1418 /* This parses the URI into all of its components */
1419 void URI::CopyFrom(const string &U)
1420 {
1421    string::const_iterator I = U.begin();
1422
1423    // Locate the first colon, this separates the scheme
1424    for (; I < U.end() && *I != ':' ; ++I);
1425    string::const_iterator FirstColon = I;
1426
1427    /* Determine if this is a host type URI with a leading double //
1428       and then search for the first single / */
1429    string::const_iterator SingleSlash = I;
1430    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1431       SingleSlash += 3;
1432
1433    /* Find the / indicating the end of the hostname, ignoring /'s in the
1434       square brackets */
1435    bool InBracket = false;
1436    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1437    {
1438       if (*SingleSlash == '[')
1439          InBracket = true;
1440       if (InBracket == true && *SingleSlash == ']')
1441          InBracket = false;
1442    }
1443
1444    if (SingleSlash > U.end())
1445       SingleSlash = U.end();
1446
1447    // We can now write the access and path specifiers
1448    Access.assign(U.begin(),FirstColon);
1449    if (SingleSlash != U.end())
1450       Path.assign(SingleSlash,U.end());
1451    if (Path.empty() == true)
1452       Path = "/";
1453
1454    // Now we attempt to locate a user:pass@host fragment
1455    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1456       FirstColon += 3;
1457    else
1458       FirstColon += 1;
1459    if (FirstColon >= U.end())
1460       return;
1461
1462    if (FirstColon > SingleSlash)
1463       FirstColon = SingleSlash;
1464
1465    // Find the colon...
1466    I = FirstColon + 1;
1467    if (I > SingleSlash)
1468       I = SingleSlash;
1469    for (; I < SingleSlash && *I != ':'; ++I);
1470    string::const_iterator SecondColon = I;
1471
1472    // Search for the @ after the colon
1473    for (; I < SingleSlash && *I != '@'; ++I);
1474    string::const_iterator At = I;
1475
1476    // Now write the host and user/pass
1477    if (At == SingleSlash)
1478    {
1479       if (FirstColon < SingleSlash)
1480          Host.assign(FirstColon,SingleSlash);
1481    }
1482    else
1483    {
1484       Host.assign(At+1,SingleSlash);
1485       // username and password must be encoded (RFC 3986)
1486       User.assign(DeQuoteString(FirstColon,SecondColon));
1487       if (SecondColon < At)
1488          Password.assign(DeQuoteString(SecondColon+1,At));
1489    }
1490
1491    // Now we parse the RFC 2732 [] hostnames.
1492    unsigned long PortEnd = 0;
1493    InBracket = false;
1494    for (unsigned I = 0; I != Host.length();)
1495    {
1496       if (Host[I] == '[')
1497       {
1498          InBracket = true;
1499          Host.erase(I,1);
1500          continue;
1501       }
1502
1503       if (InBracket == true && Host[I] == ']')
1504       {
1505          InBracket = false;
1506          Host.erase(I,1);
1507          PortEnd = I;
1508          continue;
1509       }
1510       I++;
1511    }
1512
1513    // Tsk, weird.
1514    if (InBracket == true)
1515    {
1516       Host.clear();
1517       return;
1518    }
1519
1520    // Now we parse off a port number from the hostname
1521    Port = 0;
1522    string::size_type Pos = Host.rfind(':');
1523    if (Pos == string::npos || Pos < PortEnd)
1524       return;
1525
1526    Port = atoi(string(Host,Pos+1).c_str());
1527    Host.assign(Host,0,Pos);
1528 }
1529                                                                         /*}}}*/
1530 // URI::operator string - Convert the URI to a string                   /*{{{*/
1531 // ---------------------------------------------------------------------
1532 /* */
1533 URI::operator string()
1534 {
1535    string Res;
1536
1537    if (Access.empty() == false)
1538       Res = Access + ':';
1539
1540    if (Host.empty() == false)
1541    {
1542       if (Access.empty() == false)
1543          Res += "//";
1544
1545       if (User.empty() == false)
1546       {
1547          // FIXME: Technically userinfo is permitted even less
1548          // characters than these, but this is not conveniently
1549          // expressed with a blacklist.
1550          Res += QuoteString(User, ":/?#[]@");
1551          if (Password.empty() == false)
1552             Res += ":" + QuoteString(Password, ":/?#[]@");
1553          Res += "@";
1554       }
1555
1556       // Add RFC 2732 escaping characters
1557       if (Access.empty() == false &&
1558           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1559          Res += '[' + Host + ']';
1560       else
1561          Res += Host;
1562
1563       if (Port != 0)
1564       {
1565          char S[30];
1566          sprintf(S,":%u",Port);
1567          Res += S;
1568       }
1569    }
1570
1571    if (Path.empty() == false)
1572    {
1573       if (Path[0] != '/')
1574          Res += "/" + Path;
1575       else
1576          Res += Path;
1577    }
1578
1579    return Res;
1580 }
1581                                                                         /*}}}*/
1582 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1583 // ---------------------------------------------------------------------
1584 /* */
1585 string URI::SiteOnly(const string &URI)
1586 {
1587    ::URI U(URI);
1588    U.User.clear();
1589    U.Password.clear();
1590    U.Path.clear();
1591    return U;
1592 }
1593                                                                         /*}}}*/
1594 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1595 // ---------------------------------------------------------------------
1596 /* */
1597 string URI::NoUserPassword(const string &URI)
1598 {
1599    ::URI U(URI);
1600    U.User.clear();
1601    U.Password.clear();
1602    return U;
1603 }
1604                                                                         /*}}}*/