apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <config.h>
  19
  20 #include <apt-pkg/strutl.h>
  21 #include <apt-pkg/fileutl.h>
  22 #include <apt-pkg/error.h>
  23
  24 #include <stddef.h>
  25 #include <stdlib.h>
  26 #include <time.h>
  27 #include <string>
  28 #include <vector>
  29 #include <ctype.h>
  30 #include <string.h>
  31 #include <sstream>
  32 #include <stdio.h>
  33 #include <algorithm>
  34 #include <unistd.h>
  35 #include <regex.h>
  36 #include <errno.h>
  37 #include <stdarg.h>
  38 #include <iconv.h>
  39
  40 #include <apti18n.h>
  41                                                                         /*}}}*/
  42 using namespace std;
  43
  44 // Strip - Remove white space from the front and back of a string       /*{{{*/
  45 // ---------------------------------------------------------------------
  46 namespace APT {
  47    namespace String {
  48 std::string Strip(const std::string &s)
  49 {
  50    size_t start = s.find_first_not_of(" \t\n");
  51    // only whitespace
  52    if (start == string::npos)
  53       return "";
  54    size_t end = s.find_last_not_of(" \t\n");
  55    return s.substr(start, end-start+1);
  56 }
  57
  58 bool Endswith(const std::string &s, const std::string &end)
  59 {
  60    if (end.size() > s.size())
  61       return false;
  62    return (s.substr(s.size() - end.size(), s.size()) == end);
  63 }
  64
  65 bool Startswith(const std::string &s, const std::string &start)
  66 {
  67    if (start.size() > s.size())
  68       return false;
  69    return (s.substr(0, start.size()) == start);
  70 }
  71
  72 }
  73 }
  74                                                                         /*}}}*/
  75 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  76 // ---------------------------------------------------------------------
  77 /* This is handy to use before display some information for enduser  */
  78 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  79 {
  80   iconv_t cd;
  81   const char *inbuf;
  82   char *inptr, *outbuf;
  83   size_t insize, bufsize;
  84   dest->clear();
  85
  86   cd = iconv_open(codeset, "UTF-8");
  87   if (cd == (iconv_t)(-1)) {
  88      // Something went wrong
  89      if (errno == EINVAL)
  90         _error->Error("conversion from 'UTF-8' to '%s' not available",
  91                codeset);
  92      else
  93         perror("iconv_open");
  94
  95      return false;
  96   }
  97
  98   insize = bufsize = orig.size();
  99   inbuf = orig.data();
 100   inptr = (char *)inbuf;
 101   outbuf = new char[bufsize];
 102   size_t lastError = -1;
 103
 104   while (insize != 0)
 105   {
 106      char *outptr = outbuf;
 107      size_t outsize = bufsize;
 108      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
 109      dest->append(outbuf, outptr - outbuf);
 110      if (err == (size_t)(-1))
 111      {
 112         switch (errno)
 113         {
 114         case EILSEQ:
 115            insize--;
 116            inptr++;
 117            // replace a series of unknown multibytes with a single "?"
 118            if (lastError != insize) {
 119               lastError = insize - 1;
 120               dest->append("?");
 121            }
 122            break;
 123         case EINVAL:
 124            insize = 0;
 125            break;
 126         case E2BIG:
 127            if (outptr == outbuf)
 128            {
 129               bufsize *= 2;
 130               delete[] outbuf;
 131               outbuf = new char[bufsize];
 132            }
 133            break;
 134         }
 135      }
 136   }
 137
 138   delete[] outbuf;
 139
 140   iconv_close(cd);
 141
 142   return true;
 143 }
 144                                                                         /*}}}*/
 145 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 146 // ---------------------------------------------------------------------
 147 /* This is handy to use when parsing a file. It also removes \n's left
 148    over from fgets and company */
 149 char *_strstrip(char *String)
 150 {
 151    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 152
 153    if (*String == 0)
 154       return String;
 155    return _strrstrip(String);
 156 }
 157                                                                         /*}}}*/
 158 // strrstrip - Remove white space from the back of a string     /*{{{*/
 159 // ---------------------------------------------------------------------
 160 char *_strrstrip(char *String)
 161 {
 162    char *End = String + strlen(String) - 1;
 163    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 164                                *End == '\r'); End--);
 165    End++;
 166    *End = 0;
 167    return String;
 168 }
 169                                                                         /*}}}*/
 170 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 171 // ---------------------------------------------------------------------
 172 /* */
 173 char *_strtabexpand(char *String,size_t Len)
 174 {
 175    for (char *I = String; I != I + Len && *I != 0; I++)
 176    {
 177       if (*I != '\t')
 178          continue;
 179       if (I + 8 > String + Len)
 180       {
 181          *I = 0;
 182          return String;
 183       }
 184
 185       /* Assume the start of the string is 0 and find the next 8 char
 186          division */
 187       int Len;
 188       if (String == I)
 189          Len = 1;
 190       else
 191          Len = 8 - ((String - I) % 8);
 192       Len -= 2;
 193       if (Len <= 0)
 194       {
 195          *I = ' ';
 196          continue;
 197       }
 198
 199       memmove(I + Len,I + 1,strlen(I) + 1);
 200       for (char *J = I; J + Len != I; *I = ' ', I++);
 201    }
 202    return String;
 203 }
 204                                                                         /*}}}*/
 205 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 206 // ---------------------------------------------------------------------
 207 /* This grabs a single word, converts any % escaped characters to their
 208    proper values and advances the pointer. Double quotes are understood
 209    and striped out as well. This is for URI/URL parsing. It also can
 210    understand [] brackets.*/
 211 bool ParseQuoteWord(const char *&String,string &Res)
 212 {
 213    // Skip leading whitespace
 214    const char *C = String;
 215    for (;*C != 0 && *C == ' '; C++);
 216    if (*C == 0)
 217       return false;
 218
 219    // Jump to the next word
 220    for (;*C != 0 && isspace(*C) == 0; C++)
 221    {
 222       if (*C == '"')
 223       {
 224          C = strchr(C + 1, '"');
 225          if (C == NULL)
 226             return false;
 227       }
 228       if (*C == '[')
 229       {
 230          C = strchr(C + 1, ']');
 231          if (C == NULL)
 232             return false;
 233       }
 234    }
 235
 236    // Now de-quote characters
 237    char Buffer[1024];
 238    char Tmp[3];
 239    const char *Start = String;
 240    char *I;
 241    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 242    {
 243       if (*Start == '%' && Start + 2 < C &&
 244           isxdigit(Start[1]) && isxdigit(Start[2]))
 245       {
 246          Tmp[0] = Start[1];
 247          Tmp[1] = Start[2];
 248          Tmp[2] = 0;
 249          *I = (char)strtol(Tmp,0,16);
 250          Start += 3;
 251          continue;
 252       }
 253       if (*Start != '"')
 254          *I = *Start;
 255       else
 256          I--;
 257       Start++;
 258    }
 259    *I = 0;
 260    Res = Buffer;
 261
 262    // Skip ending white space
 263    for (;*C != 0 && isspace(*C) != 0; C++);
 264    String = C;
 265    return true;
 266 }
 267                                                                         /*}}}*/
 268 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 269 // ---------------------------------------------------------------------
 270 /* This expects a series of space separated strings enclosed in ""'s.
 271    It concatenates the ""'s into a single string. */
 272 bool ParseCWord(const char *&String,string &Res)
 273 {
 274    // Skip leading whitespace
 275    const char *C = String;
 276    for (;*C != 0 && *C == ' '; C++);
 277    if (*C == 0)
 278       return false;
 279
 280    char Buffer[1024];
 281    char *Buf = Buffer;
 282    if (strlen(String) >= sizeof(Buffer))
 283        return false;
 284
 285    for (; *C != 0; C++)
 286    {
 287       if (*C == '"')
 288       {
 289          for (C++; *C != 0 && *C != '"'; C++)
 290             *Buf++ = *C;
 291
 292          if (*C == 0)
 293             return false;
 294
 295          continue;
 296       }
 297
 298       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 299          continue;
 300       if (isspace(*C) == 0)
 301          return false;
 302       *Buf++ = ' ';
 303    }
 304    *Buf = 0;
 305    Res = Buffer;
 306    String = C;
 307    return true;
 308 }
 309                                                                         /*}}}*/
 310 // QuoteString - Convert a string into quoted from                      /*{{{*/
 311 // ---------------------------------------------------------------------
 312 /* */
 313 string QuoteString(const string &Str, const char *Bad)
 314 {
 315    string Res;
 316    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 317    {
 318       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 319           *I == 0x25 || // percent '%' char
 320           *I <= 0x20 || *I >= 0x7F) // control chars
 321       {
 322          char Buf[10];
 323          sprintf(Buf,"%%%02x",(int)*I);
 324          Res += Buf;
 325       }
 326       else
 327          Res += *I;
 328    }
 329    return Res;
 330 }
 331                                                                         /*}}}*/
 332 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 333 // ---------------------------------------------------------------------
 334 /* This undoes QuoteString */
 335 string DeQuoteString(const string &Str)
 336 {
 337    return DeQuoteString(Str.begin(),Str.end());
 338 }
 339 string DeQuoteString(string::const_iterator const &begin,
 340                         string::const_iterator const &end)
 341 {
 342    string Res;
 343    for (string::const_iterator I = begin; I != end; ++I)
 344    {
 345       if (*I == '%' && I + 2 < end &&
 346           isxdigit(I[1]) && isxdigit(I[2]))
 347       {
 348          char Tmp[3];
 349          Tmp[0] = I[1];
 350          Tmp[1] = I[2];
 351          Tmp[2] = 0;
 352          Res += (char)strtol(Tmp,0,16);
 353          I += 2;
 354          continue;
 355       }
 356       else
 357          Res += *I;
 358    }
 359    return Res;
 360 }
 361
 362                                                                         /*}}}*/
 363 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 364 // ---------------------------------------------------------------------
 365 /* A max of 4 digits are shown before conversion to the next highest unit.
 366    The max length of the string will be 5 chars unless the size is > 10
 367    YottaBytes (E24) */
 368 string SizeToStr(double Size)
 369 {
 370    char S[300];
 371    double ASize;
 372    if (Size >= 0)
 373       ASize = Size;
 374    else
 375       ASize = -1*Size;
 376
 377    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 378       ExaBytes, ZettaBytes, YottaBytes */
 379    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 380    int I = 0;
 381    while (I <= 8)
 382    {
 383       if (ASize < 100 && I != 0)
 384       {
 385          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 386          break;
 387       }
 388
 389       if (ASize < 10000)
 390       {
 391          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 392          break;
 393       }
 394       ASize /= 1000.0;
 395       I++;
 396    }
 397
 398    return S;
 399 }
 400                                                                         /*}}}*/
 401 // TimeToStr - Convert the time into a string                           /*{{{*/
 402 // ---------------------------------------------------------------------
 403 /* Converts a number of seconds to a hms format */
 404 string TimeToStr(unsigned long Sec)
 405 {
 406    char S[300];
 407
 408    while (1)
 409    {
 410       if (Sec > 60*60*24)
 411       {
 412          //d means days, h means hours, min means minutes, s means seconds
 413          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 414          break;
 415       }
 416
 417       if (Sec > 60*60)
 418       {
 419          //h means hours, min means minutes, s means seconds
 420          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 421          break;
 422       }
 423
 424       if (Sec > 60)
 425       {
 426          //min means minutes, s means seconds
 427          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 428          break;
 429       }
 430
 431       //s means seconds
 432       sprintf(S,_("%lis"),Sec);
 433       break;
 434    }
 435
 436    return S;
 437 }
 438                                                                         /*}}}*/
 439 // SubstVar - Substitute a string for another string                    /*{{{*/
 440 // ---------------------------------------------------------------------
 441 /* This replaces all occurrences of Subst with Contents in Str. */
 442 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 443 {
 444    if (Subst.empty() == true)
 445       return Str;
 446
 447    string::size_type Pos = 0;
 448    string::size_type OldPos = 0;
 449    string Temp;
 450
 451    while (OldPos < Str.length() &&
 452           (Pos = Str.find(Subst,OldPos)) != string::npos)
 453    {
 454       if (OldPos != Pos)
 455          Temp.append(Str, OldPos, Pos - OldPos);
 456       if (Contents.empty() == false)
 457          Temp.append(Contents);
 458       OldPos = Pos + Subst.length();
 459    }
 460
 461    if (OldPos == 0)
 462       return Str;
 463
 464    if (OldPos >= Str.length())
 465       return Temp;
 466    return Temp + string(Str,OldPos);
 467 }
 468 string SubstVar(string Str,const struct SubstVar *Vars)
 469 {
 470    for (; Vars->Subst != 0; Vars++)
 471       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 472    return Str;
 473 }
 474                                                                         /*}}}*/
 475 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 476 // ---------------------------------------------------------------------
 477 /* Returns a string with the supplied separator depth + 1 times in it */
 478 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 479 {
 480    std::string output = "";
 481    for(unsigned long d=Depth+1; d > 0; d--)
 482       output.append(Separator);
 483    return output;
 484 }
 485                                                                         /*}}}*/
 486 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 487 // ---------------------------------------------------------------------
 488 /* This converts a URI into a safe filename. It quotes all unsafe characters
 489    and converts / to _ and removes the scheme identifier. The resulting
 490    file name should be unique and never occur again for a different file */
 491 string URItoFileName(const string &URI)
 492 {
 493    // Nuke 'sensitive' items
 494    ::URI U(URI);
 495    U.User.clear();
 496    U.Password.clear();
 497    U.Access.clear();
 498
 499    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 500    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 501    replace(NewURI.begin(),NewURI.end(),'/','_');
 502    return NewURI;
 503 }
 504                                                                         /*}}}*/
 505 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 506 // ---------------------------------------------------------------------
 507 /* This routine performs a base64 transformation on a string. It was ripped
 508    from wget and then patched and bug fixed.
 509
 510    This spec can be found in rfc2045 */
 511 string Base64Encode(const string &S)
 512 {
 513    // Conversion table.
 514    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 515                           'I','J','K','L','M','N','O','P',
 516                           'Q','R','S','T','U','V','W','X',
 517                           'Y','Z','a','b','c','d','e','f',
 518                           'g','h','i','j','k','l','m','n',
 519                           'o','p','q','r','s','t','u','v',
 520                           'w','x','y','z','0','1','2','3',
 521                           '4','5','6','7','8','9','+','/'};
 522
 523    // Pre-allocate some space
 524    string Final;
 525    Final.reserve((4*S.length() + 2)/3 + 2);
 526
 527    /* Transform the 3x8 bits to 4x6 bits, as required by
 528       base64.  */
 529    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 530    {
 531       char Bits[3] = {0,0,0};
 532       Bits[0] = I[0];
 533       if (I + 1 < S.end())
 534          Bits[1] = I[1];
 535       if (I + 2 < S.end())
 536          Bits[2] = I[2];
 537
 538       Final += tbl[Bits[0] >> 2];
 539       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 540
 541       if (I + 1 >= S.end())
 542          break;
 543
 544       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 545
 546       if (I + 2 >= S.end())
 547          break;
 548
 549       Final += tbl[Bits[2] & 0x3f];
 550    }
 551
 552    /* Apply the padding elements, this tells how many bytes the remote
 553       end should discard */
 554    if (S.length() % 3 == 2)
 555       Final += '=';
 556    if (S.length() % 3 == 1)
 557       Final += "==";
 558
 559    return Final;
 560 }
 561                                                                         /*}}}*/
 562 // stringcmp - Arbitrary string compare                                 /*{{{*/
 563 // ---------------------------------------------------------------------
 564 /* This safely compares two non-null terminated strings of arbitrary
 565    length */
 566 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 567 {
 568    for (; A != AEnd && B != BEnd; A++, B++)
 569       if (*A != *B)
 570          break;
 571
 572    if (A == AEnd && B == BEnd)
 573       return 0;
 574    if (A == AEnd)
 575       return 1;
 576    if (B == BEnd)
 577       return -1;
 578    if (*A < *B)
 579       return -1;
 580    return 1;
 581 }
 582
 583 #if __GNUC__ >= 3
 584 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 585               const char *B,const char *BEnd)
 586 {
 587    for (; A != AEnd && B != BEnd; A++, B++)
 588       if (*A != *B)
 589          break;
 590
 591    if (A == AEnd && B == BEnd)
 592       return 0;
 593    if (A == AEnd)
 594       return 1;
 595    if (B == BEnd)
 596       return -1;
 597    if (*A < *B)
 598       return -1;
 599    return 1;
 600 }
 601 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 602               string::const_iterator B,string::const_iterator BEnd)
 603 {
 604    for (; A != AEnd && B != BEnd; A++, B++)
 605       if (*A != *B)
 606          break;
 607
 608    if (A == AEnd && B == BEnd)
 609       return 0;
 610    if (A == AEnd)
 611       return 1;
 612    if (B == BEnd)
 613       return -1;
 614    if (*A < *B)
 615       return -1;
 616    return 1;
 617 }
 618 #endif
 619                                                                         /*}}}*/
 620 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 621 // ---------------------------------------------------------------------
 622 /* */
 623 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 624 {
 625    for (; A != AEnd && B != BEnd; A++, B++)
 626       if (tolower_ascii(*A) != tolower_ascii(*B))
 627          break;
 628
 629    if (A == AEnd && B == BEnd)
 630       return 0;
 631    if (A == AEnd)
 632       return 1;
 633    if (B == BEnd)
 634       return -1;
 635    if (tolower_ascii(*A) < tolower_ascii(*B))
 636       return -1;
 637    return 1;
 638 }
 639 #if __GNUC__ >= 3
 640 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 641                   const char *B,const char *BEnd)
 642 {
 643    for (; A != AEnd && B != BEnd; A++, B++)
 644       if (tolower_ascii(*A) != tolower_ascii(*B))
 645          break;
 646
 647    if (A == AEnd && B == BEnd)
 648       return 0;
 649    if (A == AEnd)
 650       return 1;
 651    if (B == BEnd)
 652       return -1;
 653    if (tolower_ascii(*A) < tolower_ascii(*B))
 654       return -1;
 655    return 1;
 656 }
 657 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 658                   string::const_iterator B,string::const_iterator BEnd)
 659 {
 660    for (; A != AEnd && B != BEnd; A++, B++)
 661       if (tolower_ascii(*A) != tolower_ascii(*B))
 662          break;
 663
 664    if (A == AEnd && B == BEnd)
 665       return 0;
 666    if (A == AEnd)
 667       return 1;
 668    if (B == BEnd)
 669       return -1;
 670    if (tolower_ascii(*A) < tolower_ascii(*B))
 671       return -1;
 672    return 1;
 673 }
 674 #endif
 675                                                                         /*}}}*/
 676 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 677 // ---------------------------------------------------------------------
 678 /* The format is like those used in package files and the method
 679    communication system */
 680 string LookupTag(const string &Message,const char *Tag,const char *Default)
 681 {
 682    // Look for a matching tag.
 683    int Length = strlen(Tag);
 684    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 685    {
 686       // Found the tag
 687       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 688       {
 689          // Find the end of line and strip the leading/trailing spaces
 690          string::const_iterator J;
 691          I += Length + 1;
 692          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 693          for (J = I; *J != '\n' && J < Message.end(); ++J);
 694          for (; J > I && isspace(J[-1]) != 0; --J);
 695
 696          return string(I,J);
 697       }
 698
 699       for (; *I != '\n' && I < Message.end(); ++I);
 700    }
 701
 702    // Failed to find a match
 703    if (Default == 0)
 704       return string();
 705    return Default;
 706 }
 707                                                                         /*}}}*/
 708 // StringToBool - Converts a string into a boolean                      /*{{{*/
 709 // ---------------------------------------------------------------------
 710 /* This inspects the string to see if it is true or if it is false and
 711    then returns the result. Several varients on true/false are checked. */
 712 int StringToBool(const string &Text,int Default)
 713 {
 714    char *ParseEnd;
 715    int Res = strtol(Text.c_str(),&ParseEnd,0);
 716    // ensure that the entire string was converted by strtol to avoid
 717    // failures on "apt-cache show -a 0ad" where the "0" is converted
 718    const char *TextEnd = Text.c_str()+Text.size();
 719    if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
 720       return Res;
 721
 722    // Check for positives
 723    if (strcasecmp(Text.c_str(),"no") == 0 ||
 724        strcasecmp(Text.c_str(),"false") == 0 ||
 725        strcasecmp(Text.c_str(),"without") == 0 ||
 726        strcasecmp(Text.c_str(),"off") == 0 ||
 727        strcasecmp(Text.c_str(),"disable") == 0)
 728       return 0;
 729
 730    // Check for negatives
 731    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 732        strcasecmp(Text.c_str(),"true") == 0 ||
 733        strcasecmp(Text.c_str(),"with") == 0 ||
 734        strcasecmp(Text.c_str(),"on") == 0 ||
 735        strcasecmp(Text.c_str(),"enable") == 0)
 736       return 1;
 737
 738    return Default;
 739 }
 740                                                                         /*}}}*/
 741 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 742 // ---------------------------------------------------------------------
 743 /* This converts a time_t into a string time representation that is
 744    year 2000 complient and timezone neutral */
 745 string TimeRFC1123(time_t Date)
 746 {
 747    struct tm Conv;
 748    if (gmtime_r(&Date, &Conv) == NULL)
 749       return "";
 750
 751    char Buf[300];
 752    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 753    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 754                           "Aug","Sep","Oct","Nov","Dec"};
 755
 756    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 757            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 758            Conv.tm_min,Conv.tm_sec);
 759    return Buf;
 760 }
 761                                                                         /*}}}*/
 762 // ReadMessages - Read messages from the FD                             /*{{{*/
 763 // ---------------------------------------------------------------------
 764 /* This pulls full messages from the input FD into the message buffer.
 765    It assumes that messages will not pause during transit so no
 766    fancy buffering is used.
 767
 768    In particular: this reads blocks from the input until it believes
 769    that it's run out of input text.  Each block is terminated by a
 770    double newline ('\n' followed by '\n').  As noted below, there is a
 771    bug in this code: it assumes that all the blocks have been read if
 772    it doesn't see additional text in the buffer after the last one is
 773    parsed, which will cause it to lose blocks if the last block
 774    coincides with the end of the buffer.
 775  */
 776 bool ReadMessages(int Fd, vector<string> &List)
 777 {
 778    char Buffer[64000];
 779    char *End = Buffer;
 780    // Represents any left-over from the previous iteration of the
 781    // parse loop.  (i.e., if a message is split across the end
 782    // of the buffer, it goes here)
 783    string PartialMessage;
 784
 785    while (1)
 786    {
 787       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 788       if (Res < 0 && errno == EINTR)
 789          continue;
 790
 791       // Process is dead, this is kind of bad..
 792       if (Res == 0)
 793          return false;
 794
 795       // No data
 796       if (Res < 0 && errno == EAGAIN)
 797          return true;
 798       if (Res < 0)
 799          return false;
 800
 801       End += Res;
 802
 803       // Look for the end of the message
 804       for (char *I = Buffer; I + 1 < End; I++)
 805       {
 806          if (I[1] != '\n' ||
 807                (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
 808             continue;
 809
 810          // Pull the message out
 811          string Message(Buffer,I-Buffer);
 812          PartialMessage += Message;
 813
 814          // Fix up the buffer
 815          for (; I < End && (*I == '\n' || *I == '\r'); ++I);
 816          End -= I-Buffer;
 817          memmove(Buffer,I,End-Buffer);
 818          I = Buffer;
 819
 820          List.push_back(PartialMessage);
 821          PartialMessage.clear();
 822       }
 823       if (End != Buffer)
 824         {
 825           // If there's text left in the buffer, store it
 826           // in PartialMessage and throw the rest of the buffer
 827           // away.  This allows us to handle messages that
 828           // are longer than the static buffer size.
 829           PartialMessage += string(Buffer, End);
 830           End = Buffer;
 831         }
 832       else
 833         {
 834           // BUG ALERT: if a message block happens to end at a
 835           // multiple of 64000 characters, this will cause it to
 836           // terminate early, leading to a badly formed block and
 837           // probably crashing the method.  However, this is the only
 838           // way we have to find the end of the message block.  I have
 839           // an idea of how to fix this, but it will require changes
 840           // to the protocol (essentially to mark the beginning and
 841           // end of the block).
 842           //
 843           //  -- dburrows 2008-04-02
 844           return true;
 845         }
 846
 847       if (WaitFd(Fd) == false)
 848          return false;
 849    }
 850 }
 851                                                                         /*}}}*/
 852 // MonthConv - Converts a month string into a number                    /*{{{*/
 853 // ---------------------------------------------------------------------
 854 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 855    Made it a bit more robust with a few tolower_ascii though. */
 856 static int MonthConv(char *Month)
 857 {
 858    switch (tolower_ascii(*Month))
 859    {
 860       case 'a':
 861       return tolower_ascii(Month[1]) == 'p'?3:7;
 862       case 'd':
 863       return 11;
 864       case 'f':
 865       return 1;
 866       case 'j':
 867       if (tolower_ascii(Month[1]) == 'a')
 868          return 0;
 869       return tolower_ascii(Month[2]) == 'n'?5:6;
 870       case 'm':
 871       return tolower_ascii(Month[2]) == 'r'?2:4;
 872       case 'n':
 873       return 10;
 874       case 'o':
 875       return 9;
 876       case 's':
 877       return 8;
 878
 879       // Pretend it is January..
 880       default:
 881       return 0;
 882    }
 883 }
 884                                                                         /*}}}*/
 885 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 886 // ---------------------------------------------------------------------
 887 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 888    than local timezone (mktime assumes the latter).
 889
 890    This function is a nonstandard GNU extension that is also present on
 891    the BSDs and maybe other systems. For others we follow the advice of
 892    the manpage of timegm and use his portable replacement. */
 893 #ifndef HAVE_TIMEGM
 894 static time_t timegm(struct tm *t)
 895 {
 896    char *tz = getenv("TZ");
 897    setenv("TZ", "", 1);
 898    tzset();
 899    time_t ret = mktime(t);
 900    if (tz)
 901       setenv("TZ", tz, 1);
 902    else
 903       unsetenv("TZ");
 904    tzset();
 905    return ret;
 906 }
 907 #endif
 908                                                                         /*}}}*/
 909 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 910 // ---------------------------------------------------------------------
 911 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 912    with one exception: All timezones (%Z) are accepted but the protocol
 913    says that it MUST be GMT, but this one is equal to UTC which we will
 914    encounter from time to time (e.g. in Release files) so we accept all
 915    here and just assume it is GMT (or UTC) later on */
 916 bool RFC1123StrToTime(const char* const str,time_t &time)
 917 {
 918    struct tm Tm;
 919    setlocale (LC_ALL,"C");
 920    bool const invalid =
 921    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 922       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 923    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 924        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 925    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 926        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 927    setlocale (LC_ALL,"");
 928    if (invalid == true)
 929       return false;
 930
 931    time = timegm(&Tm);
 932    return true;
 933 }
 934                                                                         /*}}}*/
 935 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 936 // ---------------------------------------------------------------------
 937 /* */
 938 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 939 {
 940    struct tm Tm;
 941    // MDTM includes no whitespaces but recommend and ignored by strptime
 942    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 943       return false;
 944
 945    time = timegm(&Tm);
 946    return true;
 947 }
 948                                                                         /*}}}*/
 949 // StrToTime - Converts a string into a time_t                          /*{{{*/
 950 // ---------------------------------------------------------------------
 951 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
 952    and the C library asctime format. It requires the GNU library function
 953    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 954    reason the C library does not provide any such function :< This also
 955    handles the weird, but unambiguous FTP time format*/
 956 bool StrToTime(const string &Val,time_t &Result)
 957 {
 958    struct tm Tm;
 959    char Month[10];
 960
 961    // Skip the day of the week
 962    const char *I = strchr(Val.c_str(), ' ');
 963
 964    // Handle RFC 1123 time
 965    Month[0] = 0;
 966    if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 967               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 968    {
 969       // Handle RFC 1036 time
 970       if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
 971                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 972          Tm.tm_year += 1900;
 973       else
 974       {
 975          // asctime format
 976          if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
 977                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 978          {
 979             // 'ftp' time
 980             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 981                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 982                return false;
 983             Tm.tm_mon--;
 984          }
 985       }
 986    }
 987
 988    Tm.tm_isdst = 0;
 989    if (Month[0] != 0)
 990       Tm.tm_mon = MonthConv(Month);
 991    else
 992       Tm.tm_mon = 0; // we don't have a month, so pick something
 993    Tm.tm_year -= 1900;
 994
 995    // Convert to local time and then to GMT
 996    Result = timegm(&Tm);
 997    return true;
 998 }
 999                                                                         /*}}}*/
1000 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1001 // ---------------------------------------------------------------------
1002 /* This is used in decoding the crazy fixed length string headers in
1003    tar and ar files. */
1004 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1005 {
1006    char S[30];
1007    if (Len >= sizeof(S))
1008       return false;
1009    memcpy(S,Str,Len);
1010    S[Len] = 0;
1011
1012    // All spaces is a zero
1013    Res = 0;
1014    unsigned I;
1015    for (I = 0; S[I] == ' '; I++);
1016    if (S[I] == 0)
1017       return true;
1018
1019    char *End;
1020    Res = strtoul(S,&End,Base);
1021    if (End == S)
1022       return false;
1023
1024    return true;
1025 }
1026                                                                         /*}}}*/
1027 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
1028 // ---------------------------------------------------------------------
1029 /* This is used in decoding the crazy fixed length string headers in
1030    tar and ar files. */
1031 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1032 {
1033    char S[30];
1034    if (Len >= sizeof(S))
1035       return false;
1036    memcpy(S,Str,Len);
1037    S[Len] = 0;
1038
1039    // All spaces is a zero
1040    Res = 0;
1041    unsigned I;
1042    for (I = 0; S[I] == ' '; I++);
1043    if (S[I] == 0)
1044       return true;
1045
1046    char *End;
1047    Res = strtoull(S,&End,Base);
1048    if (End == S)
1049       return false;
1050
1051    return true;
1052 }
1053                                                                         /*}}}*/
1054
1055 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1056 // ---------------------------------------------------------------------
1057 /* This is used in decoding the 256bit encoded fixed length fields in
1058    tar files */
1059 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1060 {
1061    if ((Str[0] & 0x80) == 0)
1062       return false;
1063    else
1064    {
1065       Res = Str[0] & 0x7F;
1066       for(unsigned int i = 1; i < Len; ++i)
1067          Res = (Res<<8) + Str[i];
1068       return true;
1069    }
1070 }
1071                                                                         /*}}}*/
1072 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
1073 // ---------------------------------------------------------------------
1074 /* This is used in decoding the 256bit encoded fixed length fields in
1075    tar files */
1076 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1077 {
1078    unsigned long long Num;
1079    bool rc;
1080
1081    rc = Base256ToNum(Str, Num, Len);
1082    Res = Num;
1083    if (Res != Num)
1084       return false;
1085
1086    return rc;
1087 }
1088                                                                         /*}}}*/
1089 // HexDigit - Convert a hex character into an integer                   /*{{{*/
1090 // ---------------------------------------------------------------------
1091 /* Helper for Hex2Num */
1092 static int HexDigit(int c)
1093 {
1094    if (c >= '0' && c <= '9')
1095       return c - '0';
1096    if (c >= 'a' && c <= 'f')
1097       return c - 'a' + 10;
1098    if (c >= 'A' && c <= 'F')
1099       return c - 'A' + 10;
1100    return 0;
1101 }
1102                                                                         /*}}}*/
1103 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1104 // ---------------------------------------------------------------------
1105 /* The length of the buffer must be exactly 1/2 the length of the string. */
1106 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1107 {
1108    if (Str.length() != Length*2)
1109       return false;
1110
1111    // Convert each digit. We store it in the same order as the string
1112    int J = 0;
1113    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1114    {
1115       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1116          return false;
1117
1118       Num[J] = HexDigit(I[0]) << 4;
1119       Num[J] += HexDigit(I[1]);
1120    }
1121
1122    return true;
1123 }
1124                                                                         /*}}}*/
1125 // TokSplitString - Split a string up by a given token                  /*{{{*/
1126 // ---------------------------------------------------------------------
1127 /* This is intended to be a faster splitter, it does not use dynamic
1128    memories. Input is changed to insert nulls at each token location. */
1129 bool TokSplitString(char Tok,char *Input,char **List,
1130                     unsigned long ListMax)
1131 {
1132    // Strip any leading spaces
1133    char *Start = Input;
1134    char *Stop = Start + strlen(Start);
1135    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1136
1137    unsigned long Count = 0;
1138    char *Pos = Start;
1139    while (Pos != Stop)
1140    {
1141       // Skip to the next Token
1142       for (; Pos != Stop && *Pos != Tok; Pos++);
1143
1144       // Back remove spaces
1145       char *End = Pos;
1146       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1147       *End = 0;
1148
1149       List[Count++] = Start;
1150       if (Count >= ListMax)
1151       {
1152          List[Count-1] = 0;
1153          return false;
1154       }
1155
1156       // Advance pos
1157       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1158       Start = Pos;
1159    }
1160
1161    List[Count] = 0;
1162    return true;
1163 }
1164                                                                         /*}}}*/
1165 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1166 // ---------------------------------------------------------------------
1167 /* This can be used to split a given string up into a vector, so the
1168    propose is the same as in the method above and this one is a bit slower
1169    also, but the advantage is that we have an iteratable vector */
1170 vector<string> VectorizeString(string const &haystack, char const &split)
1171 {
1172    vector<string> exploded;
1173    if (haystack.empty() == true)
1174       return exploded;
1175    string::const_iterator start = haystack.begin();
1176    string::const_iterator end = start;
1177    do {
1178       for (; end != haystack.end() && *end != split; ++end);
1179       exploded.push_back(string(start, end));
1180       start = end + 1;
1181    } while (end != haystack.end() && (++end) != haystack.end());
1182    return exploded;
1183 }
1184                                                                         /*}}}*/
1185 // StringSplit - split a string into a string vector by token           /*{{{*/
1186 // ---------------------------------------------------------------------
1187 /* See header for details.
1188  */
1189 vector<string> StringSplit(std::string const &s, std::string const &sep,
1190                            unsigned int maxsplit)
1191 {
1192    vector<string> split;
1193    size_t start, pos;
1194
1195    // no seperator given, this is bogus
1196    if(sep.size() == 0)
1197       return split;
1198
1199    start = pos = 0;
1200    while (pos != string::npos)
1201    {
1202       pos = s.find(sep, start);
1203       split.push_back(s.substr(start, pos-start));
1204
1205       // if maxsplit is reached, the remaining string is the last item
1206       if(split.size() >= maxsplit)
1207       {
1208          split[split.size()-1] = s.substr(start);
1209          break;
1210       }
1211       start = pos+sep.size();
1212    }
1213    return split;
1214 }
1215                                                                         /*}}}*/
1216 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1217 // ---------------------------------------------------------------------
1218 /* */
1219 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1220                       const char **ListEnd)
1221 {
1222    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1223       R->Hit = false;
1224
1225    unsigned long Hits = 0;
1226    for (; ListBegin < ListEnd; ++ListBegin)
1227    {
1228       // Check if the name is a regex
1229       const char *I;
1230       bool Regex = true;
1231       for (I = *ListBegin; *I != 0; I++)
1232          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1233             break;
1234       if (*I == 0)
1235          Regex = false;
1236
1237       // Compile the regex pattern
1238       regex_t Pattern;
1239       if (Regex == true)
1240          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1241                      REG_NOSUB) != 0)
1242             Regex = false;
1243
1244       // Search the list
1245       bool Done = false;
1246       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1247       {
1248          if (R->Str[0] == 0)
1249             continue;
1250
1251          if (strcasecmp(R->Str,*ListBegin) != 0)
1252          {
1253             if (Regex == false)
1254                continue;
1255             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1256                continue;
1257          }
1258          Done = true;
1259
1260          if (R->Hit == false)
1261             Hits++;
1262
1263          R->Hit = true;
1264       }
1265
1266       if (Regex == true)
1267          regfree(&Pattern);
1268
1269       if (Done == false)
1270          _error->Warning(_("Selection %s not found"),*ListBegin);
1271    }
1272
1273    return Hits;
1274 }
1275                                                                         /*}}}*/
1276 // {str,io}printf - C format string outputter to C++ strings/iostreams  /*{{{*/
1277 // ---------------------------------------------------------------------
1278 /* This is used to make the internationalization strings easier to translate
1279    and to allow reordering of parameters */
1280 static bool iovprintf(ostream &out, const char *format,
1281                       va_list &args, ssize_t &size) {
1282    char *S = (char*)malloc(size);
1283    ssize_t const n = vsnprintf(S, size, format, args);
1284    if (n > -1 && n < size) {
1285       out << S;
1286       free(S);
1287       return true;
1288    } else {
1289       if (n > -1)
1290          size = n + 1;
1291       else
1292          size *= 2;
1293    }
1294    free(S);
1295    return false;
1296 }
1297 void ioprintf(ostream &out,const char *format,...)
1298 {
1299    va_list args;
1300    ssize_t size = 400;
1301    while (true) {
1302       va_start(args,format);
1303       if (iovprintf(out, format, args, size) == true)
1304          return;
1305       va_end(args);
1306    }
1307 }
1308 void strprintf(string &out,const char *format,...)
1309 {
1310    va_list args;
1311    ssize_t size = 400;
1312    std::ostringstream outstr;
1313    while (true) {
1314       va_start(args,format);
1315       if (iovprintf(outstr, format, args, size) == true)
1316          break;
1317       va_end(args);
1318    }
1319    out = outstr.str();
1320 }
1321                                                                         /*}}}*/
1322 // safe_snprintf - Safer snprintf                                       /*{{{*/
1323 // ---------------------------------------------------------------------
1324 /* This is a snprintf that will never (ever) go past 'End' and returns a
1325    pointer to the end of the new string. The returned string is always null
1326    terminated unless Buffer == end. This is a better alterantive to using
1327    consecutive snprintfs. */
1328 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1329 {
1330    va_list args;
1331    int Did;
1332
1333    if (End <= Buffer)
1334       return End;
1335    va_start(args,Format);
1336    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1337    va_end(args);
1338
1339    if (Did < 0 || Buffer + Did > End)
1340       return End;
1341    return Buffer + Did;
1342 }
1343                                                                         /*}}}*/
1344 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1345 // ---------------------------------------------------------------------
1346 string StripEpoch(const string &VerStr)
1347 {
1348    size_t i = VerStr.find(":");
1349    if (i == string::npos)
1350       return VerStr;
1351    return VerStr.substr(i+1);
1352 }
1353                                                                         /*}}}*/
1354 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1355 // ---------------------------------------------------------------------
1356 /* This little function is the most called method we have and tries
1357    therefore to do the absolut minimum - and is notable faster than
1358    standard tolower/toupper and as a bonus avoids problems with different
1359    locales - we only operate on ascii chars anyway. */
1360 int tolower_ascii(int const c)
1361 {
1362    if (c >= 'A' && c <= 'Z')
1363       return c + 32;
1364    return c;
1365 }
1366                                                                         /*}}}*/
1367
1368 // CheckDomainList - See if Host is in a , separate list                /*{{{*/
1369 // ---------------------------------------------------------------------
1370 /* The domain list is a comma separate list of domains that are suffix
1371    matched against the argument */
1372 bool CheckDomainList(const string &Host,const string &List)
1373 {
1374    string::const_iterator Start = List.begin();
1375    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1376    {
1377       if (Cur < List.end() && *Cur != ',')
1378          continue;
1379
1380       // Match the end of the string..
1381       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1382           Cur - Start != 0 &&
1383           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1384          return true;
1385
1386       Start = Cur + 1;
1387    }
1388    return false;
1389 }
1390                                                                         /*}}}*/
1391 // strv_length - Return the length of a NULL-terminated string array    /*{{{*/
1392 // ---------------------------------------------------------------------
1393 /* */
1394 size_t strv_length(const char **str_array)
1395 {
1396    size_t i;
1397    for (i=0; str_array[i] != NULL; i++)
1398       /* nothing */
1399       ;
1400    return i;
1401 }
1402
1403 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1404 // ---------------------------------------------------------------------
1405 /* */
1406 string DeEscapeString(const string &input)
1407 {
1408    char tmp[3];
1409    string::const_iterator it;
1410    string output;
1411    for (it = input.begin(); it != input.end(); ++it)
1412    {
1413       // just copy non-escape chars
1414       if (*it != '\\')
1415       {
1416          output += *it;
1417          continue;
1418       }
1419
1420       // deal with double escape
1421       if (*it == '\\' &&
1422           (it + 1 < input.end()) &&  it[1] == '\\')
1423       {
1424          // copy
1425          output += *it;
1426          // advance iterator one step further
1427          ++it;
1428          continue;
1429       }
1430
1431       // ensure we have a char to read
1432       if (it + 1 == input.end())
1433          continue;
1434
1435       // read it
1436       ++it;
1437       switch (*it)
1438       {
1439          case '0':
1440             if (it + 2 <= input.end()) {
1441                tmp[0] = it[1];
1442                tmp[1] = it[2];
1443                tmp[2] = 0;
1444                output += (char)strtol(tmp, 0, 8);
1445                it += 2;
1446             }
1447             break;
1448          case 'x':
1449             if (it + 2 <= input.end()) {
1450                tmp[0] = it[1];
1451                tmp[1] = it[2];
1452                tmp[2] = 0;
1453                output += (char)strtol(tmp, 0, 16);
1454                it += 2;
1455             }
1456             break;
1457          default:
1458             // FIXME: raise exception here?
1459             break;
1460       }
1461    }
1462    return output;
1463 }
1464                                                                         /*}}}*/
1465 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1466 // ---------------------------------------------------------------------
1467 /* This parses the URI into all of its components */
1468 void URI::CopyFrom(const string &U)
1469 {
1470    string::const_iterator I = U.begin();
1471
1472    // Locate the first colon, this separates the scheme
1473    for (; I < U.end() && *I != ':' ; ++I);
1474    string::const_iterator FirstColon = I;
1475
1476    /* Determine if this is a host type URI with a leading double //
1477       and then search for the first single / */
1478    string::const_iterator SingleSlash = I;
1479    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1480       SingleSlash += 3;
1481
1482    /* Find the / indicating the end of the hostname, ignoring /'s in the
1483       square brackets */
1484    bool InBracket = false;
1485    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1486    {
1487       if (*SingleSlash == '[')
1488          InBracket = true;
1489       if (InBracket == true && *SingleSlash == ']')
1490          InBracket = false;
1491    }
1492
1493    if (SingleSlash > U.end())
1494       SingleSlash = U.end();
1495
1496    // We can now write the access and path specifiers
1497    Access.assign(U.begin(),FirstColon);
1498    if (SingleSlash != U.end())
1499       Path.assign(SingleSlash,U.end());
1500    if (Path.empty() == true)
1501       Path = "/";
1502
1503    // Now we attempt to locate a user:pass@host fragment
1504    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1505       FirstColon += 3;
1506    else
1507       FirstColon += 1;
1508    if (FirstColon >= U.end())
1509       return;
1510
1511    if (FirstColon > SingleSlash)
1512       FirstColon = SingleSlash;
1513
1514    // Find the colon...
1515    I = FirstColon + 1;
1516    if (I > SingleSlash)
1517       I = SingleSlash;
1518    for (; I < SingleSlash && *I != ':'; ++I);
1519    string::const_iterator SecondColon = I;
1520
1521    // Search for the @ after the colon
1522    for (; I < SingleSlash && *I != '@'; ++I);
1523    string::const_iterator At = I;
1524
1525    // Now write the host and user/pass
1526    if (At == SingleSlash)
1527    {
1528       if (FirstColon < SingleSlash)
1529          Host.assign(FirstColon,SingleSlash);
1530    }
1531    else
1532    {
1533       Host.assign(At+1,SingleSlash);
1534       // username and password must be encoded (RFC 3986)
1535       User.assign(DeQuoteString(FirstColon,SecondColon));
1536       if (SecondColon < At)
1537          Password.assign(DeQuoteString(SecondColon+1,At));
1538    }
1539
1540    // Now we parse the RFC 2732 [] hostnames.
1541    unsigned long PortEnd = 0;
1542    InBracket = false;
1543    for (unsigned I = 0; I != Host.length();)
1544    {
1545       if (Host[I] == '[')
1546       {
1547          InBracket = true;
1548          Host.erase(I,1);
1549          continue;
1550       }
1551
1552       if (InBracket == true && Host[I] == ']')
1553       {
1554          InBracket = false;
1555          Host.erase(I,1);
1556          PortEnd = I;
1557          continue;
1558       }
1559       I++;
1560    }
1561
1562    // Tsk, weird.
1563    if (InBracket == true)
1564    {
1565       Host.clear();
1566       return;
1567    }
1568
1569    // Now we parse off a port number from the hostname
1570    Port = 0;
1571    string::size_type Pos = Host.rfind(':');
1572    if (Pos == string::npos || Pos < PortEnd)
1573       return;
1574
1575    Port = atoi(string(Host,Pos+1).c_str());
1576    Host.assign(Host,0,Pos);
1577 }
1578                                                                         /*}}}*/
1579 // URI::operator string - Convert the URI to a string                   /*{{{*/
1580 // ---------------------------------------------------------------------
1581 /* */
1582 URI::operator string()
1583 {
1584    string Res;
1585
1586    if (Access.empty() == false)
1587       Res = Access + ':';
1588
1589    if (Host.empty() == false)
1590    {
1591       if (Access.empty() == false)
1592          Res += "//";
1593
1594       if (User.empty() == false)
1595       {
1596          // FIXME: Technically userinfo is permitted even less
1597          // characters than these, but this is not conveniently
1598          // expressed with a blacklist.
1599          Res += QuoteString(User, ":/?#[]@");
1600          if (Password.empty() == false)
1601             Res += ":" + QuoteString(Password, ":/?#[]@");
1602          Res += "@";
1603       }
1604
1605       // Add RFC 2732 escaping characters
1606       if (Access.empty() == false &&
1607           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1608          Res += '[' + Host + ']';
1609       else
1610          Res += Host;
1611
1612       if (Port != 0)
1613       {
1614          char S[30];
1615          sprintf(S,":%u",Port);
1616          Res += S;
1617       }
1618    }
1619
1620    if (Path.empty() == false)
1621    {
1622       if (Path[0] != '/')
1623          Res += "/" + Path;
1624       else
1625          Res += Path;
1626    }
1627
1628    return Res;
1629 }
1630                                                                         /*}}}*/
1631 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1632 // ---------------------------------------------------------------------
1633 /* */
1634 string URI::SiteOnly(const string &URI)
1635 {
1636    ::URI U(URI);
1637    U.User.clear();
1638    U.Password.clear();
1639    U.Path.clear();
1640    return U;
1641 }
1642                                                                         /*}}}*/
1643 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1644 // ---------------------------------------------------------------------
1645 /* */
1646 string URI::NoUserPassword(const string &URI)
1647 {
1648    ::URI U(URI);
1649    U.User.clear();
1650    U.Password.clear();
1651    return U;
1652 }
1653                                                                         /*}}}*/