apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf, *outptr;
  47   size_t insize, outsize;
  48
  49   cd = iconv_open(codeset, "UTF-8");
  50   if (cd == (iconv_t)(-1)) {
  51      // Something went wrong
  52      if (errno == EINVAL)
  53         _error->Error("conversion from 'UTF-8' to '%s' not available",
  54                codeset);
  55      else
  56         perror("iconv_open");
  57
  58      // Clean the destination string
  59      *dest = "";
  60
  61      return false;
  62   }
  63
  64   insize = outsize = orig.size();
  65   inbuf = orig.data();
  66   inptr = (char *)inbuf;
  67   outbuf = new char[insize+1];
  68   outptr = outbuf;
  69
  70   iconv(cd, &inptr, &insize, &outptr, &outsize);
  71   *outptr = '\0';
  72
  73   *dest = outbuf;
  74   delete[] outbuf;
  75
  76   iconv_close(cd);
  77
  78   return true;
  79 }
  80                                                                         /*}}}*/
  81 // strstrip - Remove white space from the front and back of a string    /*{{{*/
  82 // ---------------------------------------------------------------------
  83 /* This is handy to use when parsing a file. It also removes \n's left
  84    over from fgets and company */
  85 char *_strstrip(char *String)
  86 {
  87    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
  88
  89    if (*String == 0)
  90       return String;
  91
  92    char *End = String + strlen(String) - 1;
  93    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
  94                                *End == '\r'); End--);
  95    End++;
  96    *End = 0;
  97    return String;
  98 };
  99                                                                         /*}}}*/
 100 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 101 // ---------------------------------------------------------------------
 102 /* */
 103 char *_strtabexpand(char *String,size_t Len)
 104 {
 105    for (char *I = String; I != I + Len && *I != 0; I++)
 106    {
 107       if (*I != '\t')
 108          continue;
 109       if (I + 8 > String + Len)
 110       {
 111          *I = 0;
 112          return String;
 113       }
 114
 115       /* Assume the start of the string is 0 and find the next 8 char
 116          division */
 117       int Len;
 118       if (String == I)
 119          Len = 1;
 120       else
 121          Len = 8 - ((String - I) % 8);
 122       Len -= 2;
 123       if (Len <= 0)
 124       {
 125          *I = ' ';
 126          continue;
 127       }
 128
 129       memmove(I + Len,I + 1,strlen(I) + 1);
 130       for (char *J = I; J + Len != I; *I = ' ', I++);
 131    }
 132    return String;
 133 }
 134                                                                         /*}}}*/
 135 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 136 // ---------------------------------------------------------------------
 137 /* This grabs a single word, converts any % escaped characters to their
 138    proper values and advances the pointer. Double quotes are understood
 139    and striped out as well. This is for URI/URL parsing. It also can
 140    understand [] brackets.*/
 141 bool ParseQuoteWord(const char *&String,string &Res)
 142 {
 143    // Skip leading whitespace
 144    const char *C = String;
 145    for (;*C != 0 && *C == ' '; C++);
 146    if (*C == 0)
 147       return false;
 148
 149    // Jump to the next word
 150    for (;*C != 0 && isspace(*C) == 0; C++)
 151    {
 152       if (*C == '"')
 153       {
 154          for (C++; *C != 0 && *C != '"'; C++);
 155          if (*C == 0)
 156             return false;
 157       }
 158       if (*C == '[')
 159       {
 160          for (C++; *C != 0 && *C != ']'; C++);
 161          if (*C == 0)
 162             return false;
 163       }
 164    }
 165
 166    // Now de-quote characters
 167    char Buffer[1024];
 168    char Tmp[3];
 169    const char *Start = String;
 170    char *I;
 171    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 172    {
 173       if (*Start == '%' && Start + 2 < C)
 174       {
 175          Tmp[0] = Start[1];
 176          Tmp[1] = Start[2];
 177          Tmp[2] = 0;
 178          *I = (char)strtol(Tmp,0,16);
 179          Start += 3;
 180          continue;
 181       }
 182       if (*Start != '"')
 183          *I = *Start;
 184       else
 185          I--;
 186       Start++;
 187    }
 188    *I = 0;
 189    Res = Buffer;
 190
 191    // Skip ending white space
 192    for (;*C != 0 && isspace(*C) != 0; C++);
 193    String = C;
 194    return true;
 195 }
 196                                                                         /*}}}*/
 197 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 198 // ---------------------------------------------------------------------
 199 /* This expects a series of space separated strings enclosed in ""'s.
 200    It concatenates the ""'s into a single string. */
 201 bool ParseCWord(const char *&String,string &Res)
 202 {
 203    // Skip leading whitespace
 204    const char *C = String;
 205    for (;*C != 0 && *C == ' '; C++);
 206    if (*C == 0)
 207       return false;
 208
 209    char Buffer[1024];
 210    char *Buf = Buffer;
 211    if (strlen(String) >= sizeof(Buffer))
 212        return false;
 213
 214    for (; *C != 0; C++)
 215    {
 216       if (*C == '"')
 217       {
 218          for (C++; *C != 0 && *C != '"'; C++)
 219             *Buf++ = *C;
 220
 221          if (*C == 0)
 222             return false;
 223
 224          continue;
 225       }
 226
 227       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 228          continue;
 229       if (isspace(*C) == 0)
 230          return false;
 231       *Buf++ = ' ';
 232    }
 233    *Buf = 0;
 234    Res = Buffer;
 235    String = C;
 236    return true;
 237 }
 238                                                                         /*}}}*/
 239 // QuoteString - Convert a string into quoted from                      /*{{{*/
 240 // ---------------------------------------------------------------------
 241 /* */
 242 string QuoteString(const string &Str, const char *Bad)
 243 {
 244    string Res;
 245    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 246    {
 247       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 248           *I <= 0x20 || *I >= 0x7F)
 249       {
 250          char Buf[10];
 251          sprintf(Buf,"%%%02x",(int)*I);
 252          Res += Buf;
 253       }
 254       else
 255          Res += *I;
 256    }
 257    return Res;
 258 }
 259                                                                         /*}}}*/
 260 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 261 // ---------------------------------------------------------------------
 262 /* This undoes QuoteString */
 263 string DeQuoteString(const string &Str)
 264 {
 265    string Res;
 266    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 267    {
 268       if (*I == '%' && I + 2 < Str.end())
 269       {
 270          char Tmp[3];
 271          Tmp[0] = I[1];
 272          Tmp[1] = I[2];
 273          Tmp[2] = 0;
 274          Res += (char)strtol(Tmp,0,16);
 275          I += 2;
 276          continue;
 277       }
 278       else
 279          Res += *I;
 280    }
 281    return Res;
 282 }
 283
 284                                                                         /*}}}*/
 285 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 286 // ---------------------------------------------------------------------
 287 /* A max of 4 digits are shown before conversion to the next highest unit.
 288    The max length of the string will be 5 chars unless the size is > 10
 289    YottaBytes (E24) */
 290 string SizeToStr(double Size)
 291 {
 292    char S[300];
 293    double ASize;
 294    if (Size >= 0)
 295       ASize = Size;
 296    else
 297       ASize = -1*Size;
 298
 299    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 300       ExaBytes, ZettaBytes, YottaBytes */
 301    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 302    int I = 0;
 303    while (I <= 8)
 304    {
 305       if (ASize < 100 && I != 0)
 306       {
 307          sprintf(S,"%.1f%c",ASize,Ext[I]);
 308          break;
 309       }
 310
 311       if (ASize < 10000)
 312       {
 313          sprintf(S,"%.0f%c",ASize,Ext[I]);
 314          break;
 315       }
 316       ASize /= 1000.0;
 317       I++;
 318    }
 319
 320    return S;
 321 }
 322                                                                         /*}}}*/
 323 // TimeToStr - Convert the time into a string                           /*{{{*/
 324 // ---------------------------------------------------------------------
 325 /* Converts a number of seconds to a hms format */
 326 string TimeToStr(unsigned long Sec)
 327 {
 328    char S[300];
 329
 330    while (1)
 331    {
 332       if (Sec > 60*60*24)
 333       {
 334          //d means days, h means hours, min means minutes, s means seconds
 335          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 336          break;
 337       }
 338
 339       if (Sec > 60*60)
 340       {
 341          //h means hours, min means minutes, s means seconds
 342          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 343          break;
 344       }
 345
 346       if (Sec > 60)
 347       {
 348          //min means minutes, s means seconds
 349          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 350          break;
 351       }
 352
 353       //s means seconds
 354       sprintf(S,_("%lis"),Sec);
 355       break;
 356    }
 357
 358    return S;
 359 }
 360                                                                         /*}}}*/
 361 // SubstVar - Substitute a string for another string                    /*{{{*/
 362 // ---------------------------------------------------------------------
 363 /* This replaces all occurances of Subst with Contents in Str. */
 364 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 365 {
 366    string::size_type Pos = 0;
 367    string::size_type OldPos = 0;
 368    string Temp;
 369
 370    while (OldPos < Str.length() &&
 371           (Pos = Str.find(Subst,OldPos)) != string::npos)
 372    {
 373       Temp += string(Str,OldPos,Pos) + Contents;
 374       OldPos = Pos + Subst.length();
 375    }
 376
 377    if (OldPos == 0)
 378       return Str;
 379
 380    return Temp + string(Str,OldPos);
 381 }
 382
 383 string SubstVar(string Str,const struct SubstVar *Vars)
 384 {
 385    for (; Vars->Subst != 0; Vars++)
 386       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 387    return Str;
 388 }
 389                                                                         /*}}}*/
 390 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 391 // ---------------------------------------------------------------------
 392 /* This converts a URI into a safe filename. It quotes all unsafe characters
 393    and converts / to _ and removes the scheme identifier. The resulting
 394    file name should be unique and never occur again for a different file */
 395 string URItoFileName(const string &URI)
 396 {
 397    // Nuke 'sensitive' items
 398    ::URI U(URI);
 399    U.User.clear();
 400    U.Password.clear();
 401    U.Access.clear();
 402
 403    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 404    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 405    replace(NewURI.begin(),NewURI.end(),'/','_');
 406    return NewURI;
 407 }
 408                                                                         /*}}}*/
 409 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 410 // ---------------------------------------------------------------------
 411 /* This routine performs a base64 transformation on a string. It was ripped
 412    from wget and then patched and bug fixed.
 413
 414    This spec can be found in rfc2045 */
 415 string Base64Encode(const string &S)
 416 {
 417    // Conversion table.
 418    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 419                           'I','J','K','L','M','N','O','P',
 420                           'Q','R','S','T','U','V','W','X',
 421                           'Y','Z','a','b','c','d','e','f',
 422                           'g','h','i','j','k','l','m','n',
 423                           'o','p','q','r','s','t','u','v',
 424                           'w','x','y','z','0','1','2','3',
 425                           '4','5','6','7','8','9','+','/'};
 426
 427    // Pre-allocate some space
 428    string Final;
 429    Final.reserve((4*S.length() + 2)/3 + 2);
 430
 431    /* Transform the 3x8 bits to 4x6 bits, as required by
 432       base64.  */
 433    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 434    {
 435       char Bits[3] = {0,0,0};
 436       Bits[0] = I[0];
 437       if (I + 1 < S.end())
 438          Bits[1] = I[1];
 439       if (I + 2 < S.end())
 440          Bits[2] = I[2];
 441
 442       Final += tbl[Bits[0] >> 2];
 443       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 444
 445       if (I + 1 >= S.end())
 446          break;
 447
 448       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 449
 450       if (I + 2 >= S.end())
 451          break;
 452
 453       Final += tbl[Bits[2] & 0x3f];
 454    }
 455
 456    /* Apply the padding elements, this tells how many bytes the remote
 457       end should discard */
 458    if (S.length() % 3 == 2)
 459       Final += '=';
 460    if (S.length() % 3 == 1)
 461       Final += "==";
 462
 463    return Final;
 464 }
 465                                                                         /*}}}*/
 466 // stringcmp - Arbitrary string compare                                 /*{{{*/
 467 // ---------------------------------------------------------------------
 468 /* This safely compares two non-null terminated strings of arbitrary
 469    length */
 470 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 471 {
 472    for (; A != AEnd && B != BEnd; A++, B++)
 473       if (*A != *B)
 474          break;
 475
 476    if (A == AEnd && B == BEnd)
 477       return 0;
 478    if (A == AEnd)
 479       return 1;
 480    if (B == BEnd)
 481       return -1;
 482    if (*A < *B)
 483       return -1;
 484    return 1;
 485 }
 486
 487 #if __GNUC__ >= 3
 488 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 489               const char *B,const char *BEnd)
 490 {
 491    for (; A != AEnd && B != BEnd; A++, B++)
 492       if (*A != *B)
 493          break;
 494
 495    if (A == AEnd && B == BEnd)
 496       return 0;
 497    if (A == AEnd)
 498       return 1;
 499    if (B == BEnd)
 500       return -1;
 501    if (*A < *B)
 502       return -1;
 503    return 1;
 504 }
 505 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 506               string::const_iterator B,string::const_iterator BEnd)
 507 {
 508    for (; A != AEnd && B != BEnd; A++, B++)
 509       if (*A != *B)
 510          break;
 511
 512    if (A == AEnd && B == BEnd)
 513       return 0;
 514    if (A == AEnd)
 515       return 1;
 516    if (B == BEnd)
 517       return -1;
 518    if (*A < *B)
 519       return -1;
 520    return 1;
 521 }
 522 #endif
 523                                                                         /*}}}*/
 524 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 525 // ---------------------------------------------------------------------
 526 /* */
 527 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 528 {
 529    for (; A != AEnd && B != BEnd; A++, B++)
 530       if (toupper(*A) != toupper(*B))
 531          break;
 532
 533    if (A == AEnd && B == BEnd)
 534       return 0;
 535    if (A == AEnd)
 536       return 1;
 537    if (B == BEnd)
 538       return -1;
 539    if (toupper(*A) < toupper(*B))
 540       return -1;
 541    return 1;
 542 }
 543 #if __GNUC__ >= 3
 544 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 545                   const char *B,const char *BEnd)
 546 {
 547    for (; A != AEnd && B != BEnd; A++, B++)
 548       if (toupper(*A) != toupper(*B))
 549          break;
 550
 551    if (A == AEnd && B == BEnd)
 552       return 0;
 553    if (A == AEnd)
 554       return 1;
 555    if (B == BEnd)
 556       return -1;
 557    if (toupper(*A) < toupper(*B))
 558       return -1;
 559    return 1;
 560 }
 561 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 562                   string::const_iterator B,string::const_iterator BEnd)
 563 {
 564    for (; A != AEnd && B != BEnd; A++, B++)
 565       if (toupper(*A) != toupper(*B))
 566          break;
 567
 568    if (A == AEnd && B == BEnd)
 569       return 0;
 570    if (A == AEnd)
 571       return 1;
 572    if (B == BEnd)
 573       return -1;
 574    if (toupper(*A) < toupper(*B))
 575       return -1;
 576    return 1;
 577 }
 578 #endif
 579                                                                         /*}}}*/
 580 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 581 // ---------------------------------------------------------------------
 582 /* The format is like those used in package files and the method
 583    communication system */
 584 string LookupTag(const string &Message,const char *Tag,const char *Default)
 585 {
 586    // Look for a matching tag.
 587    int Length = strlen(Tag);
 588    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 589    {
 590       // Found the tag
 591       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 592       {
 593          // Find the end of line and strip the leading/trailing spaces
 594          string::const_iterator J;
 595          I += Length + 1;
 596          for (; isspace(*I) != 0 && I < Message.end(); I++);
 597          for (J = I; *J != '\n' && J < Message.end(); J++);
 598          for (; J > I && isspace(J[-1]) != 0; J--);
 599
 600          return string(I,J);
 601       }
 602
 603       for (; *I != '\n' && I < Message.end(); I++);
 604    }
 605
 606    // Failed to find a match
 607    if (Default == 0)
 608       return string();
 609    return Default;
 610 }
 611                                                                         /*}}}*/
 612 // StringToBool - Converts a string into a boolean                      /*{{{*/
 613 // ---------------------------------------------------------------------
 614 /* This inspects the string to see if it is true or if it is false and
 615    then returns the result. Several varients on true/false are checked. */
 616 int StringToBool(const string &Text,int Default)
 617 {
 618    char *End;
 619    int Res = strtol(Text.c_str(),&End,0);
 620    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 621       return Res;
 622
 623    // Check for positives
 624    if (strcasecmp(Text.c_str(),"no") == 0 ||
 625        strcasecmp(Text.c_str(),"false") == 0 ||
 626        strcasecmp(Text.c_str(),"without") == 0 ||
 627        strcasecmp(Text.c_str(),"off") == 0 ||
 628        strcasecmp(Text.c_str(),"disable") == 0)
 629       return 0;
 630
 631    // Check for negatives
 632    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 633        strcasecmp(Text.c_str(),"true") == 0 ||
 634        strcasecmp(Text.c_str(),"with") == 0 ||
 635        strcasecmp(Text.c_str(),"on") == 0 ||
 636        strcasecmp(Text.c_str(),"enable") == 0)
 637       return 1;
 638
 639    return Default;
 640 }
 641                                                                         /*}}}*/
 642 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 643 // ---------------------------------------------------------------------
 644 /* This converts a time_t into a string time representation that is
 645    year 2000 complient and timezone neutral */
 646 string TimeRFC1123(time_t Date)
 647 {
 648    struct tm Conv = *gmtime(&Date);
 649    char Buf[300];
 650
 651    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 652    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 653                           "Aug","Sep","Oct","Nov","Dec"};
 654
 655    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 656            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 657            Conv.tm_min,Conv.tm_sec);
 658    return Buf;
 659 }
 660                                                                         /*}}}*/
 661 // ReadMessages - Read messages from the FD                             /*{{{*/
 662 // ---------------------------------------------------------------------
 663 /* This pulls full messages from the input FD into the message buffer.
 664    It assumes that messages will not pause during transit so no
 665    fancy buffering is used.
 666
 667    In particular: this reads blocks from the input until it believes
 668    that it's run out of input text.  Each block is terminated by a
 669    double newline ('\n' followed by '\n').  As noted below, there is a
 670    bug in this code: it assumes that all the blocks have been read if
 671    it doesn't see additional text in the buffer after the last one is
 672    parsed, which will cause it to lose blocks if the last block
 673    coincides with the end of the buffer.
 674  */
 675 bool ReadMessages(int Fd, vector<string> &List)
 676 {
 677    char Buffer[64000];
 678    char *End = Buffer;
 679    // Represents any left-over from the previous iteration of the
 680    // parse loop.  (i.e., if a message is split across the end
 681    // of the buffer, it goes here)
 682    string PartialMessage;
 683
 684    while (1)
 685    {
 686       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 687       if (Res < 0 && errno == EINTR)
 688          continue;
 689
 690       // Process is dead, this is kind of bad..
 691       if (Res == 0)
 692          return false;
 693
 694       // No data
 695       if (Res < 0 && errno == EAGAIN)
 696          return true;
 697       if (Res < 0)
 698          return false;
 699
 700       End += Res;
 701
 702       // Look for the end of the message
 703       for (char *I = Buffer; I + 1 < End; I++)
 704       {
 705          if (I[0] != '\n' || I[1] != '\n')
 706             continue;
 707
 708          // Pull the message out
 709          string Message(Buffer,I-Buffer);
 710          PartialMessage += Message;
 711
 712          // Fix up the buffer
 713          for (; I < End && *I == '\n'; I++);
 714          End -= I-Buffer;
 715          memmove(Buffer,I,End-Buffer);
 716          I = Buffer;
 717
 718          List.push_back(PartialMessage);
 719          PartialMessage.clear();
 720       }
 721       if (End != Buffer)
 722         {
 723           // If there's text left in the buffer, store it
 724           // in PartialMessage and throw the rest of the buffer
 725           // away.  This allows us to handle messages that
 726           // are longer than the static buffer size.
 727           PartialMessage += string(Buffer, End);
 728           End = Buffer;
 729         }
 730       else
 731         {
 732           // BUG ALERT: if a message block happens to end at a
 733           // multiple of 64000 characters, this will cause it to
 734           // terminate early, leading to a badly formed block and
 735           // probably crashing the method.  However, this is the only
 736           // way we have to find the end of the message block.  I have
 737           // an idea of how to fix this, but it will require changes
 738           // to the protocol (essentially to mark the beginning and
 739           // end of the block).
 740           //
 741           //  -- dburrows 2008-04-02
 742           return true;
 743         }
 744
 745       if (WaitFd(Fd) == false)
 746          return false;
 747    }
 748 }
 749                                                                         /*}}}*/
 750 // MonthConv - Converts a month string into a number                    /*{{{*/
 751 // ---------------------------------------------------------------------
 752 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 753    Made it a bit more robust with a few touppers though. */
 754 static int MonthConv(char *Month)
 755 {
 756    switch (toupper(*Month))
 757    {
 758       case 'A':
 759       return toupper(Month[1]) == 'P'?3:7;
 760       case 'D':
 761       return 11;
 762       case 'F':
 763       return 1;
 764       case 'J':
 765       if (toupper(Month[1]) == 'A')
 766          return 0;
 767       return toupper(Month[2]) == 'N'?5:6;
 768       case 'M':
 769       return toupper(Month[2]) == 'R'?2:4;
 770       case 'N':
 771       return 10;
 772       case 'O':
 773       return 9;
 774       case 'S':
 775       return 8;
 776
 777       // Pretend it is January..
 778       default:
 779       return 0;
 780    }
 781 }
 782                                                                         /*}}}*/
 783 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 784 // ---------------------------------------------------------------------
 785 /* Ripped this evil little function from wget - I prefer the use of
 786    GNU timegm if possible as this technique will have interesting problems
 787    with leap seconds, timezones and other.
 788
 789    Converts struct tm to time_t, assuming the data in tm is UTC rather
 790    than local timezone (mktime assumes the latter).
 791
 792    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 793    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 794
 795 /* Turned it into an autoconf check, because GNU is not the only thing which
 796    can provide timegm. -- 2002-09-22, Joel Baker */
 797
 798 #ifndef HAVE_TIMEGM // Now with autoconf!
 799 static time_t timegm(struct tm *t)
 800 {
 801    time_t tl, tb;
 802
 803    tl = mktime (t);
 804    if (tl == -1)
 805       return -1;
 806    tb = mktime (gmtime (&tl));
 807    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 808 }
 809 #endif
 810                                                                         /*}}}*/
 811 // StrToTime - Converts a string into a time_t                          /*{{{*/
 812 // ---------------------------------------------------------------------
 813 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 814    and the C library asctime format. It requires the GNU library function
 815    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 816    reason the C library does not provide any such function :< This also
 817    handles the weird, but unambiguous FTP time format*/
 818 bool StrToTime(const string &Val,time_t &Result)
 819 {
 820    struct tm Tm;
 821    char Month[10];
 822    const char *I = Val.c_str();
 823
 824    // Skip the day of the week
 825    for (;*I != 0  && *I != ' '; I++);
 826
 827    // Handle RFC 1123 time
 828    Month[0] = 0;
 829    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 830               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 831    {
 832       // Handle RFC 1036 time
 833       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 834                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 835          Tm.tm_year += 1900;
 836       else
 837       {
 838          // asctime format
 839          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 840                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 841          {
 842             // 'ftp' time
 843             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 844                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 845                return false;
 846             Tm.tm_mon--;
 847          }
 848       }
 849    }
 850
 851    Tm.tm_isdst = 0;
 852    if (Month[0] != 0)
 853       Tm.tm_mon = MonthConv(Month);
 854    Tm.tm_year -= 1900;
 855
 856    // Convert to local time and then to GMT
 857    Result = timegm(&Tm);
 858    return true;
 859 }
 860                                                                         /*}}}*/
 861 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 862 // ---------------------------------------------------------------------
 863 /* This is used in decoding the crazy fixed length string headers in
 864    tar and ar files. */
 865 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 866 {
 867    char S[30];
 868    if (Len >= sizeof(S))
 869       return false;
 870    memcpy(S,Str,Len);
 871    S[Len] = 0;
 872
 873    // All spaces is a zero
 874    Res = 0;
 875    unsigned I;
 876    for (I = 0; S[I] == ' '; I++);
 877    if (S[I] == 0)
 878       return true;
 879
 880    char *End;
 881    Res = strtoul(S,&End,Base);
 882    if (End == S)
 883       return false;
 884
 885    return true;
 886 }
 887                                                                         /*}}}*/
 888 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 889 // ---------------------------------------------------------------------
 890 /* Helper for Hex2Num */
 891 static int HexDigit(int c)
 892 {
 893    if (c >= '0' && c <= '9')
 894       return c - '0';
 895    if (c >= 'a' && c <= 'f')
 896       return c - 'a' + 10;
 897    if (c >= 'A' && c <= 'F')
 898       return c - 'A' + 10;
 899    return 0;
 900 }
 901                                                                         /*}}}*/
 902 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 903 // ---------------------------------------------------------------------
 904 /* The length of the buffer must be exactly 1/2 the length of the string. */
 905 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 906 {
 907    if (Str.length() != Length*2)
 908       return false;
 909
 910    // Convert each digit. We store it in the same order as the string
 911    int J = 0;
 912    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 913    {
 914       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 915          return false;
 916
 917       Num[J] = HexDigit(I[0]) << 4;
 918       Num[J] += HexDigit(I[1]);
 919    }
 920
 921    return true;
 922 }
 923                                                                         /*}}}*/
 924 // TokSplitString - Split a string up by a given token                  /*{{{*/
 925 // ---------------------------------------------------------------------
 926 /* This is intended to be a faster splitter, it does not use dynamic
 927    memories. Input is changed to insert nulls at each token location. */
 928 bool TokSplitString(char Tok,char *Input,char **List,
 929                     unsigned long ListMax)
 930 {
 931    // Strip any leading spaces
 932    char *Start = Input;
 933    char *Stop = Start + strlen(Start);
 934    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 935
 936    unsigned long Count = 0;
 937    char *Pos = Start;
 938    while (Pos != Stop)
 939    {
 940       // Skip to the next Token
 941       for (; Pos != Stop && *Pos != Tok; Pos++);
 942
 943       // Back remove spaces
 944       char *End = Pos;
 945       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 946       *End = 0;
 947
 948       List[Count++] = Start;
 949       if (Count >= ListMax)
 950       {
 951          List[Count-1] = 0;
 952          return false;
 953       }
 954
 955       // Advance pos
 956       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 957       Start = Pos;
 958    }
 959
 960    List[Count] = 0;
 961    return true;
 962 }
 963                                                                         /*}}}*/
 964 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
 965 // ---------------------------------------------------------------------
 966 /* */
 967 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
 968                       const char **ListEnd)
 969 {
 970    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 971       R->Hit = false;
 972
 973    unsigned long Hits = 0;
 974    for (; ListBegin != ListEnd; ListBegin++)
 975    {
 976       // Check if the name is a regex
 977       const char *I;
 978       bool Regex = true;
 979       for (I = *ListBegin; *I != 0; I++)
 980          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
 981             break;
 982       if (*I == 0)
 983          Regex = false;
 984
 985       // Compile the regex pattern
 986       regex_t Pattern;
 987       if (Regex == true)
 988          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
 989                      REG_NOSUB) != 0)
 990             Regex = false;
 991
 992       // Search the list
 993       bool Done = false;
 994       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 995       {
 996          if (R->Str[0] == 0)
 997             continue;
 998
 999          if (strcasecmp(R->Str,*ListBegin) != 0)
1000          {
1001             if (Regex == false)
1002                continue;
1003             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1004                continue;
1005          }
1006          Done = true;
1007
1008          if (R->Hit == false)
1009             Hits++;
1010
1011          R->Hit = true;
1012       }
1013
1014       if (Regex == true)
1015          regfree(&Pattern);
1016
1017       if (Done == false)
1018          _error->Warning(_("Selection %s not found"),*ListBegin);
1019    }
1020
1021    return Hits;
1022 }
1023                                                                         /*}}}*/
1024 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1025 // ---------------------------------------------------------------------
1026 /* This is used to make the internationalization strings easier to translate
1027    and to allow reordering of parameters */
1028 void ioprintf(ostream &out,const char *format,...)
1029 {
1030    va_list args;
1031    va_start(args,format);
1032
1033    // sprintf the description
1034    char S[4096];
1035    vsnprintf(S,sizeof(S),format,args);
1036    out << S;
1037 }
1038                                                                         /*}}}*/
1039 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1040 // ---------------------------------------------------------------------
1041 /* This is used to make the internationalization strings easier to translate
1042    and to allow reordering of parameters */
1043 void strprintf(string &out,const char *format,...)
1044 {
1045    va_list args;
1046    va_start(args,format);
1047
1048    // sprintf the description
1049    char S[4096];
1050    vsnprintf(S,sizeof(S),format,args);
1051    out = string(S);
1052 }
1053                                                                         /*}}}*/
1054 // safe_snprintf - Safer snprintf                                       /*{{{*/
1055 // ---------------------------------------------------------------------
1056 /* This is a snprintf that will never (ever) go past 'End' and returns a
1057    pointer to the end of the new string. The returned string is always null
1058    terminated unless Buffer == end. This is a better alterantive to using
1059    consecutive snprintfs. */
1060 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1061 {
1062    va_list args;
1063    unsigned long Did;
1064
1065    va_start(args,Format);
1066
1067    if (End <= Buffer)
1068       return End;
1069
1070    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1071    if (Did < 0 || Buffer + Did > End)
1072       return End;
1073    return Buffer + Did;
1074 }
1075                                                                         /*}}}*/
1076
1077 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1078 // ---------------------------------------------------------------------
1079 /* */
1080 int tolower_ascii(int c)
1081 {
1082    if (c >= 'A' and c <= 'Z')
1083       return c + 32;
1084    return c;
1085 }
1086                                                                         /*}}}*/
1087
1088 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1089 // ---------------------------------------------------------------------
1090 /* The domain list is a comma seperate list of domains that are suffix
1091    matched against the argument */
1092 bool CheckDomainList(const string &Host,const string &List)
1093 {
1094    string::const_iterator Start = List.begin();
1095    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1096    {
1097       if (Cur < List.end() && *Cur != ',')
1098          continue;
1099
1100       // Match the end of the string..
1101       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1102           Cur - Start != 0 &&
1103           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1104          return true;
1105
1106       Start = Cur + 1;
1107    }
1108    return false;
1109 }
1110                                                                         /*}}}*/
1111
1112 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1113 // ---------------------------------------------------------------------
1114 /* This parses the URI into all of its components */
1115 void URI::CopyFrom(const string &U)
1116 {
1117    string::const_iterator I = U.begin();
1118
1119    // Locate the first colon, this separates the scheme
1120    for (; I < U.end() && *I != ':' ; I++);
1121    string::const_iterator FirstColon = I;
1122
1123    /* Determine if this is a host type URI with a leading double //
1124       and then search for the first single / */
1125    string::const_iterator SingleSlash = I;
1126    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1127       SingleSlash += 3;
1128
1129    /* Find the / indicating the end of the hostname, ignoring /'s in the
1130       square brackets */
1131    bool InBracket = false;
1132    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1133    {
1134       if (*SingleSlash == '[')
1135          InBracket = true;
1136       if (InBracket == true && *SingleSlash == ']')
1137          InBracket = false;
1138    }
1139
1140    if (SingleSlash > U.end())
1141       SingleSlash = U.end();
1142
1143    // We can now write the access and path specifiers
1144    Access.assign(U.begin(),FirstColon);
1145    if (SingleSlash != U.end())
1146       Path.assign(SingleSlash,U.end());
1147    if (Path.empty() == true)
1148       Path = "/";
1149
1150    // Now we attempt to locate a user:pass@host fragment
1151    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1152       FirstColon += 3;
1153    else
1154       FirstColon += 1;
1155    if (FirstColon >= U.end())
1156       return;
1157
1158    if (FirstColon > SingleSlash)
1159       FirstColon = SingleSlash;
1160
1161    // Find the colon...
1162    I = FirstColon + 1;
1163    if (I > SingleSlash)
1164       I = SingleSlash;
1165    for (; I < SingleSlash && *I != ':'; I++);
1166    string::const_iterator SecondColon = I;
1167
1168    // Search for the @ after the colon
1169    for (; I < SingleSlash && *I != '@'; I++);
1170    string::const_iterator At = I;
1171
1172    // Now write the host and user/pass
1173    if (At == SingleSlash)
1174    {
1175       if (FirstColon < SingleSlash)
1176          Host.assign(FirstColon,SingleSlash);
1177    }
1178    else
1179    {
1180       Host.assign(At+1,SingleSlash);
1181       User.assign(FirstColon,SecondColon);
1182       if (SecondColon < At)
1183          Password.assign(SecondColon+1,At);
1184    }
1185
1186    // Now we parse the RFC 2732 [] hostnames.
1187    unsigned long PortEnd = 0;
1188    InBracket = false;
1189    for (unsigned I = 0; I != Host.length();)
1190    {
1191       if (Host[I] == '[')
1192       {
1193          InBracket = true;
1194          Host.erase(I,1);
1195          continue;
1196       }
1197
1198       if (InBracket == true && Host[I] == ']')
1199       {
1200          InBracket = false;
1201          Host.erase(I,1);
1202          PortEnd = I;
1203          continue;
1204       }
1205       I++;
1206    }
1207
1208    // Tsk, weird.
1209    if (InBracket == true)
1210    {
1211       Host.clear();
1212       return;
1213    }
1214
1215    // Now we parse off a port number from the hostname
1216    Port = 0;
1217    string::size_type Pos = Host.rfind(':');
1218    if (Pos == string::npos || Pos < PortEnd)
1219       return;
1220
1221    Port = atoi(string(Host,Pos+1).c_str());
1222    Host.assign(Host,0,Pos);
1223 }
1224                                                                         /*}}}*/
1225 // URI::operator string - Convert the URI to a string                   /*{{{*/
1226 // ---------------------------------------------------------------------
1227 /* */
1228 URI::operator string()
1229 {
1230    string Res;
1231
1232    if (Access.empty() == false)
1233       Res = Access + ':';
1234
1235    if (Host.empty() == false)
1236    {
1237       if (Access.empty() == false)
1238          Res += "//";
1239
1240       if (User.empty() == false)
1241       {
1242          Res +=  User;
1243          if (Password.empty() == false)
1244             Res += ":" + Password;
1245          Res += "@";
1246       }
1247
1248       // Add RFC 2732 escaping characters
1249       if (Access.empty() == false &&
1250           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1251          Res += '[' + Host + ']';
1252       else
1253          Res += Host;
1254
1255       if (Port != 0)
1256       {
1257          char S[30];
1258          sprintf(S,":%u",Port);
1259          Res += S;
1260       }
1261    }
1262
1263    if (Path.empty() == false)
1264    {
1265       if (Path[0] != '/')
1266          Res += "/" + Path;
1267       else
1268          Res += Path;
1269    }
1270
1271    return Res;
1272 }
1273                                                                         /*}}}*/
1274 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1275 // ---------------------------------------------------------------------
1276 /* */
1277 string URI::SiteOnly(const string &URI)
1278 {
1279    ::URI U(URI);
1280    U.User.clear();
1281    U.Password.clear();
1282    U.Path.clear();
1283    U.Port = 0;
1284    return U;
1285 }
1286                                                                         /*}}}*/