apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf, *outptr;
  47   size_t insize, outsize;
  48
  49   cd = iconv_open(codeset, "UTF-8");
  50   if (cd == (iconv_t)(-1)) {
  51      // Something went wrong
  52      if (errno == EINVAL)
  53         _error->Error("conversion from 'UTF-8' to '%s' not available",
  54                codeset);
  55      else
  56         perror("iconv_open");
  57
  58      // Clean the destination string
  59      *dest = "";
  60
  61      return false;
  62   }
  63
  64   insize = outsize = orig.size();
  65   inbuf = orig.data();
  66   inptr = (char *)inbuf;
  67   outbuf = new char[insize+1];
  68   outptr = outbuf;
  69
  70   iconv(cd, &inptr, &insize, &outptr, &outsize);
  71   *outptr = '\0';
  72
  73   *dest = outbuf;
  74   delete[] outbuf;
  75
  76   iconv_close(cd);
  77
  78   return true;
  79 }
  80                                                                         /*}}}*/
  81 // strstrip - Remove white space from the front and back of a string    /*{{{*/
  82 // ---------------------------------------------------------------------
  83 /* This is handy to use when parsing a file. It also removes \n's left
  84    over from fgets and company */
  85 char *_strstrip(char *String)
  86 {
  87    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
  88
  89    if (*String == 0)
  90       return String;
  91
  92    char *End = String + strlen(String) - 1;
  93    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
  94                                *End == '\r'); End--);
  95    End++;
  96    *End = 0;
  97    return String;
  98 };
  99                                                                         /*}}}*/
 100 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 101 // ---------------------------------------------------------------------
 102 /* */
 103 char *_strtabexpand(char *String,size_t Len)
 104 {
 105    for (char *I = String; I != I + Len && *I != 0; I++)
 106    {
 107       if (*I != '\t')
 108          continue;
 109       if (I + 8 > String + Len)
 110       {
 111          *I = 0;
 112          return String;
 113       }
 114
 115       /* Assume the start of the string is 0 and find the next 8 char
 116          division */
 117       int Len;
 118       if (String == I)
 119          Len = 1;
 120       else
 121          Len = 8 - ((String - I) % 8);
 122       Len -= 2;
 123       if (Len <= 0)
 124       {
 125          *I = ' ';
 126          continue;
 127       }
 128
 129       memmove(I + Len,I + 1,strlen(I) + 1);
 130       for (char *J = I; J + Len != I; *I = ' ', I++);
 131    }
 132    return String;
 133 }
 134                                                                         /*}}}*/
 135 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 136 // ---------------------------------------------------------------------
 137 /* This grabs a single word, converts any % escaped characters to their
 138    proper values and advances the pointer. Double quotes are understood
 139    and striped out as well. This is for URI/URL parsing. It also can
 140    understand [] brackets.*/
 141 bool ParseQuoteWord(const char *&String,string &Res)
 142 {
 143    // Skip leading whitespace
 144    const char *C = String;
 145    for (;*C != 0 && *C == ' '; C++);
 146    if (*C == 0)
 147       return false;
 148
 149    // Jump to the next word
 150    for (;*C != 0 && isspace(*C) == 0; C++)
 151    {
 152       if (*C == '"')
 153       {
 154          for (C++; *C != 0 && *C != '"'; C++);
 155          if (*C == 0)
 156             return false;
 157       }
 158       if (*C == '[')
 159       {
 160          for (C++; *C != 0 && *C != ']'; C++);
 161          if (*C == 0)
 162             return false;
 163       }
 164    }
 165
 166    // Now de-quote characters
 167    char Buffer[1024];
 168    char Tmp[3];
 169    const char *Start = String;
 170    char *I;
 171    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 172    {
 173       if (*Start == '%' && Start + 2 < C)
 174       {
 175          Tmp[0] = Start[1];
 176          Tmp[1] = Start[2];
 177          Tmp[2] = 0;
 178          *I = (char)strtol(Tmp,0,16);
 179          Start += 3;
 180          continue;
 181       }
 182       if (*Start != '"')
 183          *I = *Start;
 184       else
 185          I--;
 186       Start++;
 187    }
 188    *I = 0;
 189    Res = Buffer;
 190
 191    // Skip ending white space
 192    for (;*C != 0 && isspace(*C) != 0; C++);
 193    String = C;
 194    return true;
 195 }
 196                                                                         /*}}}*/
 197 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 198 // ---------------------------------------------------------------------
 199 /* This expects a series of space separated strings enclosed in ""'s.
 200    It concatenates the ""'s into a single string. */
 201 bool ParseCWord(const char *&String,string &Res)
 202 {
 203    // Skip leading whitespace
 204    const char *C = String;
 205    for (;*C != 0 && *C == ' '; C++);
 206    if (*C == 0)
 207       return false;
 208
 209    char Buffer[1024];
 210    char *Buf = Buffer;
 211    if (strlen(String) >= sizeof(Buffer))
 212        return false;
 213
 214    for (; *C != 0; C++)
 215    {
 216       if (*C == '"')
 217       {
 218          for (C++; *C != 0 && *C != '"'; C++)
 219             *Buf++ = *C;
 220
 221          if (*C == 0)
 222             return false;
 223
 224          continue;
 225       }
 226
 227       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 228          continue;
 229       if (isspace(*C) == 0)
 230          return false;
 231       *Buf++ = ' ';
 232    }
 233    *Buf = 0;
 234    Res = Buffer;
 235    String = C;
 236    return true;
 237 }
 238                                                                         /*}}}*/
 239 // QuoteString - Convert a string into quoted from                      /*{{{*/
 240 // ---------------------------------------------------------------------
 241 /* */
 242 string QuoteString(const string &Str, const char *Bad)
 243 {
 244    string Res;
 245    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 246    {
 247       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 248           *I <= 0x20 || *I >= 0x7F)
 249       {
 250          char Buf[10];
 251          sprintf(Buf,"%%%02x",(int)*I);
 252          Res += Buf;
 253       }
 254       else
 255          Res += *I;
 256    }
 257    return Res;
 258 }
 259                                                                         /*}}}*/
 260 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 261 // ---------------------------------------------------------------------
 262 /* This undoes QuoteString */
 263 string DeQuoteString(const string &Str)
 264 {
 265    string Res;
 266    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 267    {
 268       if (*I == '%' && I + 2 < Str.end())
 269       {
 270          char Tmp[3];
 271          Tmp[0] = I[1];
 272          Tmp[1] = I[2];
 273          Tmp[2] = 0;
 274          Res += (char)strtol(Tmp,0,16);
 275          I += 2;
 276          continue;
 277       }
 278       else
 279          Res += *I;
 280    }
 281    return Res;
 282 }
 283
 284                                                                         /*}}}*/
 285 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 286 // ---------------------------------------------------------------------
 287 /* A max of 4 digits are shown before conversion to the next highest unit.
 288    The max length of the string will be 5 chars unless the size is > 10
 289    YottaBytes (E24) */
 290 string SizeToStr(double Size)
 291 {
 292    char S[300];
 293    double ASize;
 294    if (Size >= 0)
 295       ASize = Size;
 296    else
 297       ASize = -1*Size;
 298
 299    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 300       ExaBytes, ZettaBytes, YottaBytes */
 301    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 302    int I = 0;
 303    while (I <= 8)
 304    {
 305       if (ASize < 100 && I != 0)
 306       {
 307          sprintf(S,"%.1f%c",ASize,Ext[I]);
 308          break;
 309       }
 310
 311       if (ASize < 10000)
 312       {
 313          sprintf(S,"%.0f%c",ASize,Ext[I]);
 314          break;
 315       }
 316       ASize /= 1000.0;
 317       I++;
 318    }
 319
 320    return S;
 321 }
 322                                                                         /*}}}*/
 323 // TimeToStr - Convert the time into a string                           /*{{{*/
 324 // ---------------------------------------------------------------------
 325 /* Converts a number of seconds to a hms format */
 326 string TimeToStr(unsigned long Sec)
 327 {
 328    char S[300];
 329
 330    while (1)
 331    {
 332       if (Sec > 60*60*24)
 333       {
 334          //d means days, h means hours, min means minutes, s means seconds
 335          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 336          break;
 337       }
 338
 339       if (Sec > 60*60)
 340       {
 341          //h means hours, min means minutes, s means seconds
 342          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 343          break;
 344       }
 345
 346       if (Sec > 60)
 347       {
 348          //min means minutes, s means seconds
 349          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 350          break;
 351       }
 352
 353       //s means seconds
 354       sprintf(S,_("%lis"),Sec);
 355       break;
 356    }
 357
 358    return S;
 359 }
 360                                                                         /*}}}*/
 361 // SubstVar - Substitute a string for another string                    /*{{{*/
 362 // ---------------------------------------------------------------------
 363 /* This replaces all occurances of Subst with Contents in Str. */
 364 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 365 {
 366    string::size_type Pos = 0;
 367    string::size_type OldPos = 0;
 368    string Temp;
 369
 370    while (OldPos < Str.length() &&
 371           (Pos = Str.find(Subst,OldPos)) != string::npos)
 372    {
 373       Temp += string(Str,OldPos,Pos) + Contents;
 374       OldPos = Pos + Subst.length();
 375    }
 376
 377    if (OldPos == 0)
 378       return Str;
 379
 380    return Temp + string(Str,OldPos);
 381 }
 382
 383 string SubstVar(string Str,const struct SubstVar *Vars)
 384 {
 385    for (; Vars->Subst != 0; Vars++)
 386       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 387    return Str;
 388 }
 389                                                                         /*}}}*/
 390 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 391 // ---------------------------------------------------------------------
 392 /* This converts a URI into a safe filename. It quotes all unsafe characters
 393    and converts / to _ and removes the scheme identifier. The resulting
 394    file name should be unique and never occur again for a different file */
 395 string URItoFileName(const string &URI)
 396 {
 397    // Nuke 'sensitive' items
 398    ::URI U(URI);
 399    U.User.clear();
 400    U.Password.clear();
 401    U.Access.clear();
 402
 403    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 404    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 405    replace(NewURI.begin(),NewURI.end(),'/','_');
 406    return NewURI;
 407 }
 408                                                                         /*}}}*/
 409 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 410 // ---------------------------------------------------------------------
 411 /* This routine performs a base64 transformation on a string. It was ripped
 412    from wget and then patched and bug fixed.
 413
 414    This spec can be found in rfc2045 */
 415 string Base64Encode(const string &S)
 416 {
 417    // Conversion table.
 418    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 419                           'I','J','K','L','M','N','O','P',
 420                           'Q','R','S','T','U','V','W','X',
 421                           'Y','Z','a','b','c','d','e','f',
 422                           'g','h','i','j','k','l','m','n',
 423                           'o','p','q','r','s','t','u','v',
 424                           'w','x','y','z','0','1','2','3',
 425                           '4','5','6','7','8','9','+','/'};
 426
 427    // Pre-allocate some space
 428    string Final;
 429    Final.reserve((4*S.length() + 2)/3 + 2);
 430
 431    /* Transform the 3x8 bits to 4x6 bits, as required by
 432       base64.  */
 433    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 434    {
 435       char Bits[3] = {0,0,0};
 436       Bits[0] = I[0];
 437       if (I + 1 < S.end())
 438          Bits[1] = I[1];
 439       if (I + 2 < S.end())
 440          Bits[2] = I[2];
 441
 442       Final += tbl[Bits[0] >> 2];
 443       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 444
 445       if (I + 1 >= S.end())
 446          break;
 447
 448       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 449
 450       if (I + 2 >= S.end())
 451          break;
 452
 453       Final += tbl[Bits[2] & 0x3f];
 454    }
 455
 456    /* Apply the padding elements, this tells how many bytes the remote
 457       end should discard */
 458    if (S.length() % 3 == 2)
 459       Final += '=';
 460    if (S.length() % 3 == 1)
 461       Final += "==";
 462
 463    return Final;
 464 }
 465                                                                         /*}}}*/
 466 // stringcmp - Arbitrary string compare                                 /*{{{*/
 467 // ---------------------------------------------------------------------
 468 /* This safely compares two non-null terminated strings of arbitrary
 469    length */
 470 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 471 {
 472    for (; A != AEnd && B != BEnd; A++, B++)
 473       if (*A != *B)
 474          break;
 475
 476    if (A == AEnd && B == BEnd)
 477       return 0;
 478    if (A == AEnd)
 479       return 1;
 480    if (B == BEnd)
 481       return -1;
 482    if (*A < *B)
 483       return -1;
 484    return 1;
 485 }
 486
 487 #if __GNUC__ >= 3
 488 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 489               const char *B,const char *BEnd)
 490 {
 491    for (; A != AEnd && B != BEnd; A++, B++)
 492       if (*A != *B)
 493          break;
 494
 495    if (A == AEnd && B == BEnd)
 496       return 0;
 497    if (A == AEnd)
 498       return 1;
 499    if (B == BEnd)
 500       return -1;
 501    if (*A < *B)
 502       return -1;
 503    return 1;
 504 }
 505 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 506               string::const_iterator B,string::const_iterator BEnd)
 507 {
 508    for (; A != AEnd && B != BEnd; A++, B++)
 509       if (*A != *B)
 510          break;
 511
 512    if (A == AEnd && B == BEnd)
 513       return 0;
 514    if (A == AEnd)
 515       return 1;
 516    if (B == BEnd)
 517       return -1;
 518    if (*A < *B)
 519       return -1;
 520    return 1;
 521 }
 522 #endif
 523                                                                         /*}}}*/
 524 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 525 // ---------------------------------------------------------------------
 526 /* */
 527 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 528 {
 529    for (; A != AEnd && B != BEnd; A++, B++)
 530       if (toupper(*A) != toupper(*B))
 531          break;
 532
 533    if (A == AEnd && B == BEnd)
 534       return 0;
 535    if (A == AEnd)
 536       return 1;
 537    if (B == BEnd)
 538       return -1;
 539    if (toupper(*A) < toupper(*B))
 540       return -1;
 541    return 1;
 542 }
 543 #if __GNUC__ >= 3
 544 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 545                   const char *B,const char *BEnd)
 546 {
 547    for (; A != AEnd && B != BEnd; A++, B++)
 548       if (toupper(*A) != toupper(*B))
 549          break;
 550
 551    if (A == AEnd && B == BEnd)
 552       return 0;
 553    if (A == AEnd)
 554       return 1;
 555    if (B == BEnd)
 556       return -1;
 557    if (toupper(*A) < toupper(*B))
 558       return -1;
 559    return 1;
 560 }
 561 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 562                   string::const_iterator B,string::const_iterator BEnd)
 563 {
 564    for (; A != AEnd && B != BEnd; A++, B++)
 565       if (toupper(*A) != toupper(*B))
 566          break;
 567
 568    if (A == AEnd && B == BEnd)
 569       return 0;
 570    if (A == AEnd)
 571       return 1;
 572    if (B == BEnd)
 573       return -1;
 574    if (toupper(*A) < toupper(*B))
 575       return -1;
 576    return 1;
 577 }
 578 #endif
 579                                                                         /*}}}*/
 580 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 581 // ---------------------------------------------------------------------
 582 /* The format is like those used in package files and the method
 583    communication system */
 584 string LookupTag(const string &Message,const char *Tag,const char *Default)
 585 {
 586    // Look for a matching tag.
 587    int Length = strlen(Tag);
 588    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 589    {
 590       // Found the tag
 591       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 592       {
 593          // Find the end of line and strip the leading/trailing spaces
 594          string::const_iterator J;
 595          I += Length + 1;
 596          for (; isspace(*I) != 0 && I < Message.end(); I++);
 597          for (J = I; *J != '\n' && J < Message.end(); J++);
 598          for (; J > I && isspace(J[-1]) != 0; J--);
 599
 600          return string(I,J);
 601       }
 602
 603       for (; *I != '\n' && I < Message.end(); I++);
 604    }
 605
 606    // Failed to find a match
 607    if (Default == 0)
 608       return string();
 609    return Default;
 610 }
 611                                                                         /*}}}*/
 612 // StringToBool - Converts a string into a boolean                      /*{{{*/
 613 // ---------------------------------------------------------------------
 614 /* This inspects the string to see if it is true or if it is false and
 615    then returns the result. Several varients on true/false are checked. */
 616 int StringToBool(const string &Text,int Default)
 617 {
 618    char *End;
 619    int Res = strtol(Text.c_str(),&End,0);
 620    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 621       return Res;
 622
 623    // Check for positives
 624    if (strcasecmp(Text.c_str(),"no") == 0 ||
 625        strcasecmp(Text.c_str(),"false") == 0 ||
 626        strcasecmp(Text.c_str(),"without") == 0 ||
 627        strcasecmp(Text.c_str(),"off") == 0 ||
 628        strcasecmp(Text.c_str(),"disable") == 0)
 629       return 0;
 630
 631    // Check for negatives
 632    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 633        strcasecmp(Text.c_str(),"true") == 0 ||
 634        strcasecmp(Text.c_str(),"with") == 0 ||
 635        strcasecmp(Text.c_str(),"on") == 0 ||
 636        strcasecmp(Text.c_str(),"enable") == 0)
 637       return 1;
 638
 639    return Default;
 640 }
 641                                                                         /*}}}*/
 642 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 643 // ---------------------------------------------------------------------
 644 /* This converts a time_t into a string time representation that is
 645    year 2000 complient and timezone neutral */
 646 string TimeRFC1123(time_t Date)
 647 {
 648    struct tm Conv = *gmtime(&Date);
 649    char Buf[300];
 650
 651    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 652    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 653                           "Aug","Sep","Oct","Nov","Dec"};
 654
 655    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 656            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 657            Conv.tm_min,Conv.tm_sec);
 658    return Buf;
 659 }
 660                                                                         /*}}}*/
 661 // ReadMessages - Read messages from the FD                             /*{{{*/
 662 // ---------------------------------------------------------------------
 663 /* This pulls full messages from the input FD into the message buffer.
 664    It assumes that messages will not pause during transit so no
 665    fancy buffering is used.
 666
 667    In particular: this reads blocks from the input until it believes
 668    that it's run out of input text.  Each block is terminated by a
 669    double newline ('\n' followed by '\n').  As noted below, there is a
 670    bug in this code: it assumes that all the blocks have been read if
 671    it doesn't see additional text in the buffer after the last one is
 672    parsed, which will cause it to lose blocks if the last block
 673    coincides with the end of the buffer.
 674  */
 675 bool ReadMessages(int Fd, vector<string> &List)
 676 {
 677    char Buffer[64000];
 678    char *End = Buffer;
 679    // Represents any left-over from the previous iteration of the
 680    // parse loop.  (i.e., if a message is split across the end
 681    // of the buffer, it goes here)
 682    string PartialMessage;
 683
 684    while (1)
 685    {
 686       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 687       if (Res < 0 && errno == EINTR)
 688          continue;
 689
 690       // Process is dead, this is kind of bad..
 691       if (Res == 0)
 692          return false;
 693
 694       // No data
 695       if (Res < 0 && errno == EAGAIN)
 696          return true;
 697       if (Res < 0)
 698          return false;
 699
 700       End += Res;
 701
 702       // Look for the end of the message
 703       for (char *I = Buffer; I + 1 < End; I++)
 704       {
 705          if (I[0] != '\n' || I[1] != '\n')
 706             continue;
 707
 708          // Pull the message out
 709          string Message(Buffer,I-Buffer);
 710          PartialMessage += Message;
 711
 712          // Fix up the buffer
 713          for (; I < End && *I == '\n'; I++);
 714          End -= I-Buffer;
 715          memmove(Buffer,I,End-Buffer);
 716          I = Buffer;
 717
 718          List.push_back(PartialMessage);
 719          PartialMessage.clear();
 720       }
 721       if (End != Buffer)
 722         {
 723           // If there's text left in the buffer, store it
 724           // in PartialMessage and throw the rest of the buffer
 725           // away.  This allows us to handle messages that
 726           // are longer than the static buffer size.
 727           PartialMessage += string(Buffer, End);
 728           End = Buffer;
 729         }
 730       else
 731         {
 732           // BUG ALERT: if a message block happens to end at a
 733           // multiple of 64000 characters, this will cause it to
 734           // terminate early, leading to a badly formed block and
 735           // probably crashing the method.  However, this is the only
 736           // way we have to find the end of the message block.  I have
 737           // an idea of how to fix this, but it will require changes
 738           // to the protocol (essentially to mark the beginning and
 739           // end of the block).
 740           //
 741           //  -- dburrows 2008-04-02
 742           return true;
 743         }
 744
 745       if (WaitFd(Fd) == false)
 746          return false;
 747    }
 748 }
 749                                                                         /*}}}*/
 750 // MonthConv - Converts a month string into a number                    /*{{{*/
 751 // ---------------------------------------------------------------------
 752 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 753    Made it a bit more robust with a few touppers though. */
 754 static int MonthConv(char *Month)
 755 {
 756    switch (toupper(*Month))
 757    {
 758       case 'A':
 759       return toupper(Month[1]) == 'P'?3:7;
 760       case 'D':
 761       return 11;
 762       case 'F':
 763       return 1;
 764       case 'J':
 765       if (toupper(Month[1]) == 'A')
 766          return 0;
 767       return toupper(Month[2]) == 'N'?5:6;
 768       case 'M':
 769       return toupper(Month[2]) == 'R'?2:4;
 770       case 'N':
 771       return 10;
 772       case 'O':
 773       return 9;
 774       case 'S':
 775       return 8;
 776
 777       // Pretend it is January..
 778       default:
 779       return 0;
 780    }
 781 }
 782                                                                         /*}}}*/
 783 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 784 // ---------------------------------------------------------------------
 785 /* Ripped this evil little function from wget - I prefer the use of
 786    GNU timegm if possible as this technique will have interesting problems
 787    with leap seconds, timezones and other.
 788
 789    Converts struct tm to time_t, assuming the data in tm is UTC rather
 790    than local timezone (mktime assumes the latter).
 791
 792    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 793    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 794
 795 /* Turned it into an autoconf check, because GNU is not the only thing which
 796    can provide timegm. -- 2002-09-22, Joel Baker */
 797
 798 #ifndef HAVE_TIMEGM // Now with autoconf!
 799 static time_t timegm(struct tm *t)
 800 {
 801    time_t tl, tb;
 802
 803    tl = mktime (t);
 804    if (tl == -1)
 805       return -1;
 806    tb = mktime (gmtime (&tl));
 807    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 808 }
 809 #endif
 810                                                                         /*}}}*/
 811 // StrToTime - Converts a string into a time_t                          /*{{{*/
 812 // ---------------------------------------------------------------------
 813 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 814    and the C library asctime format. It requires the GNU library function
 815    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 816    reason the C library does not provide any such function :< This also
 817    handles the weird, but unambiguous FTP time format*/
 818 bool StrToTime(const string &Val,time_t &Result)
 819 {
 820    struct tm Tm;
 821    char Month[10];
 822    const char *I = Val.c_str();
 823
 824    // Skip the day of the week
 825    for (;*I != 0  && *I != ' '; I++);
 826
 827    // Handle RFC 1123 time
 828    Month[0] = 0;
 829    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 830               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 831    {
 832       // Handle RFC 1036 time
 833       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 834                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 835          Tm.tm_year += 1900;
 836       else
 837       {
 838          // asctime format
 839          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 840                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 841          {
 842             // 'ftp' time
 843             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 844                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 845                return false;
 846             Tm.tm_mon--;
 847          }
 848       }
 849    }
 850
 851    Tm.tm_isdst = 0;
 852    if (Month[0] != 0)
 853       Tm.tm_mon = MonthConv(Month);
 854    Tm.tm_year -= 1900;
 855
 856    // Convert to local time and then to GMT
 857    Result = timegm(&Tm);
 858    return true;
 859 }
 860                                                                         /*}}}*/
 861 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 862 // ---------------------------------------------------------------------
 863 /* This is used in decoding the crazy fixed length string headers in
 864    tar and ar files. */
 865 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 866 {
 867    char S[30];
 868    if (Len >= sizeof(S))
 869       return false;
 870    memcpy(S,Str,Len);
 871    S[Len] = 0;
 872
 873    // All spaces is a zero
 874    Res = 0;
 875    unsigned I;
 876    for (I = 0; S[I] == ' '; I++);
 877    if (S[I] == 0)
 878       return true;
 879
 880    char *End;
 881    Res = strtoul(S,&End,Base);
 882    if (End == S)
 883       return false;
 884
 885    return true;
 886 }
 887                                                                         /*}}}*/
 888 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 889 // ---------------------------------------------------------------------
 890 /* Helper for Hex2Num */
 891 static int HexDigit(int c)
 892 {
 893    if (c >= '0' && c <= '9')
 894       return c - '0';
 895    if (c >= 'a' && c <= 'f')
 896       return c - 'a' + 10;
 897    if (c >= 'A' && c <= 'F')
 898       return c - 'A' + 10;
 899    return 0;
 900 }
 901                                                                         /*}}}*/
 902 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 903 // ---------------------------------------------------------------------
 904 /* The length of the buffer must be exactly 1/2 the length of the string. */
 905 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 906 {
 907    if (Str.length() != Length*2)
 908       return false;
 909
 910    // Convert each digit. We store it in the same order as the string
 911    int J = 0;
 912    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 913    {
 914       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 915          return false;
 916
 917       Num[J] = HexDigit(I[0]) << 4;
 918       Num[J] += HexDigit(I[1]);
 919    }
 920
 921    return true;
 922 }
 923                                                                         /*}}}*/
 924 // TokSplitString - Split a string up by a given token                  /*{{{*/
 925 // ---------------------------------------------------------------------
 926 /* This is intended to be a faster splitter, it does not use dynamic
 927    memories. Input is changed to insert nulls at each token location. */
 928 bool TokSplitString(char Tok,char *Input,char **List,
 929                     unsigned long ListMax)
 930 {
 931    // Strip any leading spaces
 932    char *Start = Input;
 933    char *Stop = Start + strlen(Start);
 934    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 935
 936    unsigned long Count = 0;
 937    char *Pos = Start;
 938    while (Pos != Stop)
 939    {
 940       // Skip to the next Token
 941       for (; Pos != Stop && *Pos != Tok; Pos++);
 942
 943       // Back remove spaces
 944       char *End = Pos;
 945       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 946       *End = 0;
 947
 948       List[Count++] = Start;
 949       if (Count >= ListMax)
 950       {
 951          List[Count-1] = 0;
 952          return false;
 953       }
 954
 955       // Advance pos
 956       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 957       Start = Pos;
 958    }
 959
 960    List[Count] = 0;
 961    return true;
 962 }
 963                                                                         /*}}}*/
 964 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
 965 // ---------------------------------------------------------------------
 966 /* */
 967 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
 968                       const char **ListEnd)
 969 {
 970    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 971       R->Hit = false;
 972
 973    unsigned long Hits = 0;
 974    for (; ListBegin != ListEnd; ListBegin++)
 975    {
 976       // Check if the name is a regex
 977       const char *I;
 978       bool Regex = true;
 979       for (I = *ListBegin; *I != 0; I++)
 980          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
 981             break;
 982       if (*I == 0)
 983          Regex = false;
 984
 985       // Compile the regex pattern
 986       regex_t Pattern;
 987       if (Regex == true)
 988          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
 989                      REG_NOSUB) != 0)
 990             Regex = false;
 991
 992       // Search the list
 993       bool Done = false;
 994       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 995       {
 996          if (R->Str[0] == 0)
 997             continue;
 998
 999          if (strcasecmp(R->Str,*ListBegin) != 0)
1000          {
1001             if (Regex == false)
1002                continue;
1003             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1004                continue;
1005          }
1006          Done = true;
1007
1008          if (R->Hit == false)
1009             Hits++;
1010
1011          R->Hit = true;
1012       }
1013
1014       if (Regex == true)
1015          regfree(&Pattern);
1016
1017       if (Done == false)
1018          _error->Warning(_("Selection %s not found"),*ListBegin);
1019    }
1020
1021    return Hits;
1022 }
1023                                                                         /*}}}*/
1024 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1025 // ---------------------------------------------------------------------
1026 /* This is used to make the internationalization strings easier to translate
1027    and to allow reordering of parameters */
1028 void ioprintf(ostream &out,const char *format,...)
1029 {
1030    va_list args;
1031    va_start(args,format);
1032
1033    // sprintf the description
1034    char S[400];
1035    vsnprintf(S,sizeof(S),format,args);
1036    out << S;
1037 }
1038                                                                         /*}}}*/
1039 // safe_snprintf - Safer snprintf                                       /*{{{*/
1040 // ---------------------------------------------------------------------
1041 /* This is a snprintf that will never (ever) go past 'End' and returns a
1042    pointer to the end of the new string. The returned string is always null
1043    terminated unless Buffer == end. This is a better alterantive to using
1044    consecutive snprintfs. */
1045 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1046 {
1047    va_list args;
1048    unsigned long Did;
1049
1050    va_start(args,Format);
1051
1052    if (End <= Buffer)
1053       return End;
1054
1055    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1056    if (Did < 0 || Buffer + Did > End)
1057       return End;
1058    return Buffer + Did;
1059 }
1060                                                                         /*}}}*/
1061
1062 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1063 // ---------------------------------------------------------------------
1064 /* The domain list is a comma seperate list of domains that are suffix
1065    matched against the argument */
1066 bool CheckDomainList(const string &Host,const string &List)
1067 {
1068    string::const_iterator Start = List.begin();
1069    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1070    {
1071       if (Cur < List.end() && *Cur != ',')
1072          continue;
1073
1074       // Match the end of the string..
1075       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1076           Cur - Start != 0 &&
1077           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1078          return true;
1079
1080       Start = Cur + 1;
1081    }
1082    return false;
1083 }
1084                                                                         /*}}}*/
1085
1086 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* This parses the URI into all of its components */
1089 void URI::CopyFrom(const string &U)
1090 {
1091    string::const_iterator I = U.begin();
1092
1093    // Locate the first colon, this separates the scheme
1094    for (; I < U.end() && *I != ':' ; I++);
1095    string::const_iterator FirstColon = I;
1096
1097    /* Determine if this is a host type URI with a leading double //
1098       and then search for the first single / */
1099    string::const_iterator SingleSlash = I;
1100    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1101       SingleSlash += 3;
1102
1103    /* Find the / indicating the end of the hostname, ignoring /'s in the
1104       square brackets */
1105    bool InBracket = false;
1106    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1107    {
1108       if (*SingleSlash == '[')
1109          InBracket = true;
1110       if (InBracket == true && *SingleSlash == ']')
1111          InBracket = false;
1112    }
1113
1114    if (SingleSlash > U.end())
1115       SingleSlash = U.end();
1116
1117    // We can now write the access and path specifiers
1118    Access.assign(U.begin(),FirstColon);
1119    if (SingleSlash != U.end())
1120       Path.assign(SingleSlash,U.end());
1121    if (Path.empty() == true)
1122       Path = "/";
1123
1124    // Now we attempt to locate a user:pass@host fragment
1125    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1126       FirstColon += 3;
1127    else
1128       FirstColon += 1;
1129    if (FirstColon >= U.end())
1130       return;
1131
1132    if (FirstColon > SingleSlash)
1133       FirstColon = SingleSlash;
1134
1135    // Find the colon...
1136    I = FirstColon + 1;
1137    if (I > SingleSlash)
1138       I = SingleSlash;
1139    for (; I < SingleSlash && *I != ':'; I++);
1140    string::const_iterator SecondColon = I;
1141
1142    // Search for the @ after the colon
1143    for (; I < SingleSlash && *I != '@'; I++);
1144    string::const_iterator At = I;
1145
1146    // Now write the host and user/pass
1147    if (At == SingleSlash)
1148    {
1149       if (FirstColon < SingleSlash)
1150          Host.assign(FirstColon,SingleSlash);
1151    }
1152    else
1153    {
1154       Host.assign(At+1,SingleSlash);
1155       User.assign(FirstColon,SecondColon);
1156       if (SecondColon < At)
1157          Password.assign(SecondColon+1,At);
1158    }
1159
1160    // Now we parse the RFC 2732 [] hostnames.
1161    unsigned long PortEnd = 0;
1162    InBracket = false;
1163    for (unsigned I = 0; I != Host.length();)
1164    {
1165       if (Host[I] == '[')
1166       {
1167          InBracket = true;
1168          Host.erase(I,1);
1169          continue;
1170       }
1171
1172       if (InBracket == true && Host[I] == ']')
1173       {
1174          InBracket = false;
1175          Host.erase(I,1);
1176          PortEnd = I;
1177          continue;
1178       }
1179       I++;
1180    }
1181
1182    // Tsk, weird.
1183    if (InBracket == true)
1184    {
1185       Host.clear();
1186       return;
1187    }
1188
1189    // Now we parse off a port number from the hostname
1190    Port = 0;
1191    string::size_type Pos = Host.rfind(':');
1192    if (Pos == string::npos || Pos < PortEnd)
1193       return;
1194
1195    Port = atoi(string(Host,Pos+1).c_str());
1196    Host.assign(Host,0,Pos);
1197 }
1198                                                                         /*}}}*/
1199 // URI::operator string - Convert the URI to a string                   /*{{{*/
1200 // ---------------------------------------------------------------------
1201 /* */
1202 URI::operator string()
1203 {
1204    string Res;
1205
1206    if (Access.empty() == false)
1207       Res = Access + ':';
1208
1209    if (Host.empty() == false)
1210    {
1211       if (Access.empty() == false)
1212          Res += "//";
1213
1214       if (User.empty() == false)
1215       {
1216          Res +=  User;
1217          if (Password.empty() == false)
1218             Res += ":" + Password;
1219          Res += "@";
1220       }
1221
1222       // Add RFC 2732 escaping characters
1223       if (Access.empty() == false &&
1224           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1225          Res += '[' + Host + ']';
1226       else
1227          Res += Host;
1228
1229       if (Port != 0)
1230       {
1231          char S[30];
1232          sprintf(S,":%u",Port);
1233          Res += S;
1234       }
1235    }
1236
1237    if (Path.empty() == false)
1238    {
1239       if (Path[0] != '/')
1240          Res += "/" + Path;
1241       else
1242          Res += Path;
1243    }
1244
1245    return Res;
1246 }
1247                                                                         /*}}}*/
1248 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1249 // ---------------------------------------------------------------------
1250 /* */
1251 string URI::SiteOnly(const string &URI)
1252 {
1253    ::URI U(URI);
1254    U.User.clear();
1255    U.Password.clear();
1256    U.Path.clear();
1257    U.Port = 0;
1258    return U;
1259 }
1260                                                                         /*}}}*/