apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf, *outptr;
  47   size_t insize, outsize;
  48
  49   cd = iconv_open(codeset, "UTF-8");
  50   if (cd == (iconv_t)(-1)) {
  51      // Something went wrong
  52      if (errno == EINVAL)
  53         _error->Error("conversion from 'UTF-8' to '%s' not available",
  54                codeset);
  55      else
  56         perror("iconv_open");
  57
  58      // Clean the destination string
  59      *dest = "";
  60
  61      return false;
  62   }
  63
  64   insize = outsize = orig.size();
  65   inbuf = orig.data();
  66   inptr = (char *)inbuf;
  67   outbuf = new char[insize+1];
  68   outptr = outbuf;
  69
  70   iconv(cd, &inptr, &insize, &outptr, &outsize);
  71   *outptr = '\0';
  72
  73   *dest = outbuf;
  74   delete[] outbuf;
  75
  76   iconv_close(cd);
  77
  78   return true;
  79 }
  80                                                                         /*}}}*/
  81 // strstrip - Remove white space from the front and back of a string    /*{{{*/
  82 // ---------------------------------------------------------------------
  83 /* This is handy to use when parsing a file. It also removes \n's left
  84    over from fgets and company */
  85 char *_strstrip(char *String)
  86 {
  87    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
  88
  89    if (*String == 0)
  90       return String;
  91
  92    char *End = String + strlen(String) - 1;
  93    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
  94                                *End == '\r'); End--);
  95    End++;
  96    *End = 0;
  97    return String;
  98 };
  99                                                                         /*}}}*/
 100 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 101 // ---------------------------------------------------------------------
 102 /* */
 103 char *_strtabexpand(char *String,size_t Len)
 104 {
 105    for (char *I = String; I != I + Len && *I != 0; I++)
 106    {
 107       if (*I != '\t')
 108          continue;
 109       if (I + 8 > String + Len)
 110       {
 111          *I = 0;
 112          return String;
 113       }
 114
 115       /* Assume the start of the string is 0 and find the next 8 char
 116          division */
 117       int Len;
 118       if (String == I)
 119          Len = 1;
 120       else
 121          Len = 8 - ((String - I) % 8);
 122       Len -= 2;
 123       if (Len <= 0)
 124       {
 125          *I = ' ';
 126          continue;
 127       }
 128
 129       memmove(I + Len,I + 1,strlen(I) + 1);
 130       for (char *J = I; J + Len != I; *I = ' ', I++);
 131    }
 132    return String;
 133 }
 134                                                                         /*}}}*/
 135 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 136 // ---------------------------------------------------------------------
 137 /* This grabs a single word, converts any % escaped characters to their
 138    proper values and advances the pointer. Double quotes are understood
 139    and striped out as well. This is for URI/URL parsing. It also can
 140    understand [] brackets.*/
 141 bool ParseQuoteWord(const char *&String,string &Res)
 142 {
 143    // Skip leading whitespace
 144    const char *C = String;
 145    for (;*C != 0 && *C == ' '; C++);
 146    if (*C == 0)
 147       return false;
 148
 149    // Jump to the next word
 150    for (;*C != 0 && isspace(*C) == 0; C++)
 151    {
 152       if (*C == '"')
 153       {
 154          for (C++; *C != 0 && *C != '"'; C++);
 155          if (*C == 0)
 156             return false;
 157       }
 158       if (*C == '[')
 159       {
 160          for (C++; *C != 0 && *C != ']'; C++);
 161          if (*C == 0)
 162             return false;
 163       }
 164    }
 165
 166    // Now de-quote characters
 167    char Buffer[1024];
 168    char Tmp[3];
 169    const char *Start = String;
 170    char *I;
 171    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 172    {
 173       if (*Start == '%' && Start + 2 < C)
 174       {
 175          Tmp[0] = Start[1];
 176          Tmp[1] = Start[2];
 177          Tmp[2] = 0;
 178          *I = (char)strtol(Tmp,0,16);
 179          Start += 3;
 180          continue;
 181       }
 182       if (*Start != '"')
 183          *I = *Start;
 184       else
 185          I--;
 186       Start++;
 187    }
 188    *I = 0;
 189    Res = Buffer;
 190
 191    // Skip ending white space
 192    for (;*C != 0 && isspace(*C) != 0; C++);
 193    String = C;
 194    return true;
 195 }
 196                                                                         /*}}}*/
 197 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 198 // ---------------------------------------------------------------------
 199 /* This expects a series of space separated strings enclosed in ""'s.
 200    It concatenates the ""'s into a single string. */
 201 bool ParseCWord(const char *&String,string &Res)
 202 {
 203    // Skip leading whitespace
 204    const char *C = String;
 205    for (;*C != 0 && *C == ' '; C++);
 206    if (*C == 0)
 207       return false;
 208
 209    char Buffer[1024];
 210    char *Buf = Buffer;
 211    if (strlen(String) >= sizeof(Buffer))
 212        return false;
 213
 214    for (; *C != 0; C++)
 215    {
 216       if (*C == '"')
 217       {
 218          for (C++; *C != 0 && *C != '"'; C++)
 219             *Buf++ = *C;
 220
 221          if (*C == 0)
 222             return false;
 223
 224          continue;
 225       }
 226
 227       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 228          continue;
 229       if (isspace(*C) == 0)
 230          return false;
 231       *Buf++ = ' ';
 232    }
 233    *Buf = 0;
 234    Res = Buffer;
 235    String = C;
 236    return true;
 237 }
 238                                                                         /*}}}*/
 239 // QuoteString - Convert a string into quoted from                      /*{{{*/
 240 // ---------------------------------------------------------------------
 241 /* */
 242 string QuoteString(const string &Str, const char *Bad)
 243 {
 244    string Res;
 245    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 246    {
 247       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 248           *I <= 0x20 || *I >= 0x7F)
 249       {
 250          char Buf[10];
 251          sprintf(Buf,"%%%02x",(int)*I);
 252          Res += Buf;
 253       }
 254       else
 255          Res += *I;
 256    }
 257    return Res;
 258 }
 259                                                                         /*}}}*/
 260 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 261 // ---------------------------------------------------------------------
 262 /* This undoes QuoteString */
 263 string DeQuoteString(const string &Str)
 264 {
 265    string Res;
 266    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 267    {
 268       if (*I == '%' && I + 2 < Str.end())
 269       {
 270          char Tmp[3];
 271          Tmp[0] = I[1];
 272          Tmp[1] = I[2];
 273          Tmp[2] = 0;
 274          Res += (char)strtol(Tmp,0,16);
 275          I += 2;
 276          continue;
 277       }
 278       else
 279          Res += *I;
 280    }
 281    return Res;
 282 }
 283
 284                                                                         /*}}}*/
 285 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 286 // ---------------------------------------------------------------------
 287 /* A max of 4 digits are shown before conversion to the next highest unit.
 288    The max length of the string will be 5 chars unless the size is > 10
 289    YottaBytes (E24) */
 290 string SizeToStr(double Size)
 291 {
 292    char S[300];
 293    double ASize;
 294    if (Size >= 0)
 295       ASize = Size;
 296    else
 297       ASize = -1*Size;
 298
 299    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 300       ExaBytes, ZettaBytes, YottaBytes */
 301    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 302    int I = 0;
 303    while (I <= 8)
 304    {
 305       if (ASize < 100 && I != 0)
 306       {
 307          sprintf(S,"%'.1f%c",ASize,Ext[I]);
 308          break;
 309       }
 310
 311       if (ASize < 10000)
 312       {
 313          sprintf(S,"%'.0f%c",ASize,Ext[I]);
 314          break;
 315       }
 316       ASize /= 1000.0;
 317       I++;
 318    }
 319
 320    return S;
 321 }
 322                                                                         /*}}}*/
 323 // TimeToStr - Convert the time into a string                           /*{{{*/
 324 // ---------------------------------------------------------------------
 325 /* Converts a number of seconds to a hms format */
 326 string TimeToStr(unsigned long Sec)
 327 {
 328    char S[300];
 329
 330    while (1)
 331    {
 332       if (Sec > 60*60*24)
 333       {
 334          //d means days, h means hours, min means minutes, s means seconds
 335          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 336          break;
 337       }
 338
 339       if (Sec > 60*60)
 340       {
 341          //h means hours, min means minutes, s means seconds
 342          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 343          break;
 344       }
 345
 346       if (Sec > 60)
 347       {
 348          //min means minutes, s means seconds
 349          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 350          break;
 351       }
 352
 353       //s means seconds
 354       sprintf(S,_("%lis"),Sec);
 355       break;
 356    }
 357
 358    return S;
 359 }
 360                                                                         /*}}}*/
 361 // SubstVar - Substitute a string for another string                    /*{{{*/
 362 // ---------------------------------------------------------------------
 363 /* This replaces all occurances of Subst with Contents in Str. */
 364 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 365 {
 366    string::size_type Pos = 0;
 367    string::size_type OldPos = 0;
 368    string Temp;
 369
 370    while (OldPos < Str.length() &&
 371           (Pos = Str.find(Subst,OldPos)) != string::npos)
 372    {
 373       Temp += string(Str,OldPos,Pos) + Contents;
 374       OldPos = Pos + Subst.length();
 375    }
 376
 377    if (OldPos == 0)
 378       return Str;
 379
 380    return Temp + string(Str,OldPos);
 381 }
 382
 383 string SubstVar(string Str,const struct SubstVar *Vars)
 384 {
 385    for (; Vars->Subst != 0; Vars++)
 386       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 387    return Str;
 388 }
 389                                                                         /*}}}*/
 390 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 391 // ---------------------------------------------------------------------
 392 /* Returns a string with the supplied separator depth + 1 times in it */
 393 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 394 {
 395    std::string output = "";
 396    for(unsigned long d=Depth+1; d > 0; d--)
 397       output.append(Separator);
 398    return output;
 399 }
 400                                                                         /*}}}*/
 401 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 402 // ---------------------------------------------------------------------
 403 /* This converts a URI into a safe filename. It quotes all unsafe characters
 404    and converts / to _ and removes the scheme identifier. The resulting
 405    file name should be unique and never occur again for a different file */
 406 string URItoFileName(const string &URI)
 407 {
 408    // Nuke 'sensitive' items
 409    ::URI U(URI);
 410    U.User.clear();
 411    U.Password.clear();
 412    U.Access.clear();
 413
 414    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 415    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 416    replace(NewURI.begin(),NewURI.end(),'/','_');
 417    return NewURI;
 418 }
 419                                                                         /*}}}*/
 420 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 421 // ---------------------------------------------------------------------
 422 /* This routine performs a base64 transformation on a string. It was ripped
 423    from wget and then patched and bug fixed.
 424
 425    This spec can be found in rfc2045 */
 426 string Base64Encode(const string &S)
 427 {
 428    // Conversion table.
 429    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 430                           'I','J','K','L','M','N','O','P',
 431                           'Q','R','S','T','U','V','W','X',
 432                           'Y','Z','a','b','c','d','e','f',
 433                           'g','h','i','j','k','l','m','n',
 434                           'o','p','q','r','s','t','u','v',
 435                           'w','x','y','z','0','1','2','3',
 436                           '4','5','6','7','8','9','+','/'};
 437
 438    // Pre-allocate some space
 439    string Final;
 440    Final.reserve((4*S.length() + 2)/3 + 2);
 441
 442    /* Transform the 3x8 bits to 4x6 bits, as required by
 443       base64.  */
 444    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 445    {
 446       char Bits[3] = {0,0,0};
 447       Bits[0] = I[0];
 448       if (I + 1 < S.end())
 449          Bits[1] = I[1];
 450       if (I + 2 < S.end())
 451          Bits[2] = I[2];
 452
 453       Final += tbl[Bits[0] >> 2];
 454       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 455
 456       if (I + 1 >= S.end())
 457          break;
 458
 459       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 460
 461       if (I + 2 >= S.end())
 462          break;
 463
 464       Final += tbl[Bits[2] & 0x3f];
 465    }
 466
 467    /* Apply the padding elements, this tells how many bytes the remote
 468       end should discard */
 469    if (S.length() % 3 == 2)
 470       Final += '=';
 471    if (S.length() % 3 == 1)
 472       Final += "==";
 473
 474    return Final;
 475 }
 476                                                                         /*}}}*/
 477 // stringcmp - Arbitrary string compare                                 /*{{{*/
 478 // ---------------------------------------------------------------------
 479 /* This safely compares two non-null terminated strings of arbitrary
 480    length */
 481 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 482 {
 483    for (; A != AEnd && B != BEnd; A++, B++)
 484       if (*A != *B)
 485          break;
 486
 487    if (A == AEnd && B == BEnd)
 488       return 0;
 489    if (A == AEnd)
 490       return 1;
 491    if (B == BEnd)
 492       return -1;
 493    if (*A < *B)
 494       return -1;
 495    return 1;
 496 }
 497
 498 #if __GNUC__ >= 3
 499 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 500               const char *B,const char *BEnd)
 501 {
 502    for (; A != AEnd && B != BEnd; A++, B++)
 503       if (*A != *B)
 504          break;
 505
 506    if (A == AEnd && B == BEnd)
 507       return 0;
 508    if (A == AEnd)
 509       return 1;
 510    if (B == BEnd)
 511       return -1;
 512    if (*A < *B)
 513       return -1;
 514    return 1;
 515 }
 516 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 517               string::const_iterator B,string::const_iterator BEnd)
 518 {
 519    for (; A != AEnd && B != BEnd; A++, B++)
 520       if (*A != *B)
 521          break;
 522
 523    if (A == AEnd && B == BEnd)
 524       return 0;
 525    if (A == AEnd)
 526       return 1;
 527    if (B == BEnd)
 528       return -1;
 529    if (*A < *B)
 530       return -1;
 531    return 1;
 532 }
 533 #endif
 534                                                                         /*}}}*/
 535 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 536 // ---------------------------------------------------------------------
 537 /* */
 538 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 539 {
 540    for (; A != AEnd && B != BEnd; A++, B++)
 541       if (toupper(*A) != toupper(*B))
 542          break;
 543
 544    if (A == AEnd && B == BEnd)
 545       return 0;
 546    if (A == AEnd)
 547       return 1;
 548    if (B == BEnd)
 549       return -1;
 550    if (toupper(*A) < toupper(*B))
 551       return -1;
 552    return 1;
 553 }
 554 #if __GNUC__ >= 3
 555 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 556                   const char *B,const char *BEnd)
 557 {
 558    for (; A != AEnd && B != BEnd; A++, B++)
 559       if (toupper(*A) != toupper(*B))
 560          break;
 561
 562    if (A == AEnd && B == BEnd)
 563       return 0;
 564    if (A == AEnd)
 565       return 1;
 566    if (B == BEnd)
 567       return -1;
 568    if (toupper(*A) < toupper(*B))
 569       return -1;
 570    return 1;
 571 }
 572 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 573                   string::const_iterator B,string::const_iterator BEnd)
 574 {
 575    for (; A != AEnd && B != BEnd; A++, B++)
 576       if (toupper(*A) != toupper(*B))
 577          break;
 578
 579    if (A == AEnd && B == BEnd)
 580       return 0;
 581    if (A == AEnd)
 582       return 1;
 583    if (B == BEnd)
 584       return -1;
 585    if (toupper(*A) < toupper(*B))
 586       return -1;
 587    return 1;
 588 }
 589 #endif
 590                                                                         /*}}}*/
 591 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 592 // ---------------------------------------------------------------------
 593 /* The format is like those used in package files and the method
 594    communication system */
 595 string LookupTag(const string &Message,const char *Tag,const char *Default)
 596 {
 597    // Look for a matching tag.
 598    int Length = strlen(Tag);
 599    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 600    {
 601       // Found the tag
 602       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 603       {
 604          // Find the end of line and strip the leading/trailing spaces
 605          string::const_iterator J;
 606          I += Length + 1;
 607          for (; isspace(*I) != 0 && I < Message.end(); I++);
 608          for (J = I; *J != '\n' && J < Message.end(); J++);
 609          for (; J > I && isspace(J[-1]) != 0; J--);
 610
 611          return string(I,J);
 612       }
 613
 614       for (; *I != '\n' && I < Message.end(); I++);
 615    }
 616
 617    // Failed to find a match
 618    if (Default == 0)
 619       return string();
 620    return Default;
 621 }
 622                                                                         /*}}}*/
 623 // StringToBool - Converts a string into a boolean                      /*{{{*/
 624 // ---------------------------------------------------------------------
 625 /* This inspects the string to see if it is true or if it is false and
 626    then returns the result. Several varients on true/false are checked. */
 627 int StringToBool(const string &Text,int Default)
 628 {
 629    char *End;
 630    int Res = strtol(Text.c_str(),&End,0);
 631    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 632       return Res;
 633
 634    // Check for positives
 635    if (strcasecmp(Text.c_str(),"no") == 0 ||
 636        strcasecmp(Text.c_str(),"false") == 0 ||
 637        strcasecmp(Text.c_str(),"without") == 0 ||
 638        strcasecmp(Text.c_str(),"off") == 0 ||
 639        strcasecmp(Text.c_str(),"disable") == 0)
 640       return 0;
 641
 642    // Check for negatives
 643    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 644        strcasecmp(Text.c_str(),"true") == 0 ||
 645        strcasecmp(Text.c_str(),"with") == 0 ||
 646        strcasecmp(Text.c_str(),"on") == 0 ||
 647        strcasecmp(Text.c_str(),"enable") == 0)
 648       return 1;
 649
 650    return Default;
 651 }
 652                                                                         /*}}}*/
 653 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 654 // ---------------------------------------------------------------------
 655 /* This converts a time_t into a string time representation that is
 656    year 2000 complient and timezone neutral */
 657 string TimeRFC1123(time_t Date)
 658 {
 659    struct tm Conv = *gmtime(&Date);
 660    char Buf[300];
 661
 662    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 663    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 664                           "Aug","Sep","Oct","Nov","Dec"};
 665
 666    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 667            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 668            Conv.tm_min,Conv.tm_sec);
 669    return Buf;
 670 }
 671                                                                         /*}}}*/
 672 // ReadMessages - Read messages from the FD                             /*{{{*/
 673 // ---------------------------------------------------------------------
 674 /* This pulls full messages from the input FD into the message buffer.
 675    It assumes that messages will not pause during transit so no
 676    fancy buffering is used.
 677
 678    In particular: this reads blocks from the input until it believes
 679    that it's run out of input text.  Each block is terminated by a
 680    double newline ('\n' followed by '\n').  As noted below, there is a
 681    bug in this code: it assumes that all the blocks have been read if
 682    it doesn't see additional text in the buffer after the last one is
 683    parsed, which will cause it to lose blocks if the last block
 684    coincides with the end of the buffer.
 685  */
 686 bool ReadMessages(int Fd, vector<string> &List)
 687 {
 688    char Buffer[64000];
 689    char *End = Buffer;
 690    // Represents any left-over from the previous iteration of the
 691    // parse loop.  (i.e., if a message is split across the end
 692    // of the buffer, it goes here)
 693    string PartialMessage;
 694
 695    while (1)
 696    {
 697       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 698       if (Res < 0 && errno == EINTR)
 699          continue;
 700
 701       // Process is dead, this is kind of bad..
 702       if (Res == 0)
 703          return false;
 704
 705       // No data
 706       if (Res < 0 && errno == EAGAIN)
 707          return true;
 708       if (Res < 0)
 709          return false;
 710
 711       End += Res;
 712
 713       // Look for the end of the message
 714       for (char *I = Buffer; I + 1 < End; I++)
 715       {
 716          if (I[0] != '\n' || I[1] != '\n')
 717             continue;
 718
 719          // Pull the message out
 720          string Message(Buffer,I-Buffer);
 721          PartialMessage += Message;
 722
 723          // Fix up the buffer
 724          for (; I < End && *I == '\n'; I++);
 725          End -= I-Buffer;
 726          memmove(Buffer,I,End-Buffer);
 727          I = Buffer;
 728
 729          List.push_back(PartialMessage);
 730          PartialMessage.clear();
 731       }
 732       if (End != Buffer)
 733         {
 734           // If there's text left in the buffer, store it
 735           // in PartialMessage and throw the rest of the buffer
 736           // away.  This allows us to handle messages that
 737           // are longer than the static buffer size.
 738           PartialMessage += string(Buffer, End);
 739           End = Buffer;
 740         }
 741       else
 742         {
 743           // BUG ALERT: if a message block happens to end at a
 744           // multiple of 64000 characters, this will cause it to
 745           // terminate early, leading to a badly formed block and
 746           // probably crashing the method.  However, this is the only
 747           // way we have to find the end of the message block.  I have
 748           // an idea of how to fix this, but it will require changes
 749           // to the protocol (essentially to mark the beginning and
 750           // end of the block).
 751           //
 752           //  -- dburrows 2008-04-02
 753           return true;
 754         }
 755
 756       if (WaitFd(Fd) == false)
 757          return false;
 758    }
 759 }
 760                                                                         /*}}}*/
 761 // MonthConv - Converts a month string into a number                    /*{{{*/
 762 // ---------------------------------------------------------------------
 763 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 764    Made it a bit more robust with a few touppers though. */
 765 static int MonthConv(char *Month)
 766 {
 767    switch (toupper(*Month))
 768    {
 769       case 'A':
 770       return toupper(Month[1]) == 'P'?3:7;
 771       case 'D':
 772       return 11;
 773       case 'F':
 774       return 1;
 775       case 'J':
 776       if (toupper(Month[1]) == 'A')
 777          return 0;
 778       return toupper(Month[2]) == 'N'?5:6;
 779       case 'M':
 780       return toupper(Month[2]) == 'R'?2:4;
 781       case 'N':
 782       return 10;
 783       case 'O':
 784       return 9;
 785       case 'S':
 786       return 8;
 787
 788       // Pretend it is January..
 789       default:
 790       return 0;
 791    }
 792 }
 793                                                                         /*}}}*/
 794 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 795 // ---------------------------------------------------------------------
 796 /* Ripped this evil little function from wget - I prefer the use of
 797    GNU timegm if possible as this technique will have interesting problems
 798    with leap seconds, timezones and other.
 799
 800    Converts struct tm to time_t, assuming the data in tm is UTC rather
 801    than local timezone (mktime assumes the latter).
 802
 803    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 804    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 805
 806 /* Turned it into an autoconf check, because GNU is not the only thing which
 807    can provide timegm. -- 2002-09-22, Joel Baker */
 808
 809 #ifndef HAVE_TIMEGM // Now with autoconf!
 810 static time_t timegm(struct tm *t)
 811 {
 812    time_t tl, tb;
 813
 814    tl = mktime (t);
 815    if (tl == -1)
 816       return -1;
 817    tb = mktime (gmtime (&tl));
 818    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 819 }
 820 #endif
 821                                                                         /*}}}*/
 822 // StrToTime - Converts a string into a time_t                          /*{{{*/
 823 // ---------------------------------------------------------------------
 824 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 825    and the C library asctime format. It requires the GNU library function
 826    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 827    reason the C library does not provide any such function :< This also
 828    handles the weird, but unambiguous FTP time format*/
 829 bool StrToTime(const string &Val,time_t &Result)
 830 {
 831    struct tm Tm;
 832    char Month[10];
 833    const char *I = Val.c_str();
 834
 835    // Skip the day of the week
 836    for (;*I != 0  && *I != ' '; I++);
 837
 838    // Handle RFC 1123 time
 839    Month[0] = 0;
 840    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 841               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 842    {
 843       // Handle RFC 1036 time
 844       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 845                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 846          Tm.tm_year += 1900;
 847       else
 848       {
 849          // asctime format
 850          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 851                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 852          {
 853             // 'ftp' time
 854             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 855                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 856                return false;
 857             Tm.tm_mon--;
 858          }
 859       }
 860    }
 861
 862    Tm.tm_isdst = 0;
 863    if (Month[0] != 0)
 864       Tm.tm_mon = MonthConv(Month);
 865    Tm.tm_year -= 1900;
 866
 867    // Convert to local time and then to GMT
 868    Result = timegm(&Tm);
 869    return true;
 870 }
 871                                                                         /*}}}*/
 872 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 873 // ---------------------------------------------------------------------
 874 /* This is used in decoding the crazy fixed length string headers in
 875    tar and ar files. */
 876 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 877 {
 878    char S[30];
 879    if (Len >= sizeof(S))
 880       return false;
 881    memcpy(S,Str,Len);
 882    S[Len] = 0;
 883
 884    // All spaces is a zero
 885    Res = 0;
 886    unsigned I;
 887    for (I = 0; S[I] == ' '; I++);
 888    if (S[I] == 0)
 889       return true;
 890
 891    char *End;
 892    Res = strtoul(S,&End,Base);
 893    if (End == S)
 894       return false;
 895
 896    return true;
 897 }
 898                                                                         /*}}}*/
 899 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 900 // ---------------------------------------------------------------------
 901 /* Helper for Hex2Num */
 902 static int HexDigit(int c)
 903 {
 904    if (c >= '0' && c <= '9')
 905       return c - '0';
 906    if (c >= 'a' && c <= 'f')
 907       return c - 'a' + 10;
 908    if (c >= 'A' && c <= 'F')
 909       return c - 'A' + 10;
 910    return 0;
 911 }
 912                                                                         /*}}}*/
 913 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 914 // ---------------------------------------------------------------------
 915 /* The length of the buffer must be exactly 1/2 the length of the string. */
 916 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 917 {
 918    if (Str.length() != Length*2)
 919       return false;
 920
 921    // Convert each digit. We store it in the same order as the string
 922    int J = 0;
 923    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 924    {
 925       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 926          return false;
 927
 928       Num[J] = HexDigit(I[0]) << 4;
 929       Num[J] += HexDigit(I[1]);
 930    }
 931
 932    return true;
 933 }
 934                                                                         /*}}}*/
 935 // TokSplitString - Split a string up by a given token                  /*{{{*/
 936 // ---------------------------------------------------------------------
 937 /* This is intended to be a faster splitter, it does not use dynamic
 938    memories. Input is changed to insert nulls at each token location. */
 939 bool TokSplitString(char Tok,char *Input,char **List,
 940                     unsigned long ListMax)
 941 {
 942    // Strip any leading spaces
 943    char *Start = Input;
 944    char *Stop = Start + strlen(Start);
 945    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 946
 947    unsigned long Count = 0;
 948    char *Pos = Start;
 949    while (Pos != Stop)
 950    {
 951       // Skip to the next Token
 952       for (; Pos != Stop && *Pos != Tok; Pos++);
 953
 954       // Back remove spaces
 955       char *End = Pos;
 956       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 957       *End = 0;
 958
 959       List[Count++] = Start;
 960       if (Count >= ListMax)
 961       {
 962          List[Count-1] = 0;
 963          return false;
 964       }
 965
 966       // Advance pos
 967       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 968       Start = Pos;
 969    }
 970
 971    List[Count] = 0;
 972    return true;
 973 }
 974                                                                         /*}}}*/
 975 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
 976 // ---------------------------------------------------------------------
 977 /* */
 978 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
 979                       const char **ListEnd)
 980 {
 981    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 982       R->Hit = false;
 983
 984    unsigned long Hits = 0;
 985    for (; ListBegin != ListEnd; ListBegin++)
 986    {
 987       // Check if the name is a regex
 988       const char *I;
 989       bool Regex = true;
 990       for (I = *ListBegin; *I != 0; I++)
 991          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
 992             break;
 993       if (*I == 0)
 994          Regex = false;
 995
 996       // Compile the regex pattern
 997       regex_t Pattern;
 998       if (Regex == true)
 999          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1000                      REG_NOSUB) != 0)
1001             Regex = false;
1002
1003       // Search the list
1004       bool Done = false;
1005       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1006       {
1007          if (R->Str[0] == 0)
1008             continue;
1009
1010          if (strcasecmp(R->Str,*ListBegin) != 0)
1011          {
1012             if (Regex == false)
1013                continue;
1014             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1015                continue;
1016          }
1017          Done = true;
1018
1019          if (R->Hit == false)
1020             Hits++;
1021
1022          R->Hit = true;
1023       }
1024
1025       if (Regex == true)
1026          regfree(&Pattern);
1027
1028       if (Done == false)
1029          _error->Warning(_("Selection %s not found"),*ListBegin);
1030    }
1031
1032    return Hits;
1033 }
1034                                                                         /*}}}*/
1035 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1036 // ---------------------------------------------------------------------
1037 /* This is used to make the internationalization strings easier to translate
1038    and to allow reordering of parameters */
1039 void ioprintf(ostream &out,const char *format,...)
1040 {
1041    va_list args;
1042    va_start(args,format);
1043
1044    // sprintf the description
1045    char S[4096];
1046    vsnprintf(S,sizeof(S),format,args);
1047    out << S;
1048 }
1049                                                                         /*}}}*/
1050 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1051 // ---------------------------------------------------------------------
1052 /* This is used to make the internationalization strings easier to translate
1053    and to allow reordering of parameters */
1054 void strprintf(string &out,const char *format,...)
1055 {
1056    va_list args;
1057    va_start(args,format);
1058
1059    // sprintf the description
1060    char S[4096];
1061    vsnprintf(S,sizeof(S),format,args);
1062    out = string(S);
1063 }
1064                                                                         /*}}}*/
1065 // safe_snprintf - Safer snprintf                                       /*{{{*/
1066 // ---------------------------------------------------------------------
1067 /* This is a snprintf that will never (ever) go past 'End' and returns a
1068    pointer to the end of the new string. The returned string is always null
1069    terminated unless Buffer == end. This is a better alterantive to using
1070    consecutive snprintfs. */
1071 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1072 {
1073    va_list args;
1074    unsigned long Did;
1075
1076    va_start(args,Format);
1077
1078    if (End <= Buffer)
1079       return End;
1080
1081    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1082    if (Did < 0 || Buffer + Did > End)
1083       return End;
1084    return Buffer + Did;
1085 }
1086                                                                         /*}}}*/
1087
1088 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1089 // ---------------------------------------------------------------------
1090 /* */
1091 int tolower_ascii(int c)
1092 {
1093    if (c >= 'A' and c <= 'Z')
1094       return c + 32;
1095    return c;
1096 }
1097                                                                         /*}}}*/
1098
1099 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1100 // ---------------------------------------------------------------------
1101 /* The domain list is a comma seperate list of domains that are suffix
1102    matched against the argument */
1103 bool CheckDomainList(const string &Host,const string &List)
1104 {
1105    string::const_iterator Start = List.begin();
1106    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1107    {
1108       if (Cur < List.end() && *Cur != ',')
1109          continue;
1110
1111       // Match the end of the string..
1112       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1113           Cur - Start != 0 &&
1114           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1115          return true;
1116
1117       Start = Cur + 1;
1118    }
1119    return false;
1120 }
1121                                                                         /*}}}*/
1122
1123 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1124 // ---------------------------------------------------------------------
1125 /* This parses the URI into all of its components */
1126 void URI::CopyFrom(const string &U)
1127 {
1128    string::const_iterator I = U.begin();
1129
1130    // Locate the first colon, this separates the scheme
1131    for (; I < U.end() && *I != ':' ; I++);
1132    string::const_iterator FirstColon = I;
1133
1134    /* Determine if this is a host type URI with a leading double //
1135       and then search for the first single / */
1136    string::const_iterator SingleSlash = I;
1137    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1138       SingleSlash += 3;
1139
1140    /* Find the / indicating the end of the hostname, ignoring /'s in the
1141       square brackets */
1142    bool InBracket = false;
1143    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1144    {
1145       if (*SingleSlash == '[')
1146          InBracket = true;
1147       if (InBracket == true && *SingleSlash == ']')
1148          InBracket = false;
1149    }
1150
1151    if (SingleSlash > U.end())
1152       SingleSlash = U.end();
1153
1154    // We can now write the access and path specifiers
1155    Access.assign(U.begin(),FirstColon);
1156    if (SingleSlash != U.end())
1157       Path.assign(SingleSlash,U.end());
1158    if (Path.empty() == true)
1159       Path = "/";
1160
1161    // Now we attempt to locate a user:pass@host fragment
1162    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1163       FirstColon += 3;
1164    else
1165       FirstColon += 1;
1166    if (FirstColon >= U.end())
1167       return;
1168
1169    if (FirstColon > SingleSlash)
1170       FirstColon = SingleSlash;
1171
1172    // Find the colon...
1173    I = FirstColon + 1;
1174    if (I > SingleSlash)
1175       I = SingleSlash;
1176    for (; I < SingleSlash && *I != ':'; I++);
1177    string::const_iterator SecondColon = I;
1178
1179    // Search for the @ after the colon
1180    for (; I < SingleSlash && *I != '@'; I++);
1181    string::const_iterator At = I;
1182
1183    // Now write the host and user/pass
1184    if (At == SingleSlash)
1185    {
1186       if (FirstColon < SingleSlash)
1187          Host.assign(FirstColon,SingleSlash);
1188    }
1189    else
1190    {
1191       Host.assign(At+1,SingleSlash);
1192       User.assign(FirstColon,SecondColon);
1193       if (SecondColon < At)
1194          Password.assign(SecondColon+1,At);
1195    }
1196
1197    // Now we parse the RFC 2732 [] hostnames.
1198    unsigned long PortEnd = 0;
1199    InBracket = false;
1200    for (unsigned I = 0; I != Host.length();)
1201    {
1202       if (Host[I] == '[')
1203       {
1204          InBracket = true;
1205          Host.erase(I,1);
1206          continue;
1207       }
1208
1209       if (InBracket == true && Host[I] == ']')
1210       {
1211          InBracket = false;
1212          Host.erase(I,1);
1213          PortEnd = I;
1214          continue;
1215       }
1216       I++;
1217    }
1218
1219    // Tsk, weird.
1220    if (InBracket == true)
1221    {
1222       Host.clear();
1223       return;
1224    }
1225
1226    // Now we parse off a port number from the hostname
1227    Port = 0;
1228    string::size_type Pos = Host.rfind(':');
1229    if (Pos == string::npos || Pos < PortEnd)
1230       return;
1231
1232    Port = atoi(string(Host,Pos+1).c_str());
1233    Host.assign(Host,0,Pos);
1234 }
1235                                                                         /*}}}*/
1236 // URI::operator string - Convert the URI to a string                   /*{{{*/
1237 // ---------------------------------------------------------------------
1238 /* */
1239 URI::operator string()
1240 {
1241    string Res;
1242
1243    if (Access.empty() == false)
1244       Res = Access + ':';
1245
1246    if (Host.empty() == false)
1247    {
1248       if (Access.empty() == false)
1249          Res += "//";
1250
1251       if (User.empty() == false)
1252       {
1253          Res +=  User;
1254          if (Password.empty() == false)
1255             Res += ":" + Password;
1256          Res += "@";
1257       }
1258
1259       // Add RFC 2732 escaping characters
1260       if (Access.empty() == false &&
1261           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1262          Res += '[' + Host + ']';
1263       else
1264          Res += Host;
1265
1266       if (Port != 0)
1267       {
1268          char S[30];
1269          sprintf(S,":%u",Port);
1270          Res += S;
1271       }
1272    }
1273
1274    if (Path.empty() == false)
1275    {
1276       if (Path[0] != '/')
1277          Res += "/" + Path;
1278       else
1279          Res += Path;
1280    }
1281
1282    return Res;
1283 }
1284                                                                         /*}}}*/
1285 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1286 // ---------------------------------------------------------------------
1287 /* */
1288 string URI::SiteOnly(const string &URI)
1289 {
1290    ::URI U(URI);
1291    U.User.clear();
1292    U.Password.clear();
1293    U.Path.clear();
1294    U.Port = 0;
1295    return U;
1296 }
1297                                                                         /*}}}*/