apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf;
  47   size_t insize, bufsize;
  48   dest->clear();
  49
  50   cd = iconv_open(codeset, "UTF-8");
  51   if (cd == (iconv_t)(-1)) {
  52      // Something went wrong
  53      if (errno == EINVAL)
  54         _error->Error("conversion from 'UTF-8' to '%s' not available",
  55                codeset);
  56      else
  57         perror("iconv_open");
  58
  59      return false;
  60   }
  61
  62   insize = bufsize = orig.size();
  63   inbuf = orig.data();
  64   inptr = (char *)inbuf;
  65   outbuf = new char[bufsize];
  66   size_t lastError = -1;
  67
  68   while (insize != 0)
  69   {
  70      char *outptr = outbuf;
  71      size_t outsize = bufsize;
  72      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  73      dest->append(outbuf, outptr - outbuf);
  74      if (err == (size_t)(-1))
  75      {
  76         switch (errno)
  77         {
  78         case EILSEQ:
  79            insize--;
  80            inptr++;
  81            // replace a series of unknown multibytes with a single "?"
  82            if (lastError != insize) {
  83               lastError = insize - 1;
  84               dest->append("?");
  85            }
  86            break;
  87         case EINVAL:
  88            insize = 0;
  89            break;
  90         case E2BIG:
  91            if (outptr == outbuf)
  92            {
  93               bufsize *= 2;
  94               delete[] outbuf;
  95               outbuf = new char[bufsize];
  96            }
  97            break;
  98         }
  99      }
 100   }
 101
 102   delete[] outbuf;
 103
 104   iconv_close(cd);
 105
 106   return true;
 107 }
 108                                                                         /*}}}*/
 109 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 110 // ---------------------------------------------------------------------
 111 /* This is handy to use when parsing a file. It also removes \n's left
 112    over from fgets and company */
 113 char *_strstrip(char *String)
 114 {
 115    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 116
 117    if (*String == 0)
 118       return String;
 119
 120    char *End = String + strlen(String) - 1;
 121    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 122                                *End == '\r'); End--);
 123    End++;
 124    *End = 0;
 125    return String;
 126 };
 127                                                                         /*}}}*/
 128 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 129 // ---------------------------------------------------------------------
 130 /* */
 131 char *_strtabexpand(char *String,size_t Len)
 132 {
 133    for (char *I = String; I != I + Len && *I != 0; I++)
 134    {
 135       if (*I != '\t')
 136          continue;
 137       if (I + 8 > String + Len)
 138       {
 139          *I = 0;
 140          return String;
 141       }
 142
 143       /* Assume the start of the string is 0 and find the next 8 char
 144          division */
 145       int Len;
 146       if (String == I)
 147          Len = 1;
 148       else
 149          Len = 8 - ((String - I) % 8);
 150       Len -= 2;
 151       if (Len <= 0)
 152       {
 153          *I = ' ';
 154          continue;
 155       }
 156
 157       memmove(I + Len,I + 1,strlen(I) + 1);
 158       for (char *J = I; J + Len != I; *I = ' ', I++);
 159    }
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* This grabs a single word, converts any % escaped characters to their
 166    proper values and advances the pointer. Double quotes are understood
 167    and striped out as well. This is for URI/URL parsing. It also can
 168    understand [] brackets.*/
 169 bool ParseQuoteWord(const char *&String,string &Res)
 170 {
 171    // Skip leading whitespace
 172    const char *C = String;
 173    for (;*C != 0 && *C == ' '; C++);
 174    if (*C == 0)
 175       return false;
 176
 177    // Jump to the next word
 178    for (;*C != 0 && isspace(*C) == 0; C++)
 179    {
 180       if (*C == '"')
 181       {
 182          for (C++; *C != 0 && *C != '"'; C++);
 183          if (*C == 0)
 184             return false;
 185       }
 186       if (*C == '[')
 187       {
 188          for (C++; *C != 0 && *C != ']'; C++);
 189          if (*C == 0)
 190             return false;
 191       }
 192    }
 193
 194    // Now de-quote characters
 195    char Buffer[1024];
 196    char Tmp[3];
 197    const char *Start = String;
 198    char *I;
 199    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 200    {
 201       if (*Start == '%' && Start + 2 < C &&
 202           isxdigit(Start[1]) && isxdigit(Start[2]))
 203       {
 204          Tmp[0] = Start[1];
 205          Tmp[1] = Start[2];
 206          Tmp[2] = 0;
 207          *I = (char)strtol(Tmp,0,16);
 208          Start += 3;
 209          continue;
 210       }
 211       if (*Start != '"')
 212          *I = *Start;
 213       else
 214          I--;
 215       Start++;
 216    }
 217    *I = 0;
 218    Res = Buffer;
 219
 220    // Skip ending white space
 221    for (;*C != 0 && isspace(*C) != 0; C++);
 222    String = C;
 223    return true;
 224 }
 225                                                                         /*}}}*/
 226 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 227 // ---------------------------------------------------------------------
 228 /* This expects a series of space separated strings enclosed in ""'s.
 229    It concatenates the ""'s into a single string. */
 230 bool ParseCWord(const char *&String,string &Res)
 231 {
 232    // Skip leading whitespace
 233    const char *C = String;
 234    for (;*C != 0 && *C == ' '; C++);
 235    if (*C == 0)
 236       return false;
 237
 238    char Buffer[1024];
 239    char *Buf = Buffer;
 240    if (strlen(String) >= sizeof(Buffer))
 241        return false;
 242
 243    for (; *C != 0; C++)
 244    {
 245       if (*C == '"')
 246       {
 247          for (C++; *C != 0 && *C != '"'; C++)
 248             *Buf++ = *C;
 249
 250          if (*C == 0)
 251             return false;
 252
 253          continue;
 254       }
 255
 256       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 257          continue;
 258       if (isspace(*C) == 0)
 259          return false;
 260       *Buf++ = ' ';
 261    }
 262    *Buf = 0;
 263    Res = Buffer;
 264    String = C;
 265    return true;
 266 }
 267                                                                         /*}}}*/
 268 // QuoteString - Convert a string into quoted from                      /*{{{*/
 269 // ---------------------------------------------------------------------
 270 /* */
 271 string QuoteString(const string &Str, const char *Bad)
 272 {
 273    string Res;
 274    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 275    {
 276       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 277           *I == 0x25 || // percent '%' char
 278           *I <= 0x20 || *I >= 0x7F) // control chars
 279       {
 280          char Buf[10];
 281          sprintf(Buf,"%%%02x",(int)*I);
 282          Res += Buf;
 283       }
 284       else
 285          Res += *I;
 286    }
 287    return Res;
 288 }
 289                                                                         /*}}}*/
 290 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 291 // ---------------------------------------------------------------------
 292 /* This undoes QuoteString */
 293 string DeQuoteString(const string &Str)
 294 {
 295    return DeQuoteString(Str.begin(),Str.end());
 296 }
 297 string DeQuoteString(string::const_iterator const &begin,
 298                         string::const_iterator const &end)
 299 {
 300    string Res;
 301    for (string::const_iterator I = begin; I != end; I++)
 302    {
 303       if (*I == '%' && I + 2 < end &&
 304           isxdigit(I[1]) && isxdigit(I[2]))
 305       {
 306          char Tmp[3];
 307          Tmp[0] = I[1];
 308          Tmp[1] = I[2];
 309          Tmp[2] = 0;
 310          Res += (char)strtol(Tmp,0,16);
 311          I += 2;
 312          continue;
 313       }
 314       else
 315          Res += *I;
 316    }
 317    return Res;
 318 }
 319
 320                                                                         /*}}}*/
 321 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 322 // ---------------------------------------------------------------------
 323 /* A max of 4 digits are shown before conversion to the next highest unit.
 324    The max length of the string will be 5 chars unless the size is > 10
 325    YottaBytes (E24) */
 326 string SizeToStr(double Size)
 327 {
 328    char S[300];
 329    double ASize;
 330    if (Size >= 0)
 331       ASize = Size;
 332    else
 333       ASize = -1*Size;
 334
 335    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 336       ExaBytes, ZettaBytes, YottaBytes */
 337    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 338    int I = 0;
 339    while (I <= 8)
 340    {
 341       if (ASize < 100 && I != 0)
 342       {
 343          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 344          break;
 345       }
 346
 347       if (ASize < 10000)
 348       {
 349          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 350          break;
 351       }
 352       ASize /= 1000.0;
 353       I++;
 354    }
 355
 356    return S;
 357 }
 358                                                                         /*}}}*/
 359 // TimeToStr - Convert the time into a string                           /*{{{*/
 360 // ---------------------------------------------------------------------
 361 /* Converts a number of seconds to a hms format */
 362 string TimeToStr(unsigned long Sec)
 363 {
 364    char S[300];
 365
 366    while (1)
 367    {
 368       if (Sec > 60*60*24)
 369       {
 370          //d means days, h means hours, min means minutes, s means seconds
 371          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 372          break;
 373       }
 374
 375       if (Sec > 60*60)
 376       {
 377          //h means hours, min means minutes, s means seconds
 378          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 379          break;
 380       }
 381
 382       if (Sec > 60)
 383       {
 384          //min means minutes, s means seconds
 385          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 386          break;
 387       }
 388
 389       //s means seconds
 390       sprintf(S,_("%lis"),Sec);
 391       break;
 392    }
 393
 394    return S;
 395 }
 396                                                                         /*}}}*/
 397 // SubstVar - Substitute a string for another string                    /*{{{*/
 398 // ---------------------------------------------------------------------
 399 /* This replaces all occurances of Subst with Contents in Str. */
 400 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 401 {
 402    string::size_type Pos = 0;
 403    string::size_type OldPos = 0;
 404    string Temp;
 405
 406    while (OldPos < Str.length() &&
 407           (Pos = Str.find(Subst,OldPos)) != string::npos)
 408    {
 409       Temp += string(Str,OldPos,Pos) + Contents;
 410       OldPos = Pos + Subst.length();
 411    }
 412
 413    if (OldPos == 0)
 414       return Str;
 415
 416    return Temp + string(Str,OldPos);
 417 }
 418
 419 string SubstVar(string Str,const struct SubstVar *Vars)
 420 {
 421    for (; Vars->Subst != 0; Vars++)
 422       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 423    return Str;
 424 }
 425                                                                         /*}}}*/
 426 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 427 // ---------------------------------------------------------------------
 428 /* Returns a string with the supplied separator depth + 1 times in it */
 429 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 430 {
 431    std::string output = "";
 432    for(unsigned long d=Depth+1; d > 0; d--)
 433       output.append(Separator);
 434    return output;
 435 }
 436                                                                         /*}}}*/
 437 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 438 // ---------------------------------------------------------------------
 439 /* This converts a URI into a safe filename. It quotes all unsafe characters
 440    and converts / to _ and removes the scheme identifier. The resulting
 441    file name should be unique and never occur again for a different file */
 442 string URItoFileName(const string &URI)
 443 {
 444    // Nuke 'sensitive' items
 445    ::URI U(URI);
 446    U.User.clear();
 447    U.Password.clear();
 448    U.Access.clear();
 449
 450    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 451    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 452    replace(NewURI.begin(),NewURI.end(),'/','_');
 453    return NewURI;
 454 }
 455                                                                         /*}}}*/
 456 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 457 // ---------------------------------------------------------------------
 458 /* This routine performs a base64 transformation on a string. It was ripped
 459    from wget and then patched and bug fixed.
 460
 461    This spec can be found in rfc2045 */
 462 string Base64Encode(const string &S)
 463 {
 464    // Conversion table.
 465    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 466                           'I','J','K','L','M','N','O','P',
 467                           'Q','R','S','T','U','V','W','X',
 468                           'Y','Z','a','b','c','d','e','f',
 469                           'g','h','i','j','k','l','m','n',
 470                           'o','p','q','r','s','t','u','v',
 471                           'w','x','y','z','0','1','2','3',
 472                           '4','5','6','7','8','9','+','/'};
 473
 474    // Pre-allocate some space
 475    string Final;
 476    Final.reserve((4*S.length() + 2)/3 + 2);
 477
 478    /* Transform the 3x8 bits to 4x6 bits, as required by
 479       base64.  */
 480    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 481    {
 482       char Bits[3] = {0,0,0};
 483       Bits[0] = I[0];
 484       if (I + 1 < S.end())
 485          Bits[1] = I[1];
 486       if (I + 2 < S.end())
 487          Bits[2] = I[2];
 488
 489       Final += tbl[Bits[0] >> 2];
 490       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 491
 492       if (I + 1 >= S.end())
 493          break;
 494
 495       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 496
 497       if (I + 2 >= S.end())
 498          break;
 499
 500       Final += tbl[Bits[2] & 0x3f];
 501    }
 502
 503    /* Apply the padding elements, this tells how many bytes the remote
 504       end should discard */
 505    if (S.length() % 3 == 2)
 506       Final += '=';
 507    if (S.length() % 3 == 1)
 508       Final += "==";
 509
 510    return Final;
 511 }
 512                                                                         /*}}}*/
 513 // stringcmp - Arbitrary string compare                                 /*{{{*/
 514 // ---------------------------------------------------------------------
 515 /* This safely compares two non-null terminated strings of arbitrary
 516    length */
 517 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 518 {
 519    for (; A != AEnd && B != BEnd; A++, B++)
 520       if (*A != *B)
 521          break;
 522
 523    if (A == AEnd && B == BEnd)
 524       return 0;
 525    if (A == AEnd)
 526       return 1;
 527    if (B == BEnd)
 528       return -1;
 529    if (*A < *B)
 530       return -1;
 531    return 1;
 532 }
 533
 534 #if __GNUC__ >= 3
 535 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 536               const char *B,const char *BEnd)
 537 {
 538    for (; A != AEnd && B != BEnd; A++, B++)
 539       if (*A != *B)
 540          break;
 541
 542    if (A == AEnd && B == BEnd)
 543       return 0;
 544    if (A == AEnd)
 545       return 1;
 546    if (B == BEnd)
 547       return -1;
 548    if (*A < *B)
 549       return -1;
 550    return 1;
 551 }
 552 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 553               string::const_iterator B,string::const_iterator BEnd)
 554 {
 555    for (; A != AEnd && B != BEnd; A++, B++)
 556       if (*A != *B)
 557          break;
 558
 559    if (A == AEnd && B == BEnd)
 560       return 0;
 561    if (A == AEnd)
 562       return 1;
 563    if (B == BEnd)
 564       return -1;
 565    if (*A < *B)
 566       return -1;
 567    return 1;
 568 }
 569 #endif
 570                                                                         /*}}}*/
 571 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 572 // ---------------------------------------------------------------------
 573 /* */
 574 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 575 {
 576    for (; A != AEnd && B != BEnd; A++, B++)
 577       if (tolower_ascii(*A) != tolower_ascii(*B))
 578          break;
 579
 580    if (A == AEnd && B == BEnd)
 581       return 0;
 582    if (A == AEnd)
 583       return 1;
 584    if (B == BEnd)
 585       return -1;
 586    if (tolower_ascii(*A) < tolower_ascii(*B))
 587       return -1;
 588    return 1;
 589 }
 590 #if __GNUC__ >= 3
 591 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 592                   const char *B,const char *BEnd)
 593 {
 594    for (; A != AEnd && B != BEnd; A++, B++)
 595       if (tolower_ascii(*A) != tolower_ascii(*B))
 596          break;
 597
 598    if (A == AEnd && B == BEnd)
 599       return 0;
 600    if (A == AEnd)
 601       return 1;
 602    if (B == BEnd)
 603       return -1;
 604    if (tolower_ascii(*A) < tolower_ascii(*B))
 605       return -1;
 606    return 1;
 607 }
 608 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 609                   string::const_iterator B,string::const_iterator BEnd)
 610 {
 611    for (; A != AEnd && B != BEnd; A++, B++)
 612       if (tolower_ascii(*A) != tolower_ascii(*B))
 613          break;
 614
 615    if (A == AEnd && B == BEnd)
 616       return 0;
 617    if (A == AEnd)
 618       return 1;
 619    if (B == BEnd)
 620       return -1;
 621    if (tolower_ascii(*A) < tolower_ascii(*B))
 622       return -1;
 623    return 1;
 624 }
 625 #endif
 626                                                                         /*}}}*/
 627 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 628 // ---------------------------------------------------------------------
 629 /* The format is like those used in package files and the method
 630    communication system */
 631 string LookupTag(const string &Message,const char *Tag,const char *Default)
 632 {
 633    // Look for a matching tag.
 634    int Length = strlen(Tag);
 635    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 636    {
 637       // Found the tag
 638       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 639       {
 640          // Find the end of line and strip the leading/trailing spaces
 641          string::const_iterator J;
 642          I += Length + 1;
 643          for (; isspace(*I) != 0 && I < Message.end(); I++);
 644          for (J = I; *J != '\n' && J < Message.end(); J++);
 645          for (; J > I && isspace(J[-1]) != 0; J--);
 646
 647          return string(I,J);
 648       }
 649
 650       for (; *I != '\n' && I < Message.end(); I++);
 651    }
 652
 653    // Failed to find a match
 654    if (Default == 0)
 655       return string();
 656    return Default;
 657 }
 658                                                                         /*}}}*/
 659 // StringToBool - Converts a string into a boolean                      /*{{{*/
 660 // ---------------------------------------------------------------------
 661 /* This inspects the string to see if it is true or if it is false and
 662    then returns the result. Several varients on true/false are checked. */
 663 int StringToBool(const string &Text,int Default)
 664 {
 665    char *End;
 666    int Res = strtol(Text.c_str(),&End,0);
 667    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 668       return Res;
 669
 670    // Check for positives
 671    if (strcasecmp(Text.c_str(),"no") == 0 ||
 672        strcasecmp(Text.c_str(),"false") == 0 ||
 673        strcasecmp(Text.c_str(),"without") == 0 ||
 674        strcasecmp(Text.c_str(),"off") == 0 ||
 675        strcasecmp(Text.c_str(),"disable") == 0)
 676       return 0;
 677
 678    // Check for negatives
 679    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 680        strcasecmp(Text.c_str(),"true") == 0 ||
 681        strcasecmp(Text.c_str(),"with") == 0 ||
 682        strcasecmp(Text.c_str(),"on") == 0 ||
 683        strcasecmp(Text.c_str(),"enable") == 0)
 684       return 1;
 685
 686    return Default;
 687 }
 688                                                                         /*}}}*/
 689 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 690 // ---------------------------------------------------------------------
 691 /* This converts a time_t into a string time representation that is
 692    year 2000 complient and timezone neutral */
 693 string TimeRFC1123(time_t Date)
 694 {
 695    struct tm Conv = *gmtime(&Date);
 696    char Buf[300];
 697
 698    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 699    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 700                           "Aug","Sep","Oct","Nov","Dec"};
 701
 702    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 703            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 704            Conv.tm_min,Conv.tm_sec);
 705    return Buf;
 706 }
 707                                                                         /*}}}*/
 708 // ReadMessages - Read messages from the FD                             /*{{{*/
 709 // ---------------------------------------------------------------------
 710 /* This pulls full messages from the input FD into the message buffer.
 711    It assumes that messages will not pause during transit so no
 712    fancy buffering is used.
 713
 714    In particular: this reads blocks from the input until it believes
 715    that it's run out of input text.  Each block is terminated by a
 716    double newline ('\n' followed by '\n').  As noted below, there is a
 717    bug in this code: it assumes that all the blocks have been read if
 718    it doesn't see additional text in the buffer after the last one is
 719    parsed, which will cause it to lose blocks if the last block
 720    coincides with the end of the buffer.
 721  */
 722 bool ReadMessages(int Fd, vector<string> &List)
 723 {
 724    char Buffer[64000];
 725    char *End = Buffer;
 726    // Represents any left-over from the previous iteration of the
 727    // parse loop.  (i.e., if a message is split across the end
 728    // of the buffer, it goes here)
 729    string PartialMessage;
 730
 731    while (1)
 732    {
 733       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 734       if (Res < 0 && errno == EINTR)
 735          continue;
 736
 737       // Process is dead, this is kind of bad..
 738       if (Res == 0)
 739          return false;
 740
 741       // No data
 742       if (Res < 0 && errno == EAGAIN)
 743          return true;
 744       if (Res < 0)
 745          return false;
 746
 747       End += Res;
 748
 749       // Look for the end of the message
 750       for (char *I = Buffer; I + 1 < End; I++)
 751       {
 752          if (I[0] != '\n' || I[1] != '\n')
 753             continue;
 754
 755          // Pull the message out
 756          string Message(Buffer,I-Buffer);
 757          PartialMessage += Message;
 758
 759          // Fix up the buffer
 760          for (; I < End && *I == '\n'; I++);
 761          End -= I-Buffer;
 762          memmove(Buffer,I,End-Buffer);
 763          I = Buffer;
 764
 765          List.push_back(PartialMessage);
 766          PartialMessage.clear();
 767       }
 768       if (End != Buffer)
 769         {
 770           // If there's text left in the buffer, store it
 771           // in PartialMessage and throw the rest of the buffer
 772           // away.  This allows us to handle messages that
 773           // are longer than the static buffer size.
 774           PartialMessage += string(Buffer, End);
 775           End = Buffer;
 776         }
 777       else
 778         {
 779           // BUG ALERT: if a message block happens to end at a
 780           // multiple of 64000 characters, this will cause it to
 781           // terminate early, leading to a badly formed block and
 782           // probably crashing the method.  However, this is the only
 783           // way we have to find the end of the message block.  I have
 784           // an idea of how to fix this, but it will require changes
 785           // to the protocol (essentially to mark the beginning and
 786           // end of the block).
 787           //
 788           //  -- dburrows 2008-04-02
 789           return true;
 790         }
 791
 792       if (WaitFd(Fd) == false)
 793          return false;
 794    }
 795 }
 796                                                                         /*}}}*/
 797 // MonthConv - Converts a month string into a number                    /*{{{*/
 798 // ---------------------------------------------------------------------
 799 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 800    Made it a bit more robust with a few tolower_ascii though. */
 801 static int MonthConv(char *Month)
 802 {
 803    switch (tolower_ascii(*Month))
 804    {
 805       case 'a':
 806       return tolower_ascii(Month[1]) == 'p'?3:7;
 807       case 'd':
 808       return 11;
 809       case 'f':
 810       return 1;
 811       case 'j':
 812       if (tolower_ascii(Month[1]) == 'a')
 813          return 0;
 814       return tolower_ascii(Month[2]) == 'n'?5:6;
 815       case 'm':
 816       return tolower_ascii(Month[2]) == 'r'?2:4;
 817       case 'n':
 818       return 10;
 819       case 'o':
 820       return 9;
 821       case 's':
 822       return 8;
 823
 824       // Pretend it is January..
 825       default:
 826       return 0;
 827    }
 828 }
 829                                                                         /*}}}*/
 830 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 831 // ---------------------------------------------------------------------
 832 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 833    than local timezone (mktime assumes the latter).
 834
 835    This function is a nonstandard GNU extension that is also present on
 836    the BSDs and maybe other systems. For others we follow the advice of
 837    the manpage of timegm and use his portable replacement. */
 838 #ifndef HAVE_TIMEGM
 839 static time_t timegm(struct tm *t)
 840 {
 841    char *tz = getenv("TZ");
 842    setenv("TZ", "", 1);
 843    tzset();
 844    time_t ret = mktime(t);
 845    if (tz)
 846       setenv("TZ", tz, 1);
 847    else
 848       unsetenv("TZ");
 849    tzset();
 850    return ret;
 851 }
 852 #endif
 853                                                                         /*}}}*/
 854 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 855 // ---------------------------------------------------------------------
 856 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 857    with one exception: All timezones (%Z) are accepted but the protocol
 858    says that it MUST be GMT, but this one is equal to UTC which we will
 859    encounter from time to time (e.g. in Release files) so we accept all
 860    here and just assume it is GMT (or UTC) later on */
 861 bool RFC1123StrToTime(const char* const str,time_t &time)
 862 {
 863    struct tm Tm;
 864    setlocale (LC_ALL,"C");
 865    bool const invalid =
 866    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 867       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 868    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 869        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 870    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 871        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 872    setlocale (LC_ALL,"");
 873    if (invalid == true)
 874       return false;
 875
 876    time = timegm(&Tm);
 877    return true;
 878 }
 879                                                                         /*}}}*/
 880 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 881 // ---------------------------------------------------------------------
 882 /* */
 883 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 884 {
 885    struct tm Tm;
 886    // MDTM includes no whitespaces but recommend and ignored by strptime
 887    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 888       return false;
 889
 890    time = timegm(&Tm);
 891    return true;
 892 }
 893                                                                         /*}}}*/
 894 // StrToTime - Converts a string into a time_t                          /*{{{*/
 895 // ---------------------------------------------------------------------
 896 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 897    and the C library asctime format. It requires the GNU library function
 898    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 899    reason the C library does not provide any such function :< This also
 900    handles the weird, but unambiguous FTP time format*/
 901 bool StrToTime(const string &Val,time_t &Result)
 902 {
 903    struct tm Tm;
 904    char Month[10];
 905    const char *I = Val.c_str();
 906
 907    // Skip the day of the week
 908    for (;*I != 0  && *I != ' '; I++);
 909
 910    // Handle RFC 1123 time
 911    Month[0] = 0;
 912    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 913               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 914    {
 915       // Handle RFC 1036 time
 916       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 917                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 918          Tm.tm_year += 1900;
 919       else
 920       {
 921          // asctime format
 922          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 923                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 924          {
 925             // 'ftp' time
 926             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 927                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 928                return false;
 929             Tm.tm_mon--;
 930          }
 931       }
 932    }
 933
 934    Tm.tm_isdst = 0;
 935    if (Month[0] != 0)
 936       Tm.tm_mon = MonthConv(Month);
 937    Tm.tm_year -= 1900;
 938
 939    // Convert to local time and then to GMT
 940    Result = timegm(&Tm);
 941    return true;
 942 }
 943                                                                         /*}}}*/
 944 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 945 // ---------------------------------------------------------------------
 946 /* This is used in decoding the crazy fixed length string headers in
 947    tar and ar files. */
 948 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 949 {
 950    char S[30];
 951    if (Len >= sizeof(S))
 952       return false;
 953    memcpy(S,Str,Len);
 954    S[Len] = 0;
 955
 956    // All spaces is a zero
 957    Res = 0;
 958    unsigned I;
 959    for (I = 0; S[I] == ' '; I++);
 960    if (S[I] == 0)
 961       return true;
 962
 963    char *End;
 964    Res = strtoul(S,&End,Base);
 965    if (End == S)
 966       return false;
 967
 968    return true;
 969 }
 970                                                                         /*}}}*/
 971 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 972 // ---------------------------------------------------------------------
 973 /* Helper for Hex2Num */
 974 static int HexDigit(int c)
 975 {
 976    if (c >= '0' && c <= '9')
 977       return c - '0';
 978    if (c >= 'a' && c <= 'f')
 979       return c - 'a' + 10;
 980    if (c >= 'A' && c <= 'F')
 981       return c - 'A' + 10;
 982    return 0;
 983 }
 984                                                                         /*}}}*/
 985 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 986 // ---------------------------------------------------------------------
 987 /* The length of the buffer must be exactly 1/2 the length of the string. */
 988 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 989 {
 990    if (Str.length() != Length*2)
 991       return false;
 992
 993    // Convert each digit. We store it in the same order as the string
 994    int J = 0;
 995    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 996    {
 997       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 998          return false;
 999
1000       Num[J] = HexDigit(I[0]) << 4;
1001       Num[J] += HexDigit(I[1]);
1002    }
1003
1004    return true;
1005 }
1006                                                                         /*}}}*/
1007 // TokSplitString - Split a string up by a given token                  /*{{{*/
1008 // ---------------------------------------------------------------------
1009 /* This is intended to be a faster splitter, it does not use dynamic
1010    memories. Input is changed to insert nulls at each token location. */
1011 bool TokSplitString(char Tok,char *Input,char **List,
1012                     unsigned long ListMax)
1013 {
1014    // Strip any leading spaces
1015    char *Start = Input;
1016    char *Stop = Start + strlen(Start);
1017    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1018
1019    unsigned long Count = 0;
1020    char *Pos = Start;
1021    while (Pos != Stop)
1022    {
1023       // Skip to the next Token
1024       for (; Pos != Stop && *Pos != Tok; Pos++);
1025
1026       // Back remove spaces
1027       char *End = Pos;
1028       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1029       *End = 0;
1030
1031       List[Count++] = Start;
1032       if (Count >= ListMax)
1033       {
1034          List[Count-1] = 0;
1035          return false;
1036       }
1037
1038       // Advance pos
1039       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1040       Start = Pos;
1041    }
1042
1043    List[Count] = 0;
1044    return true;
1045 }
1046                                                                         /*}}}*/
1047 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1048 // ---------------------------------------------------------------------
1049 /* This can be used to split a given string up into a vector, so the
1050    propose is the same as in the method above and this one is a bit slower
1051    also, but the advantage is that we have an iteratable vector */
1052 vector<string> VectorizeString(string const &haystack, char const &split)
1053 {
1054    string::const_iterator start = haystack.begin();
1055    string::const_iterator end = start;
1056    vector<string> exploded;
1057    do {
1058       for (; end != haystack.end() && *end != split; ++end);
1059       exploded.push_back(string(start, end));
1060       start = end + 1;
1061    } while (end != haystack.end() && (++end) != haystack.end());
1062    return exploded;
1063 }
1064                                                                         /*}}}*/
1065 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1066 // ---------------------------------------------------------------------
1067 /* */
1068 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1069                       const char **ListEnd)
1070 {
1071    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1072       R->Hit = false;
1073
1074    unsigned long Hits = 0;
1075    for (; ListBegin != ListEnd; ListBegin++)
1076    {
1077       // Check if the name is a regex
1078       const char *I;
1079       bool Regex = true;
1080       for (I = *ListBegin; *I != 0; I++)
1081          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1082             break;
1083       if (*I == 0)
1084          Regex = false;
1085
1086       // Compile the regex pattern
1087       regex_t Pattern;
1088       if (Regex == true)
1089          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1090                      REG_NOSUB) != 0)
1091             Regex = false;
1092
1093       // Search the list
1094       bool Done = false;
1095       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1096       {
1097          if (R->Str[0] == 0)
1098             continue;
1099
1100          if (strcasecmp(R->Str,*ListBegin) != 0)
1101          {
1102             if (Regex == false)
1103                continue;
1104             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1105                continue;
1106          }
1107          Done = true;
1108
1109          if (R->Hit == false)
1110             Hits++;
1111
1112          R->Hit = true;
1113       }
1114
1115       if (Regex == true)
1116          regfree(&Pattern);
1117
1118       if (Done == false)
1119          _error->Warning(_("Selection %s not found"),*ListBegin);
1120    }
1121
1122    return Hits;
1123 }
1124                                                                         /*}}}*/
1125 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1126 // ---------------------------------------------------------------------
1127 /* This is used to make the internationalization strings easier to translate
1128    and to allow reordering of parameters */
1129 void ioprintf(ostream &out,const char *format,...)
1130 {
1131    va_list args;
1132    va_start(args,format);
1133
1134    // sprintf the description
1135    char S[4096];
1136    vsnprintf(S,sizeof(S),format,args);
1137    out << S;
1138 }
1139                                                                         /*}}}*/
1140 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1141 // ---------------------------------------------------------------------
1142 /* This is used to make the internationalization strings easier to translate
1143    and to allow reordering of parameters */
1144 void strprintf(string &out,const char *format,...)
1145 {
1146    va_list args;
1147    va_start(args,format);
1148
1149    // sprintf the description
1150    char S[4096];
1151    vsnprintf(S,sizeof(S),format,args);
1152    out = string(S);
1153 }
1154                                                                         /*}}}*/
1155 // safe_snprintf - Safer snprintf                                       /*{{{*/
1156 // ---------------------------------------------------------------------
1157 /* This is a snprintf that will never (ever) go past 'End' and returns a
1158    pointer to the end of the new string. The returned string is always null
1159    terminated unless Buffer == end. This is a better alterantive to using
1160    consecutive snprintfs. */
1161 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1162 {
1163    va_list args;
1164    int Did;
1165
1166    va_start(args,Format);
1167
1168    if (End <= Buffer)
1169       return End;
1170
1171    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1172    if (Did < 0 || Buffer + Did > End)
1173       return End;
1174    return Buffer + Did;
1175 }
1176                                                                         /*}}}*/
1177
1178 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1179 // ---------------------------------------------------------------------
1180 /* This little function is the most called method we have and tries
1181    therefore to do the absolut minimum - and is noteable faster than
1182    standard tolower/toupper and as a bonus avoids problems with different
1183    locales - we only operate on ascii chars anyway. */
1184 int tolower_ascii(int const c)
1185 {
1186    if (c >= 'A' && c <= 'Z')
1187       return c + 32;
1188    return c;
1189 }
1190                                                                         /*}}}*/
1191
1192 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1193 // ---------------------------------------------------------------------
1194 /* The domain list is a comma seperate list of domains that are suffix
1195    matched against the argument */
1196 bool CheckDomainList(const string &Host,const string &List)
1197 {
1198    string::const_iterator Start = List.begin();
1199    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1200    {
1201       if (Cur < List.end() && *Cur != ',')
1202          continue;
1203
1204       // Match the end of the string..
1205       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1206           Cur - Start != 0 &&
1207           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1208          return true;
1209
1210       Start = Cur + 1;
1211    }
1212    return false;
1213 }
1214                                                                         /*}}}*/
1215
1216 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1217 // ---------------------------------------------------------------------
1218 /* This parses the URI into all of its components */
1219 void URI::CopyFrom(const string &U)
1220 {
1221    string::const_iterator I = U.begin();
1222
1223    // Locate the first colon, this separates the scheme
1224    for (; I < U.end() && *I != ':' ; I++);
1225    string::const_iterator FirstColon = I;
1226
1227    /* Determine if this is a host type URI with a leading double //
1228       and then search for the first single / */
1229    string::const_iterator SingleSlash = I;
1230    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1231       SingleSlash += 3;
1232
1233    /* Find the / indicating the end of the hostname, ignoring /'s in the
1234       square brackets */
1235    bool InBracket = false;
1236    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1237    {
1238       if (*SingleSlash == '[')
1239          InBracket = true;
1240       if (InBracket == true && *SingleSlash == ']')
1241          InBracket = false;
1242    }
1243
1244    if (SingleSlash > U.end())
1245       SingleSlash = U.end();
1246
1247    // We can now write the access and path specifiers
1248    Access.assign(U.begin(),FirstColon);
1249    if (SingleSlash != U.end())
1250       Path.assign(SingleSlash,U.end());
1251    if (Path.empty() == true)
1252       Path = "/";
1253
1254    // Now we attempt to locate a user:pass@host fragment
1255    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1256       FirstColon += 3;
1257    else
1258       FirstColon += 1;
1259    if (FirstColon >= U.end())
1260       return;
1261
1262    if (FirstColon > SingleSlash)
1263       FirstColon = SingleSlash;
1264
1265    // Find the colon...
1266    I = FirstColon + 1;
1267    if (I > SingleSlash)
1268       I = SingleSlash;
1269    for (; I < SingleSlash && *I != ':'; I++);
1270    string::const_iterator SecondColon = I;
1271
1272    // Search for the @ after the colon
1273    for (; I < SingleSlash && *I != '@'; I++);
1274    string::const_iterator At = I;
1275
1276    // Now write the host and user/pass
1277    if (At == SingleSlash)
1278    {
1279       if (FirstColon < SingleSlash)
1280          Host.assign(FirstColon,SingleSlash);
1281    }
1282    else
1283    {
1284       Host.assign(At+1,SingleSlash);
1285       // username and password must be encoded (RFC 3986)
1286       User.assign(DeQuoteString(FirstColon,SecondColon));
1287       if (SecondColon < At)
1288          Password.assign(DeQuoteString(SecondColon+1,At));
1289    }
1290
1291    // Now we parse the RFC 2732 [] hostnames.
1292    unsigned long PortEnd = 0;
1293    InBracket = false;
1294    for (unsigned I = 0; I != Host.length();)
1295    {
1296       if (Host[I] == '[')
1297       {
1298          InBracket = true;
1299          Host.erase(I,1);
1300          continue;
1301       }
1302
1303       if (InBracket == true && Host[I] == ']')
1304       {
1305          InBracket = false;
1306          Host.erase(I,1);
1307          PortEnd = I;
1308          continue;
1309       }
1310       I++;
1311    }
1312
1313    // Tsk, weird.
1314    if (InBracket == true)
1315    {
1316       Host.clear();
1317       return;
1318    }
1319
1320    // Now we parse off a port number from the hostname
1321    Port = 0;
1322    string::size_type Pos = Host.rfind(':');
1323    if (Pos == string::npos || Pos < PortEnd)
1324       return;
1325
1326    Port = atoi(string(Host,Pos+1).c_str());
1327    Host.assign(Host,0,Pos);
1328 }
1329                                                                         /*}}}*/
1330 // URI::operator string - Convert the URI to a string                   /*{{{*/
1331 // ---------------------------------------------------------------------
1332 /* */
1333 URI::operator string()
1334 {
1335    string Res;
1336
1337    if (Access.empty() == false)
1338       Res = Access + ':';
1339
1340    if (Host.empty() == false)
1341    {
1342       if (Access.empty() == false)
1343          Res += "//";
1344
1345       if (User.empty() == false)
1346       {
1347          Res +=  User;
1348          if (Password.empty() == false)
1349             Res += ":" + Password;
1350          Res += "@";
1351       }
1352
1353       // Add RFC 2732 escaping characters
1354       if (Access.empty() == false &&
1355           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1356          Res += '[' + Host + ']';
1357       else
1358          Res += Host;
1359
1360       if (Port != 0)
1361       {
1362          char S[30];
1363          sprintf(S,":%u",Port);
1364          Res += S;
1365       }
1366    }
1367
1368    if (Path.empty() == false)
1369    {
1370       if (Path[0] != '/')
1371          Res += "/" + Path;
1372       else
1373          Res += Path;
1374    }
1375
1376    return Res;
1377 }
1378                                                                         /*}}}*/
1379 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1380 // ---------------------------------------------------------------------
1381 /* */
1382 string URI::SiteOnly(const string &URI)
1383 {
1384    ::URI U(URI);
1385    U.User.clear();
1386    U.Password.clear();
1387    U.Path.clear();
1388    U.Port = 0;
1389    return U;
1390 }
1391                                                                         /*}}}*/
1392 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1393 // ---------------------------------------------------------------------
1394 /* */
1395 string URI::NoUserPassword(const string &URI)
1396 {
1397    ::URI U(URI);
1398    U.User.clear();
1399    U.Password.clear();
1400    U.Port = 0;
1401    return U;
1402 }
1403                                                                         /*}}}*/