apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #ifdef __GNUG__
  19 #pragma implementation "apt-pkg/strutl.h"
  20 #endif
  21
  22 #include <apt-pkg/strutl.h>
  23 #include <apt-pkg/fileutl.h>
  24 #include <apt-pkg/error.h>
  25
  26 #include <apti18n.h>
  27
  28 #include <ctype.h>
  29 #include <string.h>
  30 #include <stdio.h>
  31 #include <algorithm>
  32 #include <unistd.h>
  33 #include <regex.h>
  34 #include <errno.h>
  35 #include <stdarg.h>
  36 #include <iconv.h>
  37
  38 #include "config.h"
  39
  40 using namespace std;
  41                                                                         /*}}}*/
  42
  43 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  44 // ---------------------------------------------------------------------
  45 /* This is handy to use before display some information for enduser  */
  46 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  47 {
  48   iconv_t cd;
  49   const char *inbuf;
  50   char *inptr, *outbuf, *outptr;
  51   size_t insize, outsize;
  52
  53   cd = iconv_open(codeset, "UTF-8");
  54   if (cd == (iconv_t)(-1)) {
  55      // Something went wrong
  56      if (errno == EINVAL)
  57         _error->Error("conversion from 'UTF-8' to '%s' not available",
  58                codeset);
  59      else
  60         perror("iconv_open");
  61
  62      // Clean the destination string
  63      *dest = "";
  64
  65      return false;
  66   }
  67
  68   insize = outsize = orig.size();
  69   inbuf = orig.data();
  70   inptr = (char *)inbuf;
  71   outbuf = new char[insize+1];
  72   outptr = outbuf;
  73
  74   iconv(cd, &inptr, &insize, &outptr, &outsize);
  75   *outptr = '\0';
  76
  77   *dest = outbuf;
  78   delete[] outbuf;
  79
  80   iconv_close(cd);
  81
  82   return true;
  83 }
  84                                                                         /*}}}*/
  85 // strstrip - Remove white space from the front and back of a string    /*{{{*/
  86 // ---------------------------------------------------------------------
  87 /* This is handy to use when parsing a file. It also removes \n's left
  88    over from fgets and company */
  89 char *_strstrip(char *String)
  90 {
  91    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
  92
  93    if (*String == 0)
  94       return String;
  95
  96    char *End = String + strlen(String) - 1;
  97    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
  98                                *End == '\r'); End--);
  99    End++;
 100    *End = 0;
 101    return String;
 102 };
 103                                                                         /*}}}*/
 104 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 105 // ---------------------------------------------------------------------
 106 /* */
 107 char *_strtabexpand(char *String,size_t Len)
 108 {
 109    for (char *I = String; I != I + Len && *I != 0; I++)
 110    {
 111       if (*I != '\t')
 112          continue;
 113       if (I + 8 > String + Len)
 114       {
 115          *I = 0;
 116          return String;
 117       }
 118
 119       /* Assume the start of the string is 0 and find the next 8 char
 120          division */
 121       int Len;
 122       if (String == I)
 123          Len = 1;
 124       else
 125          Len = 8 - ((String - I) % 8);
 126       Len -= 2;
 127       if (Len <= 0)
 128       {
 129          *I = ' ';
 130          continue;
 131       }
 132
 133       memmove(I + Len,I + 1,strlen(I) + 1);
 134       for (char *J = I; J + Len != I; *I = ' ', I++);
 135    }
 136    return String;
 137 }
 138                                                                         /*}}}*/
 139 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 140 // ---------------------------------------------------------------------
 141 /* This grabs a single word, converts any % escaped characters to their
 142    proper values and advances the pointer. Double quotes are understood
 143    and striped out as well. This is for URI/URL parsing. It also can
 144    understand [] brackets.*/
 145 bool ParseQuoteWord(const char *&String,string &Res)
 146 {
 147    // Skip leading whitespace
 148    const char *C = String;
 149    for (;*C != 0 && *C == ' '; C++);
 150    if (*C == 0)
 151       return false;
 152
 153    // Jump to the next word
 154    for (;*C != 0 && isspace(*C) == 0; C++)
 155    {
 156       if (*C == '"')
 157       {
 158          for (C++; *C != 0 && *C != '"'; C++);
 159          if (*C == 0)
 160             return false;
 161       }
 162       if (*C == '[')
 163       {
 164          for (C++; *C != 0 && *C != ']'; C++);
 165          if (*C == 0)
 166             return false;
 167       }
 168    }
 169
 170    // Now de-quote characters
 171    char Buffer[1024];
 172    char Tmp[3];
 173    const char *Start = String;
 174    char *I;
 175    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 176    {
 177       if (*Start == '%' && Start + 2 < C)
 178       {
 179          Tmp[0] = Start[1];
 180          Tmp[1] = Start[2];
 181          Tmp[2] = 0;
 182          *I = (char)strtol(Tmp,0,16);
 183          Start += 3;
 184          continue;
 185       }
 186       if (*Start != '"')
 187          *I = *Start;
 188       else
 189          I--;
 190       Start++;
 191    }
 192    *I = 0;
 193    Res = Buffer;
 194
 195    // Skip ending white space
 196    for (;*C != 0 && isspace(*C) != 0; C++);
 197    String = C;
 198    return true;
 199 }
 200                                                                         /*}}}*/
 201 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 202 // ---------------------------------------------------------------------
 203 /* This expects a series of space separated strings enclosed in ""'s.
 204    It concatenates the ""'s into a single string. */
 205 bool ParseCWord(const char *&String,string &Res)
 206 {
 207    // Skip leading whitespace
 208    const char *C = String;
 209    for (;*C != 0 && *C == ' '; C++);
 210    if (*C == 0)
 211       return false;
 212
 213    char Buffer[1024];
 214    char *Buf = Buffer;
 215    if (strlen(String) >= sizeof(Buffer))
 216        return false;
 217
 218    for (; *C != 0; C++)
 219    {
 220       if (*C == '"')
 221       {
 222          for (C++; *C != 0 && *C != '"'; C++)
 223             *Buf++ = *C;
 224
 225          if (*C == 0)
 226             return false;
 227
 228          continue;
 229       }
 230
 231       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 232          continue;
 233       if (isspace(*C) == 0)
 234          return false;
 235       *Buf++ = ' ';
 236    }
 237    *Buf = 0;
 238    Res = Buffer;
 239    String = C;
 240    return true;
 241 }
 242                                                                         /*}}}*/
 243 // QuoteString - Convert a string into quoted from                      /*{{{*/
 244 // ---------------------------------------------------------------------
 245 /* */
 246 string QuoteString(const string &Str, const char *Bad)
 247 {
 248    string Res;
 249    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 250    {
 251       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 252           *I <= 0x20 || *I >= 0x7F)
 253       {
 254          char Buf[10];
 255          sprintf(Buf,"%%%02x",(int)*I);
 256          Res += Buf;
 257       }
 258       else
 259          Res += *I;
 260    }
 261    return Res;
 262 }
 263                                                                         /*}}}*/
 264 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 265 // ---------------------------------------------------------------------
 266 /* This undoes QuoteString */
 267 string DeQuoteString(const string &Str)
 268 {
 269    string Res;
 270    for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
 271    {
 272       if (*I == '%' && I + 2 < Str.end())
 273       {
 274          char Tmp[3];
 275          Tmp[0] = I[1];
 276          Tmp[1] = I[2];
 277          Tmp[2] = 0;
 278          Res += (char)strtol(Tmp,0,16);
 279          I += 2;
 280          continue;
 281       }
 282       else
 283          Res += *I;
 284    }
 285    return Res;
 286 }
 287
 288                                                                         /*}}}*/
 289 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 290 // ---------------------------------------------------------------------
 291 /* A max of 4 digits are shown before conversion to the next highest unit.
 292    The max length of the string will be 5 chars unless the size is > 10
 293    YottaBytes (E24) */
 294 string SizeToStr(double Size)
 295 {
 296    char S[300];
 297    double ASize;
 298    if (Size >= 0)
 299       ASize = Size;
 300    else
 301       ASize = -1*Size;
 302
 303    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 304       ExaBytes, ZettaBytes, YottaBytes */
 305    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 306    int I = 0;
 307    while (I <= 8)
 308    {
 309       if (ASize < 100 && I != 0)
 310       {
 311          sprintf(S,"%.1f%c",ASize,Ext[I]);
 312          break;
 313       }
 314
 315       if (ASize < 10000)
 316       {
 317          sprintf(S,"%.0f%c",ASize,Ext[I]);
 318          break;
 319       }
 320       ASize /= 1000.0;
 321       I++;
 322    }
 323
 324    return S;
 325 }
 326                                                                         /*}}}*/
 327 // TimeToStr - Convert the time into a string                           /*{{{*/
 328 // ---------------------------------------------------------------------
 329 /* Converts a number of seconds to a hms format */
 330 string TimeToStr(unsigned long Sec)
 331 {
 332    char S[300];
 333
 334    while (1)
 335    {
 336       if (Sec > 60*60*24)
 337       {
 338          sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 339          break;
 340       }
 341
 342       if (Sec > 60*60)
 343       {
 344          sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
 345          break;
 346       }
 347
 348       if (Sec > 60)
 349       {
 350          sprintf(S,"%lim%lis",Sec/60,Sec % 60);
 351          break;
 352       }
 353
 354       sprintf(S,"%lis",Sec);
 355       break;
 356    }
 357
 358    return S;
 359 }
 360                                                                         /*}}}*/
 361 // SubstVar - Substitute a string for another string                    /*{{{*/
 362 // ---------------------------------------------------------------------
 363 /* This replaces all occurances of Subst with Contents in Str. */
 364 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 365 {
 366    string::size_type Pos = 0;
 367    string::size_type OldPos = 0;
 368    string Temp;
 369
 370    while (OldPos < Str.length() &&
 371           (Pos = Str.find(Subst,OldPos)) != string::npos)
 372    {
 373       Temp += string(Str,OldPos,Pos) + Contents;
 374       OldPos = Pos + Subst.length();
 375    }
 376
 377    if (OldPos == 0)
 378       return Str;
 379
 380    return Temp + string(Str,OldPos);
 381 }
 382
 383 string SubstVar(string Str,const struct SubstVar *Vars)
 384 {
 385    for (; Vars->Subst != 0; Vars++)
 386       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 387    return Str;
 388 }
 389                                                                         /*}}}*/
 390 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 391 // ---------------------------------------------------------------------
 392 /* This converts a URI into a safe filename. It quotes all unsafe characters
 393    and converts / to _ and removes the scheme identifier. The resulting
 394    file name should be unique and never occur again for a different file */
 395 string URItoFileName(const string &URI)
 396 {
 397    // Nuke 'sensitive' items
 398    ::URI U(URI);
 399    U.User.clear();
 400    U.Password.clear();
 401    U.Access.clear();
 402
 403    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 404    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 405    replace(NewURI.begin(),NewURI.end(),'/','_');
 406    return NewURI;
 407 }
 408                                                                         /*}}}*/
 409 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 410 // ---------------------------------------------------------------------
 411 /* This routine performs a base64 transformation on a string. It was ripped
 412    from wget and then patched and bug fixed.
 413
 414    This spec can be found in rfc2045 */
 415 string Base64Encode(const string &S)
 416 {
 417    // Conversion table.
 418    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 419                           'I','J','K','L','M','N','O','P',
 420                           'Q','R','S','T','U','V','W','X',
 421                           'Y','Z','a','b','c','d','e','f',
 422                           'g','h','i','j','k','l','m','n',
 423                           'o','p','q','r','s','t','u','v',
 424                           'w','x','y','z','0','1','2','3',
 425                           '4','5','6','7','8','9','+','/'};
 426
 427    // Pre-allocate some space
 428    string Final;
 429    Final.reserve((4*S.length() + 2)/3 + 2);
 430
 431    /* Transform the 3x8 bits to 4x6 bits, as required by
 432       base64.  */
 433    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 434    {
 435       char Bits[3] = {0,0,0};
 436       Bits[0] = I[0];
 437       if (I + 1 < S.end())
 438          Bits[1] = I[1];
 439       if (I + 2 < S.end())
 440          Bits[2] = I[2];
 441
 442       Final += tbl[Bits[0] >> 2];
 443       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 444
 445       if (I + 1 >= S.end())
 446          break;
 447
 448       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 449
 450       if (I + 2 >= S.end())
 451          break;
 452
 453       Final += tbl[Bits[2] & 0x3f];
 454    }
 455
 456    /* Apply the padding elements, this tells how many bytes the remote
 457       end should discard */
 458    if (S.length() % 3 == 2)
 459       Final += '=';
 460    if (S.length() % 3 == 1)
 461       Final += "==";
 462
 463    return Final;
 464 }
 465                                                                         /*}}}*/
 466 // stringcmp - Arbitary string compare                                  /*{{{*/
 467 // ---------------------------------------------------------------------
 468 /* This safely compares two non-null terminated strings of arbitary
 469    length */
 470 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 471 {
 472    for (; A != AEnd && B != BEnd; A++, B++)
 473       if (*A != *B)
 474          break;
 475
 476    if (A == AEnd && B == BEnd)
 477       return 0;
 478    if (A == AEnd)
 479       return 1;
 480    if (B == BEnd)
 481       return -1;
 482    if (*A < *B)
 483       return -1;
 484    return 1;
 485 }
 486
 487 #if __GNUC__ >= 3
 488 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 489               const char *B,const char *BEnd)
 490 {
 491    for (; A != AEnd && B != BEnd; A++, B++)
 492       if (*A != *B)
 493          break;
 494
 495    if (A == AEnd && B == BEnd)
 496       return 0;
 497    if (A == AEnd)
 498       return 1;
 499    if (B == BEnd)
 500       return -1;
 501    if (*A < *B)
 502       return -1;
 503    return 1;
 504 }
 505 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 506               string::const_iterator B,string::const_iterator BEnd)
 507 {
 508    for (; A != AEnd && B != BEnd; A++, B++)
 509       if (*A != *B)
 510          break;
 511
 512    if (A == AEnd && B == BEnd)
 513       return 0;
 514    if (A == AEnd)
 515       return 1;
 516    if (B == BEnd)
 517       return -1;
 518    if (*A < *B)
 519       return -1;
 520    return 1;
 521 }
 522 #endif
 523                                                                         /*}}}*/
 524 // stringcasecmp - Arbitary case insensitive string compare             /*{{{*/
 525 // ---------------------------------------------------------------------
 526 /* */
 527 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 528 {
 529    for (; A != AEnd && B != BEnd; A++, B++)
 530       if (toupper(*A) != toupper(*B))
 531          break;
 532
 533    if (A == AEnd && B == BEnd)
 534       return 0;
 535    if (A == AEnd)
 536       return 1;
 537    if (B == BEnd)
 538       return -1;
 539    if (toupper(*A) < toupper(*B))
 540       return -1;
 541    return 1;
 542 }
 543 #if __GNUC__ >= 3
 544 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 545                   const char *B,const char *BEnd)
 546 {
 547    for (; A != AEnd && B != BEnd; A++, B++)
 548       if (toupper(*A) != toupper(*B))
 549          break;
 550
 551    if (A == AEnd && B == BEnd)
 552       return 0;
 553    if (A == AEnd)
 554       return 1;
 555    if (B == BEnd)
 556       return -1;
 557    if (toupper(*A) < toupper(*B))
 558       return -1;
 559    return 1;
 560 }
 561 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 562                   string::const_iterator B,string::const_iterator BEnd)
 563 {
 564    for (; A != AEnd && B != BEnd; A++, B++)
 565       if (toupper(*A) != toupper(*B))
 566          break;
 567
 568    if (A == AEnd && B == BEnd)
 569       return 0;
 570    if (A == AEnd)
 571       return 1;
 572    if (B == BEnd)
 573       return -1;
 574    if (toupper(*A) < toupper(*B))
 575       return -1;
 576    return 1;
 577 }
 578 #endif
 579                                                                         /*}}}*/
 580 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 581 // ---------------------------------------------------------------------
 582 /* The format is like those used in package files and the method
 583    communication system */
 584 string LookupTag(const string &Message,const char *Tag,const char *Default)
 585 {
 586    // Look for a matching tag.
 587    int Length = strlen(Tag);
 588    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
 589    {
 590       // Found the tag
 591       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 592       {
 593          // Find the end of line and strip the leading/trailing spaces
 594          string::const_iterator J;
 595          I += Length + 1;
 596          for (; isspace(*I) != 0 && I < Message.end(); I++);
 597          for (J = I; *J != '\n' && J < Message.end(); J++);
 598          for (; J > I && isspace(J[-1]) != 0; J--);
 599
 600          return string(I,J);
 601       }
 602
 603       for (; *I != '\n' && I < Message.end(); I++);
 604    }
 605
 606    // Failed to find a match
 607    if (Default == 0)
 608       return string();
 609    return Default;
 610 }
 611                                                                         /*}}}*/
 612 // StringToBool - Converts a string into a boolean                      /*{{{*/
 613 // ---------------------------------------------------------------------
 614 /* This inspects the string to see if it is true or if it is false and
 615    then returns the result. Several varients on true/false are checked. */
 616 int StringToBool(const string &Text,int Default)
 617 {
 618    char *End;
 619    int Res = strtol(Text.c_str(),&End,0);
 620    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 621       return Res;
 622
 623    // Check for positives
 624    if (strcasecmp(Text.c_str(),"no") == 0 ||
 625        strcasecmp(Text.c_str(),"false") == 0 ||
 626        strcasecmp(Text.c_str(),"without") == 0 ||
 627        strcasecmp(Text.c_str(),"off") == 0 ||
 628        strcasecmp(Text.c_str(),"disable") == 0)
 629       return 0;
 630
 631    // Check for negatives
 632    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 633        strcasecmp(Text.c_str(),"true") == 0 ||
 634        strcasecmp(Text.c_str(),"with") == 0 ||
 635        strcasecmp(Text.c_str(),"on") == 0 ||
 636        strcasecmp(Text.c_str(),"enable") == 0)
 637       return 1;
 638
 639    return Default;
 640 }
 641                                                                         /*}}}*/
 642 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 643 // ---------------------------------------------------------------------
 644 /* This converts a time_t into a string time representation that is
 645    year 2000 complient and timezone neutral */
 646 string TimeRFC1123(time_t Date)
 647 {
 648    struct tm Conv = *gmtime(&Date);
 649    char Buf[300];
 650
 651    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 652    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 653                           "Aug","Sep","Oct","Nov","Dec"};
 654
 655    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 656            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 657            Conv.tm_min,Conv.tm_sec);
 658    return Buf;
 659 }
 660                                                                         /*}}}*/
 661 // ReadMessages - Read messages from the FD                             /*{{{*/
 662 // ---------------------------------------------------------------------
 663 /* This pulls full messages from the input FD into the message buffer.
 664    It assumes that messages will not pause during transit so no
 665    fancy buffering is used. */
 666 bool ReadMessages(int Fd, vector<string> &List)
 667 {
 668    char Buffer[64000];
 669    char *End = Buffer;
 670
 671    while (1)
 672    {
 673       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 674       if (Res < 0 && errno == EINTR)
 675          continue;
 676
 677       // Process is dead, this is kind of bad..
 678       if (Res == 0)
 679          return false;
 680
 681       // No data
 682       if (Res < 0 && errno == EAGAIN)
 683          return true;
 684       if (Res < 0)
 685          return false;
 686
 687       End += Res;
 688
 689       // Look for the end of the message
 690       for (char *I = Buffer; I + 1 < End; I++)
 691       {
 692          if (I[0] != '\n' || I[1] != '\n')
 693             continue;
 694
 695          // Pull the message out
 696          string Message(Buffer,I-Buffer);
 697
 698          // Fix up the buffer
 699          for (; I < End && *I == '\n'; I++);
 700          End -= I-Buffer;
 701          memmove(Buffer,I,End-Buffer);
 702          I = Buffer;
 703
 704          List.push_back(Message);
 705       }
 706       if (End == Buffer)
 707          return true;
 708
 709       if (WaitFd(Fd) == false)
 710          return false;
 711    }
 712 }
 713                                                                         /*}}}*/
 714 // MonthConv - Converts a month string into a number                    /*{{{*/
 715 // ---------------------------------------------------------------------
 716 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 717    Made it a bit more robust with a few touppers though. */
 718 static int MonthConv(char *Month)
 719 {
 720    switch (toupper(*Month))
 721    {
 722       case 'A':
 723       return toupper(Month[1]) == 'P'?3:7;
 724       case 'D':
 725       return 11;
 726       case 'F':
 727       return 1;
 728       case 'J':
 729       if (toupper(Month[1]) == 'A')
 730          return 0;
 731       return toupper(Month[2]) == 'N'?5:6;
 732       case 'M':
 733       return toupper(Month[2]) == 'R'?2:4;
 734       case 'N':
 735       return 10;
 736       case 'O':
 737       return 9;
 738       case 'S':
 739       return 8;
 740
 741       // Pretend it is January..
 742       default:
 743       return 0;
 744    }
 745 }
 746                                                                         /*}}}*/
 747 // timegm - Internal timegm function if gnu is not available            /*{{{*/
 748 // ---------------------------------------------------------------------
 749 /* Ripped this evil little function from wget - I prefer the use of
 750    GNU timegm if possible as this technique will have interesting problems
 751    with leap seconds, timezones and other.
 752
 753    Converts struct tm to time_t, assuming the data in tm is UTC rather
 754    than local timezone (mktime assumes the latter).
 755
 756    Contributed by Roger Beeman <beeman@cisco.com>, with the help of
 757    Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
 758
 759 /* Turned it into an autoconf check, because GNU is not the only thing which
 760    can provide timegm. -- 2002-09-22, Joel Baker */
 761
 762 #ifndef HAVE_TIMEGM // Now with autoconf!
 763 static time_t timegm(struct tm *t)
 764 {
 765    time_t tl, tb;
 766
 767    tl = mktime (t);
 768    if (tl == -1)
 769       return -1;
 770    tb = mktime (gmtime (&tl));
 771    return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
 772 }
 773 #endif
 774                                                                         /*}}}*/
 775 // StrToTime - Converts a string into a time_t                          /*{{{*/
 776 // ---------------------------------------------------------------------
 777 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 778    and the C library asctime format. It requires the GNU library function
 779    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 780    reason the C library does not provide any such function :< This also
 781    handles the weird, but unambiguous FTP time format*/
 782 bool StrToTime(const string &Val,time_t &Result)
 783 {
 784    struct tm Tm;
 785    char Month[10];
 786    const char *I = Val.c_str();
 787
 788    // Skip the day of the week
 789    for (;*I != 0  && *I != ' '; I++);
 790
 791    // Handle RFC 1123 time
 792    Month[0] = 0;
 793    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 794               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 795    {
 796       // Handle RFC 1036 time
 797       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 798                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 799          Tm.tm_year += 1900;
 800       else
 801       {
 802          // asctime format
 803          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 804                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 805          {
 806             // 'ftp' time
 807             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 808                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 809                return false;
 810             Tm.tm_mon--;
 811          }
 812       }
 813    }
 814
 815    Tm.tm_isdst = 0;
 816    if (Month[0] != 0)
 817       Tm.tm_mon = MonthConv(Month);
 818    Tm.tm_year -= 1900;
 819
 820    // Convert to local time and then to GMT
 821    Result = timegm(&Tm);
 822    return true;
 823 }
 824                                                                         /*}}}*/
 825 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 826 // ---------------------------------------------------------------------
 827 /* This is used in decoding the crazy fixed length string headers in
 828    tar and ar files. */
 829 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 830 {
 831    char S[30];
 832    if (Len >= sizeof(S))
 833       return false;
 834    memcpy(S,Str,Len);
 835    S[Len] = 0;
 836
 837    // All spaces is a zero
 838    Res = 0;
 839    unsigned I;
 840    for (I = 0; S[I] == ' '; I++);
 841    if (S[I] == 0)
 842       return true;
 843
 844    char *End;
 845    Res = strtoul(S,&End,Base);
 846    if (End == S)
 847       return false;
 848
 849    return true;
 850 }
 851                                                                         /*}}}*/
 852 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 853 // ---------------------------------------------------------------------
 854 /* Helper for Hex2Num */
 855 static int HexDigit(int c)
 856 {
 857    if (c >= '0' && c <= '9')
 858       return c - '0';
 859    if (c >= 'a' && c <= 'f')
 860       return c - 'a' + 10;
 861    if (c >= 'A' && c <= 'F')
 862       return c - 'A' + 10;
 863    return 0;
 864 }
 865                                                                         /*}}}*/
 866 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
 867 // ---------------------------------------------------------------------
 868 /* The length of the buffer must be exactly 1/2 the length of the string. */
 869 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
 870 {
 871    if (Str.length() != Length*2)
 872       return false;
 873
 874    // Convert each digit. We store it in the same order as the string
 875    int J = 0;
 876    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
 877    {
 878       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
 879          return false;
 880
 881       Num[J] = HexDigit(I[0]) << 4;
 882       Num[J] += HexDigit(I[1]);
 883    }
 884
 885    return true;
 886 }
 887                                                                         /*}}}*/
 888 // TokSplitString - Split a string up by a given token                  /*{{{*/
 889 // ---------------------------------------------------------------------
 890 /* This is intended to be a faster splitter, it does not use dynamic
 891    memories. Input is changed to insert nulls at each token location. */
 892 bool TokSplitString(char Tok,char *Input,char **List,
 893                     unsigned long ListMax)
 894 {
 895    // Strip any leading spaces
 896    char *Start = Input;
 897    char *Stop = Start + strlen(Start);
 898    for (; *Start != 0 && isspace(*Start) != 0; Start++);
 899
 900    unsigned long Count = 0;
 901    char *Pos = Start;
 902    while (Pos != Stop)
 903    {
 904       // Skip to the next Token
 905       for (; Pos != Stop && *Pos != Tok; Pos++);
 906
 907       // Back remove spaces
 908       char *End = Pos;
 909       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
 910       *End = 0;
 911
 912       List[Count++] = Start;
 913       if (Count >= ListMax)
 914       {
 915          List[Count-1] = 0;
 916          return false;
 917       }
 918
 919       // Advance pos
 920       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
 921       Start = Pos;
 922    }
 923
 924    List[Count] = 0;
 925    return true;
 926 }
 927                                                                         /*}}}*/
 928 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
 929 // ---------------------------------------------------------------------
 930 /* */
 931 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
 932                       const char **ListEnd)
 933 {
 934    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 935       R->Hit = false;
 936
 937    unsigned long Hits = 0;
 938    for (; ListBegin != ListEnd; ListBegin++)
 939    {
 940       // Check if the name is a regex
 941       const char *I;
 942       bool Regex = true;
 943       for (I = *ListBegin; *I != 0; I++)
 944          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
 945             break;
 946       if (*I == 0)
 947          Regex = false;
 948
 949       // Compile the regex pattern
 950       regex_t Pattern;
 951       if (Regex == true)
 952          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
 953                      REG_NOSUB) != 0)
 954             Regex = false;
 955
 956       // Search the list
 957       bool Done = false;
 958       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
 959       {
 960          if (R->Str[0] == 0)
 961             continue;
 962
 963          if (strcasecmp(R->Str,*ListBegin) != 0)
 964          {
 965             if (Regex == false)
 966                continue;
 967             if (regexec(&Pattern,R->Str,0,0,0) != 0)
 968                continue;
 969          }
 970          Done = true;
 971
 972          if (R->Hit == false)
 973             Hits++;
 974
 975          R->Hit = true;
 976       }
 977
 978       if (Regex == true)
 979          regfree(&Pattern);
 980
 981       if (Done == false)
 982          _error->Warning(_("Selection %s not found"),*ListBegin);
 983    }
 984
 985    return Hits;
 986 }
 987                                                                         /*}}}*/
 988 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
 989 // ---------------------------------------------------------------------
 990 /* This is used to make the internationalization strings easier to translate
 991    and to allow reordering of parameters */
 992 void ioprintf(ostream &out,const char *format,...)
 993 {
 994    va_list args;
 995    va_start(args,format);
 996
 997    // sprintf the description
 998    char S[400];
 999    vsnprintf(S,sizeof(S),format,args);
1000    out << S;
1001 }
1002                                                                         /*}}}*/
1003 // safe_snprintf - Safer snprintf                                       /*{{{*/
1004 // ---------------------------------------------------------------------
1005 /* This is a snprintf that will never (ever) go past 'End' and returns a
1006    pointer to the end of the new string. The returned string is always null
1007    terminated unless Buffer == end. This is a better alterantive to using
1008    consecutive snprintfs. */
1009 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1010 {
1011    va_list args;
1012    unsigned long Did;
1013
1014    va_start(args,Format);
1015
1016    if (End <= Buffer)
1017       return End;
1018
1019    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1020    if (Did < 0 || Buffer + Did > End)
1021       return End;
1022    return Buffer + Did;
1023 }
1024                                                                         /*}}}*/
1025
1026 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1027 // ---------------------------------------------------------------------
1028 /* The domain list is a comma seperate list of domains that are suffix
1029    matched against the argument */
1030 bool CheckDomainList(const string &Host,const string &List)
1031 {
1032    string::const_iterator Start = List.begin();
1033    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1034    {
1035       if (Cur < List.end() && *Cur != ',')
1036          continue;
1037
1038       // Match the end of the string..
1039       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1040           Cur - Start != 0 &&
1041           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1042          return true;
1043
1044       Start = Cur + 1;
1045    }
1046    return false;
1047 }
1048                                                                         /*}}}*/
1049
1050 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1051 // ---------------------------------------------------------------------
1052 /* This parses the URI into all of its components */
1053 void URI::CopyFrom(const string &U)
1054 {
1055    string::const_iterator I = U.begin();
1056
1057    // Locate the first colon, this separates the scheme
1058    for (; I < U.end() && *I != ':' ; I++);
1059    string::const_iterator FirstColon = I;
1060
1061    /* Determine if this is a host type URI with a leading double //
1062       and then search for the first single / */
1063    string::const_iterator SingleSlash = I;
1064    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1065       SingleSlash += 3;
1066
1067    /* Find the / indicating the end of the hostname, ignoring /'s in the
1068       square brackets */
1069    bool InBracket = false;
1070    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1071    {
1072       if (*SingleSlash == '[')
1073          InBracket = true;
1074       if (InBracket == true && *SingleSlash == ']')
1075          InBracket = false;
1076    }
1077
1078    if (SingleSlash > U.end())
1079       SingleSlash = U.end();
1080
1081    // We can now write the access and path specifiers
1082    Access.assign(U.begin(),FirstColon);
1083    if (SingleSlash != U.end())
1084       Path.assign(SingleSlash,U.end());
1085    if (Path.empty() == true)
1086       Path = "/";
1087
1088    // Now we attempt to locate a user:pass@host fragment
1089    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1090       FirstColon += 3;
1091    else
1092       FirstColon += 1;
1093    if (FirstColon >= U.end())
1094       return;
1095
1096    if (FirstColon > SingleSlash)
1097       FirstColon = SingleSlash;
1098
1099    // Find the colon...
1100    I = FirstColon + 1;
1101    if (I > SingleSlash)
1102       I = SingleSlash;
1103    for (; I < SingleSlash && *I != ':'; I++);
1104    string::const_iterator SecondColon = I;
1105
1106    // Search for the @ after the colon
1107    for (; I < SingleSlash && *I != '@'; I++);
1108    string::const_iterator At = I;
1109
1110    // Now write the host and user/pass
1111    if (At == SingleSlash)
1112    {
1113       if (FirstColon < SingleSlash)
1114          Host.assign(FirstColon,SingleSlash);
1115    }
1116    else
1117    {
1118       Host.assign(At+1,SingleSlash);
1119       User.assign(FirstColon,SecondColon);
1120       if (SecondColon < At)
1121          Password.assign(SecondColon+1,At);
1122    }
1123
1124    // Now we parse the RFC 2732 [] hostnames.
1125    unsigned long PortEnd = 0;
1126    InBracket = false;
1127    for (unsigned I = 0; I != Host.length();)
1128    {
1129       if (Host[I] == '[')
1130       {
1131          InBracket = true;
1132          Host.erase(I,1);
1133          continue;
1134       }
1135
1136       if (InBracket == true && Host[I] == ']')
1137       {
1138          InBracket = false;
1139          Host.erase(I,1);
1140          PortEnd = I;
1141          continue;
1142       }
1143       I++;
1144    }
1145
1146    // Tsk, weird.
1147    if (InBracket == true)
1148    {
1149       Host.clear();
1150       return;
1151    }
1152
1153    // Now we parse off a port number from the hostname
1154    Port = 0;
1155    string::size_type Pos = Host.rfind(':');
1156    if (Pos == string::npos || Pos < PortEnd)
1157       return;
1158
1159    Port = atoi(string(Host,Pos+1).c_str());
1160    Host.assign(Host,0,Pos);
1161 }
1162                                                                         /*}}}*/
1163 // URI::operator string - Convert the URI to a string                   /*{{{*/
1164 // ---------------------------------------------------------------------
1165 /* */
1166 URI::operator string()
1167 {
1168    string Res;
1169
1170    if (Access.empty() == false)
1171       Res = Access + ':';
1172
1173    if (Host.empty() == false)
1174    {
1175       if (Access.empty() == false)
1176          Res += "//";
1177
1178       if (User.empty() == false)
1179       {
1180          Res +=  User;
1181          if (Password.empty() == false)
1182             Res += ":" + Password;
1183          Res += "@";
1184       }
1185
1186       // Add RFC 2732 escaping characters
1187       if (Access.empty() == false &&
1188           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1189          Res += '[' + Host + ']';
1190       else
1191          Res += Host;
1192
1193       if (Port != 0)
1194       {
1195          char S[30];
1196          sprintf(S,":%u",Port);
1197          Res += S;
1198       }
1199    }
1200
1201    if (Path.empty() == false)
1202    {
1203       if (Path[0] != '/')
1204          Res += "/" + Path;
1205       else
1206          Res += Path;
1207    }
1208
1209    return Res;
1210 }
1211                                                                         /*}}}*/
1212 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1213 // ---------------------------------------------------------------------
1214 /* */
1215 string URI::SiteOnly(const string &URI)
1216 {
1217    ::URI U(URI);
1218    U.User.clear();
1219    U.Password.clear();
1220    U.Path.clear();
1221    U.Port = 0;
1222    return U;
1223 }
1224                                                                         /*}}}*/