apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
   4 /* ######################################################################
   5
   6    String Util - Some useful string functions.
   7
   8    These have been collected from here and there to do all sorts of useful
   9    things to strings. They are useful in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <apt-pkg/strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20 #include <apt-pkg/error.h>
  21
  22 #include <apti18n.h>
  23
  24 #include <ctype.h>
  25 #include <string.h>
  26 #include <stdio.h>
  27 #include <algorithm>
  28 #include <unistd.h>
  29 #include <regex.h>
  30 #include <errno.h>
  31 #include <stdarg.h>
  32 #include <iconv.h>
  33
  34 #include "config.h"
  35
  36 using namespace std;
  37                                                                         /*}}}*/
  38
  39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset           /*{{{*/
  40 // ---------------------------------------------------------------------
  41 /* This is handy to use before display some information for enduser  */
  42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
  43 {
  44   iconv_t cd;
  45   const char *inbuf;
  46   char *inptr, *outbuf;
  47   size_t insize, bufsize;
  48   dest->clear();
  49
  50   cd = iconv_open(codeset, "UTF-8");
  51   if (cd == (iconv_t)(-1)) {
  52      // Something went wrong
  53      if (errno == EINVAL)
  54         _error->Error("conversion from 'UTF-8' to '%s' not available",
  55                codeset);
  56      else
  57         perror("iconv_open");
  58
  59      return false;
  60   }
  61
  62   insize = bufsize = orig.size();
  63   inbuf = orig.data();
  64   inptr = (char *)inbuf;
  65   outbuf = new char[bufsize];
  66   size_t lastError = -1;
  67
  68   while (insize != 0)
  69   {
  70      char *outptr = outbuf;
  71      size_t outsize = bufsize;
  72      size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
  73      dest->append(outbuf, outptr - outbuf);
  74      if (err == (size_t)(-1))
  75      {
  76         switch (errno)
  77         {
  78         case EILSEQ:
  79            insize--;
  80            inptr++;
  81            // replace a series of unknown multibytes with a single "?"
  82            if (lastError != insize) {
  83               lastError = insize - 1;
  84               dest->append("?");
  85            }
  86            break;
  87         case EINVAL:
  88            insize = 0;
  89            break;
  90         case E2BIG:
  91            if (outptr == outbuf)
  92            {
  93               bufsize *= 2;
  94               delete[] outbuf;
  95               outbuf = new char[bufsize];
  96            }
  97            break;
  98         }
  99      }
 100   }
 101
 102   delete[] outbuf;
 103
 104   iconv_close(cd);
 105
 106   return true;
 107 }
 108                                                                         /*}}}*/
 109 // strstrip - Remove white space from the front and back of a string    /*{{{*/
 110 // ---------------------------------------------------------------------
 111 /* This is handy to use when parsing a file. It also removes \n's left
 112    over from fgets and company */
 113 char *_strstrip(char *String)
 114 {
 115    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
 116
 117    if (*String == 0)
 118       return String;
 119
 120    char *End = String + strlen(String) - 1;
 121    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
 122                                *End == '\r'); End--);
 123    End++;
 124    *End = 0;
 125    return String;
 126 };
 127                                                                         /*}}}*/
 128 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
 129 // ---------------------------------------------------------------------
 130 /* */
 131 char *_strtabexpand(char *String,size_t Len)
 132 {
 133    for (char *I = String; I != I + Len && *I != 0; I++)
 134    {
 135       if (*I != '\t')
 136          continue;
 137       if (I + 8 > String + Len)
 138       {
 139          *I = 0;
 140          return String;
 141       }
 142
 143       /* Assume the start of the string is 0 and find the next 8 char
 144          division */
 145       int Len;
 146       if (String == I)
 147          Len = 1;
 148       else
 149          Len = 8 - ((String - I) % 8);
 150       Len -= 2;
 151       if (Len <= 0)
 152       {
 153          *I = ' ';
 154          continue;
 155       }
 156
 157       memmove(I + Len,I + 1,strlen(I) + 1);
 158       for (char *J = I; J + Len != I; *I = ' ', I++);
 159    }
 160    return String;
 161 }
 162                                                                         /*}}}*/
 163 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
 164 // ---------------------------------------------------------------------
 165 /* This grabs a single word, converts any % escaped characters to their
 166    proper values and advances the pointer. Double quotes are understood
 167    and striped out as well. This is for URI/URL parsing. It also can
 168    understand [] brackets.*/
 169 bool ParseQuoteWord(const char *&String,string &Res)
 170 {
 171    // Skip leading whitespace
 172    const char *C = String;
 173    for (;*C != 0 && *C == ' '; C++);
 174    if (*C == 0)
 175       return false;
 176
 177    // Jump to the next word
 178    for (;*C != 0 && isspace(*C) == 0; C++)
 179    {
 180       if (*C == '"')
 181       {
 182          for (C++; *C != 0 && *C != '"'; C++);
 183          if (*C == 0)
 184             return false;
 185       }
 186       if (*C == '[')
 187       {
 188          for (C++; *C != 0 && *C != ']'; C++);
 189          if (*C == 0)
 190             return false;
 191       }
 192    }
 193
 194    // Now de-quote characters
 195    char Buffer[1024];
 196    char Tmp[3];
 197    const char *Start = String;
 198    char *I;
 199    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 200    {
 201       if (*Start == '%' && Start + 2 < C &&
 202           isxdigit(Start[1]) && isxdigit(Start[2]))
 203       {
 204          Tmp[0] = Start[1];
 205          Tmp[1] = Start[2];
 206          Tmp[2] = 0;
 207          *I = (char)strtol(Tmp,0,16);
 208          Start += 3;
 209          continue;
 210       }
 211       if (*Start != '"')
 212          *I = *Start;
 213       else
 214          I--;
 215       Start++;
 216    }
 217    *I = 0;
 218    Res = Buffer;
 219
 220    // Skip ending white space
 221    for (;*C != 0 && isspace(*C) != 0; C++);
 222    String = C;
 223    return true;
 224 }
 225                                                                         /*}}}*/
 226 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 227 // ---------------------------------------------------------------------
 228 /* This expects a series of space separated strings enclosed in ""'s.
 229    It concatenates the ""'s into a single string. */
 230 bool ParseCWord(const char *&String,string &Res)
 231 {
 232    // Skip leading whitespace
 233    const char *C = String;
 234    for (;*C != 0 && *C == ' '; C++);
 235    if (*C == 0)
 236       return false;
 237
 238    char Buffer[1024];
 239    char *Buf = Buffer;
 240    if (strlen(String) >= sizeof(Buffer))
 241        return false;
 242
 243    for (; *C != 0; C++)
 244    {
 245       if (*C == '"')
 246       {
 247          for (C++; *C != 0 && *C != '"'; C++)
 248             *Buf++ = *C;
 249
 250          if (*C == 0)
 251             return false;
 252
 253          continue;
 254       }
 255
 256       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 257          continue;
 258       if (isspace(*C) == 0)
 259          return false;
 260       *Buf++ = ' ';
 261    }
 262    *Buf = 0;
 263    Res = Buffer;
 264    String = C;
 265    return true;
 266 }
 267                                                                         /*}}}*/
 268 // QuoteString - Convert a string into quoted from                      /*{{{*/
 269 // ---------------------------------------------------------------------
 270 /* */
 271 string QuoteString(const string &Str, const char *Bad)
 272 {
 273    string Res;
 274    for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
 275    {
 276       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 277           *I == 0x25 || // percent '%' char
 278           *I <= 0x20 || *I >= 0x7F) // control chars
 279       {
 280          char Buf[10];
 281          sprintf(Buf,"%%%02x",(int)*I);
 282          Res += Buf;
 283       }
 284       else
 285          Res += *I;
 286    }
 287    return Res;
 288 }
 289                                                                         /*}}}*/
 290 // DeQuoteString - Convert a string from quoted from                    /*{{{*/
 291 // ---------------------------------------------------------------------
 292 /* This undoes QuoteString */
 293 string DeQuoteString(const string &Str)
 294 {
 295    return DeQuoteString(Str.begin(),Str.end());
 296 }
 297 string DeQuoteString(string::const_iterator const &begin,
 298                         string::const_iterator const &end)
 299 {
 300    string Res;
 301    for (string::const_iterator I = begin; I != end; ++I)
 302    {
 303       if (*I == '%' && I + 2 < end &&
 304           isxdigit(I[1]) && isxdigit(I[2]))
 305       {
 306          char Tmp[3];
 307          Tmp[0] = I[1];
 308          Tmp[1] = I[2];
 309          Tmp[2] = 0;
 310          Res += (char)strtol(Tmp,0,16);
 311          I += 2;
 312          continue;
 313       }
 314       else
 315          Res += *I;
 316    }
 317    return Res;
 318 }
 319
 320                                                                         /*}}}*/
 321 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 322 // ---------------------------------------------------------------------
 323 /* A max of 4 digits are shown before conversion to the next highest unit.
 324    The max length of the string will be 5 chars unless the size is > 10
 325    YottaBytes (E24) */
 326 string SizeToStr(double Size)
 327 {
 328    char S[300];
 329    double ASize;
 330    if (Size >= 0)
 331       ASize = Size;
 332    else
 333       ASize = -1*Size;
 334
 335    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 336       ExaBytes, ZettaBytes, YottaBytes */
 337    char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
 338    int I = 0;
 339    while (I <= 8)
 340    {
 341       if (ASize < 100 && I != 0)
 342       {
 343          sprintf(S,"%'.1f %c",ASize,Ext[I]);
 344          break;
 345       }
 346
 347       if (ASize < 10000)
 348       {
 349          sprintf(S,"%'.0f %c",ASize,Ext[I]);
 350          break;
 351       }
 352       ASize /= 1000.0;
 353       I++;
 354    }
 355
 356    return S;
 357 }
 358                                                                         /*}}}*/
 359 // TimeToStr - Convert the time into a string                           /*{{{*/
 360 // ---------------------------------------------------------------------
 361 /* Converts a number of seconds to a hms format */
 362 string TimeToStr(unsigned long Sec)
 363 {
 364    char S[300];
 365
 366    while (1)
 367    {
 368       if (Sec > 60*60*24)
 369       {
 370          //d means days, h means hours, min means minutes, s means seconds
 371          sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 372          break;
 373       }
 374
 375       if (Sec > 60*60)
 376       {
 377          //h means hours, min means minutes, s means seconds
 378          sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
 379          break;
 380       }
 381
 382       if (Sec > 60)
 383       {
 384          //min means minutes, s means seconds
 385          sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
 386          break;
 387       }
 388
 389       //s means seconds
 390       sprintf(S,_("%lis"),Sec);
 391       break;
 392    }
 393
 394    return S;
 395 }
 396                                                                         /*}}}*/
 397 // SubstVar - Substitute a string for another string                    /*{{{*/
 398 // ---------------------------------------------------------------------
 399 /* This replaces all occurances of Subst with Contents in Str. */
 400 string SubstVar(const string &Str,const string &Subst,const string &Contents)
 401 {
 402    string::size_type Pos = 0;
 403    string::size_type OldPos = 0;
 404    string Temp;
 405
 406    while (OldPos < Str.length() &&
 407           (Pos = Str.find(Subst,OldPos)) != string::npos)
 408    {
 409       Temp += string(Str,OldPos,Pos) + Contents;
 410       OldPos = Pos + Subst.length();
 411    }
 412
 413    if (OldPos == 0)
 414       return Str;
 415
 416    return Temp + string(Str,OldPos);
 417 }
 418
 419 string SubstVar(string Str,const struct SubstVar *Vars)
 420 {
 421    for (; Vars->Subst != 0; Vars++)
 422       Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
 423    return Str;
 424 }
 425                                                                         /*}}}*/
 426 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
 427 // ---------------------------------------------------------------------
 428 /* Returns a string with the supplied separator depth + 1 times in it */
 429 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
 430 {
 431    std::string output = "";
 432    for(unsigned long d=Depth+1; d > 0; d--)
 433       output.append(Separator);
 434    return output;
 435 }
 436                                                                         /*}}}*/
 437 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 438 // ---------------------------------------------------------------------
 439 /* This converts a URI into a safe filename. It quotes all unsafe characters
 440    and converts / to _ and removes the scheme identifier. The resulting
 441    file name should be unique and never occur again for a different file */
 442 string URItoFileName(const string &URI)
 443 {
 444    // Nuke 'sensitive' items
 445    ::URI U(URI);
 446    U.User.clear();
 447    U.Password.clear();
 448    U.Access.clear();
 449
 450    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 451    string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
 452    replace(NewURI.begin(),NewURI.end(),'/','_');
 453    return NewURI;
 454 }
 455                                                                         /*}}}*/
 456 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 457 // ---------------------------------------------------------------------
 458 /* This routine performs a base64 transformation on a string. It was ripped
 459    from wget and then patched and bug fixed.
 460
 461    This spec can be found in rfc2045 */
 462 string Base64Encode(const string &S)
 463 {
 464    // Conversion table.
 465    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 466                           'I','J','K','L','M','N','O','P',
 467                           'Q','R','S','T','U','V','W','X',
 468                           'Y','Z','a','b','c','d','e','f',
 469                           'g','h','i','j','k','l','m','n',
 470                           'o','p','q','r','s','t','u','v',
 471                           'w','x','y','z','0','1','2','3',
 472                           '4','5','6','7','8','9','+','/'};
 473
 474    // Pre-allocate some space
 475    string Final;
 476    Final.reserve((4*S.length() + 2)/3 + 2);
 477
 478    /* Transform the 3x8 bits to 4x6 bits, as required by
 479       base64.  */
 480    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 481    {
 482       char Bits[3] = {0,0,0};
 483       Bits[0] = I[0];
 484       if (I + 1 < S.end())
 485          Bits[1] = I[1];
 486       if (I + 2 < S.end())
 487          Bits[2] = I[2];
 488
 489       Final += tbl[Bits[0] >> 2];
 490       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 491
 492       if (I + 1 >= S.end())
 493          break;
 494
 495       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 496
 497       if (I + 2 >= S.end())
 498          break;
 499
 500       Final += tbl[Bits[2] & 0x3f];
 501    }
 502
 503    /* Apply the padding elements, this tells how many bytes the remote
 504       end should discard */
 505    if (S.length() % 3 == 2)
 506       Final += '=';
 507    if (S.length() % 3 == 1)
 508       Final += "==";
 509
 510    return Final;
 511 }
 512                                                                         /*}}}*/
 513 // stringcmp - Arbitrary string compare                                 /*{{{*/
 514 // ---------------------------------------------------------------------
 515 /* This safely compares two non-null terminated strings of arbitrary
 516    length */
 517 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 518 {
 519    for (; A != AEnd && B != BEnd; A++, B++)
 520       if (*A != *B)
 521          break;
 522
 523    if (A == AEnd && B == BEnd)
 524       return 0;
 525    if (A == AEnd)
 526       return 1;
 527    if (B == BEnd)
 528       return -1;
 529    if (*A < *B)
 530       return -1;
 531    return 1;
 532 }
 533
 534 #if __GNUC__ >= 3
 535 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 536               const char *B,const char *BEnd)
 537 {
 538    for (; A != AEnd && B != BEnd; A++, B++)
 539       if (*A != *B)
 540          break;
 541
 542    if (A == AEnd && B == BEnd)
 543       return 0;
 544    if (A == AEnd)
 545       return 1;
 546    if (B == BEnd)
 547       return -1;
 548    if (*A < *B)
 549       return -1;
 550    return 1;
 551 }
 552 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
 553               string::const_iterator B,string::const_iterator BEnd)
 554 {
 555    for (; A != AEnd && B != BEnd; A++, B++)
 556       if (*A != *B)
 557          break;
 558
 559    if (A == AEnd && B == BEnd)
 560       return 0;
 561    if (A == AEnd)
 562       return 1;
 563    if (B == BEnd)
 564       return -1;
 565    if (*A < *B)
 566       return -1;
 567    return 1;
 568 }
 569 #endif
 570                                                                         /*}}}*/
 571 // stringcasecmp - Arbitrary case insensitive string compare            /*{{{*/
 572 // ---------------------------------------------------------------------
 573 /* */
 574 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 575 {
 576    for (; A != AEnd && B != BEnd; A++, B++)
 577       if (tolower_ascii(*A) != tolower_ascii(*B))
 578          break;
 579
 580    if (A == AEnd && B == BEnd)
 581       return 0;
 582    if (A == AEnd)
 583       return 1;
 584    if (B == BEnd)
 585       return -1;
 586    if (tolower_ascii(*A) < tolower_ascii(*B))
 587       return -1;
 588    return 1;
 589 }
 590 #if __GNUC__ >= 3
 591 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 592                   const char *B,const char *BEnd)
 593 {
 594    for (; A != AEnd && B != BEnd; A++, B++)
 595       if (tolower_ascii(*A) != tolower_ascii(*B))
 596          break;
 597
 598    if (A == AEnd && B == BEnd)
 599       return 0;
 600    if (A == AEnd)
 601       return 1;
 602    if (B == BEnd)
 603       return -1;
 604    if (tolower_ascii(*A) < tolower_ascii(*B))
 605       return -1;
 606    return 1;
 607 }
 608 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
 609                   string::const_iterator B,string::const_iterator BEnd)
 610 {
 611    for (; A != AEnd && B != BEnd; A++, B++)
 612       if (tolower_ascii(*A) != tolower_ascii(*B))
 613          break;
 614
 615    if (A == AEnd && B == BEnd)
 616       return 0;
 617    if (A == AEnd)
 618       return 1;
 619    if (B == BEnd)
 620       return -1;
 621    if (tolower_ascii(*A) < tolower_ascii(*B))
 622       return -1;
 623    return 1;
 624 }
 625 #endif
 626                                                                         /*}}}*/
 627 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 628 // ---------------------------------------------------------------------
 629 /* The format is like those used in package files and the method
 630    communication system */
 631 string LookupTag(const string &Message,const char *Tag,const char *Default)
 632 {
 633    // Look for a matching tag.
 634    int Length = strlen(Tag);
 635    for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
 636    {
 637       // Found the tag
 638       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 639       {
 640          // Find the end of line and strip the leading/trailing spaces
 641          string::const_iterator J;
 642          I += Length + 1;
 643          for (; isspace(*I) != 0 && I < Message.end(); ++I);
 644          for (J = I; *J != '\n' && J < Message.end(); ++J);
 645          for (; J > I && isspace(J[-1]) != 0; --J);
 646
 647          return string(I,J);
 648       }
 649
 650       for (; *I != '\n' && I < Message.end(); ++I);
 651    }
 652
 653    // Failed to find a match
 654    if (Default == 0)
 655       return string();
 656    return Default;
 657 }
 658                                                                         /*}}}*/
 659 // StringToBool - Converts a string into a boolean                      /*{{{*/
 660 // ---------------------------------------------------------------------
 661 /* This inspects the string to see if it is true or if it is false and
 662    then returns the result. Several varients on true/false are checked. */
 663 int StringToBool(const string &Text,int Default)
 664 {
 665    char *End;
 666    int Res = strtol(Text.c_str(),&End,0);
 667    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 668       return Res;
 669
 670    // Check for positives
 671    if (strcasecmp(Text.c_str(),"no") == 0 ||
 672        strcasecmp(Text.c_str(),"false") == 0 ||
 673        strcasecmp(Text.c_str(),"without") == 0 ||
 674        strcasecmp(Text.c_str(),"off") == 0 ||
 675        strcasecmp(Text.c_str(),"disable") == 0)
 676       return 0;
 677
 678    // Check for negatives
 679    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 680        strcasecmp(Text.c_str(),"true") == 0 ||
 681        strcasecmp(Text.c_str(),"with") == 0 ||
 682        strcasecmp(Text.c_str(),"on") == 0 ||
 683        strcasecmp(Text.c_str(),"enable") == 0)
 684       return 1;
 685
 686    return Default;
 687 }
 688                                                                         /*}}}*/
 689 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 690 // ---------------------------------------------------------------------
 691 /* This converts a time_t into a string time representation that is
 692    year 2000 complient and timezone neutral */
 693 string TimeRFC1123(time_t Date)
 694 {
 695    struct tm Conv;
 696    if (gmtime_r(&Date, &Conv) == NULL)
 697       return "";
 698
 699    char Buf[300];
 700    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 701    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 702                           "Aug","Sep","Oct","Nov","Dec"};
 703
 704    snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 705            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 706            Conv.tm_min,Conv.tm_sec);
 707    return Buf;
 708 }
 709                                                                         /*}}}*/
 710 // ReadMessages - Read messages from the FD                             /*{{{*/
 711 // ---------------------------------------------------------------------
 712 /* This pulls full messages from the input FD into the message buffer.
 713    It assumes that messages will not pause during transit so no
 714    fancy buffering is used.
 715
 716    In particular: this reads blocks from the input until it believes
 717    that it's run out of input text.  Each block is terminated by a
 718    double newline ('\n' followed by '\n').  As noted below, there is a
 719    bug in this code: it assumes that all the blocks have been read if
 720    it doesn't see additional text in the buffer after the last one is
 721    parsed, which will cause it to lose blocks if the last block
 722    coincides with the end of the buffer.
 723  */
 724 bool ReadMessages(int Fd, vector<string> &List)
 725 {
 726    char Buffer[64000];
 727    char *End = Buffer;
 728    // Represents any left-over from the previous iteration of the
 729    // parse loop.  (i.e., if a message is split across the end
 730    // of the buffer, it goes here)
 731    string PartialMessage;
 732
 733    while (1)
 734    {
 735       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 736       if (Res < 0 && errno == EINTR)
 737          continue;
 738
 739       // Process is dead, this is kind of bad..
 740       if (Res == 0)
 741          return false;
 742
 743       // No data
 744       if (Res < 0 && errno == EAGAIN)
 745          return true;
 746       if (Res < 0)
 747          return false;
 748
 749       End += Res;
 750
 751       // Look for the end of the message
 752       for (char *I = Buffer; I + 1 < End; I++)
 753       {
 754          if (I[0] != '\n' || I[1] != '\n')
 755             continue;
 756
 757          // Pull the message out
 758          string Message(Buffer,I-Buffer);
 759          PartialMessage += Message;
 760
 761          // Fix up the buffer
 762          for (; I < End && *I == '\n'; I++);
 763          End -= I-Buffer;
 764          memmove(Buffer,I,End-Buffer);
 765          I = Buffer;
 766
 767          List.push_back(PartialMessage);
 768          PartialMessage.clear();
 769       }
 770       if (End != Buffer)
 771         {
 772           // If there's text left in the buffer, store it
 773           // in PartialMessage and throw the rest of the buffer
 774           // away.  This allows us to handle messages that
 775           // are longer than the static buffer size.
 776           PartialMessage += string(Buffer, End);
 777           End = Buffer;
 778         }
 779       else
 780         {
 781           // BUG ALERT: if a message block happens to end at a
 782           // multiple of 64000 characters, this will cause it to
 783           // terminate early, leading to a badly formed block and
 784           // probably crashing the method.  However, this is the only
 785           // way we have to find the end of the message block.  I have
 786           // an idea of how to fix this, but it will require changes
 787           // to the protocol (essentially to mark the beginning and
 788           // end of the block).
 789           //
 790           //  -- dburrows 2008-04-02
 791           return true;
 792         }
 793
 794       if (WaitFd(Fd) == false)
 795          return false;
 796    }
 797 }
 798                                                                         /*}}}*/
 799 // MonthConv - Converts a month string into a number                    /*{{{*/
 800 // ---------------------------------------------------------------------
 801 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 802    Made it a bit more robust with a few tolower_ascii though. */
 803 static int MonthConv(char *Month)
 804 {
 805    switch (tolower_ascii(*Month))
 806    {
 807       case 'a':
 808       return tolower_ascii(Month[1]) == 'p'?3:7;
 809       case 'd':
 810       return 11;
 811       case 'f':
 812       return 1;
 813       case 'j':
 814       if (tolower_ascii(Month[1]) == 'a')
 815          return 0;
 816       return tolower_ascii(Month[2]) == 'n'?5:6;
 817       case 'm':
 818       return tolower_ascii(Month[2]) == 'r'?2:4;
 819       case 'n':
 820       return 10;
 821       case 'o':
 822       return 9;
 823       case 's':
 824       return 8;
 825
 826       // Pretend it is January..
 827       default:
 828       return 0;
 829    }
 830 }
 831                                                                         /*}}}*/
 832 // timegm - Internal timegm if the gnu version is not available         /*{{{*/
 833 // ---------------------------------------------------------------------
 834 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
 835    than local timezone (mktime assumes the latter).
 836
 837    This function is a nonstandard GNU extension that is also present on
 838    the BSDs and maybe other systems. For others we follow the advice of
 839    the manpage of timegm and use his portable replacement. */
 840 #ifndef HAVE_TIMEGM
 841 static time_t timegm(struct tm *t)
 842 {
 843    char *tz = getenv("TZ");
 844    setenv("TZ", "", 1);
 845    tzset();
 846    time_t ret = mktime(t);
 847    if (tz)
 848       setenv("TZ", tz, 1);
 849    else
 850       unsetenv("TZ");
 851    tzset();
 852    return ret;
 853 }
 854 #endif
 855                                                                         /*}}}*/
 856 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t  /*{{{*/
 857 // ---------------------------------------------------------------------
 858 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
 859    with one exception: All timezones (%Z) are accepted but the protocol
 860    says that it MUST be GMT, but this one is equal to UTC which we will
 861    encounter from time to time (e.g. in Release files) so we accept all
 862    here and just assume it is GMT (or UTC) later on */
 863 bool RFC1123StrToTime(const char* const str,time_t &time)
 864 {
 865    struct tm Tm;
 866    setlocale (LC_ALL,"C");
 867    bool const invalid =
 868    // Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
 869       (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
 870    // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
 871        strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
 872    // Sun Nov  6 08:49:37 1994       ; ANSI C's asctime() format
 873        strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
 874    setlocale (LC_ALL,"");
 875    if (invalid == true)
 876       return false;
 877
 878    time = timegm(&Tm);
 879    return true;
 880 }
 881                                                                         /*}}}*/
 882 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t    /*{{{*/
 883 // ---------------------------------------------------------------------
 884 /* */
 885 bool FTPMDTMStrToTime(const char* const str,time_t &time)
 886 {
 887    struct tm Tm;
 888    // MDTM includes no whitespaces but recommend and ignored by strptime
 889    if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
 890       return false;
 891
 892    time = timegm(&Tm);
 893    return true;
 894 }
 895                                                                         /*}}}*/
 896 // StrToTime - Converts a string into a time_t                          /*{{{*/
 897 // ---------------------------------------------------------------------
 898 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 899    and the C library asctime format. It requires the GNU library function
 900    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 901    reason the C library does not provide any such function :< This also
 902    handles the weird, but unambiguous FTP time format*/
 903 bool StrToTime(const string &Val,time_t &Result)
 904 {
 905    struct tm Tm;
 906    char Month[10];
 907    const char *I = Val.c_str();
 908
 909    // Skip the day of the week
 910    for (;*I != 0  && *I != ' '; I++);
 911
 912    // Handle RFC 1123 time
 913    Month[0] = 0;
 914    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 915               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 916    {
 917       // Handle RFC 1036 time
 918       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 919                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 920          Tm.tm_year += 1900;
 921       else
 922       {
 923          // asctime format
 924          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 925                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 926          {
 927             // 'ftp' time
 928             if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
 929                        &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 930                return false;
 931             Tm.tm_mon--;
 932          }
 933       }
 934    }
 935
 936    Tm.tm_isdst = 0;
 937    if (Month[0] != 0)
 938       Tm.tm_mon = MonthConv(Month);
 939    Tm.tm_year -= 1900;
 940
 941    // Convert to local time and then to GMT
 942    Result = timegm(&Tm);
 943    return true;
 944 }
 945                                                                         /*}}}*/
 946 // StrToNum - Convert a fixed length string to a number                 /*{{{*/
 947 // ---------------------------------------------------------------------
 948 /* This is used in decoding the crazy fixed length string headers in
 949    tar and ar files. */
 950 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
 951 {
 952    char S[30];
 953    if (Len >= sizeof(S))
 954       return false;
 955    memcpy(S,Str,Len);
 956    S[Len] = 0;
 957
 958    // All spaces is a zero
 959    Res = 0;
 960    unsigned I;
 961    for (I = 0; S[I] == ' '; I++);
 962    if (S[I] == 0)
 963       return true;
 964
 965    char *End;
 966    Res = strtoul(S,&End,Base);
 967    if (End == S)
 968       return false;
 969
 970    return true;
 971 }
 972                                                                         /*}}}*/
 973 // Base256ToNum - Convert a fixed length binary to a number             /*{{{*/
 974 // ---------------------------------------------------------------------
 975 /* This is used in decoding the 256bit encoded fixed length fields in
 976    tar files */
 977 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
 978 {
 979    if ((Str[0] & 0x80) == 0)
 980       return false;
 981    else
 982    {
 983       Res = Str[0] & 0x7F;
 984       for(unsigned int i = 1; i < Len; ++i)
 985          Res = (Res<<8) + Str[i];
 986       return true;
 987    }
 988 }
 989                                                                         /*}}}*/
 990 // HexDigit - Convert a hex character into an integer                   /*{{{*/
 991 // ---------------------------------------------------------------------
 992 /* Helper for Hex2Num */
 993 static int HexDigit(int c)
 994 {
 995    if (c >= '0' && c <= '9')
 996       return c - '0';
 997    if (c >= 'a' && c <= 'f')
 998       return c - 'a' + 10;
 999    if (c >= 'A' && c <= 'F')
1000       return c - 'A' + 10;
1001    return 0;
1002 }
1003                                                                         /*}}}*/
1004 // Hex2Num - Convert a long hex number into a buffer                    /*{{{*/
1005 // ---------------------------------------------------------------------
1006 /* The length of the buffer must be exactly 1/2 the length of the string. */
1007 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1008 {
1009    if (Str.length() != Length*2)
1010       return false;
1011
1012    // Convert each digit. We store it in the same order as the string
1013    int J = 0;
1014    for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1015    {
1016       if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1017          return false;
1018
1019       Num[J] = HexDigit(I[0]) << 4;
1020       Num[J] += HexDigit(I[1]);
1021    }
1022
1023    return true;
1024 }
1025                                                                         /*}}}*/
1026 // TokSplitString - Split a string up by a given token                  /*{{{*/
1027 // ---------------------------------------------------------------------
1028 /* This is intended to be a faster splitter, it does not use dynamic
1029    memories. Input is changed to insert nulls at each token location. */
1030 bool TokSplitString(char Tok,char *Input,char **List,
1031                     unsigned long ListMax)
1032 {
1033    // Strip any leading spaces
1034    char *Start = Input;
1035    char *Stop = Start + strlen(Start);
1036    for (; *Start != 0 && isspace(*Start) != 0; Start++);
1037
1038    unsigned long Count = 0;
1039    char *Pos = Start;
1040    while (Pos != Stop)
1041    {
1042       // Skip to the next Token
1043       for (; Pos != Stop && *Pos != Tok; Pos++);
1044
1045       // Back remove spaces
1046       char *End = Pos;
1047       for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1048       *End = 0;
1049
1050       List[Count++] = Start;
1051       if (Count >= ListMax)
1052       {
1053          List[Count-1] = 0;
1054          return false;
1055       }
1056
1057       // Advance pos
1058       for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1059       Start = Pos;
1060    }
1061
1062    List[Count] = 0;
1063    return true;
1064 }
1065                                                                         /*}}}*/
1066 // VectorizeString - Split a string up into a vector of strings         /*{{{*/
1067 // ---------------------------------------------------------------------
1068 /* This can be used to split a given string up into a vector, so the
1069    propose is the same as in the method above and this one is a bit slower
1070    also, but the advantage is that we have an iteratable vector */
1071 vector<string> VectorizeString(string const &haystack, char const &split)
1072 {
1073    string::const_iterator start = haystack.begin();
1074    string::const_iterator end = start;
1075    vector<string> exploded;
1076    do {
1077       for (; end != haystack.end() && *end != split; ++end);
1078       exploded.push_back(string(start, end));
1079       start = end + 1;
1080    } while (end != haystack.end() && (++end) != haystack.end());
1081    return exploded;
1082 }
1083                                                                         /*}}}*/
1084 // RegexChoice - Simple regex list/list matcher                         /*{{{*/
1085 // ---------------------------------------------------------------------
1086 /* */
1087 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1088                       const char **ListEnd)
1089 {
1090    for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1091       R->Hit = false;
1092
1093    unsigned long Hits = 0;
1094    for (; ListBegin != ListEnd; ListBegin++)
1095    {
1096       // Check if the name is a regex
1097       const char *I;
1098       bool Regex = true;
1099       for (I = *ListBegin; *I != 0; I++)
1100          if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1101             break;
1102       if (*I == 0)
1103          Regex = false;
1104
1105       // Compile the regex pattern
1106       regex_t Pattern;
1107       if (Regex == true)
1108          if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1109                      REG_NOSUB) != 0)
1110             Regex = false;
1111
1112       // Search the list
1113       bool Done = false;
1114       for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1115       {
1116          if (R->Str[0] == 0)
1117             continue;
1118
1119          if (strcasecmp(R->Str,*ListBegin) != 0)
1120          {
1121             if (Regex == false)
1122                continue;
1123             if (regexec(&Pattern,R->Str,0,0,0) != 0)
1124                continue;
1125          }
1126          Done = true;
1127
1128          if (R->Hit == false)
1129             Hits++;
1130
1131          R->Hit = true;
1132       }
1133
1134       if (Regex == true)
1135          regfree(&Pattern);
1136
1137       if (Done == false)
1138          _error->Warning(_("Selection %s not found"),*ListBegin);
1139    }
1140
1141    return Hits;
1142 }
1143                                                                         /*}}}*/
1144 // ioprintf - C format string outputter to C++ iostreams                /*{{{*/
1145 // ---------------------------------------------------------------------
1146 /* This is used to make the internationalization strings easier to translate
1147    and to allow reordering of parameters */
1148 void ioprintf(ostream &out,const char *format,...)
1149 {
1150    va_list args;
1151    va_start(args,format);
1152
1153    // sprintf the description
1154    char S[4096];
1155    vsnprintf(S,sizeof(S),format,args);
1156    out << S;
1157 }
1158                                                                         /*}}}*/
1159 // strprintf - C format string outputter to C++ strings                 /*{{{*/
1160 // ---------------------------------------------------------------------
1161 /* This is used to make the internationalization strings easier to translate
1162    and to allow reordering of parameters */
1163 void strprintf(string &out,const char *format,...)
1164 {
1165    va_list args;
1166    va_start(args,format);
1167
1168    // sprintf the description
1169    char S[4096];
1170    vsnprintf(S,sizeof(S),format,args);
1171    out = string(S);
1172 }
1173                                                                         /*}}}*/
1174 // safe_snprintf - Safer snprintf                                       /*{{{*/
1175 // ---------------------------------------------------------------------
1176 /* This is a snprintf that will never (ever) go past 'End' and returns a
1177    pointer to the end of the new string. The returned string is always null
1178    terminated unless Buffer == end. This is a better alterantive to using
1179    consecutive snprintfs. */
1180 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1181 {
1182    va_list args;
1183    int Did;
1184
1185    va_start(args,Format);
1186
1187    if (End <= Buffer)
1188       return End;
1189
1190    Did = vsnprintf(Buffer,End - Buffer,Format,args);
1191    if (Did < 0 || Buffer + Did > End)
1192       return End;
1193    return Buffer + Did;
1194 }
1195                                                                         /*}}}*/
1196 // StripEpoch - Remove the version "epoch" from a version string        /*{{{*/
1197 // ---------------------------------------------------------------------
1198 string StripEpoch(const string &VerStr)
1199 {
1200    size_t i = VerStr.find(":");
1201    if (i == string::npos)
1202       return VerStr;
1203    return VerStr.substr(i+1);
1204 }
1205
1206 // tolower_ascii - tolower() function that ignores the locale           /*{{{*/
1207 // ---------------------------------------------------------------------
1208 /* This little function is the most called method we have and tries
1209    therefore to do the absolut minimum - and is noteable faster than
1210    standard tolower/toupper and as a bonus avoids problems with different
1211    locales - we only operate on ascii chars anyway. */
1212 int tolower_ascii(int const c)
1213 {
1214    if (c >= 'A' && c <= 'Z')
1215       return c + 32;
1216    return c;
1217 }
1218                                                                         /*}}}*/
1219
1220 // CheckDomainList - See if Host is in a , seperate list                /*{{{*/
1221 // ---------------------------------------------------------------------
1222 /* The domain list is a comma seperate list of domains that are suffix
1223    matched against the argument */
1224 bool CheckDomainList(const string &Host,const string &List)
1225 {
1226    string::const_iterator Start = List.begin();
1227    for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1228    {
1229       if (Cur < List.end() && *Cur != ',')
1230          continue;
1231
1232       // Match the end of the string..
1233       if ((Host.size() >= (unsigned)(Cur - Start)) &&
1234           Cur - Start != 0 &&
1235           stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1236          return true;
1237
1238       Start = Cur + 1;
1239    }
1240    return false;
1241 }
1242                                                                         /*}}}*/
1243 // DeEscapeString - unescape (\0XX and \xXX) from a string              /*{{{*/
1244 // ---------------------------------------------------------------------
1245 /* */
1246 string DeEscapeString(const string &input)
1247 {
1248    char tmp[3];
1249    string::const_iterator it, escape_start;
1250    string output, octal, hex;
1251    for (it = input.begin(); it != input.end(); ++it)
1252    {
1253       // just copy non-escape chars
1254       if (*it != '\\')
1255       {
1256          output += *it;
1257          continue;
1258       }
1259
1260       // deal with double escape
1261       if (*it == '\\' &&
1262           (it + 1 < input.end()) &&  it[1] == '\\')
1263       {
1264          // copy
1265          output += *it;
1266          // advance iterator one step further
1267          ++it;
1268          continue;
1269       }
1270
1271       // ensure we have a char to read
1272       if (it + 1 == input.end())
1273          continue;
1274
1275       // read it
1276       ++it;
1277       switch (*it)
1278       {
1279          case '0':
1280             if (it + 2 <= input.end()) {
1281                tmp[0] = it[1];
1282                tmp[1] = it[2];
1283                tmp[2] = 0;
1284                output += (char)strtol(tmp, 0, 8);
1285                it += 2;
1286             }
1287             break;
1288          case 'x':
1289             if (it + 2 <= input.end()) {
1290                tmp[0] = it[1];
1291                tmp[1] = it[2];
1292                tmp[2] = 0;
1293                output += (char)strtol(tmp, 0, 16);
1294                it += 2;
1295             }
1296             break;
1297          default:
1298             // FIXME: raise exception here?
1299             break;
1300       }
1301    }
1302    return output;
1303 }
1304                                                                         /*}}}*/
1305 // URI::CopyFrom - Copy from an object                                  /*{{{*/
1306 // ---------------------------------------------------------------------
1307 /* This parses the URI into all of its components */
1308 void URI::CopyFrom(const string &U)
1309 {
1310    string::const_iterator I = U.begin();
1311
1312    // Locate the first colon, this separates the scheme
1313    for (; I < U.end() && *I != ':' ; ++I);
1314    string::const_iterator FirstColon = I;
1315
1316    /* Determine if this is a host type URI with a leading double //
1317       and then search for the first single / */
1318    string::const_iterator SingleSlash = I;
1319    if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1320       SingleSlash += 3;
1321
1322    /* Find the / indicating the end of the hostname, ignoring /'s in the
1323       square brackets */
1324    bool InBracket = false;
1325    for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1326    {
1327       if (*SingleSlash == '[')
1328          InBracket = true;
1329       if (InBracket == true && *SingleSlash == ']')
1330          InBracket = false;
1331    }
1332
1333    if (SingleSlash > U.end())
1334       SingleSlash = U.end();
1335
1336    // We can now write the access and path specifiers
1337    Access.assign(U.begin(),FirstColon);
1338    if (SingleSlash != U.end())
1339       Path.assign(SingleSlash,U.end());
1340    if (Path.empty() == true)
1341       Path = "/";
1342
1343    // Now we attempt to locate a user:pass@host fragment
1344    if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1345       FirstColon += 3;
1346    else
1347       FirstColon += 1;
1348    if (FirstColon >= U.end())
1349       return;
1350
1351    if (FirstColon > SingleSlash)
1352       FirstColon = SingleSlash;
1353
1354    // Find the colon...
1355    I = FirstColon + 1;
1356    if (I > SingleSlash)
1357       I = SingleSlash;
1358    for (; I < SingleSlash && *I != ':'; ++I);
1359    string::const_iterator SecondColon = I;
1360
1361    // Search for the @ after the colon
1362    for (; I < SingleSlash && *I != '@'; ++I);
1363    string::const_iterator At = I;
1364
1365    // Now write the host and user/pass
1366    if (At == SingleSlash)
1367    {
1368       if (FirstColon < SingleSlash)
1369          Host.assign(FirstColon,SingleSlash);
1370    }
1371    else
1372    {
1373       Host.assign(At+1,SingleSlash);
1374       // username and password must be encoded (RFC 3986)
1375       User.assign(DeQuoteString(FirstColon,SecondColon));
1376       if (SecondColon < At)
1377          Password.assign(DeQuoteString(SecondColon+1,At));
1378    }
1379
1380    // Now we parse the RFC 2732 [] hostnames.
1381    unsigned long PortEnd = 0;
1382    InBracket = false;
1383    for (unsigned I = 0; I != Host.length();)
1384    {
1385       if (Host[I] == '[')
1386       {
1387          InBracket = true;
1388          Host.erase(I,1);
1389          continue;
1390       }
1391
1392       if (InBracket == true && Host[I] == ']')
1393       {
1394          InBracket = false;
1395          Host.erase(I,1);
1396          PortEnd = I;
1397          continue;
1398       }
1399       I++;
1400    }
1401
1402    // Tsk, weird.
1403    if (InBracket == true)
1404    {
1405       Host.clear();
1406       return;
1407    }
1408
1409    // Now we parse off a port number from the hostname
1410    Port = 0;
1411    string::size_type Pos = Host.rfind(':');
1412    if (Pos == string::npos || Pos < PortEnd)
1413       return;
1414
1415    Port = atoi(string(Host,Pos+1).c_str());
1416    Host.assign(Host,0,Pos);
1417 }
1418                                                                         /*}}}*/
1419 // URI::operator string - Convert the URI to a string                   /*{{{*/
1420 // ---------------------------------------------------------------------
1421 /* */
1422 URI::operator string()
1423 {
1424    string Res;
1425
1426    if (Access.empty() == false)
1427       Res = Access + ':';
1428
1429    if (Host.empty() == false)
1430    {
1431       if (Access.empty() == false)
1432          Res += "//";
1433
1434       if (User.empty() == false)
1435       {
1436          Res +=  User;
1437          if (Password.empty() == false)
1438             Res += ":" + Password;
1439          Res += "@";
1440       }
1441
1442       // Add RFC 2732 escaping characters
1443       if (Access.empty() == false &&
1444           (Host.find('/') != string::npos || Host.find(':') != string::npos))
1445          Res += '[' + Host + ']';
1446       else
1447          Res += Host;
1448
1449       if (Port != 0)
1450       {
1451          char S[30];
1452          sprintf(S,":%u",Port);
1453          Res += S;
1454       }
1455    }
1456
1457    if (Path.empty() == false)
1458    {
1459       if (Path[0] != '/')
1460          Res += "/" + Path;
1461       else
1462          Res += Path;
1463    }
1464
1465    return Res;
1466 }
1467                                                                         /*}}}*/
1468 // URI::SiteOnly - Return the schema and site for the URI               /*{{{*/
1469 // ---------------------------------------------------------------------
1470 /* */
1471 string URI::SiteOnly(const string &URI)
1472 {
1473    ::URI U(URI);
1474    U.User.clear();
1475    U.Password.clear();
1476    U.Path.clear();
1477    U.Port = 0;
1478    return U;
1479 }
1480                                                                         /*}}}*/
1481 // URI::NoUserPassword - Return the schema, site and path for the URI   /*{{{*/
1482 // ---------------------------------------------------------------------
1483 /* */
1484 string URI::NoUserPassword(const string &URI)
1485 {
1486    ::URI U(URI);
1487    U.User.clear();
1488    U.Password.clear();
1489    U.Port = 0;
1490    return U;
1491 }
1492                                                                         /*}}}*/