apt-pkg/contrib/strutl.cc

   1 // -*- mode: cpp; mode: fold -*-
   2 // Description                                                          /*{{{*/
   3 // $Id: strutl.cc,v 1.8 1998/10/24 04:58:07 jgg Exp $
   4 /* ######################################################################
   5
   6    String Util - Some usefull string functions.
   7
   8    These have been collected from here and there to do all sorts of usefull
   9    things to strings. They are usefull in file parsers, URI handlers and
  10    especially in APT methods.
  11
  12    This source is placed in the Public Domain, do with it what you will
  13    It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
  14
  15    ##################################################################### */
  16                                                                         /*}}}*/
  17 // Includes                                                             /*{{{*/
  18 #include <strutl.h>
  19 #include <apt-pkg/fileutl.h>
  20
  21 #include <ctype.h>
  22 #include <string.h>
  23 #include <stdio.h>
  24 #include <time.h>
  25                                                                         /*}}}*/
  26
  27 // strstrip - Remove white space from the front and back of a string    /*{{{*/
  28 // ---------------------------------------------------------------------
  29 /* This is handy to use when parsing a file. It also removes \n's left
  30    over from fgets and company */
  31 char *_strstrip(char *String)
  32 {
  33    for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
  34
  35    if (*String == 0)
  36       return String;
  37
  38    char *End = String + strlen(String) - 1;
  39    for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
  40                                *End == '\r'); End--);
  41    End++;
  42    *End = 0;
  43    return String;
  44 };
  45                                                                         /*}}}*/
  46 // strtabexpand - Converts tabs into 8 spaces                           /*{{{*/
  47 // ---------------------------------------------------------------------
  48 /* */
  49 char *_strtabexpand(char *String,size_t Len)
  50 {
  51    for (char *I = String; I != I + Len && *I != 0; I++)
  52    {
  53       if (*I != '\t')
  54          continue;
  55       if (I + 8 > String + Len)
  56       {
  57          *I = 0;
  58          return String;
  59       }
  60
  61       /* Assume the start of the string is 0 and find the next 8 char
  62          division */
  63       int Len;
  64       if (String == I)
  65          Len = 1;
  66       else
  67          Len = 8 - ((String - I) % 8);
  68       Len -= 2;
  69       if (Len <= 0)
  70       {
  71          *I = ' ';
  72          continue;
  73       }
  74
  75       memmove(I + Len,I + 1,strlen(I) + 1);
  76       for (char *J = I; J + Len != I; *I = ' ', I++);
  77    }
  78    return String;
  79 }
  80                                                                         /*}}}*/
  81 // ParseQuoteWord - Parse a single word out of a string                 /*{{{*/
  82 // ---------------------------------------------------------------------
  83 /* This grabs a single word, converts any % escaped characters to their
  84    proper values and advances the pointer. Double quotes are understood
  85    and striped out as well. This is for URI/URL parsing. */
  86 bool ParseQuoteWord(const char *&String,string &Res)
  87 {
  88    // Skip leading whitespace
  89    const char *C = String;
  90    for (;*C != 0 && *C == ' '; C++);
  91    if (*C == 0)
  92       return false;
  93
  94    // Jump to the next word
  95    for (;*C != 0 && *C != ' '; C++)
  96    {
  97       if (*C == '"')
  98       {
  99          for (C++;*C != 0 && *C != '"'; C++);
 100          if (*C == 0)
 101             return false;
 102       }
 103    }
 104
 105    // Now de-quote characters
 106    char Buffer[1024];
 107    char Tmp[3];
 108    const char *Start = String;
 109    char *I;
 110    for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
 111    {
 112       if (*Start == '%' && Start + 2 < C)
 113       {
 114          Tmp[0] = Start[1];
 115          Tmp[1] = Start[2];
 116          Tmp[3] = 0;
 117          *I = (char)strtol(Tmp,0,16);
 118          Start += 3;
 119          continue;
 120       }
 121       if (*Start != '"')
 122          *I = *Start;
 123       else
 124          I--;
 125       Start++;
 126    }
 127    *I = 0;
 128    Res = Buffer;
 129
 130    // Skip ending white space
 131    for (;*C != 0 && *C == ' '; C++);
 132    String = C;
 133    return true;
 134 }
 135                                                                         /*}}}*/
 136 // ParseCWord - Parses a string like a C "" expression                  /*{{{*/
 137 // ---------------------------------------------------------------------
 138 /* This expects a series of space seperated strings enclosed in ""'s.
 139    It concatenates the ""'s into a single string. */
 140 bool ParseCWord(const char *String,string &Res)
 141 {
 142    // Skip leading whitespace
 143    const char *C = String;
 144    for (;*C != 0 && *C == ' '; C++);
 145    if (*C == 0)
 146       return false;
 147
 148    char Buffer[1024];
 149    char *Buf = Buffer;
 150    if (strlen(String) >= sizeof(Buffer))
 151        return false;
 152
 153    for (; *C != 0; C++)
 154    {
 155       if (*C == '"')
 156       {
 157          for (C++; *C != 0 && *C != '"'; C++)
 158             *Buf++ = *C;
 159
 160          if (*C == 0)
 161             return false;
 162
 163          continue;
 164       }
 165
 166       if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
 167          continue;
 168       if (isspace(*C) == 0)
 169          return false;
 170       *Buf++ = ' ';
 171    }
 172    *Buf = 0;
 173    Res = Buffer;
 174    return true;
 175 }
 176                                                                         /*}}}*/
 177 // QuoteString - Convert a string into quoted from                      /*{{{*/
 178 // ---------------------------------------------------------------------
 179 /* */
 180 string QuoteString(string Str,const char *Bad)
 181 {
 182    string Res;
 183    for (string::iterator I = Str.begin(); I != Str.end(); I++)
 184    {
 185       if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
 186           *I <= 0x20 || *I >= 0x7F)
 187       {
 188          char Buf[10];
 189          sprintf(Buf,"%%%02x",(int)*I);
 190          Res += Buf;
 191       }
 192       else
 193          Res += *I;
 194    }
 195    return Res;
 196 }
 197                                                                         /*}}}*/
 198 // SizeToStr - Convert a long into a human readable size                /*{{{*/
 199 // ---------------------------------------------------------------------
 200 /* A max of 4 digits are shown before conversion to the next highest unit.
 201    The max length of the string will be 5 chars unless the size is > 10
 202    YottaBytes (E24) */
 203 string SizeToStr(double Size)
 204 {
 205    char S[300];
 206    double ASize;
 207    if (Size >= 0)
 208       ASize = Size;
 209    else
 210       ASize = -1*Size;
 211
 212    /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
 213       ExaBytes, ZettaBytes, YottaBytes */
 214    char Ext[] = {'b','k','M','G','T','P','E','Z','Y'};
 215    int I = 0;
 216    while (I <= 8)
 217    {
 218       if (ASize < 100 && I != 0)
 219       {
 220          sprintf(S,"%.1f%c",ASize,Ext[I]);
 221          break;
 222       }
 223
 224       if (ASize < 10000)
 225       {
 226          sprintf(S,"%.0f%c",ASize,Ext[I]);
 227          break;
 228       }
 229       ASize /= 1000.0;
 230       I++;
 231    }
 232
 233    return S;
 234 }
 235                                                                         /*}}}*/
 236 // TimeToStr - Convert the time into a string                           /*{{{*/
 237 // ---------------------------------------------------------------------
 238 /* Converts a number of seconds to a hms format */
 239 string TimeToStr(unsigned long Sec)
 240 {
 241    char S[300];
 242
 243    while (1)
 244    {
 245       if (Sec > 60*60*24)
 246       {
 247          sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
 248          break;
 249       }
 250
 251       if (Sec > 60*60)
 252       {
 253          sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
 254          break;
 255       }
 256
 257       if (Sec > 60)
 258       {
 259          sprintf(S,"%lim%lis",Sec/60,Sec % 60);
 260          break;
 261       }
 262
 263       sprintf(S,"%lis",Sec);
 264       break;
 265    }
 266
 267    return S;
 268 }
 269                                                                         /*}}}*/
 270 // SubstVar - Substitute a string for another string                    /*{{{*/
 271 // ---------------------------------------------------------------------
 272 /* This replaces all occurances of Subst with Contents in Str. */
 273 string SubstVar(string Str,string Subst,string Contents)
 274 {
 275    string::size_type Pos = 0;
 276    string::size_type OldPos = 0;
 277    string Temp;
 278
 279    while (OldPos < Str.length() &&
 280           (Pos = Str.find(Subst,OldPos)) != string::npos)
 281    {
 282       Temp += string(Str,OldPos,Pos) + Contents;
 283       OldPos = Pos + Subst.length();
 284    }
 285
 286    if (OldPos == 0)
 287       return Str;
 288
 289    return Temp + string(Str,OldPos);
 290 }
 291                                                                         /*}}}*/
 292 // URItoFileName - Convert the uri into a unique file name              /*{{{*/
 293 // ---------------------------------------------------------------------
 294 /* This converts a URI into a safe filename. It quotes all unsafe characters
 295    and converts / to _ and removes the scheme identifier. The resulting
 296    file name should be unique and never occur again for a different file */
 297 string URItoFileName(string URI)
 298 {
 299    string::const_iterator I = URI.begin() + URI.find(':') + 1;
 300    for (; I < URI.end() && *I == '/'; I++);
 301
 302    // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
 303    URI = QuoteString(string(I,URI.end() - I),"\\|{}[]<>\"^~_=!@#$%^&*");
 304    string::iterator J = URI.begin();
 305    for (; J != URI.end(); J++)
 306       if (*J == '/')
 307          *J = '_';
 308    return URI;
 309 }
 310                                                                         /*}}}*/
 311 // URIAccess - Return the access method for the URI                     /*{{{*/
 312 // ---------------------------------------------------------------------
 313 /* */
 314 string URIAccess(string URI)
 315 {
 316    string::size_type Pos = URI.find(':');
 317    if (Pos == string::npos)
 318       return URI;
 319    return string(URI,0,Pos);
 320 }
 321                                                                         /*}}}*/
 322 // Base64Encode - Base64 Encoding routine for short strings             /*{{{*/
 323 // ---------------------------------------------------------------------
 324 /* This routine performs a base64 transformation on a string. It was ripped
 325    from wget and then patched and bug fixed.
 326
 327    This spec can be found in rfc2045 */
 328 string Base64Encode(string S)
 329 {
 330    // Conversion table.
 331    static char tbl[64] = {'A','B','C','D','E','F','G','H',
 332                           'I','J','K','L','M','N','O','P',
 333                           'Q','R','S','T','U','V','W','X',
 334                           'Y','Z','a','b','c','d','e','f',
 335                           'g','h','i','j','k','l','m','n',
 336                           'o','p','q','r','s','t','u','v',
 337                           'w','x','y','z','0','1','2','3',
 338                           '4','5','6','7','8','9','+','/'};
 339
 340    // Pre-allocate some space
 341    string Final;
 342    Final.reserve((4*S.length() + 2)/3 + 2);
 343
 344    /* Transform the 3x8 bits to 4x6 bits, as required by
 345       base64.  */
 346    for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
 347    {
 348       char Bits[3] = {0,0,0};
 349       Bits[0] = I[0];
 350       if (I + 1 < S.end())
 351          Bits[1] = I[1];
 352       if (I + 2 < S.end())
 353          Bits[2] = I[2];
 354
 355       Final += tbl[Bits[0] >> 2];
 356       Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
 357
 358       if (I + 1 >= S.end())
 359          break;
 360
 361       Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
 362
 363       if (I + 2 >= S.end())
 364          break;
 365
 366       Final += tbl[Bits[2] & 0x3f];
 367    }
 368
 369    /* Apply the padding elements, this tells how many bytes the remote
 370       end should discard */
 371    if (S.length() % 3 == 2)
 372       Final += '=';
 373    if (S.length() % 3 == 1)
 374       Final += "==";
 375
 376    return Final;
 377 }
 378                                                                         /*}}}*/
 379 // stringcmp - Arbitary string compare                                  /*{{{*/
 380 // ---------------------------------------------------------------------
 381 /* This safely compares two non-null terminated strings of arbitary
 382    length */
 383 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 384 {
 385    for (; A != AEnd && B != BEnd; A++, B++)
 386       if (*A != *B)
 387          break;
 388
 389    if (A == AEnd && B == BEnd)
 390       return 0;
 391    if (A == AEnd)
 392       return 1;
 393    if (B == BEnd)
 394       return -1;
 395    if (*A < *B)
 396       return -1;
 397    return 1;
 398 }
 399                                                                         /*}}}*/
 400 // stringcasecmp - Arbitary case insensitive string compare             /*{{{*/
 401 // ---------------------------------------------------------------------
 402 /* */
 403 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
 404 {
 405    for (; A != AEnd && B != BEnd; A++, B++)
 406       if (toupper(*A) != toupper(*B))
 407          break;
 408
 409    if (A == AEnd && B == BEnd)
 410       return 0;
 411    if (A == AEnd)
 412       return 1;
 413    if (B == BEnd)
 414       return -1;
 415    if (toupper(*A) < toupper(*B))
 416       return -1;
 417    return 1;
 418 }
 419                                                                         /*}}}*/
 420 // LookupTag - Lookup the value of a tag in a taged string              /*{{{*/
 421 // ---------------------------------------------------------------------
 422 /* The format is like those used in package files and the method
 423    communication system */
 424 string LookupTag(string Message,const char *Tag,const char *Default)
 425 {
 426    // Look for a matching tag.
 427    int Length = strlen(Tag);
 428    for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
 429    {
 430       // Found the tag
 431       if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
 432       {
 433          // Find the end of line and strip the leading/trailing spaces
 434          string::iterator J;
 435          I += Length + 1;
 436          for (; isspace(*I) != 0 && I < Message.end(); I++);
 437          for (J = I; *J != '\n' && J < Message.end(); J++);
 438          for (; J > I && isspace(J[-1]) != 0; J--);
 439
 440          return string(I,J-I);
 441       }
 442
 443       for (; *I != '\n' && I < Message.end(); I++);
 444    }
 445
 446    // Failed to find a match
 447    if (Default == 0)
 448       return string();
 449    return Default;
 450 }
 451                                                                         /*}}}*/
 452 // StringToBool - Converts a string into a boolean                      /*{{{*/
 453 // ---------------------------------------------------------------------
 454 /* This inspects the string to see if it is true or if it is false and
 455    then returns the result. Several varients on true/false are checked. */
 456 int StringToBool(string Text,int Default = -1)
 457 {
 458    char *End;
 459    int Res = strtol(Text.c_str(),&End,0);
 460    if (End != Text.c_str() && Res >= 0 && Res <= 1)
 461       return Res;
 462
 463    // Check for positives
 464    if (strcasecmp(Text.c_str(),"no") == 0 ||
 465        strcasecmp(Text.c_str(),"false") == 0 ||
 466        strcasecmp(Text.c_str(),"without") == 0 ||
 467        strcasecmp(Text.c_str(),"disable") == 0)
 468       return 0;
 469
 470    // Check for negatives
 471    if (strcasecmp(Text.c_str(),"yes") == 0 ||
 472        strcasecmp(Text.c_str(),"true") == 0 ||
 473        strcasecmp(Text.c_str(),"with") == 0 ||
 474        strcasecmp(Text.c_str(),"enable") == 0)
 475       return 1;
 476
 477    return Default;
 478 }
 479                                                                         /*}}}*/
 480 // TimeRFC1123 - Convert a time_t into RFC1123 format                   /*{{{*/
 481 // ---------------------------------------------------------------------
 482 /* This converts a time_t into a string time representation that is
 483    year 2000 complient and timezone neutral */
 484 string TimeRFC1123(time_t Date)
 485 {
 486    struct tm Conv = *gmtime(&Date);
 487    char Buf[300];
 488
 489    const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
 490    const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
 491                           "Aug","Sep","Oct","Nov","Dec"};
 492
 493    sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
 494            Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
 495            Conv.tm_min,Conv.tm_sec);
 496    return Buf;
 497 }
 498                                                                         /*}}}*/
 499 // ReadMessages - Read messages from the FD                             /*{{{*/
 500 // ---------------------------------------------------------------------
 501 /* This pulls full messages from the input FD into the message buffer.
 502    It assumes that messages will not pause during transit so no
 503    fancy buffering is used. */
 504 bool ReadMessages(int Fd, vector<string> &List)
 505 {
 506    char Buffer[4000];
 507    char *End = Buffer;
 508
 509    while (1)
 510    {
 511       int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
 512
 513       // Process is dead, this is kind of bad..
 514       if (Res == 0)
 515          return false;
 516
 517       // No data
 518       if (Res <= 0)
 519          return true;
 520
 521       End += Res;
 522
 523       // Look for the end of the message
 524       for (char *I = Buffer; I + 1 < End; I++)
 525       {
 526          if (I[0] != '\n' || I[1] != '\n')
 527             continue;
 528
 529          // Pull the message out
 530          string Message(Buffer,0,I-Buffer);
 531
 532          // Fix up the buffer
 533          for (; I < End && *I == '\n'; I++);
 534          End -= I-Buffer;
 535          memmove(Buffer,I,End-Buffer);
 536          I = Buffer;
 537
 538          List.push_back(Message);
 539       }
 540       if (End == Buffer)
 541          return true;
 542
 543       if (WaitFd(Fd) == false)
 544          return false;
 545    }
 546 }
 547                                                                         /*}}}*/
 548 // MonthConv - Converts a month string into a number                    /*{{{*/
 549 // ---------------------------------------------------------------------
 550 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
 551    Made it a bit more robust with a few touppers though. */
 552 static int MonthConv(char *Month)
 553 {
 554    switch (toupper(*Month))
 555    {
 556       case 'A':
 557       return toupper(Month[1]) == 'P'?3:7;
 558       case 'D':
 559       return 11;
 560       case 'F':
 561       return 1;
 562       case 'J':
 563       if (toupper(Month[1]) == 'A')
 564          return 0;
 565       return toupper(Month[2]) == 'N'?5:6;
 566       case 'M':
 567       return toupper(Month[2]) == 'R'?2:4;
 568       case 'N':
 569       return 10;
 570       case 'O':
 571       return 9;
 572       case 'S':
 573       return 8;
 574
 575       // Pretend it is January..
 576       default:
 577       return 0;
 578    }
 579 }
 580                                                                         /*}}}*/
 581 // StrToTime - Converts a string into a time_t                          /*{{{*/
 582 // ---------------------------------------------------------------------
 583 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
 584    and the C library asctime format. It requires the GNU library function
 585    'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
 586    reason the C library does not provide any such function :<*/
 587 bool StrToTime(string Val,time_t &Result)
 588 {
 589    struct tm Tm;
 590    char Month[10];
 591    const char *I = Val.c_str();
 592
 593    // Skip the day of the week
 594    for (;*I != 0  && *I != ' '; I++);
 595
 596    // Handle RFC 1123 time
 597    if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
 598               &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
 599    {
 600       // Handle RFC 1036 time
 601       if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
 602                  &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
 603          Tm.tm_year += 1900;
 604       else
 605       {
 606          // asctime format
 607          if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
 608                     &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
 609             return false;
 610       }
 611    }
 612
 613    Tm.tm_isdst = 0;
 614    Tm.tm_mon = MonthConv(Month);
 615    Tm.tm_year -= 1900;
 616
 617    // Convert to local time and then to GMT
 618    Result = timegm(&Tm);
 619    return true;
 620 }
 621                                                                         /*}}}*/