1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.26 1999/06/27 04:55:54 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
32 // strstrip - Remove white space from the front and back of a string /*{{{*/
33 // ---------------------------------------------------------------------
34 /* This is handy to use when parsing a file. It also removes \n's left
35 over from fgets and company */
36 char *_strstrip(char *String
)
38 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
43 char *End
= String
+ strlen(String
) - 1;
44 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
45 *End
== '\r'); End
--);
51 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
52 // ---------------------------------------------------------------------
54 char *_strtabexpand(char *String
,size_t Len
)
56 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
60 if (I
+ 8 > String
+ Len
)
66 /* Assume the start of the string is 0 and find the next 8 char
72 Len
= 8 - ((String
- I
) % 8);
80 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
81 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
86 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
87 // ---------------------------------------------------------------------
88 /* This grabs a single word, converts any % escaped characters to their
89 proper values and advances the pointer. Double quotes are understood
90 and striped out as well. This is for URI/URL parsing. */
91 bool ParseQuoteWord(const char *&String
,string
&Res
)
93 // Skip leading whitespace
94 const char *C
= String
;
95 for (;*C
!= 0 && *C
== ' '; C
++);
99 // Jump to the next word
100 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
104 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
110 // Now de-quote characters
113 const char *Start
= String
;
115 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
117 if (*Start
== '%' && Start
+ 2 < C
)
122 *I
= (char)strtol(Tmp
,0,16);
135 // Skip ending white space
136 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
141 // ParseCWord - Parses a string like a C "" expression /*{{{*/
142 // ---------------------------------------------------------------------
143 /* This expects a series of space seperated strings enclosed in ""'s.
144 It concatenates the ""'s into a single string. */
145 bool ParseCWord(const char *String
,string
&Res
)
147 // Skip leading whitespace
148 const char *C
= String
;
149 for (;*C
!= 0 && *C
== ' '; C
++);
155 if (strlen(String
) >= sizeof(Buffer
))
162 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
171 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
173 if (isspace(*C
) == 0)
182 // QuoteString - Convert a string into quoted from /*{{{*/
183 // ---------------------------------------------------------------------
185 string
QuoteString(string Str
,const char *Bad
)
188 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
190 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
191 *I
<= 0x20 || *I
>= 0x7F)
194 sprintf(Buf
,"%%%02x",(int)*I
);
203 // DeQuoteString - Convert a string from quoted from /*{{{*/
204 // ---------------------------------------------------------------------
205 /* This undoes QuoteString */
206 string
DeQuoteString(string Str
)
209 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
211 if (*I
== '%' && I
+ 2 < Str
.end())
217 Res
+= (char)strtol(Tmp
,0,16);
228 // SizeToStr - Convert a long into a human readable size /*{{{*/
229 // ---------------------------------------------------------------------
230 /* A max of 4 digits are shown before conversion to the next highest unit.
231 The max length of the string will be 5 chars unless the size is > 10
233 string
SizeToStr(double Size
)
242 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
243 ExaBytes, ZettaBytes, YottaBytes */
244 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
248 if (ASize
< 100 && I
!= 0)
250 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
256 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
266 // TimeToStr - Convert the time into a string /*{{{*/
267 // ---------------------------------------------------------------------
268 /* Converts a number of seconds to a hms format */
269 string
TimeToStr(unsigned long Sec
)
277 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
283 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
289 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
293 sprintf(S
,"%lis",Sec
);
300 // SubstVar - Substitute a string for another string /*{{{*/
301 // ---------------------------------------------------------------------
302 /* This replaces all occurances of Subst with Contents in Str. */
303 string
SubstVar(string Str
,string Subst
,string Contents
)
305 string::size_type Pos
= 0;
306 string::size_type OldPos
= 0;
309 while (OldPos
< Str
.length() &&
310 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
312 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
313 OldPos
= Pos
+ Subst
.length();
319 return Temp
+ string(Str
,OldPos
);
322 // URItoFileName - Convert the uri into a unique file name /*{{{*/
323 // ---------------------------------------------------------------------
324 /* This converts a URI into a safe filename. It quotes all unsafe characters
325 and converts / to _ and removes the scheme identifier. The resulting
326 file name should be unique and never occur again for a different file */
327 string
URItoFileName(string URI
)
329 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
330 for (; I
< URI
.end() && *I
== '/'; I
++);
332 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
333 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
334 string::iterator J
= URI
.begin();
335 for (; J
!= URI
.end(); J
++)
341 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
342 // ---------------------------------------------------------------------
343 /* This routine performs a base64 transformation on a string. It was ripped
344 from wget and then patched and bug fixed.
346 This spec can be found in rfc2045 */
347 string
Base64Encode(string S
)
350 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
351 'I','J','K','L','M','N','O','P',
352 'Q','R','S','T','U','V','W','X',
353 'Y','Z','a','b','c','d','e','f',
354 'g','h','i','j','k','l','m','n',
355 'o','p','q','r','s','t','u','v',
356 'w','x','y','z','0','1','2','3',
357 '4','5','6','7','8','9','+','/'};
359 // Pre-allocate some space
361 Final
.reserve((4*S
.length() + 2)/3 + 2);
363 /* Transform the 3x8 bits to 4x6 bits, as required by
365 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
367 char Bits
[3] = {0,0,0};
374 Final
+= tbl
[Bits
[0] >> 2];
375 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
377 if (I
+ 1 >= S
.end())
380 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
382 if (I
+ 2 >= S
.end())
385 Final
+= tbl
[Bits
[2] & 0x3f];
388 /* Apply the padding elements, this tells how many bytes the remote
389 end should discard */
390 if (S
.length() % 3 == 2)
392 if (S
.length() % 3 == 1)
398 // stringcmp - Arbitary string compare /*{{{*/
399 // ---------------------------------------------------------------------
400 /* This safely compares two non-null terminated strings of arbitary
402 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
404 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
408 if (A
== AEnd
&& B
== BEnd
)
419 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
420 // ---------------------------------------------------------------------
422 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
424 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
425 if (toupper(*A
) != toupper(*B
))
428 if (A
== AEnd
&& B
== BEnd
)
434 if (toupper(*A
) < toupper(*B
))
439 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
440 // ---------------------------------------------------------------------
441 /* The format is like those used in package files and the method
442 communication system */
443 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
445 // Look for a matching tag.
446 int Length
= strlen(Tag
);
447 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
450 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
452 // Find the end of line and strip the leading/trailing spaces
455 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
456 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
457 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
459 return string(I
,J
-I
);
462 for (; *I
!= '\n' && I
< Message
.end(); I
++);
465 // Failed to find a match
471 // StringToBool - Converts a string into a boolean /*{{{*/
472 // ---------------------------------------------------------------------
473 /* This inspects the string to see if it is true or if it is false and
474 then returns the result. Several varients on true/false are checked. */
475 int StringToBool(string Text
,int Default
= -1)
478 int Res
= strtol(Text
.c_str(),&End
,0);
479 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
482 // Check for positives
483 if (strcasecmp(Text
.c_str(),"no") == 0 ||
484 strcasecmp(Text
.c_str(),"false") == 0 ||
485 strcasecmp(Text
.c_str(),"without") == 0 ||
486 strcasecmp(Text
.c_str(),"off") == 0 ||
487 strcasecmp(Text
.c_str(),"disable") == 0)
490 // Check for negatives
491 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
492 strcasecmp(Text
.c_str(),"true") == 0 ||
493 strcasecmp(Text
.c_str(),"with") == 0 ||
494 strcasecmp(Text
.c_str(),"on") == 0 ||
495 strcasecmp(Text
.c_str(),"enable") == 0)
501 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
502 // ---------------------------------------------------------------------
503 /* This converts a time_t into a string time representation that is
504 year 2000 complient and timezone neutral */
505 string
TimeRFC1123(time_t Date
)
507 struct tm Conv
= *gmtime(&Date
);
510 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
511 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
512 "Aug","Sep","Oct","Nov","Dec"};
514 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
515 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
516 Conv
.tm_min
,Conv
.tm_sec
);
520 // ReadMessages - Read messages from the FD /*{{{*/
521 // ---------------------------------------------------------------------
522 /* This pulls full messages from the input FD into the message buffer.
523 It assumes that messages will not pause during transit so no
524 fancy buffering is used. */
525 bool ReadMessages(int Fd
, vector
<string
> &List
)
532 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
533 if (Res
< 0 && errno
== EINTR
)
536 // Process is dead, this is kind of bad..
546 // Look for the end of the message
547 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
549 if (I
[0] != '\n' || I
[1] != '\n')
552 // Pull the message out
553 string
Message(Buffer
,0,I
-Buffer
);
556 for (; I
< End
&& *I
== '\n'; I
++);
558 memmove(Buffer
,I
,End
-Buffer
);
561 List
.push_back(Message
);
566 if (WaitFd(Fd
) == false)
571 // MonthConv - Converts a month string into a number /*{{{*/
572 // ---------------------------------------------------------------------
573 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
574 Made it a bit more robust with a few touppers though. */
575 static int MonthConv(char *Month
)
577 switch (toupper(*Month
))
580 return toupper(Month
[1]) == 'P'?3:7;
586 if (toupper(Month
[1]) == 'A')
588 return toupper(Month
[2]) == 'N'?5:6;
590 return toupper(Month
[2]) == 'R'?2:4;
598 // Pretend it is January..
604 // timegm - Internal timegm function if gnu is not available /*{{{*/
605 // ---------------------------------------------------------------------
606 /* Ripped this evil little function from wget - I prefer the use of
607 GNU timegm if possible as this technique will have interesting problems
608 with leap seconds, timezones and other.
610 Converts struct tm to time_t, assuming the data in tm is UTC rather
611 than local timezone (mktime assumes the latter).
613 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
614 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
615 #ifndef __USE_MISC // glib sets this
616 static time_t timegm(struct tm
*t
)
623 tb
= mktime (gmtime (&tl
));
624 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
628 // StrToTime - Converts a string into a time_t /*{{{*/
629 // ---------------------------------------------------------------------
630 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
631 and the C library asctime format. It requires the GNU library function
632 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
633 reason the C library does not provide any such function :<*/
634 bool StrToTime(string Val
,time_t &Result
)
638 const char *I
= Val
.c_str();
640 // Skip the day of the week
641 for (;*I
!= 0 && *I
!= ' '; I
++);
643 // Handle RFC 1123 time
644 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
645 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
647 // Handle RFC 1036 time
648 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
649 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
654 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
655 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
661 Tm
.tm_mon
= MonthConv(Month
);
664 // Convert to local time and then to GMT
665 Result
= timegm(&Tm
);
670 // URI::CopyFrom - Copy from an object /*{{{*/
671 // ---------------------------------------------------------------------
672 /* This parses the URI into all of its components */
673 void URI::CopyFrom(string U
)
675 string::const_iterator I
= U
.begin();
677 // Locate the first colon, this seperates the scheme
678 for (; I
< U
.end() && *I
!= ':' ; I
++);
679 string::const_iterator FirstColon
= I
;
681 /* Determine if this is a host type URI with a leading double //
682 and then search for the first single / */
683 string::const_iterator SingleSlash
= I
;
684 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
686 for (; SingleSlash
< U
.end() && *SingleSlash
!= '/'; SingleSlash
++);
687 if (SingleSlash
> U
.end())
688 SingleSlash
= U
.end();
690 // We can now write the access and path specifiers
691 Access
= string(U
,0,FirstColon
- U
.begin());
692 if (SingleSlash
!= U
.end())
693 Path
= string(U
,SingleSlash
- U
.begin());
694 if (Path
.empty() == true)
697 // Now we attempt to locate a user:pass@host fragment
698 if (FirstColon
[1] == '/' && FirstColon
[2] == '/')
702 if (FirstColon
>= U
.end())
705 if (FirstColon
> SingleSlash
)
706 FirstColon
= SingleSlash
;
712 for (; I
< SingleSlash
&& *I
!= ':'; I
++);
713 string::const_iterator SecondColon
= I
;
715 // Search for the @ after the colon
716 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
717 string::const_iterator At
= I
;
719 // Now write the host and user/pass
720 if (At
== SingleSlash
)
722 if (FirstColon
< SingleSlash
)
723 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
727 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
728 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
729 if (SecondColon
< At
)
730 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
733 // Now we parse off a port number from the hostname
735 string::size_type Pos
= Host
.rfind(':');
736 if (Pos
== string::npos
)
739 Port
= atoi(string(Host
,Pos
+1).c_str());
740 Host
= string(Host
,0,Pos
);
743 // URI::operator string - Convert the URI to a string /*{{{*/
744 // ---------------------------------------------------------------------
746 URI::operator string()
748 string Res
= Access
+ ':';
749 if (Host
.empty() == false)
752 if (User
.empty() == false)
755 if (Password
.empty() == false)
756 Res
+= ":" + Password
;
763 sprintf(S
,":%u",Port
);
768 if (Path
.empty() == false)