1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.34 2000/01/16 05:36:17 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
32 // strstrip - Remove white space from the front and back of a string /*{{{*/
33 // ---------------------------------------------------------------------
34 /* This is handy to use when parsing a file. It also removes \n's left
35 over from fgets and company */
36 char *_strstrip(char *String
)
38 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
43 char *End
= String
+ strlen(String
) - 1;
44 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
45 *End
== '\r'); End
--);
51 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
52 // ---------------------------------------------------------------------
54 char *_strtabexpand(char *String
,size_t Len
)
56 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
60 if (I
+ 8 > String
+ Len
)
66 /* Assume the start of the string is 0 and find the next 8 char
72 Len
= 8 - ((String
- I
) % 8);
80 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
81 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
86 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
87 // ---------------------------------------------------------------------
88 /* This grabs a single word, converts any % escaped characters to their
89 proper values and advances the pointer. Double quotes are understood
90 and striped out as well. This is for URI/URL parsing. It also can
91 understand [] brackets.*/
92 bool ParseQuoteWord(const char *&String
,string
&Res
)
94 // Skip leading whitespace
95 const char *C
= String
;
96 for (;*C
!= 0 && *C
== ' '; C
++);
100 // Jump to the next word
101 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
105 for (C
++; *C
!= 0 && *C
!= '"'; C
++);
111 for (C
++; *C
!= 0 && *C
!= ']'; C
++);
117 // Now de-quote characters
120 const char *Start
= String
;
122 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
124 if (*Start
== '%' && Start
+ 2 < C
)
129 *I
= (char)strtol(Tmp
,0,16);
142 // Skip ending white space
143 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
148 // ParseCWord - Parses a string like a C "" expression /*{{{*/
149 // ---------------------------------------------------------------------
150 /* This expects a series of space seperated strings enclosed in ""'s.
151 It concatenates the ""'s into a single string. */
152 bool ParseCWord(const char *String
,string
&Res
)
154 // Skip leading whitespace
155 const char *C
= String
;
156 for (;*C
!= 0 && *C
== ' '; C
++);
162 if (strlen(String
) >= sizeof(Buffer
))
169 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
178 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
180 if (isspace(*C
) == 0)
189 // QuoteString - Convert a string into quoted from /*{{{*/
190 // ---------------------------------------------------------------------
192 string
QuoteString(string Str
,const char *Bad
)
195 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
197 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
198 *I
<= 0x20 || *I
>= 0x7F)
201 sprintf(Buf
,"%%%02x",(int)*I
);
210 // DeQuoteString - Convert a string from quoted from /*{{{*/
211 // ---------------------------------------------------------------------
212 /* This undoes QuoteString */
213 string
DeQuoteString(string Str
)
216 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
218 if (*I
== '%' && I
+ 2 < Str
.end())
224 Res
+= (char)strtol(Tmp
,0,16);
235 // SizeToStr - Convert a long into a human readable size /*{{{*/
236 // ---------------------------------------------------------------------
237 /* A max of 4 digits are shown before conversion to the next highest unit.
238 The max length of the string will be 5 chars unless the size is > 10
240 string
SizeToStr(double Size
)
249 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
250 ExaBytes, ZettaBytes, YottaBytes */
251 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
255 if (ASize
< 100 && I
!= 0)
257 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
263 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
273 // TimeToStr - Convert the time into a string /*{{{*/
274 // ---------------------------------------------------------------------
275 /* Converts a number of seconds to a hms format */
276 string
TimeToStr(unsigned long Sec
)
284 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
290 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
296 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
300 sprintf(S
,"%lis",Sec
);
307 // SubstVar - Substitute a string for another string /*{{{*/
308 // ---------------------------------------------------------------------
309 /* This replaces all occurances of Subst with Contents in Str. */
310 string
SubstVar(string Str
,string Subst
,string Contents
)
312 string::size_type Pos
= 0;
313 string::size_type OldPos
= 0;
316 while (OldPos
< Str
.length() &&
317 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
319 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
320 OldPos
= Pos
+ Subst
.length();
326 return Temp
+ string(Str
,OldPos
);
329 // URItoFileName - Convert the uri into a unique file name /*{{{*/
330 // ---------------------------------------------------------------------
331 /* This converts a URI into a safe filename. It quotes all unsafe characters
332 and converts / to _ and removes the scheme identifier. The resulting
333 file name should be unique and never occur again for a different file */
334 string
URItoFileName(string URI
)
336 // Nuke 'sensitive' items
339 U
.Password
= string();
342 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
343 URI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
344 string::iterator J
= URI
.begin();
345 for (; J
!= URI
.end(); J
++)
351 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
352 // ---------------------------------------------------------------------
353 /* This routine performs a base64 transformation on a string. It was ripped
354 from wget and then patched and bug fixed.
356 This spec can be found in rfc2045 */
357 string
Base64Encode(string S
)
360 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
361 'I','J','K','L','M','N','O','P',
362 'Q','R','S','T','U','V','W','X',
363 'Y','Z','a','b','c','d','e','f',
364 'g','h','i','j','k','l','m','n',
365 'o','p','q','r','s','t','u','v',
366 'w','x','y','z','0','1','2','3',
367 '4','5','6','7','8','9','+','/'};
369 // Pre-allocate some space
371 Final
.reserve((4*S
.length() + 2)/3 + 2);
373 /* Transform the 3x8 bits to 4x6 bits, as required by
375 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
377 char Bits
[3] = {0,0,0};
384 Final
+= tbl
[Bits
[0] >> 2];
385 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
387 if (I
+ 1 >= S
.end())
390 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
392 if (I
+ 2 >= S
.end())
395 Final
+= tbl
[Bits
[2] & 0x3f];
398 /* Apply the padding elements, this tells how many bytes the remote
399 end should discard */
400 if (S
.length() % 3 == 2)
402 if (S
.length() % 3 == 1)
408 // stringcmp - Arbitary string compare /*{{{*/
409 // ---------------------------------------------------------------------
410 /* This safely compares two non-null terminated strings of arbitary
412 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
414 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
418 if (A
== AEnd
&& B
== BEnd
)
429 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
430 // ---------------------------------------------------------------------
432 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
434 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
435 if (toupper(*A
) != toupper(*B
))
438 if (A
== AEnd
&& B
== BEnd
)
444 if (toupper(*A
) < toupper(*B
))
449 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
450 // ---------------------------------------------------------------------
451 /* The format is like those used in package files and the method
452 communication system */
453 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
455 // Look for a matching tag.
456 int Length
= strlen(Tag
);
457 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
460 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
462 // Find the end of line and strip the leading/trailing spaces
465 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
466 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
467 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
469 return string(I
,J
-I
);
472 for (; *I
!= '\n' && I
< Message
.end(); I
++);
475 // Failed to find a match
481 // StringToBool - Converts a string into a boolean /*{{{*/
482 // ---------------------------------------------------------------------
483 /* This inspects the string to see if it is true or if it is false and
484 then returns the result. Several varients on true/false are checked. */
485 int StringToBool(string Text
,int Default
= -1)
488 int Res
= strtol(Text
.c_str(),&End
,0);
489 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
492 // Check for positives
493 if (strcasecmp(Text
.c_str(),"no") == 0 ||
494 strcasecmp(Text
.c_str(),"false") == 0 ||
495 strcasecmp(Text
.c_str(),"without") == 0 ||
496 strcasecmp(Text
.c_str(),"off") == 0 ||
497 strcasecmp(Text
.c_str(),"disable") == 0)
500 // Check for negatives
501 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
502 strcasecmp(Text
.c_str(),"true") == 0 ||
503 strcasecmp(Text
.c_str(),"with") == 0 ||
504 strcasecmp(Text
.c_str(),"on") == 0 ||
505 strcasecmp(Text
.c_str(),"enable") == 0)
511 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
512 // ---------------------------------------------------------------------
513 /* This converts a time_t into a string time representation that is
514 year 2000 complient and timezone neutral */
515 string
TimeRFC1123(time_t Date
)
517 struct tm Conv
= *gmtime(&Date
);
520 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
521 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
522 "Aug","Sep","Oct","Nov","Dec"};
524 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
525 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
526 Conv
.tm_min
,Conv
.tm_sec
);
530 // ReadMessages - Read messages from the FD /*{{{*/
531 // ---------------------------------------------------------------------
532 /* This pulls full messages from the input FD into the message buffer.
533 It assumes that messages will not pause during transit so no
534 fancy buffering is used. */
535 bool ReadMessages(int Fd
, vector
<string
> &List
)
542 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
543 if (Res
< 0 && errno
== EINTR
)
546 // Process is dead, this is kind of bad..
556 // Look for the end of the message
557 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
559 if (I
[0] != '\n' || I
[1] != '\n')
562 // Pull the message out
563 string
Message(Buffer
,0,I
-Buffer
);
566 for (; I
< End
&& *I
== '\n'; I
++);
568 memmove(Buffer
,I
,End
-Buffer
);
571 List
.push_back(Message
);
576 if (WaitFd(Fd
) == false)
581 // MonthConv - Converts a month string into a number /*{{{*/
582 // ---------------------------------------------------------------------
583 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
584 Made it a bit more robust with a few touppers though. */
585 static int MonthConv(char *Month
)
587 switch (toupper(*Month
))
590 return toupper(Month
[1]) == 'P'?3:7;
596 if (toupper(Month
[1]) == 'A')
598 return toupper(Month
[2]) == 'N'?5:6;
600 return toupper(Month
[2]) == 'R'?2:4;
608 // Pretend it is January..
614 // timegm - Internal timegm function if gnu is not available /*{{{*/
615 // ---------------------------------------------------------------------
616 /* Ripped this evil little function from wget - I prefer the use of
617 GNU timegm if possible as this technique will have interesting problems
618 with leap seconds, timezones and other.
620 Converts struct tm to time_t, assuming the data in tm is UTC rather
621 than local timezone (mktime assumes the latter).
623 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
624 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
625 #ifndef __USE_MISC // glib sets this
626 static time_t timegm(struct tm
*t
)
633 tb
= mktime (gmtime (&tl
));
634 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
638 // StrToTime - Converts a string into a time_t /*{{{*/
639 // ---------------------------------------------------------------------
640 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
641 and the C library asctime format. It requires the GNU library function
642 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
643 reason the C library does not provide any such function :< This also
644 handles the weird, but unambiguous FTP time format*/
645 bool StrToTime(string Val
,time_t &Result
)
649 const char *I
= Val
.c_str();
651 // Skip the day of the week
652 for (;*I
!= 0 && *I
!= ' '; I
++);
654 // Handle RFC 1123 time
656 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
657 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
659 // Handle RFC 1036 time
660 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
661 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
666 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
667 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
670 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
671 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
680 Tm
.tm_mon
= MonthConv(Month
);
683 // Convert to local time and then to GMT
684 Result
= timegm(&Tm
);
688 // StrToNum - Convert a fixed length string to a number /*{{{*/
689 // ---------------------------------------------------------------------
690 /* This is used in decoding the crazy fixed length string headers in
692 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
695 if (Len
>= sizeof(S
))
700 // All spaces is a zero
703 for (I
= 0; S
[I
] == ' '; I
++);
708 Res
= strtoul(S
,&End
,Base
);
715 // HexDigit - Convert a hex character into an integer /*{{{*/
716 // ---------------------------------------------------------------------
717 /* Helper for Hex2Num */
718 static int HexDigit(int c
)
720 if (c
>= '0' && c
<= '9')
722 if (c
>= 'a' && c
<= 'f')
724 if (c
>= 'A' && c
<= 'F')
729 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
730 // ---------------------------------------------------------------------
731 /* The length of the buffer must be exactly 1/2 the length of the string. */
732 bool Hex2Num(const char *Start
,const char *End
,unsigned char *Num
,
735 if (End
- Start
!= (signed)(Length
*2))
738 // Convert each digit. We store it in the same order as the string
740 for (const char *I
= Start
; I
< End
;J
++, I
+= 2)
742 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
745 Num
[J
] = HexDigit(I
[0]) << 4;
746 Num
[J
] += HexDigit(I
[1]);
753 // URI::CopyFrom - Copy from an object /*{{{*/
754 // ---------------------------------------------------------------------
755 /* This parses the URI into all of its components */
756 void URI::CopyFrom(string U
)
758 string::const_iterator I
= U
.begin();
760 // Locate the first colon, this seperates the scheme
761 for (; I
< U
.end() && *I
!= ':' ; I
++);
762 string::const_iterator FirstColon
= I
;
764 /* Determine if this is a host type URI with a leading double //
765 and then search for the first single / */
766 string::const_iterator SingleSlash
= I
;
767 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
770 /* Find the / indicating the end of the hostname, ignoring /'s in the
772 bool InBracket
= false;
773 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); SingleSlash
++)
775 if (*SingleSlash
== '[')
777 if (InBracket
== true && *SingleSlash
== ']')
781 if (SingleSlash
> U
.end())
782 SingleSlash
= U
.end();
784 // We can now write the access and path specifiers
785 Access
= string(U
,0,FirstColon
- U
.begin());
786 if (SingleSlash
!= U
.end())
787 Path
= string(U
,SingleSlash
- U
.begin());
788 if (Path
.empty() == true)
791 // Now we attempt to locate a user:pass@host fragment
792 if (FirstColon
[1] == '/' && FirstColon
[2] == '/')
796 if (FirstColon
>= U
.end())
799 if (FirstColon
> SingleSlash
)
800 FirstColon
= SingleSlash
;
806 for (; I
< SingleSlash
&& *I
!= ':'; I
++);
807 string::const_iterator SecondColon
= I
;
809 // Search for the @ after the colon
810 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
811 string::const_iterator At
= I
;
813 // Now write the host and user/pass
814 if (At
== SingleSlash
)
816 if (FirstColon
< SingleSlash
)
817 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
821 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
822 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
823 if (SecondColon
< At
)
824 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
827 // Now we parse the RFC 2732 [] hostnames.
828 unsigned long PortEnd
= 0;
830 for (unsigned I
= 0; I
!= Host
.length();)
839 if (InBracket
== true && Host
[I
] == ']')
850 if (InBracket
== true)
856 // Now we parse off a port number from the hostname
858 string::size_type Pos
= Host
.rfind(':');
859 if (Pos
== string::npos
|| Pos
< PortEnd
)
862 Port
= atoi(string(Host
,Pos
+1).c_str());
863 Host
= string(Host
,0,Pos
);
866 // URI::operator string - Convert the URI to a string /*{{{*/
867 // ---------------------------------------------------------------------
869 URI::operator string()
873 if (Access
.empty() == false)
876 if (Host
.empty() == false)
878 if (Access
.empty() == false)
881 if (User
.empty() == false)
884 if (Password
.empty() == false)
885 Res
+= ":" + Password
;
889 // Add RFC 2732 escaping characters
890 if (Access
.empty() == false &&
891 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
892 Res
+= '[' + Host
+ ']';
899 sprintf(S
,":%u",Port
);
904 if (Path
.empty() == false)