1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.21 1999/03/15 08:10:39 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg//fileutl.h>
31 // strstrip - Remove white space from the front and back of a string /*{{{*/
32 // ---------------------------------------------------------------------
33 /* This is handy to use when parsing a file. It also removes \n's left
34 over from fgets and company */
35 char *_strstrip(char *String
)
37 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
42 char *End
= String
+ strlen(String
) - 1;
43 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
44 *End
== '\r'); End
--);
50 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
51 // ---------------------------------------------------------------------
53 char *_strtabexpand(char *String
,size_t Len
)
55 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
59 if (I
+ 8 > String
+ Len
)
65 /* Assume the start of the string is 0 and find the next 8 char
71 Len
= 8 - ((String
- I
) % 8);
79 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
80 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
85 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
86 // ---------------------------------------------------------------------
87 /* This grabs a single word, converts any % escaped characters to their
88 proper values and advances the pointer. Double quotes are understood
89 and striped out as well. This is for URI/URL parsing. */
90 bool ParseQuoteWord(const char *&String
,string
&Res
)
92 // Skip leading whitespace
93 const char *C
= String
;
94 for (;*C
!= 0 && *C
== ' '; C
++);
98 // Jump to the next word
99 for (;*C
!= 0 && *C
!= ' '; C
++)
103 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
109 // Now de-quote characters
112 const char *Start
= String
;
114 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
116 if (*Start
== '%' && Start
+ 2 < C
)
121 *I
= (char)strtol(Tmp
,0,16);
134 // Skip ending white space
135 for (;*C
!= 0 && *C
== ' '; C
++);
140 // ParseCWord - Parses a string like a C "" expression /*{{{*/
141 // ---------------------------------------------------------------------
142 /* This expects a series of space seperated strings enclosed in ""'s.
143 It concatenates the ""'s into a single string. */
144 bool ParseCWord(const char *String
,string
&Res
)
146 // Skip leading whitespace
147 const char *C
= String
;
148 for (;*C
!= 0 && *C
== ' '; C
++);
154 if (strlen(String
) >= sizeof(Buffer
))
161 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
170 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
172 if (isspace(*C
) == 0)
181 // QuoteString - Convert a string into quoted from /*{{{*/
182 // ---------------------------------------------------------------------
184 string
QuoteString(string Str
,const char *Bad
)
187 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
189 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
190 *I
<= 0x20 || *I
>= 0x7F)
193 sprintf(Buf
,"%%%02x",(int)*I
);
202 // DeQuoteString - Convert a string from quoted from /*{{{*/
203 // ---------------------------------------------------------------------
204 /* This undoes QuoteString */
205 string
DeQuoteString(string Str
)
208 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
210 if (*I
== '%' && I
+ 2 < Str
.end())
216 Res
+= (char)strtol(Tmp
,0,16);
227 // SizeToStr - Convert a long into a human readable size /*{{{*/
228 // ---------------------------------------------------------------------
229 /* A max of 4 digits are shown before conversion to the next highest unit.
230 The max length of the string will be 5 chars unless the size is > 10
232 string
SizeToStr(double Size
)
241 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
242 ExaBytes, ZettaBytes, YottaBytes */
243 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
247 if (ASize
< 100 && I
!= 0)
249 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
255 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
265 // TimeToStr - Convert the time into a string /*{{{*/
266 // ---------------------------------------------------------------------
267 /* Converts a number of seconds to a hms format */
268 string
TimeToStr(unsigned long Sec
)
276 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
282 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
288 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
292 sprintf(S
,"%lis",Sec
);
299 // SubstVar - Substitute a string for another string /*{{{*/
300 // ---------------------------------------------------------------------
301 /* This replaces all occurances of Subst with Contents in Str. */
302 string
SubstVar(string Str
,string Subst
,string Contents
)
304 string::size_type Pos
= 0;
305 string::size_type OldPos
= 0;
308 while (OldPos
< Str
.length() &&
309 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
311 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
312 OldPos
= Pos
+ Subst
.length();
318 return Temp
+ string(Str
,OldPos
);
321 // URItoFileName - Convert the uri into a unique file name /*{{{*/
322 // ---------------------------------------------------------------------
323 /* This converts a URI into a safe filename. It quotes all unsafe characters
324 and converts / to _ and removes the scheme identifier. The resulting
325 file name should be unique and never occur again for a different file */
326 string
URItoFileName(string URI
)
328 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
329 for (; I
< URI
.end() && *I
== '/'; I
++);
331 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
332 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
333 string::iterator J
= URI
.begin();
334 for (; J
!= URI
.end(); J
++)
340 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
341 // ---------------------------------------------------------------------
342 /* This routine performs a base64 transformation on a string. It was ripped
343 from wget and then patched and bug fixed.
345 This spec can be found in rfc2045 */
346 string
Base64Encode(string S
)
349 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
350 'I','J','K','L','M','N','O','P',
351 'Q','R','S','T','U','V','W','X',
352 'Y','Z','a','b','c','d','e','f',
353 'g','h','i','j','k','l','m','n',
354 'o','p','q','r','s','t','u','v',
355 'w','x','y','z','0','1','2','3',
356 '4','5','6','7','8','9','+','/'};
358 // Pre-allocate some space
360 Final
.reserve((4*S
.length() + 2)/3 + 2);
362 /* Transform the 3x8 bits to 4x6 bits, as required by
364 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
366 char Bits
[3] = {0,0,0};
373 Final
+= tbl
[Bits
[0] >> 2];
374 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
376 if (I
+ 1 >= S
.end())
379 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
381 if (I
+ 2 >= S
.end())
384 Final
+= tbl
[Bits
[2] & 0x3f];
387 /* Apply the padding elements, this tells how many bytes the remote
388 end should discard */
389 if (S
.length() % 3 == 2)
391 if (S
.length() % 3 == 1)
397 // stringcmp - Arbitary string compare /*{{{*/
398 // ---------------------------------------------------------------------
399 /* This safely compares two non-null terminated strings of arbitary
401 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
403 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
407 if (A
== AEnd
&& B
== BEnd
)
418 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
419 // ---------------------------------------------------------------------
421 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
423 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
424 if (toupper(*A
) != toupper(*B
))
427 if (A
== AEnd
&& B
== BEnd
)
433 if (toupper(*A
) < toupper(*B
))
438 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
439 // ---------------------------------------------------------------------
440 /* The format is like those used in package files and the method
441 communication system */
442 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
444 // Look for a matching tag.
445 int Length
= strlen(Tag
);
446 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
449 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
451 // Find the end of line and strip the leading/trailing spaces
454 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
455 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
456 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
458 return string(I
,J
-I
);
461 for (; *I
!= '\n' && I
< Message
.end(); I
++);
464 // Failed to find a match
470 // StringToBool - Converts a string into a boolean /*{{{*/
471 // ---------------------------------------------------------------------
472 /* This inspects the string to see if it is true or if it is false and
473 then returns the result. Several varients on true/false are checked. */
474 int StringToBool(string Text
,int Default
= -1)
477 int Res
= strtol(Text
.c_str(),&End
,0);
478 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
481 // Check for positives
482 if (strcasecmp(Text
.c_str(),"no") == 0 ||
483 strcasecmp(Text
.c_str(),"false") == 0 ||
484 strcasecmp(Text
.c_str(),"without") == 0 ||
485 strcasecmp(Text
.c_str(),"off") == 0 ||
486 strcasecmp(Text
.c_str(),"disable") == 0)
489 // Check for negatives
490 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
491 strcasecmp(Text
.c_str(),"true") == 0 ||
492 strcasecmp(Text
.c_str(),"with") == 0 ||
493 strcasecmp(Text
.c_str(),"on") == 0 ||
494 strcasecmp(Text
.c_str(),"enable") == 0)
500 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
501 // ---------------------------------------------------------------------
502 /* This converts a time_t into a string time representation that is
503 year 2000 complient and timezone neutral */
504 string
TimeRFC1123(time_t Date
)
506 struct tm Conv
= *gmtime(&Date
);
509 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
510 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
511 "Aug","Sep","Oct","Nov","Dec"};
513 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
514 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
515 Conv
.tm_min
,Conv
.tm_sec
);
519 // ReadMessages - Read messages from the FD /*{{{*/
520 // ---------------------------------------------------------------------
521 /* This pulls full messages from the input FD into the message buffer.
522 It assumes that messages will not pause during transit so no
523 fancy buffering is used. */
524 bool ReadMessages(int Fd
, vector
<string
> &List
)
531 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
533 // Process is dead, this is kind of bad..
543 // Look for the end of the message
544 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
546 if (I
[0] != '\n' || I
[1] != '\n')
549 // Pull the message out
550 string
Message(Buffer
,0,I
-Buffer
);
553 for (; I
< End
&& *I
== '\n'; I
++);
555 memmove(Buffer
,I
,End
-Buffer
);
558 List
.push_back(Message
);
563 if (WaitFd(Fd
) == false)
568 // MonthConv - Converts a month string into a number /*{{{*/
569 // ---------------------------------------------------------------------
570 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
571 Made it a bit more robust with a few touppers though. */
572 static int MonthConv(char *Month
)
574 switch (toupper(*Month
))
577 return toupper(Month
[1]) == 'P'?3:7;
583 if (toupper(Month
[1]) == 'A')
585 return toupper(Month
[2]) == 'N'?5:6;
587 return toupper(Month
[2]) == 'R'?2:4;
595 // Pretend it is January..
601 // timegm - Internal timegm function if gnu is not available /*{{{*/
602 // ---------------------------------------------------------------------
603 /* Ripped this evil little function from wget - I prefer the use of
604 GNU timegm if possible as this technique will have interesting problems
605 with leap seconds, timezones and other.
607 Converts struct tm to time_t, assuming the data in tm is UTC rather
608 than local timezone (mktime assumes the latter).
610 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
611 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
612 #ifndef __USE_MISC // glib sets this
613 static time_t timegm(struct tm
*t
)
620 tb
= mktime (gmtime (&tl
));
621 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
625 // StrToTime - Converts a string into a time_t /*{{{*/
626 // ---------------------------------------------------------------------
627 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
628 and the C library asctime format. It requires the GNU library function
629 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
630 reason the C library does not provide any such function :<*/
631 bool StrToTime(string Val
,time_t &Result
)
635 const char *I
= Val
.c_str();
637 // Skip the day of the week
638 for (;*I
!= 0 && *I
!= ' '; I
++);
640 // Handle RFC 1123 time
641 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
642 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
644 // Handle RFC 1036 time
645 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
646 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
651 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
652 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
658 Tm
.tm_mon
= MonthConv(Month
);
661 // Convert to local time and then to GMT
662 Result
= timegm(&Tm
);
667 // URI::CopyFrom - Copy from an object /*{{{*/
668 // ---------------------------------------------------------------------
669 /* This parses the URI into all of its components */
670 void URI::CopyFrom(string U
)
672 string::const_iterator I
= U
.begin();
674 // Locate the first colon, this seperates the scheme
675 for (; I
< U
.end() && *I
!= ':' ; I
++);
676 string::const_iterator FirstColon
= I
;
678 /* Determine if this is a host type URI with a leading double //
679 and then search for the first single / */
680 string::const_iterator SingleSlash
= I
;
681 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
683 for (; SingleSlash
< U
.end() && *SingleSlash
!= '/'; SingleSlash
++);
684 if (SingleSlash
> U
.end())
685 SingleSlash
= U
.end();
687 // We can now write the access and path specifiers
688 Access
= string(U
,0,FirstColon
- U
.begin());
689 if (SingleSlash
!= U
.end())
690 Path
= string(U
,SingleSlash
- U
.begin());
691 if (Path
.empty() == true)
694 // Now we attempt to locate a user:pass@host fragment
695 if (FirstColon
[1] == '/' && FirstColon
[2] == '/')
699 if (FirstColon
>= U
.end())
702 if (FirstColon
> SingleSlash
)
703 FirstColon
= SingleSlash
;
707 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
708 string::const_iterator At
= I
;
710 // Colon in the @ section
712 for (; I
< At
&& *I
!= ':'; I
++);
713 string::const_iterator SecondColon
= I
;
715 // Now write the host and user/pass
716 if (At
== SingleSlash
)
718 if (FirstColon
< SingleSlash
)
719 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
723 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
724 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
725 if (SecondColon
< At
)
726 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
729 // Now we parse off a pot number from the hostname
731 string::size_type Pos
= Host
.rfind(':');
732 if (Pos
== string::npos
)
735 Port
= atoi(string(Host
,Pos
+1).c_str());
736 Host
= string(Host
,0,Pos
);
739 // URI::operator string - Convert the URI to a string /*{{{*/
740 // ---------------------------------------------------------------------
742 URI::operator string()
744 string Res
= Access
+ ':';
745 if (Host
.empty() == false)
748 if (User
.empty() == false)
751 if (Password
.empty() == false)
752 Res
+= ":" + Password
;
759 sprintf(S
,":%u",Port
);
764 if (Path
.empty() == false)