1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.19 1999/02/01 08:11:57 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
30 // strstrip - Remove white space from the front and back of a string /*{{{*/
31 // ---------------------------------------------------------------------
32 /* This is handy to use when parsing a file. It also removes \n's left
33 over from fgets and company */
34 char *_strstrip(char *String
)
36 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
41 char *End
= String
+ strlen(String
) - 1;
42 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
43 *End
== '\r'); End
--);
49 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
50 // ---------------------------------------------------------------------
52 char *_strtabexpand(char *String
,size_t Len
)
54 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
58 if (I
+ 8 > String
+ Len
)
64 /* Assume the start of the string is 0 and find the next 8 char
70 Len
= 8 - ((String
- I
) % 8);
78 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
79 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
84 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This grabs a single word, converts any % escaped characters to their
87 proper values and advances the pointer. Double quotes are understood
88 and striped out as well. This is for URI/URL parsing. */
89 bool ParseQuoteWord(const char *&String
,string
&Res
)
91 // Skip leading whitespace
92 const char *C
= String
;
93 for (;*C
!= 0 && *C
== ' '; C
++);
97 // Jump to the next word
98 for (;*C
!= 0 && *C
!= ' '; C
++)
102 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
108 // Now de-quote characters
111 const char *Start
= String
;
113 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
115 if (*Start
== '%' && Start
+ 2 < C
)
120 *I
= (char)strtol(Tmp
,0,16);
133 // Skip ending white space
134 for (;*C
!= 0 && *C
== ' '; C
++);
139 // ParseCWord - Parses a string like a C "" expression /*{{{*/
140 // ---------------------------------------------------------------------
141 /* This expects a series of space seperated strings enclosed in ""'s.
142 It concatenates the ""'s into a single string. */
143 bool ParseCWord(const char *String
,string
&Res
)
145 // Skip leading whitespace
146 const char *C
= String
;
147 for (;*C
!= 0 && *C
== ' '; C
++);
153 if (strlen(String
) >= sizeof(Buffer
))
160 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
169 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
171 if (isspace(*C
) == 0)
180 // DeQuoteString - Convert a string from quoted from /*{{{*/
181 // ---------------------------------------------------------------------
182 /* This undoes QuoteString */
183 string
DeQuoteString(string Str
)
186 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
188 if (*I
== '%' && I
+ 2 < Str
.end())
194 Res
+= (char)strtol(Tmp
,0,16);
204 // QuoteString - Convert a string into quoted from /*{{{*/
205 // ---------------------------------------------------------------------
207 string
QuoteString(string Str
,const char *Bad
)
210 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
212 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
213 *I
<= 0x20 || *I
>= 0x7F)
216 sprintf(Buf
,"%%%02x",(int)*I
);
225 // SizeToStr - Convert a long into a human readable size /*{{{*/
226 // ---------------------------------------------------------------------
227 /* A max of 4 digits are shown before conversion to the next highest unit.
228 The max length of the string will be 5 chars unless the size is > 10
230 string
SizeToStr(double Size
)
239 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
240 ExaBytes, ZettaBytes, YottaBytes */
241 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
245 if (ASize
< 100 && I
!= 0)
247 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
253 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
263 // TimeToStr - Convert the time into a string /*{{{*/
264 // ---------------------------------------------------------------------
265 /* Converts a number of seconds to a hms format */
266 string
TimeToStr(unsigned long Sec
)
274 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
280 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
286 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
290 sprintf(S
,"%lis",Sec
);
297 // SubstVar - Substitute a string for another string /*{{{*/
298 // ---------------------------------------------------------------------
299 /* This replaces all occurances of Subst with Contents in Str. */
300 string
SubstVar(string Str
,string Subst
,string Contents
)
302 string::size_type Pos
= 0;
303 string::size_type OldPos
= 0;
306 while (OldPos
< Str
.length() &&
307 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
309 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
310 OldPos
= Pos
+ Subst
.length();
316 return Temp
+ string(Str
,OldPos
);
319 // URItoFileName - Convert the uri into a unique file name /*{{{*/
320 // ---------------------------------------------------------------------
321 /* This converts a URI into a safe filename. It quotes all unsafe characters
322 and converts / to _ and removes the scheme identifier. The resulting
323 file name should be unique and never occur again for a different file */
324 string
URItoFileName(string URI
)
326 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
327 for (; I
< URI
.end() && *I
== '/'; I
++);
329 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
330 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
331 string::iterator J
= URI
.begin();
332 for (; J
!= URI
.end(); J
++)
338 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
339 // ---------------------------------------------------------------------
340 /* This routine performs a base64 transformation on a string. It was ripped
341 from wget and then patched and bug fixed.
343 This spec can be found in rfc2045 */
344 string
Base64Encode(string S
)
347 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
348 'I','J','K','L','M','N','O','P',
349 'Q','R','S','T','U','V','W','X',
350 'Y','Z','a','b','c','d','e','f',
351 'g','h','i','j','k','l','m','n',
352 'o','p','q','r','s','t','u','v',
353 'w','x','y','z','0','1','2','3',
354 '4','5','6','7','8','9','+','/'};
356 // Pre-allocate some space
358 Final
.reserve((4*S
.length() + 2)/3 + 2);
360 /* Transform the 3x8 bits to 4x6 bits, as required by
362 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
364 char Bits
[3] = {0,0,0};
371 Final
+= tbl
[Bits
[0] >> 2];
372 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
374 if (I
+ 1 >= S
.end())
377 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
379 if (I
+ 2 >= S
.end())
382 Final
+= tbl
[Bits
[2] & 0x3f];
385 /* Apply the padding elements, this tells how many bytes the remote
386 end should discard */
387 if (S
.length() % 3 == 2)
389 if (S
.length() % 3 == 1)
395 // stringcmp - Arbitary string compare /*{{{*/
396 // ---------------------------------------------------------------------
397 /* This safely compares two non-null terminated strings of arbitary
399 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
401 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
405 if (A
== AEnd
&& B
== BEnd
)
416 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
417 // ---------------------------------------------------------------------
419 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
421 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
422 if (toupper(*A
) != toupper(*B
))
425 if (A
== AEnd
&& B
== BEnd
)
431 if (toupper(*A
) < toupper(*B
))
436 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
437 // ---------------------------------------------------------------------
438 /* The format is like those used in package files and the method
439 communication system */
440 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
442 // Look for a matching tag.
443 int Length
= strlen(Tag
);
444 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
447 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
449 // Find the end of line and strip the leading/trailing spaces
452 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
453 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
454 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
456 return string(I
,J
-I
);
459 for (; *I
!= '\n' && I
< Message
.end(); I
++);
462 // Failed to find a match
468 // StringToBool - Converts a string into a boolean /*{{{*/
469 // ---------------------------------------------------------------------
470 /* This inspects the string to see if it is true or if it is false and
471 then returns the result. Several varients on true/false are checked. */
472 int StringToBool(string Text
,int Default
= -1)
475 int Res
= strtol(Text
.c_str(),&End
,0);
476 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
479 // Check for positives
480 if (strcasecmp(Text
.c_str(),"no") == 0 ||
481 strcasecmp(Text
.c_str(),"false") == 0 ||
482 strcasecmp(Text
.c_str(),"without") == 0 ||
483 strcasecmp(Text
.c_str(),"off") == 0 ||
484 strcasecmp(Text
.c_str(),"disable") == 0)
487 // Check for negatives
488 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
489 strcasecmp(Text
.c_str(),"true") == 0 ||
490 strcasecmp(Text
.c_str(),"with") == 0 ||
491 strcasecmp(Text
.c_str(),"on") == 0 ||
492 strcasecmp(Text
.c_str(),"enable") == 0)
498 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
499 // ---------------------------------------------------------------------
500 /* This converts a time_t into a string time representation that is
501 year 2000 complient and timezone neutral */
502 string
TimeRFC1123(time_t Date
)
504 struct tm Conv
= *gmtime(&Date
);
507 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
508 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
509 "Aug","Sep","Oct","Nov","Dec"};
511 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
512 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
513 Conv
.tm_min
,Conv
.tm_sec
);
517 // ReadMessages - Read messages from the FD /*{{{*/
518 // ---------------------------------------------------------------------
519 /* This pulls full messages from the input FD into the message buffer.
520 It assumes that messages will not pause during transit so no
521 fancy buffering is used. */
522 bool ReadMessages(int Fd
, vector
<string
> &List
)
529 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
531 // Process is dead, this is kind of bad..
541 // Look for the end of the message
542 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
544 if (I
[0] != '\n' || I
[1] != '\n')
547 // Pull the message out
548 string
Message(Buffer
,0,I
-Buffer
);
551 for (; I
< End
&& *I
== '\n'; I
++);
553 memmove(Buffer
,I
,End
-Buffer
);
556 List
.push_back(Message
);
561 if (WaitFd(Fd
) == false)
566 // MonthConv - Converts a month string into a number /*{{{*/
567 // ---------------------------------------------------------------------
568 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
569 Made it a bit more robust with a few touppers though. */
570 static int MonthConv(char *Month
)
572 switch (toupper(*Month
))
575 return toupper(Month
[1]) == 'P'?3:7;
581 if (toupper(Month
[1]) == 'A')
583 return toupper(Month
[2]) == 'N'?5:6;
585 return toupper(Month
[2]) == 'R'?2:4;
593 // Pretend it is January..
599 // StrToTime - Converts a string into a time_t /*{{{*/
600 // ---------------------------------------------------------------------
601 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
602 and the C library asctime format. It requires the GNU library function
603 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
604 reason the C library does not provide any such function :<*/
605 bool StrToTime(string Val
,time_t &Result
)
609 const char *I
= Val
.c_str();
611 // Skip the day of the week
612 for (;*I
!= 0 && *I
!= ' '; I
++);
614 // Handle RFC 1123 time
615 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
616 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
618 // Handle RFC 1036 time
619 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
620 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
625 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
626 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
632 Tm
.tm_mon
= MonthConv(Month
);
635 // Convert to local time and then to GMT
636 Result
= timegm(&Tm
);
641 // URI::CopyFrom - Copy from an object /*{{{*/
642 // ---------------------------------------------------------------------
643 /* This parses the URI into all of its components */
644 void URI::CopyFrom(string U
)
646 string::const_iterator I
= U
.begin();
648 // Locate the first colon, this seperates the scheme
649 for (; I
< U
.end() && *I
!= ':' ; I
++);
650 string::const_iterator FirstColon
= I
;
652 /* Determine if this is a host type URI with a leading double //
653 and then search for the first single / */
654 string::const_iterator SingleSlash
= I
;
655 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
657 for (; SingleSlash
< U
.end() && *SingleSlash
!= '/'; SingleSlash
++);
658 if (SingleSlash
> U
.end())
659 SingleSlash
= U
.end();
661 // We can now write the access and path specifiers
662 Access
= string(U
,0,FirstColon
- U
.begin());
663 if (SingleSlash
!= U
.end())
664 Path
= string(U
,SingleSlash
- U
.begin());
665 if (Path
.empty() == true)
668 // Now we attempt to locate a user:pass@host fragment
669 if (FirstColon
[1] == '/' && FirstColon
[2] == '/')
673 if (FirstColon
>= U
.end())
676 if (FirstColon
> SingleSlash
)
677 FirstColon
= SingleSlash
;
681 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
682 string::const_iterator At
= I
;
684 // Colon in the @ section
686 for (; I
< At
&& *I
!= ':'; I
++);
687 string::const_iterator SecondColon
= I
;
689 // Now write the host and user/pass
690 if (At
== SingleSlash
)
692 if (FirstColon
< SingleSlash
)
693 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
697 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
698 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
699 if (SecondColon
< At
)
700 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
703 // Now we parse off a pot number from the hostname
705 string::size_type Pos
= Host
.rfind(':');
706 if (Pos
== string::npos
)
709 Port
= atoi(string(Host
,Pos
+1).c_str());
710 Host
= string(Host
,0,Pos
);
713 // URI::operator string - Convert the URI to a string /*{{{*/
714 // ---------------------------------------------------------------------
716 URI::operator string()
718 string Res
= Access
+ ':';
719 if (Host
.empty() == false)
722 if (User
.empty() == false)
725 if (Password
.empty() == false)
726 Res
+= ":" + Password
;
733 sprintf(S
,":%u",Port
);
738 if (Path
.empty() == false)