1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.18 1999/01/27 02:48:52 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
30 // strstrip - Remove white space from the front and back of a string /*{{{*/
31 // ---------------------------------------------------------------------
32 /* This is handy to use when parsing a file. It also removes \n's left
33 over from fgets and company */
34 char *_strstrip(char *String
)
36 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
41 char *End
= String
+ strlen(String
) - 1;
42 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
43 *End
== '\r'); End
--);
49 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
50 // ---------------------------------------------------------------------
52 char *_strtabexpand(char *String
,size_t Len
)
54 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
58 if (I
+ 8 > String
+ Len
)
64 /* Assume the start of the string is 0 and find the next 8 char
70 Len
= 8 - ((String
- I
) % 8);
78 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
79 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
84 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This grabs a single word, converts any % escaped characters to their
87 proper values and advances the pointer. Double quotes are understood
88 and striped out as well. This is for URI/URL parsing. */
89 bool ParseQuoteWord(const char *&String
,string
&Res
)
91 // Skip leading whitespace
92 const char *C
= String
;
93 for (;*C
!= 0 && *C
== ' '; C
++);
97 // Jump to the next word
98 for (;*C
!= 0 && *C
!= ' '; C
++)
102 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
108 // Now de-quote characters
111 const char *Start
= String
;
113 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
115 if (*Start
== '%' && Start
+ 2 < C
)
120 *I
= (char)strtol(Tmp
,0,16);
133 // Skip ending white space
134 for (;*C
!= 0 && *C
== ' '; C
++);
139 // ParseCWord - Parses a string like a C "" expression /*{{{*/
140 // ---------------------------------------------------------------------
141 /* This expects a series of space seperated strings enclosed in ""'s.
142 It concatenates the ""'s into a single string. */
143 bool ParseCWord(const char *String
,string
&Res
)
145 // Skip leading whitespace
146 const char *C
= String
;
147 for (;*C
!= 0 && *C
== ' '; C
++);
153 if (strlen(String
) >= sizeof(Buffer
))
160 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
169 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
171 if (isspace(*C
) == 0)
180 // QuoteString - Convert a string into quoted from /*{{{*/
181 // ---------------------------------------------------------------------
183 string
QuoteString(string Str
,const char *Bad
)
186 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
188 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
189 *I
<= 0x20 || *I
>= 0x7F)
192 sprintf(Buf
,"%%%02x",(int)*I
);
201 // SizeToStr - Convert a long into a human readable size /*{{{*/
202 // ---------------------------------------------------------------------
203 /* A max of 4 digits are shown before conversion to the next highest unit.
204 The max length of the string will be 5 chars unless the size is > 10
206 string
SizeToStr(double Size
)
215 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
216 ExaBytes, ZettaBytes, YottaBytes */
217 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
221 if (ASize
< 100 && I
!= 0)
223 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
229 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
239 // TimeToStr - Convert the time into a string /*{{{*/
240 // ---------------------------------------------------------------------
241 /* Converts a number of seconds to a hms format */
242 string
TimeToStr(unsigned long Sec
)
250 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
256 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
262 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
266 sprintf(S
,"%lis",Sec
);
273 // SubstVar - Substitute a string for another string /*{{{*/
274 // ---------------------------------------------------------------------
275 /* This replaces all occurances of Subst with Contents in Str. */
276 string
SubstVar(string Str
,string Subst
,string Contents
)
278 string::size_type Pos
= 0;
279 string::size_type OldPos
= 0;
282 while (OldPos
< Str
.length() &&
283 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
285 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
286 OldPos
= Pos
+ Subst
.length();
292 return Temp
+ string(Str
,OldPos
);
295 // URItoFileName - Convert the uri into a unique file name /*{{{*/
296 // ---------------------------------------------------------------------
297 /* This converts a URI into a safe filename. It quotes all unsafe characters
298 and converts / to _ and removes the scheme identifier. The resulting
299 file name should be unique and never occur again for a different file */
300 string
URItoFileName(string URI
)
302 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
303 for (; I
< URI
.end() && *I
== '/'; I
++);
305 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
306 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
307 string::iterator J
= URI
.begin();
308 for (; J
!= URI
.end(); J
++)
314 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
315 // ---------------------------------------------------------------------
316 /* This routine performs a base64 transformation on a string. It was ripped
317 from wget and then patched and bug fixed.
319 This spec can be found in rfc2045 */
320 string
Base64Encode(string S
)
323 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
324 'I','J','K','L','M','N','O','P',
325 'Q','R','S','T','U','V','W','X',
326 'Y','Z','a','b','c','d','e','f',
327 'g','h','i','j','k','l','m','n',
328 'o','p','q','r','s','t','u','v',
329 'w','x','y','z','0','1','2','3',
330 '4','5','6','7','8','9','+','/'};
332 // Pre-allocate some space
334 Final
.reserve((4*S
.length() + 2)/3 + 2);
336 /* Transform the 3x8 bits to 4x6 bits, as required by
338 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
340 char Bits
[3] = {0,0,0};
347 Final
+= tbl
[Bits
[0] >> 2];
348 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
350 if (I
+ 1 >= S
.end())
353 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
355 if (I
+ 2 >= S
.end())
358 Final
+= tbl
[Bits
[2] & 0x3f];
361 /* Apply the padding elements, this tells how many bytes the remote
362 end should discard */
363 if (S
.length() % 3 == 2)
365 if (S
.length() % 3 == 1)
371 // stringcmp - Arbitary string compare /*{{{*/
372 // ---------------------------------------------------------------------
373 /* This safely compares two non-null terminated strings of arbitary
375 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
377 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
381 if (A
== AEnd
&& B
== BEnd
)
392 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
393 // ---------------------------------------------------------------------
395 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
397 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
398 if (toupper(*A
) != toupper(*B
))
401 if (A
== AEnd
&& B
== BEnd
)
407 if (toupper(*A
) < toupper(*B
))
412 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
413 // ---------------------------------------------------------------------
414 /* The format is like those used in package files and the method
415 communication system */
416 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
418 // Look for a matching tag.
419 int Length
= strlen(Tag
);
420 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
423 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
425 // Find the end of line and strip the leading/trailing spaces
428 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
429 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
430 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
432 return string(I
,J
-I
);
435 for (; *I
!= '\n' && I
< Message
.end(); I
++);
438 // Failed to find a match
444 // StringToBool - Converts a string into a boolean /*{{{*/
445 // ---------------------------------------------------------------------
446 /* This inspects the string to see if it is true or if it is false and
447 then returns the result. Several varients on true/false are checked. */
448 int StringToBool(string Text
,int Default
= -1)
451 int Res
= strtol(Text
.c_str(),&End
,0);
452 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
455 // Check for positives
456 if (strcasecmp(Text
.c_str(),"no") == 0 ||
457 strcasecmp(Text
.c_str(),"false") == 0 ||
458 strcasecmp(Text
.c_str(),"without") == 0 ||
459 strcasecmp(Text
.c_str(),"off") == 0 ||
460 strcasecmp(Text
.c_str(),"disable") == 0)
463 // Check for negatives
464 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
465 strcasecmp(Text
.c_str(),"true") == 0 ||
466 strcasecmp(Text
.c_str(),"with") == 0 ||
467 strcasecmp(Text
.c_str(),"on") == 0 ||
468 strcasecmp(Text
.c_str(),"enable") == 0)
474 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
475 // ---------------------------------------------------------------------
476 /* This converts a time_t into a string time representation that is
477 year 2000 complient and timezone neutral */
478 string
TimeRFC1123(time_t Date
)
480 struct tm Conv
= *gmtime(&Date
);
483 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
484 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
485 "Aug","Sep","Oct","Nov","Dec"};
487 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
488 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
489 Conv
.tm_min
,Conv
.tm_sec
);
493 // ReadMessages - Read messages from the FD /*{{{*/
494 // ---------------------------------------------------------------------
495 /* This pulls full messages from the input FD into the message buffer.
496 It assumes that messages will not pause during transit so no
497 fancy buffering is used. */
498 bool ReadMessages(int Fd
, vector
<string
> &List
)
505 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
507 // Process is dead, this is kind of bad..
517 // Look for the end of the message
518 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
520 if (I
[0] != '\n' || I
[1] != '\n')
523 // Pull the message out
524 string
Message(Buffer
,0,I
-Buffer
);
527 for (; I
< End
&& *I
== '\n'; I
++);
529 memmove(Buffer
,I
,End
-Buffer
);
532 List
.push_back(Message
);
537 if (WaitFd(Fd
) == false)
542 // MonthConv - Converts a month string into a number /*{{{*/
543 // ---------------------------------------------------------------------
544 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
545 Made it a bit more robust with a few touppers though. */
546 static int MonthConv(char *Month
)
548 switch (toupper(*Month
))
551 return toupper(Month
[1]) == 'P'?3:7;
557 if (toupper(Month
[1]) == 'A')
559 return toupper(Month
[2]) == 'N'?5:6;
561 return toupper(Month
[2]) == 'R'?2:4;
569 // Pretend it is January..
575 // StrToTime - Converts a string into a time_t /*{{{*/
576 // ---------------------------------------------------------------------
577 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
578 and the C library asctime format. It requires the GNU library function
579 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
580 reason the C library does not provide any such function :<*/
581 bool StrToTime(string Val
,time_t &Result
)
585 const char *I
= Val
.c_str();
587 // Skip the day of the week
588 for (;*I
!= 0 && *I
!= ' '; I
++);
590 // Handle RFC 1123 time
591 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
592 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
594 // Handle RFC 1036 time
595 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
596 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
601 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
602 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
608 Tm
.tm_mon
= MonthConv(Month
);
611 // Convert to local time and then to GMT
612 Result
= timegm(&Tm
);
617 // URI::CopyFrom - Copy from an object /*{{{*/
618 // ---------------------------------------------------------------------
619 /* This parses the URI into all of its components */
620 void URI::CopyFrom(string U
)
622 string::const_iterator I
= U
.begin();
624 // Locate the first colon, this seperates the scheme
625 for (; I
< U
.end() && *I
!= ':' ; I
++);
626 string::const_iterator FirstColon
= I
;
628 /* Determine if this is a host type URI with a leading double //
629 and then search for the first single / */
630 string::const_iterator SingleSlash
= I
;
631 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
633 for (; SingleSlash
< U
.end() && *SingleSlash
!= '/'; SingleSlash
++);
634 if (SingleSlash
> U
.end())
635 SingleSlash
= U
.end();
637 // We can now write the access and path specifiers
638 Access
= string(U
,0,FirstColon
- U
.begin());
639 if (SingleSlash
!= U
.end())
640 Path
= string(U
,SingleSlash
- U
.begin());
641 if (Path
.empty() == true)
644 // Now we attempt to locate a user:pass@host fragment
645 if (FirstColon
[1] == '/' && FirstColon
[2] == '/')
649 if (FirstColon
>= U
.end())
652 if (FirstColon
> SingleSlash
)
653 FirstColon
= SingleSlash
;
657 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
658 string::const_iterator At
= I
;
660 // Colon in the @ section
662 for (; I
< At
&& *I
!= ':'; I
++);
663 string::const_iterator SecondColon
= I
;
665 // Now write the host and user/pass
666 if (At
== SingleSlash
)
668 if (FirstColon
< SingleSlash
)
669 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
673 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
674 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
675 if (SecondColon
< At
)
676 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
679 // Now we parse off a pot number from the hostname
681 string::size_type Pos
= Host
.rfind(':');
682 if (Pos
== string::npos
)
685 Port
= atoi(string(Host
,Pos
+1).c_str());
686 Host
= string(Host
,0,Pos
);
689 // URI::operator string - Convert the URI to a string /*{{{*/
690 // ---------------------------------------------------------------------
692 URI::operator string()
694 string Res
= Access
+ ':';
695 if (Host
.empty() == false)
698 if (User
.empty() == false)
701 if (Password
.empty() == false)
702 Res
+= ":" + Password
;
709 sprintf(S
,":%u",Port
);
714 if (Path
.empty() == false)