1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.13 1998/11/05 07:21:44 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "strutl.h"
23 #include <apt-pkg/fileutl.h>
30 // strstrip - Remove white space from the front and back of a string /*{{{*/
31 // ---------------------------------------------------------------------
32 /* This is handy to use when parsing a file. It also removes \n's left
33 over from fgets and company */
34 char *_strstrip(char *String
)
36 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
41 char *End
= String
+ strlen(String
) - 1;
42 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
43 *End
== '\r'); End
--);
49 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
50 // ---------------------------------------------------------------------
52 char *_strtabexpand(char *String
,size_t Len
)
54 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
58 if (I
+ 8 > String
+ Len
)
64 /* Assume the start of the string is 0 and find the next 8 char
70 Len
= 8 - ((String
- I
) % 8);
78 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
79 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
84 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This grabs a single word, converts any % escaped characters to their
87 proper values and advances the pointer. Double quotes are understood
88 and striped out as well. This is for URI/URL parsing. */
89 bool ParseQuoteWord(const char *&String
,string
&Res
)
91 // Skip leading whitespace
92 const char *C
= String
;
93 for (;*C
!= 0 && *C
== ' '; C
++);
97 // Jump to the next word
98 for (;*C
!= 0 && *C
!= ' '; C
++)
102 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
108 // Now de-quote characters
111 const char *Start
= String
;
113 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
115 if (*Start
== '%' && Start
+ 2 < C
)
120 *I
= (char)strtol(Tmp
,0,16);
133 // Skip ending white space
134 for (;*C
!= 0 && *C
== ' '; C
++);
139 // ParseCWord - Parses a string like a C "" expression /*{{{*/
140 // ---------------------------------------------------------------------
141 /* This expects a series of space seperated strings enclosed in ""'s.
142 It concatenates the ""'s into a single string. */
143 bool ParseCWord(const char *String
,string
&Res
)
145 // Skip leading whitespace
146 const char *C
= String
;
147 for (;*C
!= 0 && *C
== ' '; C
++);
153 if (strlen(String
) >= sizeof(Buffer
))
160 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
169 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
171 if (isspace(*C
) == 0)
180 // QuoteString - Convert a string into quoted from /*{{{*/
181 // ---------------------------------------------------------------------
183 string
QuoteString(string Str
,const char *Bad
)
186 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
188 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
189 *I
<= 0x20 || *I
>= 0x7F)
192 sprintf(Buf
,"%%%02x",(int)*I
);
201 // SizeToStr - Convert a long into a human readable size /*{{{*/
202 // ---------------------------------------------------------------------
203 /* A max of 4 digits are shown before conversion to the next highest unit.
204 The max length of the string will be 5 chars unless the size is > 10
206 string
SizeToStr(double Size
)
215 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
216 ExaBytes, ZettaBytes, YottaBytes */
217 char Ext
[] = {'b','k','M','G','T','P','E','Z','Y'};
221 if (ASize
< 100 && I
!= 0)
223 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
229 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
239 // TimeToStr - Convert the time into a string /*{{{*/
240 // ---------------------------------------------------------------------
241 /* Converts a number of seconds to a hms format */
242 string
TimeToStr(unsigned long Sec
)
250 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
256 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
262 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
266 sprintf(S
,"%lis",Sec
);
273 // SubstVar - Substitute a string for another string /*{{{*/
274 // ---------------------------------------------------------------------
275 /* This replaces all occurances of Subst with Contents in Str. */
276 string
SubstVar(string Str
,string Subst
,string Contents
)
278 string::size_type Pos
= 0;
279 string::size_type OldPos
= 0;
282 while (OldPos
< Str
.length() &&
283 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
285 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
286 OldPos
= Pos
+ Subst
.length();
292 return Temp
+ string(Str
,OldPos
);
295 // URItoFileName - Convert the uri into a unique file name /*{{{*/
296 // ---------------------------------------------------------------------
297 /* This converts a URI into a safe filename. It quotes all unsafe characters
298 and converts / to _ and removes the scheme identifier. The resulting
299 file name should be unique and never occur again for a different file */
300 string
URItoFileName(string URI
)
302 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
303 for (; I
< URI
.end() && *I
== '/'; I
++);
305 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
306 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
307 string::iterator J
= URI
.begin();
308 for (; J
!= URI
.end(); J
++)
314 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
315 // ---------------------------------------------------------------------
316 /* This routine performs a base64 transformation on a string. It was ripped
317 from wget and then patched and bug fixed.
319 This spec can be found in rfc2045 */
320 string
Base64Encode(string S
)
323 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
324 'I','J','K','L','M','N','O','P',
325 'Q','R','S','T','U','V','W','X',
326 'Y','Z','a','b','c','d','e','f',
327 'g','h','i','j','k','l','m','n',
328 'o','p','q','r','s','t','u','v',
329 'w','x','y','z','0','1','2','3',
330 '4','5','6','7','8','9','+','/'};
332 // Pre-allocate some space
334 Final
.reserve((4*S
.length() + 2)/3 + 2);
336 /* Transform the 3x8 bits to 4x6 bits, as required by
338 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
340 char Bits
[3] = {0,0,0};
347 Final
+= tbl
[Bits
[0] >> 2];
348 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
350 if (I
+ 1 >= S
.end())
353 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
355 if (I
+ 2 >= S
.end())
358 Final
+= tbl
[Bits
[2] & 0x3f];
361 /* Apply the padding elements, this tells how many bytes the remote
362 end should discard */
363 if (S
.length() % 3 == 2)
365 if (S
.length() % 3 == 1)
371 // stringcmp - Arbitary string compare /*{{{*/
372 // ---------------------------------------------------------------------
373 /* This safely compares two non-null terminated strings of arbitary
375 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
377 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
381 if (A
== AEnd
&& B
== BEnd
)
392 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
393 // ---------------------------------------------------------------------
395 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
397 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
398 if (toupper(*A
) != toupper(*B
))
401 if (A
== AEnd
&& B
== BEnd
)
407 if (toupper(*A
) < toupper(*B
))
412 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
413 // ---------------------------------------------------------------------
414 /* The format is like those used in package files and the method
415 communication system */
416 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
418 // Look for a matching tag.
419 int Length
= strlen(Tag
);
420 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
423 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
425 // Find the end of line and strip the leading/trailing spaces
428 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
429 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
430 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
432 return string(I
,J
-I
);
435 for (; *I
!= '\n' && I
< Message
.end(); I
++);
438 // Failed to find a match
444 // StringToBool - Converts a string into a boolean /*{{{*/
445 // ---------------------------------------------------------------------
446 /* This inspects the string to see if it is true or if it is false and
447 then returns the result. Several varients on true/false are checked. */
448 int StringToBool(string Text
,int Default
= -1)
451 int Res
= strtol(Text
.c_str(),&End
,0);
452 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
455 // Check for positives
456 if (strcasecmp(Text
.c_str(),"no") == 0 ||
457 strcasecmp(Text
.c_str(),"false") == 0 ||
458 strcasecmp(Text
.c_str(),"without") == 0 ||
459 strcasecmp(Text
.c_str(),"disable") == 0)
462 // Check for negatives
463 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
464 strcasecmp(Text
.c_str(),"true") == 0 ||
465 strcasecmp(Text
.c_str(),"with") == 0 ||
466 strcasecmp(Text
.c_str(),"enable") == 0)
472 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
473 // ---------------------------------------------------------------------
474 /* This converts a time_t into a string time representation that is
475 year 2000 complient and timezone neutral */
476 string
TimeRFC1123(time_t Date
)
478 struct tm Conv
= *gmtime(&Date
);
481 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
482 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
483 "Aug","Sep","Oct","Nov","Dec"};
485 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
486 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
487 Conv
.tm_min
,Conv
.tm_sec
);
491 // ReadMessages - Read messages from the FD /*{{{*/
492 // ---------------------------------------------------------------------
493 /* This pulls full messages from the input FD into the message buffer.
494 It assumes that messages will not pause during transit so no
495 fancy buffering is used. */
496 bool ReadMessages(int Fd
, vector
<string
> &List
)
503 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
505 // Process is dead, this is kind of bad..
515 // Look for the end of the message
516 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
518 if (I
[0] != '\n' || I
[1] != '\n')
521 // Pull the message out
522 string
Message(Buffer
,0,I
-Buffer
);
525 for (; I
< End
&& *I
== '\n'; I
++);
527 memmove(Buffer
,I
,End
-Buffer
);
530 List
.push_back(Message
);
535 if (WaitFd(Fd
) == false)
540 // MonthConv - Converts a month string into a number /*{{{*/
541 // ---------------------------------------------------------------------
542 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
543 Made it a bit more robust with a few touppers though. */
544 static int MonthConv(char *Month
)
546 switch (toupper(*Month
))
549 return toupper(Month
[1]) == 'P'?3:7;
555 if (toupper(Month
[1]) == 'A')
557 return toupper(Month
[2]) == 'N'?5:6;
559 return toupper(Month
[2]) == 'R'?2:4;
567 // Pretend it is January..
573 // StrToTime - Converts a string into a time_t /*{{{*/
574 // ---------------------------------------------------------------------
575 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
576 and the C library asctime format. It requires the GNU library function
577 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
578 reason the C library does not provide any such function :<*/
579 bool StrToTime(string Val
,time_t &Result
)
583 const char *I
= Val
.c_str();
585 // Skip the day of the week
586 for (;*I
!= 0 && *I
!= ' '; I
++);
588 // Handle RFC 1123 time
589 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
590 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
592 // Handle RFC 1036 time
593 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
594 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
599 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
600 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
606 Tm
.tm_mon
= MonthConv(Month
);
609 // Convert to local time and then to GMT
610 Result
= timegm(&Tm
);
615 // URI::CopyFrom - Copy from an object /*{{{*/
616 // ---------------------------------------------------------------------
617 /* This parses the URI into all of its components */
618 void URI::CopyFrom(string U
)
620 string::const_iterator I
= U
.begin();
622 // Locate the first colon, this seperates the scheme
623 for (; I
< U
.end() && *I
!= ':' ; I
++);
624 string::const_iterator FirstColon
= I
;
626 /* Determine if this is a host type URI with a leading double //
627 and then search for the first single / */
628 string::const_iterator SingleSlash
= I
;
629 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
631 for (; SingleSlash
< U
.end() && *SingleSlash
!= '/'; SingleSlash
++);
632 if (SingleSlash
> U
.end())
633 SingleSlash
= U
.end();
635 // We can now write the access and path specifiers
636 Access
= string(U
,0,FirstColon
- U
.begin());
637 if (SingleSlash
!= U
.end())
638 Path
= string(U
,SingleSlash
- U
.begin());
639 if (Path
.empty() == true)
642 // Now we attempt to locate a user:pass@host fragment
644 if (FirstColon
>= U
.end())
647 if (FirstColon
> SingleSlash
)
648 FirstColon
= SingleSlash
;
652 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
653 string::const_iterator At
= I
;
655 // Colon in the @ section
657 for (; I
< At
&& *I
!= ':'; I
++);
658 string::const_iterator SecondColon
= I
;
660 // Now write the host and user/pass
661 if (At
== SingleSlash
)
663 if (FirstColon
< SingleSlash
)
664 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
668 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
669 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
670 if (SecondColon
< At
)
671 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
674 // Now we parse off a pot number from the hostname
676 string::size_type Pos
= Host
.rfind(':');
677 if (Pos
== string::npos
)
680 Port
= atoi(string(Host
,Pos
+1).c_str());
681 Host
= string(Host
,0,Pos
);
684 // URI::operator string - Convert the URI to a string /*{{{*/
685 // ---------------------------------------------------------------------
687 URI::operator string()
689 string Res
= Access
+ ':';
690 if (Host
.empty() == false)
693 if (User
.empty() == false)
696 if (Password
.empty() == false)
697 Res
+= ":" + Password
;
704 sprintf(S
,":%u",Port
);
709 if (Path
.empty() == false)