1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.20 1999/02/27 22:29:11 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
31 // strstrip - Remove white space from the front and back of a string /*{{{*/
32 // ---------------------------------------------------------------------
33 /* This is handy to use when parsing a file. It also removes \n's left
34 over from fgets and company */
35 char *_strstrip(char *String
)
37 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
42 char *End
= String
+ strlen(String
) - 1;
43 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
44 *End
== '\r'); End
--);
50 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
51 // ---------------------------------------------------------------------
53 char *_strtabexpand(char *String
,size_t Len
)
55 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
59 if (I
+ 8 > String
+ Len
)
65 /* Assume the start of the string is 0 and find the next 8 char
71 Len
= 8 - ((String
- I
) % 8);
79 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
80 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
85 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
86 // ---------------------------------------------------------------------
87 /* This grabs a single word, converts any % escaped characters to their
88 proper values and advances the pointer. Double quotes are understood
89 and striped out as well. This is for URI/URL parsing. */
90 bool ParseQuoteWord(const char *&String
,string
&Res
)
92 // Skip leading whitespace
93 const char *C
= String
;
94 for (;*C
!= 0 && *C
== ' '; C
++);
98 // Jump to the next word
99 for (;*C
!= 0 && *C
!= ' '; C
++)
103 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
109 // Now de-quote characters
112 const char *Start
= String
;
114 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
116 if (*Start
== '%' && Start
+ 2 < C
)
121 *I
= (char)strtol(Tmp
,0,16);
134 // Skip ending white space
135 for (;*C
!= 0 && *C
== ' '; C
++);
140 // ParseCWord - Parses a string like a C "" expression /*{{{*/
141 // ---------------------------------------------------------------------
142 /* This expects a series of space seperated strings enclosed in ""'s.
143 It concatenates the ""'s into a single string. */
144 bool ParseCWord(const char *String
,string
&Res
)
146 // Skip leading whitespace
147 const char *C
= String
;
148 for (;*C
!= 0 && *C
== ' '; C
++);
154 if (strlen(String
) >= sizeof(Buffer
))
161 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
170 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
172 if (isspace(*C
) == 0)
181 // DeQuoteString - Convert a string from quoted from /*{{{*/
182 // ---------------------------------------------------------------------
183 /* This undoes QuoteString */
184 string
DeQuoteString(string Str
)
187 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
189 if (*I
== '%' && I
+ 2 < Str
.end())
195 Res
+= (char)strtol(Tmp
,0,16);
205 // QuoteString - Convert a string into quoted from /*{{{*/
206 // ---------------------------------------------------------------------
208 string
QuoteString(string Str
,const char *Bad
)
211 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
213 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
214 *I
<= 0x20 || *I
>= 0x7F)
217 sprintf(Buf
,"%%%02x",(int)*I
);
226 // SizeToStr - Convert a long into a human readable size /*{{{*/
227 // ---------------------------------------------------------------------
228 /* A max of 4 digits are shown before conversion to the next highest unit.
229 The max length of the string will be 5 chars unless the size is > 10
231 string
SizeToStr(double Size
)
240 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
241 ExaBytes, ZettaBytes, YottaBytes */
242 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
246 if (ASize
< 100 && I
!= 0)
248 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
254 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
264 // TimeToStr - Convert the time into a string /*{{{*/
265 // ---------------------------------------------------------------------
266 /* Converts a number of seconds to a hms format */
267 string
TimeToStr(unsigned long Sec
)
275 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
281 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
287 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
291 sprintf(S
,"%lis",Sec
);
298 // SubstVar - Substitute a string for another string /*{{{*/
299 // ---------------------------------------------------------------------
300 /* This replaces all occurances of Subst with Contents in Str. */
301 string
SubstVar(string Str
,string Subst
,string Contents
)
303 string::size_type Pos
= 0;
304 string::size_type OldPos
= 0;
307 while (OldPos
< Str
.length() &&
308 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
310 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
311 OldPos
= Pos
+ Subst
.length();
317 return Temp
+ string(Str
,OldPos
);
320 // URItoFileName - Convert the uri into a unique file name /*{{{*/
321 // ---------------------------------------------------------------------
322 /* This converts a URI into a safe filename. It quotes all unsafe characters
323 and converts / to _ and removes the scheme identifier. The resulting
324 file name should be unique and never occur again for a different file */
325 string
URItoFileName(string URI
)
327 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
328 for (; I
< URI
.end() && *I
== '/'; I
++);
330 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
331 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
332 string::iterator J
= URI
.begin();
333 for (; J
!= URI
.end(); J
++)
339 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
340 // ---------------------------------------------------------------------
341 /* This routine performs a base64 transformation on a string. It was ripped
342 from wget and then patched and bug fixed.
344 This spec can be found in rfc2045 */
345 string
Base64Encode(string S
)
348 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
349 'I','J','K','L','M','N','O','P',
350 'Q','R','S','T','U','V','W','X',
351 'Y','Z','a','b','c','d','e','f',
352 'g','h','i','j','k','l','m','n',
353 'o','p','q','r','s','t','u','v',
354 'w','x','y','z','0','1','2','3',
355 '4','5','6','7','8','9','+','/'};
357 // Pre-allocate some space
359 Final
.reserve((4*S
.length() + 2)/3 + 2);
361 /* Transform the 3x8 bits to 4x6 bits, as required by
363 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
365 char Bits
[3] = {0,0,0};
372 Final
+= tbl
[Bits
[0] >> 2];
373 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
375 if (I
+ 1 >= S
.end())
378 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
380 if (I
+ 2 >= S
.end())
383 Final
+= tbl
[Bits
[2] & 0x3f];
386 /* Apply the padding elements, this tells how many bytes the remote
387 end should discard */
388 if (S
.length() % 3 == 2)
390 if (S
.length() % 3 == 1)
396 // stringcmp - Arbitary string compare /*{{{*/
397 // ---------------------------------------------------------------------
398 /* This safely compares two non-null terminated strings of arbitary
400 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
402 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
406 if (A
== AEnd
&& B
== BEnd
)
417 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
418 // ---------------------------------------------------------------------
420 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
422 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
423 if (toupper(*A
) != toupper(*B
))
426 if (A
== AEnd
&& B
== BEnd
)
432 if (toupper(*A
) < toupper(*B
))
437 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
438 // ---------------------------------------------------------------------
439 /* The format is like those used in package files and the method
440 communication system */
441 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
443 // Look for a matching tag.
444 int Length
= strlen(Tag
);
445 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
448 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
450 // Find the end of line and strip the leading/trailing spaces
453 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
454 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
455 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
457 return string(I
,J
-I
);
460 for (; *I
!= '\n' && I
< Message
.end(); I
++);
463 // Failed to find a match
469 // StringToBool - Converts a string into a boolean /*{{{*/
470 // ---------------------------------------------------------------------
471 /* This inspects the string to see if it is true or if it is false and
472 then returns the result. Several varients on true/false are checked. */
473 int StringToBool(string Text
,int Default
= -1)
476 int Res
= strtol(Text
.c_str(),&End
,0);
477 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
480 // Check for positives
481 if (strcasecmp(Text
.c_str(),"no") == 0 ||
482 strcasecmp(Text
.c_str(),"false") == 0 ||
483 strcasecmp(Text
.c_str(),"without") == 0 ||
484 strcasecmp(Text
.c_str(),"off") == 0 ||
485 strcasecmp(Text
.c_str(),"disable") == 0)
488 // Check for negatives
489 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
490 strcasecmp(Text
.c_str(),"true") == 0 ||
491 strcasecmp(Text
.c_str(),"with") == 0 ||
492 strcasecmp(Text
.c_str(),"on") == 0 ||
493 strcasecmp(Text
.c_str(),"enable") == 0)
499 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
500 // ---------------------------------------------------------------------
501 /* This converts a time_t into a string time representation that is
502 year 2000 complient and timezone neutral */
503 string
TimeRFC1123(time_t Date
)
505 struct tm Conv
= *gmtime(&Date
);
508 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
509 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
510 "Aug","Sep","Oct","Nov","Dec"};
512 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
513 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
514 Conv
.tm_min
,Conv
.tm_sec
);
518 // ReadMessages - Read messages from the FD /*{{{*/
519 // ---------------------------------------------------------------------
520 /* This pulls full messages from the input FD into the message buffer.
521 It assumes that messages will not pause during transit so no
522 fancy buffering is used. */
523 bool ReadMessages(int Fd
, vector
<string
> &List
)
530 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
532 // Process is dead, this is kind of bad..
542 // Look for the end of the message
543 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
545 if (I
[0] != '\n' || I
[1] != '\n')
548 // Pull the message out
549 string
Message(Buffer
,0,I
-Buffer
);
552 for (; I
< End
&& *I
== '\n'; I
++);
554 memmove(Buffer
,I
,End
-Buffer
);
557 List
.push_back(Message
);
562 if (WaitFd(Fd
) == false)
567 // MonthConv - Converts a month string into a number /*{{{*/
568 // ---------------------------------------------------------------------
569 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
570 Made it a bit more robust with a few touppers though. */
571 static int MonthConv(char *Month
)
573 switch (toupper(*Month
))
576 return toupper(Month
[1]) == 'P'?3:7;
582 if (toupper(Month
[1]) == 'A')
584 return toupper(Month
[2]) == 'N'?5:6;
586 return toupper(Month
[2]) == 'R'?2:4;
594 // Pretend it is January..
600 // StrToTime - Converts a string into a time_t /*{{{*/
601 // ---------------------------------------------------------------------
602 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
603 and the C library asctime format. It requires the GNU library function
604 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
605 reason the C library does not provide any such function :<*/
606 bool StrToTime(string Val
,time_t &Result
)
610 const char *I
= Val
.c_str();
612 // Skip the day of the week
613 for (;*I
!= 0 && *I
!= ' '; I
++);
615 // Handle RFC 1123 time
616 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
617 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
619 // Handle RFC 1036 time
620 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
621 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
626 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
627 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
633 Tm
.tm_mon
= MonthConv(Month
);
636 // Convert to local time and then to GMT
637 Result
= timegm(&Tm
);
642 // URI::CopyFrom - Copy from an object /*{{{*/
643 // ---------------------------------------------------------------------
644 /* This parses the URI into all of its components */
645 void URI::CopyFrom(string U
)
647 string::const_iterator I
= U
.begin();
649 // Locate the first colon, this seperates the scheme
650 for (; I
< U
.end() && *I
!= ':' ; I
++);
651 string::const_iterator FirstColon
= I
;
653 /* Determine if this is a host type URI with a leading double //
654 and then search for the first single / */
655 string::const_iterator SingleSlash
= I
;
656 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
658 for (; SingleSlash
< U
.end() && *SingleSlash
!= '/'; SingleSlash
++);
659 if (SingleSlash
> U
.end())
660 SingleSlash
= U
.end();
662 // We can now write the access and path specifiers
663 Access
= string(U
,0,FirstColon
- U
.begin());
664 if (SingleSlash
!= U
.end())
665 Path
= string(U
,SingleSlash
- U
.begin());
666 if (Path
.empty() == true)
669 // Now we attempt to locate a user:pass@host fragment
670 if (FirstColon
[1] == '/' && FirstColon
[2] == '/')
674 if (FirstColon
>= U
.end())
677 if (FirstColon
> SingleSlash
)
678 FirstColon
= SingleSlash
;
682 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
683 string::const_iterator At
= I
;
685 // Colon in the @ section
687 for (; I
< At
&& *I
!= ':'; I
++);
688 string::const_iterator SecondColon
= I
;
690 // Now write the host and user/pass
691 if (At
== SingleSlash
)
693 if (FirstColon
< SingleSlash
)
694 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
698 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
699 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
700 if (SecondColon
< At
)
701 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
704 // Now we parse off a pot number from the hostname
706 string::size_type Pos
= Host
.rfind(':');
707 if (Pos
== string::npos
)
710 Port
= atoi(string(Host
,Pos
+1).c_str());
711 Host
= string(Host
,0,Pos
);
714 // URI::operator string - Convert the URI to a string /*{{{*/
715 // ---------------------------------------------------------------------
717 URI::operator string()
719 string Res
= Access
+ ':';
720 if (Host
.empty() == false)
723 if (User
.empty() == false)
726 if (Password
.empty() == false)
727 Res
+= ":" + Password
;
734 sprintf(S
,":%u",Port
);
739 if (Path
.empty() == false)