1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
48 std::string
Strip(const std::string
&str
)
50 // ensure we have at least one character
51 if (str
.empty() == true)
54 char const * const s
= str
.c_str();
56 for (; isspace(s
[start
]) != 0; ++start
)
57 ; // find the first not-space
59 // string contains only whitespaces
63 size_t end
= str
.length() - 1;
64 for (; isspace(s
[end
]) != 0; --end
)
65 ; // find the last not-space
67 return str
.substr(start
, end
- start
+ 1);
70 bool Endswith(const std::string
&s
, const std::string
&end
)
72 if (end
.size() > s
.size())
74 return (s
.substr(s
.size() - end
.size(), s
.size()) == end
);
77 bool Startswith(const std::string
&s
, const std::string
&start
)
79 if (start
.size() > s
.size())
81 return (s
.substr(0, start
.size()) == start
);
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
95 size_t insize
, bufsize
;
98 cd
= iconv_open(codeset
, "UTF-8");
99 if (cd
== (iconv_t
)(-1)) {
100 // Something went wrong
102 _error
->Error("conversion from 'UTF-8' to '%s' not available",
105 perror("iconv_open");
110 insize
= bufsize
= orig
.size();
112 inptr
= (char *)inbuf
;
113 outbuf
= new char[bufsize
];
114 size_t lastError
= -1;
118 char *outptr
= outbuf
;
119 size_t outsize
= bufsize
;
120 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
121 dest
->append(outbuf
, outptr
- outbuf
);
122 if (err
== (size_t)(-1))
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError
!= insize
) {
131 lastError
= insize
- 1;
139 if (outptr
== outbuf
)
143 outbuf
= new char[bufsize
];
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String
)
163 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
167 return _strrstrip(String
);
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String
)
174 char *End
= String
+ strlen(String
) - 1;
175 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
176 *End
== '\r'); End
--);
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
185 char *_strtabexpand(char *String
,size_t Len
)
187 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
191 if (I
+ 8 > String
+ Len
)
197 /* Assume the start of the string is 0 and find the next 8 char
203 Len
= 8 - ((String
- I
) % 8);
211 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
212 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String
,string
&Res
)
225 // Skip leading whitespace
226 const char *C
= String
;
227 for (;*C
!= 0 && *C
== ' '; C
++);
231 // Jump to the next word
232 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
236 C
= strchr(C
+ 1, '"');
242 C
= strchr(C
+ 1, ']');
248 // Now de-quote characters
251 const char *Start
= String
;
253 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
255 if (*Start
== '%' && Start
+ 2 < C
&&
256 isxdigit(Start
[1]) && isxdigit(Start
[2]))
261 *I
= (char)strtol(Tmp
,0,16);
274 // Skip ending white space
275 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String
,string
&Res
)
286 // Skip leading whitespace
287 const char *C
= String
;
288 for (;*C
!= 0 && *C
== ' '; C
++);
294 if (strlen(String
) >= sizeof(Buffer
))
301 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
310 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
312 if (isspace(*C
) == 0)
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
325 string
QuoteString(const string
&Str
, const char *Bad
)
327 std::stringstream Res
;
328 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
330 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
331 *I
== 0x25 || // percent '%' char
332 *I
<= 0x20 || *I
>= 0x7F) // control chars
334 ioprintf(Res
,"%%%02x",(int)*I
);
342 // DeQuoteString - Convert a string from quoted from /*{{{*/
343 // ---------------------------------------------------------------------
344 /* This undoes QuoteString */
345 string
DeQuoteString(const string
&Str
)
347 return DeQuoteString(Str
.begin(),Str
.end());
349 string
DeQuoteString(string::const_iterator
const &begin
,
350 string::const_iterator
const &end
)
353 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
355 if (*I
== '%' && I
+ 2 < end
&&
356 isxdigit(I
[1]) && isxdigit(I
[2]))
362 Res
+= (char)strtol(Tmp
,0,16);
373 // SizeToStr - Convert a long into a human readable size /*{{{*/
374 // ---------------------------------------------------------------------
375 /* A max of 4 digits are shown before conversion to the next highest unit.
376 The max length of the string will be 5 chars unless the size is > 10
378 string
SizeToStr(double Size
)
386 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
387 ExaBytes, ZettaBytes, YottaBytes */
388 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 if (ASize
< 100 && I
!= 0)
395 strprintf(S
, "%'.1f %c", ASize
, Ext
[I
]);
402 strprintf(S
, "%'.0f %c", ASize
, Ext
[I
]);
411 // TimeToStr - Convert the time into a string /*{{{*/
412 // ---------------------------------------------------------------------
413 /* Converts a number of seconds to a hms format */
414 string
TimeToStr(unsigned long Sec
)
419 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
420 strprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
422 else if (Sec
> 60*60)
424 //TRANSLATOR: h means hours, min means minutes, s means seconds
425 strprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
429 //TRANSLATOR: min means minutes, s means seconds
430 strprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
434 //TRANSLATOR: s means seconds
435 strprintf(S
,_("%lis"),Sec
);
440 // SubstVar - Substitute a string for another string /*{{{*/
441 // ---------------------------------------------------------------------
442 /* This replaces all occurrences of Subst with Contents in Str. */
443 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
445 if (Subst
.empty() == true)
448 string::size_type Pos
= 0;
449 string::size_type OldPos
= 0;
452 while (OldPos
< Str
.length() &&
453 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
456 Temp
.append(Str
, OldPos
, Pos
- OldPos
);
457 if (Contents
.empty() == false)
458 Temp
.append(Contents
);
459 OldPos
= Pos
+ Subst
.length();
465 if (OldPos
>= Str
.length())
467 return Temp
+ string(Str
,OldPos
);
469 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
471 for (; Vars
->Subst
!= 0; Vars
++)
472 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
476 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
477 // ---------------------------------------------------------------------
478 /* Returns a string with the supplied separator depth + 1 times in it */
479 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
481 std::string output
= "";
482 for(unsigned long d
=Depth
+1; d
> 0; d
--)
483 output
.append(Separator
);
487 // URItoFileName - Convert the uri into a unique file name /*{{{*/
488 // ---------------------------------------------------------------------
489 /* This converts a URI into a safe filename. It quotes all unsafe characters
490 and converts / to _ and removes the scheme identifier. The resulting
491 file name should be unique and never occur again for a different file */
492 string
URItoFileName(const string
&URI
)
494 // Nuke 'sensitive' items
500 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
501 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
502 replace(NewURI
.begin(),NewURI
.end(),'/','_');
506 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
507 // ---------------------------------------------------------------------
508 /* This routine performs a base64 transformation on a string. It was ripped
509 from wget and then patched and bug fixed.
511 This spec can be found in rfc2045 */
512 string
Base64Encode(const string
&S
)
515 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
516 'I','J','K','L','M','N','O','P',
517 'Q','R','S','T','U','V','W','X',
518 'Y','Z','a','b','c','d','e','f',
519 'g','h','i','j','k','l','m','n',
520 'o','p','q','r','s','t','u','v',
521 'w','x','y','z','0','1','2','3',
522 '4','5','6','7','8','9','+','/'};
524 // Pre-allocate some space
526 Final
.reserve((4*S
.length() + 2)/3 + 2);
528 /* Transform the 3x8 bits to 4x6 bits, as required by
530 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
532 char Bits
[3] = {0,0,0};
539 Final
+= tbl
[Bits
[0] >> 2];
540 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
542 if (I
+ 1 >= S
.end())
545 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
547 if (I
+ 2 >= S
.end())
550 Final
+= tbl
[Bits
[2] & 0x3f];
553 /* Apply the padding elements, this tells how many bytes the remote
554 end should discard */
555 if (S
.length() % 3 == 2)
557 if (S
.length() % 3 == 1)
563 // stringcmp - Arbitrary string compare /*{{{*/
564 // ---------------------------------------------------------------------
565 /* This safely compares two non-null terminated strings of arbitrary
567 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
569 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
573 if (A
== AEnd
&& B
== BEnd
)
585 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
586 const char *B
,const char *BEnd
)
588 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
592 if (A
== AEnd
&& B
== BEnd
)
602 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
603 string::const_iterator B
,string::const_iterator BEnd
)
605 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
609 if (A
== AEnd
&& B
== BEnd
)
621 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
622 // ---------------------------------------------------------------------
624 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
626 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
627 if (tolower_ascii(*A
) != tolower_ascii(*B
))
630 if (A
== AEnd
&& B
== BEnd
)
636 if (tolower_ascii(*A
) < tolower_ascii(*B
))
641 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
642 const char *B
,const char *BEnd
)
644 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
645 if (tolower_ascii(*A
) != tolower_ascii(*B
))
648 if (A
== AEnd
&& B
== BEnd
)
654 if (tolower_ascii(*A
) < tolower_ascii(*B
))
658 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
659 string::const_iterator B
,string::const_iterator BEnd
)
661 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
662 if (tolower_ascii(*A
) != tolower_ascii(*B
))
665 if (A
== AEnd
&& B
== BEnd
)
671 if (tolower_ascii(*A
) < tolower_ascii(*B
))
677 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
678 // ---------------------------------------------------------------------
679 /* The format is like those used in package files and the method
680 communication system */
681 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
683 // Look for a matching tag.
684 int Length
= strlen(Tag
);
685 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
688 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
690 // Find the end of line and strip the leading/trailing spaces
691 string::const_iterator J
;
693 for (; isspace(*I
) != 0 && I
< Message
.end(); ++I
);
694 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
695 for (; J
> I
&& isspace(J
[-1]) != 0; --J
);
700 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
703 // Failed to find a match
709 // StringToBool - Converts a string into a boolean /*{{{*/
710 // ---------------------------------------------------------------------
711 /* This inspects the string to see if it is true or if it is false and
712 then returns the result. Several varients on true/false are checked. */
713 int StringToBool(const string
&Text
,int Default
)
716 int Res
= strtol(Text
.c_str(),&ParseEnd
,0);
717 // ensure that the entire string was converted by strtol to avoid
718 // failures on "apt-cache show -a 0ad" where the "0" is converted
719 const char *TextEnd
= Text
.c_str()+Text
.size();
720 if (ParseEnd
== TextEnd
&& Res
>= 0 && Res
<= 1)
723 // Check for positives
724 if (strcasecmp(Text
.c_str(),"no") == 0 ||
725 strcasecmp(Text
.c_str(),"false") == 0 ||
726 strcasecmp(Text
.c_str(),"without") == 0 ||
727 strcasecmp(Text
.c_str(),"off") == 0 ||
728 strcasecmp(Text
.c_str(),"disable") == 0)
731 // Check for negatives
732 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
733 strcasecmp(Text
.c_str(),"true") == 0 ||
734 strcasecmp(Text
.c_str(),"with") == 0 ||
735 strcasecmp(Text
.c_str(),"on") == 0 ||
736 strcasecmp(Text
.c_str(),"enable") == 0)
742 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
743 // ---------------------------------------------------------------------
744 /* This converts a time_t into a string time representation that is
745 year 2000 complient and timezone neutral */
746 string
TimeRFC1123(time_t Date
)
749 if (gmtime_r(&Date
, &Conv
) == NULL
)
753 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
754 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
755 "Aug","Sep","Oct","Nov","Dec"};
757 snprintf(Buf
, sizeof(Buf
), "%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
758 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
759 Conv
.tm_min
,Conv
.tm_sec
);
763 // ReadMessages - Read messages from the FD /*{{{*/
764 // ---------------------------------------------------------------------
765 /* This pulls full messages from the input FD into the message buffer.
766 It assumes that messages will not pause during transit so no
767 fancy buffering is used.
769 In particular: this reads blocks from the input until it believes
770 that it's run out of input text. Each block is terminated by a
771 double newline ('\n' followed by '\n').
773 bool ReadMessages(int Fd
, vector
<string
> &List
)
776 // Represents any left-over from the previous iteration of the
777 // parse loop. (i.e., if a message is split across the end
778 // of the buffer, it goes here)
779 string PartialMessage
;
782 int const Res
= read(Fd
, Buffer
, sizeof(Buffer
));
783 if (Res
< 0 && errno
== EINTR
)
786 // process we read from has died
791 if (Res
< 0 && (errno
== EAGAIN
|| errno
== EWOULDBLOCK
))
796 // extract the message(s) from the buffer
797 char const *Start
= Buffer
;
798 char const * const End
= Buffer
+ Res
;
800 char const * NL
= (char const *) memchr(Start
, '\n', End
- Start
);
803 // end of buffer: store what we have so far and read new data in
804 PartialMessage
.append(Start
, End
- Start
);
810 if (PartialMessage
.empty() == false && Start
< End
)
812 // if we start with a new line, see if the partial message we have ended with one
813 // so that we properly detect records ending between two read() runs
814 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
815 // the case \r|\n\r\n is handled by the usual double-newline handling
816 if ((NL
- Start
) == 1 || ((NL
- Start
) == 2 && *Start
== '\r'))
818 if (APT::String::Endswith(PartialMessage
, "\n") || APT::String::Endswith(PartialMessage
, "\r\n\r"))
820 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
821 List
.push_back(PartialMessage
);
822 PartialMessage
.clear();
823 while (NL
< End
&& (*NL
== '\n' || *NL
== '\r')) ++NL
;
829 while (Start
< End
) {
830 char const * NL2
= (char const *) memchr(NL
, '\n', End
- NL
);
833 // end of buffer: store what we have so far and read new data in
834 PartialMessage
.append(Start
, End
- Start
);
839 // did we find a double newline?
840 if ((NL2
- NL
) == 1 || ((NL2
- NL
) == 2 && *NL
== '\r'))
842 PartialMessage
.append(Start
, NL2
- Start
);
843 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
844 List
.push_back(PartialMessage
);
845 PartialMessage
.clear();
846 while (NL2
< End
&& (*NL2
== '\n' || *NL2
== '\r')) ++NL2
;
852 // we have read at least one complete message and nothing left
853 if (PartialMessage
.empty() == true)
856 if (WaitFd(Fd
) == false)
861 // MonthConv - Converts a month string into a number /*{{{*/
862 // ---------------------------------------------------------------------
863 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
864 Made it a bit more robust with a few tolower_ascii though. */
865 static int MonthConv(char *Month
)
867 switch (tolower_ascii(*Month
))
870 return tolower_ascii(Month
[1]) == 'p'?3:7;
876 if (tolower_ascii(Month
[1]) == 'a')
878 return tolower_ascii(Month
[2]) == 'n'?5:6;
880 return tolower_ascii(Month
[2]) == 'r'?2:4;
888 // Pretend it is January..
894 // timegm - Internal timegm if the gnu version is not available /*{{{*/
895 // ---------------------------------------------------------------------
896 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
897 than local timezone (mktime assumes the latter).
899 This function is a nonstandard GNU extension that is also present on
900 the BSDs and maybe other systems. For others we follow the advice of
901 the manpage of timegm and use his portable replacement. */
903 static time_t timegm(struct tm
*t
)
905 char *tz
= getenv("TZ");
908 time_t ret
= mktime(t
);
918 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
919 // ---------------------------------------------------------------------
920 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
921 with one exception: All timezones (%Z) are accepted but the protocol
922 says that it MUST be GMT, but this one is equal to UTC which we will
923 encounter from time to time (e.g. in Release files) so we accept all
924 here and just assume it is GMT (or UTC) later on */
925 bool RFC1123StrToTime(const char* const str
,time_t &time
)
928 setlocale (LC_ALL
,"C");
930 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
931 (strptime(str
, "%a, %d %b %Y %H:%M:%S %Z", &Tm
) == NULL
&&
932 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
933 strptime(str
, "%A, %d-%b-%y %H:%M:%S %Z", &Tm
) == NULL
&&
934 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
935 strptime(str
, "%a %b %d %H:%M:%S %Y", &Tm
) == NULL
);
936 setlocale (LC_ALL
,"");
944 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
945 // ---------------------------------------------------------------------
947 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
950 // MDTM includes no whitespaces but recommend and ignored by strptime
951 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
958 // StrToTime - Converts a string into a time_t /*{{{*/
959 // ---------------------------------------------------------------------
960 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
961 and the C library asctime format. It requires the GNU library function
962 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
963 reason the C library does not provide any such function :< This also
964 handles the weird, but unambiguous FTP time format*/
965 bool StrToTime(const string
&Val
,time_t &Result
)
970 // Skip the day of the week
971 const char *I
= strchr(Val
.c_str(), ' ');
973 // Handle RFC 1123 time
975 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
976 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
978 // Handle RFC 1036 time
979 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
980 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
985 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
986 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
989 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
990 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
999 Tm
.tm_mon
= MonthConv(Month
);
1001 Tm
.tm_mon
= 0; // we don't have a month, so pick something
1004 // Convert to local time and then to GMT
1005 Result
= timegm(&Tm
);
1009 // StrToNum - Convert a fixed length string to a number /*{{{*/
1010 // ---------------------------------------------------------------------
1011 /* This is used in decoding the crazy fixed length string headers in
1012 tar and ar files. */
1013 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
1016 if (Len
>= sizeof(S
))
1021 // All spaces is a zero
1024 for (I
= 0; S
[I
] == ' '; I
++);
1029 Res
= strtoul(S
,&End
,Base
);
1036 // StrToNum - Convert a fixed length string to a number /*{{{*/
1037 // ---------------------------------------------------------------------
1038 /* This is used in decoding the crazy fixed length string headers in
1039 tar and ar files. */
1040 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1043 if (Len
>= sizeof(S
))
1048 // All spaces is a zero
1051 for (I
= 0; S
[I
] == ' '; I
++);
1056 Res
= strtoull(S
,&End
,Base
);
1064 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1065 // ---------------------------------------------------------------------
1066 /* This is used in decoding the 256bit encoded fixed length fields in
1068 bool Base256ToNum(const char *Str
,unsigned long long &Res
,unsigned int Len
)
1070 if ((Str
[0] & 0x80) == 0)
1074 Res
= Str
[0] & 0x7F;
1075 for(unsigned int i
= 1; i
< Len
; ++i
)
1076 Res
= (Res
<<8) + Str
[i
];
1081 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1082 // ---------------------------------------------------------------------
1083 /* This is used in decoding the 256bit encoded fixed length fields in
1085 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1087 unsigned long long Num
;
1090 rc
= Base256ToNum(Str
, Num
, Len
);
1098 // HexDigit - Convert a hex character into an integer /*{{{*/
1099 // ---------------------------------------------------------------------
1100 /* Helper for Hex2Num */
1101 static int HexDigit(int c
)
1103 if (c
>= '0' && c
<= '9')
1105 if (c
>= 'a' && c
<= 'f')
1106 return c
- 'a' + 10;
1107 if (c
>= 'A' && c
<= 'F')
1108 return c
- 'A' + 10;
1112 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1113 // ---------------------------------------------------------------------
1114 /* The length of the buffer must be exactly 1/2 the length of the string. */
1115 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1117 if (Str
.length() != Length
*2)
1120 // Convert each digit. We store it in the same order as the string
1122 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1124 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
1127 Num
[J
] = HexDigit(I
[0]) << 4;
1128 Num
[J
] += HexDigit(I
[1]);
1134 // TokSplitString - Split a string up by a given token /*{{{*/
1135 // ---------------------------------------------------------------------
1136 /* This is intended to be a faster splitter, it does not use dynamic
1137 memories. Input is changed to insert nulls at each token location. */
1138 bool TokSplitString(char Tok
,char *Input
,char **List
,
1139 unsigned long ListMax
)
1141 // Strip any leading spaces
1142 char *Start
= Input
;
1143 char *Stop
= Start
+ strlen(Start
);
1144 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1146 unsigned long Count
= 0;
1150 // Skip to the next Token
1151 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1153 // Back remove spaces
1155 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1158 List
[Count
++] = Start
;
1159 if (Count
>= ListMax
)
1166 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1174 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1175 // ---------------------------------------------------------------------
1176 /* This can be used to split a given string up into a vector, so the
1177 propose is the same as in the method above and this one is a bit slower
1178 also, but the advantage is that we have an iteratable vector */
1179 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1181 vector
<string
> exploded
;
1182 if (haystack
.empty() == true)
1184 string::const_iterator start
= haystack
.begin();
1185 string::const_iterator end
= start
;
1187 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1188 exploded
.push_back(string(start
, end
));
1190 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1194 // StringSplit - split a string into a string vector by token /*{{{*/
1195 // ---------------------------------------------------------------------
1196 /* See header for details.
1198 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1199 unsigned int maxsplit
)
1201 vector
<string
> split
;
1204 // no seperator given, this is bogus
1209 while (pos
!= string::npos
)
1211 pos
= s
.find(sep
, start
);
1212 split
.push_back(s
.substr(start
, pos
-start
));
1214 // if maxsplit is reached, the remaining string is the last item
1215 if(split
.size() >= maxsplit
)
1217 split
[split
.size()-1] = s
.substr(start
);
1220 start
= pos
+sep
.size();
1225 // RegexChoice - Simple regex list/list matcher /*{{{*/
1226 // ---------------------------------------------------------------------
1228 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1229 const char **ListEnd
)
1231 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1234 unsigned long Hits
= 0;
1235 for (; ListBegin
< ListEnd
; ++ListBegin
)
1237 // Check if the name is a regex
1240 for (I
= *ListBegin
; *I
!= 0; I
++)
1241 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1246 // Compile the regex pattern
1249 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1255 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1260 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1264 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1269 if (R
->Hit
== false)
1279 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1285 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1286 // ---------------------------------------------------------------------
1287 /* This is used to make the internationalization strings easier to translate
1288 and to allow reordering of parameters */
1289 static bool iovprintf(ostream
&out
, const char *format
,
1290 va_list &args
, ssize_t
&size
) {
1291 char *S
= (char*)malloc(size
);
1292 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1293 if (n
> -1 && n
< size
) {
1306 void ioprintf(ostream
&out
,const char *format
,...)
1312 va_start(args
,format
);
1313 ret
= iovprintf(out
, format
, args
, size
);
1319 void strprintf(string
&out
,const char *format
,...)
1323 std::ostringstream outstr
;
1326 va_start(args
,format
);
1327 ret
= iovprintf(outstr
, format
, args
, size
);
1335 // safe_snprintf - Safer snprintf /*{{{*/
1336 // ---------------------------------------------------------------------
1337 /* This is a snprintf that will never (ever) go past 'End' and returns a
1338 pointer to the end of the new string. The returned string is always null
1339 terminated unless Buffer == end. This is a better alterantive to using
1340 consecutive snprintfs. */
1341 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1348 va_start(args
,Format
);
1349 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1352 if (Did
< 0 || Buffer
+ Did
> End
)
1354 return Buffer
+ Did
;
1357 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1358 // ---------------------------------------------------------------------
1359 string
StripEpoch(const string
&VerStr
)
1361 size_t i
= VerStr
.find(":");
1362 if (i
== string::npos
)
1364 return VerStr
.substr(i
+1);
1367 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1368 // ---------------------------------------------------------------------
1369 /* This little function is the most called method we have and tries
1370 therefore to do the absolut minimum - and is notable faster than
1371 standard tolower/toupper and as a bonus avoids problems with different
1372 locales - we only operate on ascii chars anyway. */
1373 int tolower_ascii(int const c
)
1375 if (c
>= 'A' && c
<= 'Z')
1381 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1382 // ---------------------------------------------------------------------
1383 /* The domain list is a comma separate list of domains that are suffix
1384 matched against the argument */
1385 bool CheckDomainList(const string
&Host
,const string
&List
)
1387 string::const_iterator Start
= List
.begin();
1388 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1390 if (Cur
< List
.end() && *Cur
!= ',')
1393 // Match the end of the string..
1394 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1396 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1404 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1405 // ---------------------------------------------------------------------
1407 size_t strv_length(const char **str_array
)
1410 for (i
=0; str_array
[i
] != NULL
; i
++)
1416 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1417 // ---------------------------------------------------------------------
1419 string
DeEscapeString(const string
&input
)
1422 string::const_iterator it
;
1424 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1426 // just copy non-escape chars
1433 // deal with double escape
1435 (it
+ 1 < input
.end()) && it
[1] == '\\')
1439 // advance iterator one step further
1444 // ensure we have a char to read
1445 if (it
+ 1 == input
.end())
1453 if (it
+ 2 <= input
.end()) {
1457 output
+= (char)strtol(tmp
, 0, 8);
1462 if (it
+ 2 <= input
.end()) {
1466 output
+= (char)strtol(tmp
, 0, 16);
1471 // FIXME: raise exception here?
1478 // URI::CopyFrom - Copy from an object /*{{{*/
1479 // ---------------------------------------------------------------------
1480 /* This parses the URI into all of its components */
1481 void URI::CopyFrom(const string
&U
)
1483 string::const_iterator I
= U
.begin();
1485 // Locate the first colon, this separates the scheme
1486 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1487 string::const_iterator FirstColon
= I
;
1489 /* Determine if this is a host type URI with a leading double //
1490 and then search for the first single / */
1491 string::const_iterator SingleSlash
= I
;
1492 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1495 /* Find the / indicating the end of the hostname, ignoring /'s in the
1497 bool InBracket
= false;
1498 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1500 if (*SingleSlash
== '[')
1502 if (InBracket
== true && *SingleSlash
== ']')
1506 if (SingleSlash
> U
.end())
1507 SingleSlash
= U
.end();
1509 // We can now write the access and path specifiers
1510 Access
.assign(U
.begin(),FirstColon
);
1511 if (SingleSlash
!= U
.end())
1512 Path
.assign(SingleSlash
,U
.end());
1513 if (Path
.empty() == true)
1516 // Now we attempt to locate a user:pass@host fragment
1517 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1521 if (FirstColon
>= U
.end())
1524 if (FirstColon
> SingleSlash
)
1525 FirstColon
= SingleSlash
;
1527 // Find the colon...
1529 if (I
> SingleSlash
)
1531 for (; I
< SingleSlash
&& *I
!= ':'; ++I
);
1532 string::const_iterator SecondColon
= I
;
1534 // Search for the @ after the colon
1535 for (; I
< SingleSlash
&& *I
!= '@'; ++I
);
1536 string::const_iterator At
= I
;
1538 // Now write the host and user/pass
1539 if (At
== SingleSlash
)
1541 if (FirstColon
< SingleSlash
)
1542 Host
.assign(FirstColon
,SingleSlash
);
1546 Host
.assign(At
+1,SingleSlash
);
1547 // username and password must be encoded (RFC 3986)
1548 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1549 if (SecondColon
< At
)
1550 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1553 // Now we parse the RFC 2732 [] hostnames.
1554 unsigned long PortEnd
= 0;
1556 for (unsigned I
= 0; I
!= Host
.length();)
1565 if (InBracket
== true && Host
[I
] == ']')
1576 if (InBracket
== true)
1582 // Now we parse off a port number from the hostname
1584 string::size_type Pos
= Host
.rfind(':');
1585 if (Pos
== string::npos
|| Pos
< PortEnd
)
1588 Port
= atoi(string(Host
,Pos
+1).c_str());
1589 Host
.assign(Host
,0,Pos
);
1592 // URI::operator string - Convert the URI to a string /*{{{*/
1593 // ---------------------------------------------------------------------
1595 URI::operator string()
1597 std::stringstream Res
;
1599 if (Access
.empty() == false)
1600 Res
<< Access
<< ':';
1602 if (Host
.empty() == false)
1604 if (Access
.empty() == false)
1607 if (User
.empty() == false)
1609 // FIXME: Technically userinfo is permitted even less
1610 // characters than these, but this is not conveniently
1611 // expressed with a blacklist.
1612 Res
<< QuoteString(User
, ":/?#[]@");
1613 if (Password
.empty() == false)
1614 Res
<< ":" << QuoteString(Password
, ":/?#[]@");
1618 // Add RFC 2732 escaping characters
1619 if (Access
.empty() == false && Host
.find_first_of("/:") != string::npos
)
1620 Res
<< '[' << Host
<< ']';
1628 if (Path
.empty() == false)
1639 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1640 string
URI::SiteOnly(const string
&URI
)
1649 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1650 string
URI::ArchiveOnly(const string
&URI
)
1655 if (U
.Path
.empty() == false && U
.Path
[U
.Path
.length() - 1] == '/')
1656 U
.Path
.erase(U
.Path
.length() - 1);
1660 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1661 string
URI::NoUserPassword(const string
&URI
)