1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
48 std::string
Strip(const std::string
&str
)
50 // ensure we have at least one character
51 if (str
.empty() == true)
54 char const * const s
= str
.c_str();
56 for (; isspace(s
[start
]) != 0; ++start
)
57 ; // find the first not-space
59 // string contains only whitespaces
63 size_t end
= str
.length() - 1;
64 for (; isspace(s
[end
]) != 0; --end
)
65 ; // find the last not-space
67 return str
.substr(start
, end
- start
+ 1);
70 bool Endswith(const std::string
&s
, const std::string
&end
)
72 if (end
.size() > s
.size())
74 return (s
.substr(s
.size() - end
.size(), s
.size()) == end
);
77 bool Startswith(const std::string
&s
, const std::string
&start
)
79 if (start
.size() > s
.size())
81 return (s
.substr(0, start
.size()) == start
);
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
95 size_t insize
, bufsize
;
98 cd
= iconv_open(codeset
, "UTF-8");
99 if (cd
== (iconv_t
)(-1)) {
100 // Something went wrong
102 _error
->Error("conversion from 'UTF-8' to '%s' not available",
105 perror("iconv_open");
110 insize
= bufsize
= orig
.size();
112 inptr
= (char *)inbuf
;
113 outbuf
= new char[bufsize
];
114 size_t lastError
= -1;
118 char *outptr
= outbuf
;
119 size_t outsize
= bufsize
;
120 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
121 dest
->append(outbuf
, outptr
- outbuf
);
122 if (err
== (size_t)(-1))
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError
!= insize
) {
131 lastError
= insize
- 1;
139 if (outptr
== outbuf
)
143 outbuf
= new char[bufsize
];
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String
)
163 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
167 return _strrstrip(String
);
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String
)
174 char *End
= String
+ strlen(String
) - 1;
175 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
176 *End
== '\r'); End
--);
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
185 char *_strtabexpand(char *String
,size_t Len
)
187 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
191 if (I
+ 8 > String
+ Len
)
197 /* Assume the start of the string is 0 and find the next 8 char
203 Len
= 8 - ((String
- I
) % 8);
211 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
212 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String
,string
&Res
)
225 // Skip leading whitespace
226 const char *C
= String
;
227 for (;*C
!= 0 && *C
== ' '; C
++);
231 // Jump to the next word
232 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
236 C
= strchr(C
+ 1, '"');
242 C
= strchr(C
+ 1, ']');
248 // Now de-quote characters
251 const char *Start
= String
;
253 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
255 if (*Start
== '%' && Start
+ 2 < C
&&
256 isxdigit(Start
[1]) && isxdigit(Start
[2]))
261 *I
= (char)strtol(Tmp
,0,16);
274 // Skip ending white space
275 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String
,string
&Res
)
286 // Skip leading whitespace
287 const char *C
= String
;
288 for (;*C
!= 0 && *C
== ' '; C
++);
294 if (strlen(String
) >= sizeof(Buffer
))
301 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
310 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
312 if (isspace(*C
) == 0)
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
325 string
QuoteString(const string
&Str
, const char *Bad
)
328 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
330 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
331 *I
== 0x25 || // percent '%' char
332 *I
<= 0x20 || *I
>= 0x7F) // control chars
335 sprintf(Buf
,"%%%02x",(int)*I
);
344 // DeQuoteString - Convert a string from quoted from /*{{{*/
345 // ---------------------------------------------------------------------
346 /* This undoes QuoteString */
347 string
DeQuoteString(const string
&Str
)
349 return DeQuoteString(Str
.begin(),Str
.end());
351 string
DeQuoteString(string::const_iterator
const &begin
,
352 string::const_iterator
const &end
)
355 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
357 if (*I
== '%' && I
+ 2 < end
&&
358 isxdigit(I
[1]) && isxdigit(I
[2]))
364 Res
+= (char)strtol(Tmp
,0,16);
375 // SizeToStr - Convert a long into a human readable size /*{{{*/
376 // ---------------------------------------------------------------------
377 /* A max of 4 digits are shown before conversion to the next highest unit.
378 The max length of the string will be 5 chars unless the size is > 10
380 string
SizeToStr(double Size
)
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
395 if (ASize
< 100 && I
!= 0)
397 sprintf(S
,"%'.1f %c",ASize
,Ext
[I
]);
403 sprintf(S
,"%'.0f %c",ASize
,Ext
[I
]);
413 // TimeToStr - Convert the time into a string /*{{{*/
414 // ---------------------------------------------------------------------
415 /* Converts a number of seconds to a hms format */
416 string
TimeToStr(unsigned long Sec
)
424 //d means days, h means hours, min means minutes, s means seconds
425 sprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
431 //h means hours, min means minutes, s means seconds
432 sprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
438 //min means minutes, s means seconds
439 sprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
444 sprintf(S
,_("%lis"),Sec
);
451 // SubstVar - Substitute a string for another string /*{{{*/
452 // ---------------------------------------------------------------------
453 /* This replaces all occurrences of Subst with Contents in Str. */
454 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
456 if (Subst
.empty() == true)
459 string::size_type Pos
= 0;
460 string::size_type OldPos
= 0;
463 while (OldPos
< Str
.length() &&
464 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
467 Temp
.append(Str
, OldPos
, Pos
- OldPos
);
468 if (Contents
.empty() == false)
469 Temp
.append(Contents
);
470 OldPos
= Pos
+ Subst
.length();
476 if (OldPos
>= Str
.length())
478 return Temp
+ string(Str
,OldPos
);
480 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
482 for (; Vars
->Subst
!= 0; Vars
++)
483 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
487 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
488 // ---------------------------------------------------------------------
489 /* Returns a string with the supplied separator depth + 1 times in it */
490 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
492 std::string output
= "";
493 for(unsigned long d
=Depth
+1; d
> 0; d
--)
494 output
.append(Separator
);
498 // URItoFileName - Convert the uri into a unique file name /*{{{*/
499 // ---------------------------------------------------------------------
500 /* This converts a URI into a safe filename. It quotes all unsafe characters
501 and converts / to _ and removes the scheme identifier. The resulting
502 file name should be unique and never occur again for a different file */
503 string
URItoFileName(const string
&URI
)
505 // Nuke 'sensitive' items
511 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
512 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
513 replace(NewURI
.begin(),NewURI
.end(),'/','_');
517 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
518 // ---------------------------------------------------------------------
519 /* This routine performs a base64 transformation on a string. It was ripped
520 from wget and then patched and bug fixed.
522 This spec can be found in rfc2045 */
523 string
Base64Encode(const string
&S
)
526 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
527 'I','J','K','L','M','N','O','P',
528 'Q','R','S','T','U','V','W','X',
529 'Y','Z','a','b','c','d','e','f',
530 'g','h','i','j','k','l','m','n',
531 'o','p','q','r','s','t','u','v',
532 'w','x','y','z','0','1','2','3',
533 '4','5','6','7','8','9','+','/'};
535 // Pre-allocate some space
537 Final
.reserve((4*S
.length() + 2)/3 + 2);
539 /* Transform the 3x8 bits to 4x6 bits, as required by
541 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
543 char Bits
[3] = {0,0,0};
550 Final
+= tbl
[Bits
[0] >> 2];
551 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
553 if (I
+ 1 >= S
.end())
556 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
558 if (I
+ 2 >= S
.end())
561 Final
+= tbl
[Bits
[2] & 0x3f];
564 /* Apply the padding elements, this tells how many bytes the remote
565 end should discard */
566 if (S
.length() % 3 == 2)
568 if (S
.length() % 3 == 1)
574 // stringcmp - Arbitrary string compare /*{{{*/
575 // ---------------------------------------------------------------------
576 /* This safely compares two non-null terminated strings of arbitrary
578 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
580 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
584 if (A
== AEnd
&& B
== BEnd
)
596 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
597 const char *B
,const char *BEnd
)
599 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
603 if (A
== AEnd
&& B
== BEnd
)
613 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
614 string::const_iterator B
,string::const_iterator BEnd
)
616 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
620 if (A
== AEnd
&& B
== BEnd
)
632 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
633 // ---------------------------------------------------------------------
635 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
637 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
638 if (tolower_ascii(*A
) != tolower_ascii(*B
))
641 if (A
== AEnd
&& B
== BEnd
)
647 if (tolower_ascii(*A
) < tolower_ascii(*B
))
652 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
653 const char *B
,const char *BEnd
)
655 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
656 if (tolower_ascii(*A
) != tolower_ascii(*B
))
659 if (A
== AEnd
&& B
== BEnd
)
665 if (tolower_ascii(*A
) < tolower_ascii(*B
))
669 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
670 string::const_iterator B
,string::const_iterator BEnd
)
672 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
673 if (tolower_ascii(*A
) != tolower_ascii(*B
))
676 if (A
== AEnd
&& B
== BEnd
)
682 if (tolower_ascii(*A
) < tolower_ascii(*B
))
688 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
689 // ---------------------------------------------------------------------
690 /* The format is like those used in package files and the method
691 communication system */
692 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
694 // Look for a matching tag.
695 int Length
= strlen(Tag
);
696 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
699 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
701 // Find the end of line and strip the leading/trailing spaces
702 string::const_iterator J
;
704 for (; isspace(*I
) != 0 && I
< Message
.end(); ++I
);
705 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
706 for (; J
> I
&& isspace(J
[-1]) != 0; --J
);
711 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
714 // Failed to find a match
720 // StringToBool - Converts a string into a boolean /*{{{*/
721 // ---------------------------------------------------------------------
722 /* This inspects the string to see if it is true or if it is false and
723 then returns the result. Several varients on true/false are checked. */
724 int StringToBool(const string
&Text
,int Default
)
727 int Res
= strtol(Text
.c_str(),&ParseEnd
,0);
728 // ensure that the entire string was converted by strtol to avoid
729 // failures on "apt-cache show -a 0ad" where the "0" is converted
730 const char *TextEnd
= Text
.c_str()+Text
.size();
731 if (ParseEnd
== TextEnd
&& Res
>= 0 && Res
<= 1)
734 // Check for positives
735 if (strcasecmp(Text
.c_str(),"no") == 0 ||
736 strcasecmp(Text
.c_str(),"false") == 0 ||
737 strcasecmp(Text
.c_str(),"without") == 0 ||
738 strcasecmp(Text
.c_str(),"off") == 0 ||
739 strcasecmp(Text
.c_str(),"disable") == 0)
742 // Check for negatives
743 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
744 strcasecmp(Text
.c_str(),"true") == 0 ||
745 strcasecmp(Text
.c_str(),"with") == 0 ||
746 strcasecmp(Text
.c_str(),"on") == 0 ||
747 strcasecmp(Text
.c_str(),"enable") == 0)
753 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
754 // ---------------------------------------------------------------------
755 /* This converts a time_t into a string time representation that is
756 year 2000 complient and timezone neutral */
757 string
TimeRFC1123(time_t Date
)
760 if (gmtime_r(&Date
, &Conv
) == NULL
)
764 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
765 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
766 "Aug","Sep","Oct","Nov","Dec"};
768 snprintf(Buf
, sizeof(Buf
), "%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
769 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
770 Conv
.tm_min
,Conv
.tm_sec
);
774 // ReadMessages - Read messages from the FD /*{{{*/
775 // ---------------------------------------------------------------------
776 /* This pulls full messages from the input FD into the message buffer.
777 It assumes that messages will not pause during transit so no
778 fancy buffering is used.
780 In particular: this reads blocks from the input until it believes
781 that it's run out of input text. Each block is terminated by a
782 double newline ('\n' followed by '\n').
784 bool ReadMessages(int Fd
, vector
<string
> &List
)
787 // Represents any left-over from the previous iteration of the
788 // parse loop. (i.e., if a message is split across the end
789 // of the buffer, it goes here)
790 string PartialMessage
;
793 int const Res
= read(Fd
, Buffer
, sizeof(Buffer
));
794 if (Res
< 0 && errno
== EINTR
)
797 // process we read from has died
802 if (Res
< 0 && (errno
== EAGAIN
|| errno
== EWOULDBLOCK
))
807 // extract the message(s) from the buffer
808 char const *Start
= Buffer
;
809 char const * const End
= Buffer
+ Res
;
811 char const * NL
= (char const *) memchr(Start
, '\n', End
- Start
);
814 // end of buffer: store what we have so far and read new data in
815 PartialMessage
.append(Start
, End
- Start
);
821 if (PartialMessage
.empty() == false && Start
< End
)
823 // if we start with a new line, see if the partial message we have ended with one
824 // so that we properly detect records ending between two read() runs
825 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
826 // the case \r|\n\r\n is handled by the usual double-newline handling
827 if ((NL
- Start
) == 1 || ((NL
- Start
) == 2 && *Start
== '\r'))
829 if (APT::String::Endswith(PartialMessage
, "\n") || APT::String::Endswith(PartialMessage
, "\r\n\r"))
831 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
832 List
.push_back(PartialMessage
);
833 PartialMessage
.clear();
834 while (NL
< End
&& (*NL
== '\n' || *NL
== '\r')) ++NL
;
840 while (Start
< End
) {
841 char const * NL2
= (char const *) memchr(NL
, '\n', End
- NL
);
844 // end of buffer: store what we have so far and read new data in
845 PartialMessage
.append(Start
, End
- Start
);
850 // did we find a double newline?
851 if ((NL2
- NL
) == 1 || ((NL2
- NL
) == 2 && *NL
== '\r'))
853 PartialMessage
.append(Start
, NL2
- Start
);
854 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
855 List
.push_back(PartialMessage
);
856 PartialMessage
.clear();
857 while (NL2
< End
&& (*NL2
== '\n' || *NL2
== '\r')) ++NL2
;
863 // we have read at least one complete message and nothing left
864 if (PartialMessage
.empty() == true)
867 if (WaitFd(Fd
) == false)
872 // MonthConv - Converts a month string into a number /*{{{*/
873 // ---------------------------------------------------------------------
874 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
875 Made it a bit more robust with a few tolower_ascii though. */
876 static int MonthConv(char *Month
)
878 switch (tolower_ascii(*Month
))
881 return tolower_ascii(Month
[1]) == 'p'?3:7;
887 if (tolower_ascii(Month
[1]) == 'a')
889 return tolower_ascii(Month
[2]) == 'n'?5:6;
891 return tolower_ascii(Month
[2]) == 'r'?2:4;
899 // Pretend it is January..
905 // timegm - Internal timegm if the gnu version is not available /*{{{*/
906 // ---------------------------------------------------------------------
907 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
908 than local timezone (mktime assumes the latter).
910 This function is a nonstandard GNU extension that is also present on
911 the BSDs and maybe other systems. For others we follow the advice of
912 the manpage of timegm and use his portable replacement. */
914 static time_t timegm(struct tm
*t
)
916 char *tz
= getenv("TZ");
919 time_t ret
= mktime(t
);
929 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
930 // ---------------------------------------------------------------------
931 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
932 with one exception: All timezones (%Z) are accepted but the protocol
933 says that it MUST be GMT, but this one is equal to UTC which we will
934 encounter from time to time (e.g. in Release files) so we accept all
935 here and just assume it is GMT (or UTC) later on */
936 bool RFC1123StrToTime(const char* const str
,time_t &time
)
939 setlocale (LC_ALL
,"C");
941 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
942 (strptime(str
, "%a, %d %b %Y %H:%M:%S %Z", &Tm
) == NULL
&&
943 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
944 strptime(str
, "%A, %d-%b-%y %H:%M:%S %Z", &Tm
) == NULL
&&
945 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
946 strptime(str
, "%a %b %d %H:%M:%S %Y", &Tm
) == NULL
);
947 setlocale (LC_ALL
,"");
955 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
956 // ---------------------------------------------------------------------
958 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
961 // MDTM includes no whitespaces but recommend and ignored by strptime
962 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
969 // StrToTime - Converts a string into a time_t /*{{{*/
970 // ---------------------------------------------------------------------
971 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
972 and the C library asctime format. It requires the GNU library function
973 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
974 reason the C library does not provide any such function :< This also
975 handles the weird, but unambiguous FTP time format*/
976 bool StrToTime(const string
&Val
,time_t &Result
)
981 // Skip the day of the week
982 const char *I
= strchr(Val
.c_str(), ' ');
984 // Handle RFC 1123 time
986 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
987 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
989 // Handle RFC 1036 time
990 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
991 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
996 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
997 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
1000 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
1001 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
1010 Tm
.tm_mon
= MonthConv(Month
);
1012 Tm
.tm_mon
= 0; // we don't have a month, so pick something
1015 // Convert to local time and then to GMT
1016 Result
= timegm(&Tm
);
1020 // StrToNum - Convert a fixed length string to a number /*{{{*/
1021 // ---------------------------------------------------------------------
1022 /* This is used in decoding the crazy fixed length string headers in
1023 tar and ar files. */
1024 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
1027 if (Len
>= sizeof(S
))
1032 // All spaces is a zero
1035 for (I
= 0; S
[I
] == ' '; I
++);
1040 Res
= strtoul(S
,&End
,Base
);
1047 // StrToNum - Convert a fixed length string to a number /*{{{*/
1048 // ---------------------------------------------------------------------
1049 /* This is used in decoding the crazy fixed length string headers in
1050 tar and ar files. */
1051 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1054 if (Len
>= sizeof(S
))
1059 // All spaces is a zero
1062 for (I
= 0; S
[I
] == ' '; I
++);
1067 Res
= strtoull(S
,&End
,Base
);
1075 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1076 // ---------------------------------------------------------------------
1077 /* This is used in decoding the 256bit encoded fixed length fields in
1079 bool Base256ToNum(const char *Str
,unsigned long long &Res
,unsigned int Len
)
1081 if ((Str
[0] & 0x80) == 0)
1085 Res
= Str
[0] & 0x7F;
1086 for(unsigned int i
= 1; i
< Len
; ++i
)
1087 Res
= (Res
<<8) + Str
[i
];
1092 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1093 // ---------------------------------------------------------------------
1094 /* This is used in decoding the 256bit encoded fixed length fields in
1096 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1098 unsigned long long Num
;
1101 rc
= Base256ToNum(Str
, Num
, Len
);
1109 // HexDigit - Convert a hex character into an integer /*{{{*/
1110 // ---------------------------------------------------------------------
1111 /* Helper for Hex2Num */
1112 static int HexDigit(int c
)
1114 if (c
>= '0' && c
<= '9')
1116 if (c
>= 'a' && c
<= 'f')
1117 return c
- 'a' + 10;
1118 if (c
>= 'A' && c
<= 'F')
1119 return c
- 'A' + 10;
1123 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1124 // ---------------------------------------------------------------------
1125 /* The length of the buffer must be exactly 1/2 the length of the string. */
1126 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1128 if (Str
.length() != Length
*2)
1131 // Convert each digit. We store it in the same order as the string
1133 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1135 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
1138 Num
[J
] = HexDigit(I
[0]) << 4;
1139 Num
[J
] += HexDigit(I
[1]);
1145 // TokSplitString - Split a string up by a given token /*{{{*/
1146 // ---------------------------------------------------------------------
1147 /* This is intended to be a faster splitter, it does not use dynamic
1148 memories. Input is changed to insert nulls at each token location. */
1149 bool TokSplitString(char Tok
,char *Input
,char **List
,
1150 unsigned long ListMax
)
1152 // Strip any leading spaces
1153 char *Start
= Input
;
1154 char *Stop
= Start
+ strlen(Start
);
1155 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1157 unsigned long Count
= 0;
1161 // Skip to the next Token
1162 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1164 // Back remove spaces
1166 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1169 List
[Count
++] = Start
;
1170 if (Count
>= ListMax
)
1177 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1185 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1186 // ---------------------------------------------------------------------
1187 /* This can be used to split a given string up into a vector, so the
1188 propose is the same as in the method above and this one is a bit slower
1189 also, but the advantage is that we have an iteratable vector */
1190 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1192 vector
<string
> exploded
;
1193 if (haystack
.empty() == true)
1195 string::const_iterator start
= haystack
.begin();
1196 string::const_iterator end
= start
;
1198 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1199 exploded
.push_back(string(start
, end
));
1201 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1205 // StringSplit - split a string into a string vector by token /*{{{*/
1206 // ---------------------------------------------------------------------
1207 /* See header for details.
1209 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1210 unsigned int maxsplit
)
1212 vector
<string
> split
;
1215 // no seperator given, this is bogus
1220 while (pos
!= string::npos
)
1222 pos
= s
.find(sep
, start
);
1223 split
.push_back(s
.substr(start
, pos
-start
));
1225 // if maxsplit is reached, the remaining string is the last item
1226 if(split
.size() >= maxsplit
)
1228 split
[split
.size()-1] = s
.substr(start
);
1231 start
= pos
+sep
.size();
1236 // RegexChoice - Simple regex list/list matcher /*{{{*/
1237 // ---------------------------------------------------------------------
1239 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1240 const char **ListEnd
)
1242 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1245 unsigned long Hits
= 0;
1246 for (; ListBegin
< ListEnd
; ++ListBegin
)
1248 // Check if the name is a regex
1251 for (I
= *ListBegin
; *I
!= 0; I
++)
1252 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1257 // Compile the regex pattern
1260 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1266 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1271 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1275 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1280 if (R
->Hit
== false)
1290 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1296 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1297 // ---------------------------------------------------------------------
1298 /* This is used to make the internationalization strings easier to translate
1299 and to allow reordering of parameters */
1300 static bool iovprintf(ostream
&out
, const char *format
,
1301 va_list &args
, ssize_t
&size
) {
1302 char *S
= (char*)malloc(size
);
1303 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1304 if (n
> -1 && n
< size
) {
1317 void ioprintf(ostream
&out
,const char *format
,...)
1323 va_start(args
,format
);
1324 ret
= iovprintf(out
, format
, args
, size
);
1330 void strprintf(string
&out
,const char *format
,...)
1334 std::ostringstream outstr
;
1337 va_start(args
,format
);
1338 ret
= iovprintf(outstr
, format
, args
, size
);
1346 // safe_snprintf - Safer snprintf /*{{{*/
1347 // ---------------------------------------------------------------------
1348 /* This is a snprintf that will never (ever) go past 'End' and returns a
1349 pointer to the end of the new string. The returned string is always null
1350 terminated unless Buffer == end. This is a better alterantive to using
1351 consecutive snprintfs. */
1352 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1359 va_start(args
,Format
);
1360 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1363 if (Did
< 0 || Buffer
+ Did
> End
)
1365 return Buffer
+ Did
;
1368 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1369 // ---------------------------------------------------------------------
1370 string
StripEpoch(const string
&VerStr
)
1372 size_t i
= VerStr
.find(":");
1373 if (i
== string::npos
)
1375 return VerStr
.substr(i
+1);
1378 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1379 // ---------------------------------------------------------------------
1380 /* This little function is the most called method we have and tries
1381 therefore to do the absolut minimum - and is notable faster than
1382 standard tolower/toupper and as a bonus avoids problems with different
1383 locales - we only operate on ascii chars anyway. */
1384 int tolower_ascii(int const c
)
1386 if (c
>= 'A' && c
<= 'Z')
1392 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1393 // ---------------------------------------------------------------------
1394 /* The domain list is a comma separate list of domains that are suffix
1395 matched against the argument */
1396 bool CheckDomainList(const string
&Host
,const string
&List
)
1398 string::const_iterator Start
= List
.begin();
1399 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1401 if (Cur
< List
.end() && *Cur
!= ',')
1404 // Match the end of the string..
1405 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1407 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1415 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1416 // ---------------------------------------------------------------------
1418 size_t strv_length(const char **str_array
)
1421 for (i
=0; str_array
[i
] != NULL
; i
++)
1427 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1428 // ---------------------------------------------------------------------
1430 string
DeEscapeString(const string
&input
)
1433 string::const_iterator it
;
1435 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1437 // just copy non-escape chars
1444 // deal with double escape
1446 (it
+ 1 < input
.end()) && it
[1] == '\\')
1450 // advance iterator one step further
1455 // ensure we have a char to read
1456 if (it
+ 1 == input
.end())
1464 if (it
+ 2 <= input
.end()) {
1468 output
+= (char)strtol(tmp
, 0, 8);
1473 if (it
+ 2 <= input
.end()) {
1477 output
+= (char)strtol(tmp
, 0, 16);
1482 // FIXME: raise exception here?
1489 // URI::CopyFrom - Copy from an object /*{{{*/
1490 // ---------------------------------------------------------------------
1491 /* This parses the URI into all of its components */
1492 void URI::CopyFrom(const string
&U
)
1494 string::const_iterator I
= U
.begin();
1496 // Locate the first colon, this separates the scheme
1497 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1498 string::const_iterator FirstColon
= I
;
1500 /* Determine if this is a host type URI with a leading double //
1501 and then search for the first single / */
1502 string::const_iterator SingleSlash
= I
;
1503 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1506 /* Find the / indicating the end of the hostname, ignoring /'s in the
1508 bool InBracket
= false;
1509 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1511 if (*SingleSlash
== '[')
1513 if (InBracket
== true && *SingleSlash
== ']')
1517 if (SingleSlash
> U
.end())
1518 SingleSlash
= U
.end();
1520 // We can now write the access and path specifiers
1521 Access
.assign(U
.begin(),FirstColon
);
1522 if (SingleSlash
!= U
.end())
1523 Path
.assign(SingleSlash
,U
.end());
1524 if (Path
.empty() == true)
1527 // Now we attempt to locate a user:pass@host fragment
1528 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1532 if (FirstColon
>= U
.end())
1535 if (FirstColon
> SingleSlash
)
1536 FirstColon
= SingleSlash
;
1538 // Find the colon...
1540 if (I
> SingleSlash
)
1542 for (; I
< SingleSlash
&& *I
!= ':'; ++I
);
1543 string::const_iterator SecondColon
= I
;
1545 // Search for the @ after the colon
1546 for (; I
< SingleSlash
&& *I
!= '@'; ++I
);
1547 string::const_iterator At
= I
;
1549 // Now write the host and user/pass
1550 if (At
== SingleSlash
)
1552 if (FirstColon
< SingleSlash
)
1553 Host
.assign(FirstColon
,SingleSlash
);
1557 Host
.assign(At
+1,SingleSlash
);
1558 // username and password must be encoded (RFC 3986)
1559 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1560 if (SecondColon
< At
)
1561 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1564 // Now we parse the RFC 2732 [] hostnames.
1565 unsigned long PortEnd
= 0;
1567 for (unsigned I
= 0; I
!= Host
.length();)
1576 if (InBracket
== true && Host
[I
] == ']')
1587 if (InBracket
== true)
1593 // Now we parse off a port number from the hostname
1595 string::size_type Pos
= Host
.rfind(':');
1596 if (Pos
== string::npos
|| Pos
< PortEnd
)
1599 Port
= atoi(string(Host
,Pos
+1).c_str());
1600 Host
.assign(Host
,0,Pos
);
1603 // URI::operator string - Convert the URI to a string /*{{{*/
1604 // ---------------------------------------------------------------------
1606 URI::operator string()
1610 if (Access
.empty() == false)
1613 if (Host
.empty() == false)
1615 if (Access
.empty() == false)
1618 if (User
.empty() == false)
1620 // FIXME: Technically userinfo is permitted even less
1621 // characters than these, but this is not conveniently
1622 // expressed with a blacklist.
1623 Res
+= QuoteString(User
, ":/?#[]@");
1624 if (Password
.empty() == false)
1625 Res
+= ":" + QuoteString(Password
, ":/?#[]@");
1629 // Add RFC 2732 escaping characters
1630 if (Access
.empty() == false &&
1631 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1632 Res
+= '[' + Host
+ ']';
1639 sprintf(S
,":%u",Port
);
1644 if (Path
.empty() == false)
1655 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1656 // ---------------------------------------------------------------------
1658 string
URI::SiteOnly(const string
&URI
)
1667 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1668 // ---------------------------------------------------------------------
1670 string
URI::NoUserPassword(const string
&URI
)