1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
48 std::string
Strip(const std::string
&str
)
50 // ensure we have at least one character
51 if (str
.empty() == true)
54 char const * const s
= str
.c_str();
56 for (; isspace(s
[start
]) != 0; ++start
)
57 ; // find the first not-space
59 // string contains only whitespaces
63 size_t end
= str
.length() - 1;
64 for (; isspace(s
[end
]) != 0; --end
)
65 ; // find the last not-space
67 return str
.substr(start
, end
- start
+ 1);
70 bool Endswith(const std::string
&s
, const std::string
&end
)
72 if (end
.size() > s
.size())
74 return (s
.compare(s
.size() - end
.size(), end
.size(), end
) == 0);
77 bool Startswith(const std::string
&s
, const std::string
&start
)
79 if (start
.size() > s
.size())
81 return (s
.compare(0, start
.size(), start
) == 0);
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
95 size_t insize
, bufsize
;
98 cd
= iconv_open(codeset
, "UTF-8");
99 if (cd
== (iconv_t
)(-1)) {
100 // Something went wrong
102 _error
->Error("conversion from 'UTF-8' to '%s' not available",
105 perror("iconv_open");
110 insize
= bufsize
= orig
.size();
112 inptr
= (char *)inbuf
;
113 outbuf
= new char[bufsize
];
114 size_t lastError
= -1;
118 char *outptr
= outbuf
;
119 size_t outsize
= bufsize
;
120 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
121 dest
->append(outbuf
, outptr
- outbuf
);
122 if (err
== (size_t)(-1))
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError
!= insize
) {
131 lastError
= insize
- 1;
139 if (outptr
== outbuf
)
143 outbuf
= new char[bufsize
];
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String
)
163 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
167 return _strrstrip(String
);
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String
)
174 char *End
= String
+ strlen(String
) - 1;
175 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
176 *End
== '\r'); End
--);
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
185 char *_strtabexpand(char *String
,size_t Len
)
187 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
191 if (I
+ 8 > String
+ Len
)
197 /* Assume the start of the string is 0 and find the next 8 char
203 Len
= 8 - ((String
- I
) % 8);
211 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
212 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String
,string
&Res
)
225 // Skip leading whitespace
226 const char *C
= String
;
227 for (;*C
!= 0 && *C
== ' '; C
++);
231 // Jump to the next word
232 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
236 C
= strchr(C
+ 1, '"');
242 C
= strchr(C
+ 1, ']');
248 // Now de-quote characters
251 const char *Start
= String
;
253 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
255 if (*Start
== '%' && Start
+ 2 < C
&&
256 isxdigit(Start
[1]) && isxdigit(Start
[2]))
261 *I
= (char)strtol(Tmp
,0,16);
274 // Skip ending white space
275 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String
,string
&Res
)
286 // Skip leading whitespace
287 const char *C
= String
;
288 for (;*C
!= 0 && *C
== ' '; C
++);
294 if (strlen(String
) >= sizeof(Buffer
))
301 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
310 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
312 if (isspace(*C
) == 0)
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
325 string
QuoteString(const string
&Str
, const char *Bad
)
327 std::stringstream Res
;
328 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
330 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
331 *I
== 0x25 || // percent '%' char
332 *I
<= 0x20 || *I
>= 0x7F) // control chars
334 ioprintf(Res
, "%%%02hhx", *I
);
342 // DeQuoteString - Convert a string from quoted from /*{{{*/
343 // ---------------------------------------------------------------------
344 /* This undoes QuoteString */
345 string
DeQuoteString(const string
&Str
)
347 return DeQuoteString(Str
.begin(),Str
.end());
349 string
DeQuoteString(string::const_iterator
const &begin
,
350 string::const_iterator
const &end
)
353 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
355 if (*I
== '%' && I
+ 2 < end
&&
356 isxdigit(I
[1]) && isxdigit(I
[2]))
362 Res
+= (char)strtol(Tmp
,0,16);
373 // SizeToStr - Convert a long into a human readable size /*{{{*/
374 // ---------------------------------------------------------------------
375 /* A max of 4 digits are shown before conversion to the next highest unit.
376 The max length of the string will be 5 chars unless the size is > 10
378 string
SizeToStr(double Size
)
386 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
387 ExaBytes, ZettaBytes, YottaBytes */
388 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 if (ASize
< 100 && I
!= 0)
395 strprintf(S
, "%'.1f %c", ASize
, Ext
[I
]);
402 strprintf(S
, "%'.0f %c", ASize
, Ext
[I
]);
411 // TimeToStr - Convert the time into a string /*{{{*/
412 // ---------------------------------------------------------------------
413 /* Converts a number of seconds to a hms format */
414 string
TimeToStr(unsigned long Sec
)
419 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
420 strprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
422 else if (Sec
> 60*60)
424 //TRANSLATOR: h means hours, min means minutes, s means seconds
425 strprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
429 //TRANSLATOR: min means minutes, s means seconds
430 strprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
434 //TRANSLATOR: s means seconds
435 strprintf(S
,_("%lis"),Sec
);
440 // SubstVar - Substitute a string for another string /*{{{*/
441 // ---------------------------------------------------------------------
442 /* This replaces all occurrences of Subst with Contents in Str. */
443 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
445 if (Subst
.empty() == true)
448 string::size_type Pos
= 0;
449 string::size_type OldPos
= 0;
452 while (OldPos
< Str
.length() &&
453 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
456 Temp
.append(Str
, OldPos
, Pos
- OldPos
);
457 if (Contents
.empty() == false)
458 Temp
.append(Contents
);
459 OldPos
= Pos
+ Subst
.length();
465 if (OldPos
>= Str
.length())
468 Temp
.append(Str
, OldPos
, string::npos
);
471 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
473 for (; Vars
->Subst
!= 0; Vars
++)
474 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
478 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
479 // ---------------------------------------------------------------------
480 /* Returns a string with the supplied separator depth + 1 times in it */
481 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
483 std::string output
= "";
484 for(unsigned long d
=Depth
+1; d
> 0; d
--)
485 output
.append(Separator
);
489 // URItoFileName - Convert the uri into a unique file name /*{{{*/
490 // ---------------------------------------------------------------------
491 /* This converts a URI into a safe filename. It quotes all unsafe characters
492 and converts / to _ and removes the scheme identifier. The resulting
493 file name should be unique and never occur again for a different file */
494 string
URItoFileName(const string
&URI
)
496 // Nuke 'sensitive' items
502 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
503 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
504 replace(NewURI
.begin(),NewURI
.end(),'/','_');
508 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
509 // ---------------------------------------------------------------------
510 /* This routine performs a base64 transformation on a string. It was ripped
511 from wget and then patched and bug fixed.
513 This spec can be found in rfc2045 */
514 string
Base64Encode(const string
&S
)
517 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
518 'I','J','K','L','M','N','O','P',
519 'Q','R','S','T','U','V','W','X',
520 'Y','Z','a','b','c','d','e','f',
521 'g','h','i','j','k','l','m','n',
522 'o','p','q','r','s','t','u','v',
523 'w','x','y','z','0','1','2','3',
524 '4','5','6','7','8','9','+','/'};
526 // Pre-allocate some space
528 Final
.reserve((4*S
.length() + 2)/3 + 2);
530 /* Transform the 3x8 bits to 4x6 bits, as required by
532 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
534 char Bits
[3] = {0,0,0};
541 Final
+= tbl
[Bits
[0] >> 2];
542 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
544 if (I
+ 1 >= S
.end())
547 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
549 if (I
+ 2 >= S
.end())
552 Final
+= tbl
[Bits
[2] & 0x3f];
555 /* Apply the padding elements, this tells how many bytes the remote
556 end should discard */
557 if (S
.length() % 3 == 2)
559 if (S
.length() % 3 == 1)
565 // stringcmp - Arbitrary string compare /*{{{*/
566 // ---------------------------------------------------------------------
567 /* This safely compares two non-null terminated strings of arbitrary
569 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
571 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
575 if (A
== AEnd
&& B
== BEnd
)
587 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
588 const char *B
,const char *BEnd
)
590 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
594 if (A
== AEnd
&& B
== BEnd
)
604 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
605 string::const_iterator B
,string::const_iterator BEnd
)
607 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
611 if (A
== AEnd
&& B
== BEnd
)
623 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
624 // ---------------------------------------------------------------------
626 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
628 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
629 if (tolower_ascii(*A
) != tolower_ascii(*B
))
632 if (A
== AEnd
&& B
== BEnd
)
638 if (tolower_ascii(*A
) < tolower_ascii(*B
))
643 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
644 const char *B
,const char *BEnd
)
646 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
647 if (tolower_ascii(*A
) != tolower_ascii(*B
))
650 if (A
== AEnd
&& B
== BEnd
)
656 if (tolower_ascii(*A
) < tolower_ascii(*B
))
660 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
661 string::const_iterator B
,string::const_iterator BEnd
)
663 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
664 if (tolower_ascii(*A
) != tolower_ascii(*B
))
667 if (A
== AEnd
&& B
== BEnd
)
673 if (tolower_ascii(*A
) < tolower_ascii(*B
))
679 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
680 // ---------------------------------------------------------------------
681 /* The format is like those used in package files and the method
682 communication system */
683 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
685 // Look for a matching tag.
686 int Length
= strlen(Tag
);
687 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
690 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
692 // Find the end of line and strip the leading/trailing spaces
693 string::const_iterator J
;
695 for (; isspace_ascii(*I
) != 0 && I
< Message
.end(); ++I
);
696 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
697 for (; J
> I
&& isspace_ascii(J
[-1]) != 0; --J
);
702 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
705 // Failed to find a match
711 // StringToBool - Converts a string into a boolean /*{{{*/
712 // ---------------------------------------------------------------------
713 /* This inspects the string to see if it is true or if it is false and
714 then returns the result. Several varients on true/false are checked. */
715 int StringToBool(const string
&Text
,int Default
)
718 int Res
= strtol(Text
.c_str(),&ParseEnd
,0);
719 // ensure that the entire string was converted by strtol to avoid
720 // failures on "apt-cache show -a 0ad" where the "0" is converted
721 const char *TextEnd
= Text
.c_str()+Text
.size();
722 if (ParseEnd
== TextEnd
&& Res
>= 0 && Res
<= 1)
725 // Check for positives
726 if (strcasecmp(Text
.c_str(),"no") == 0 ||
727 strcasecmp(Text
.c_str(),"false") == 0 ||
728 strcasecmp(Text
.c_str(),"without") == 0 ||
729 strcasecmp(Text
.c_str(),"off") == 0 ||
730 strcasecmp(Text
.c_str(),"disable") == 0)
733 // Check for negatives
734 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
735 strcasecmp(Text
.c_str(),"true") == 0 ||
736 strcasecmp(Text
.c_str(),"with") == 0 ||
737 strcasecmp(Text
.c_str(),"on") == 0 ||
738 strcasecmp(Text
.c_str(),"enable") == 0)
744 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
745 // ---------------------------------------------------------------------
746 /* This converts a time_t into a string time representation that is
747 year 2000 complient and timezone neutral */
748 string
TimeRFC1123(time_t Date
)
751 if (gmtime_r(&Date
, &Conv
) == NULL
)
755 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
756 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
757 "Aug","Sep","Oct","Nov","Dec"};
759 snprintf(Buf
, sizeof(Buf
), "%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
760 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
761 Conv
.tm_min
,Conv
.tm_sec
);
765 // ReadMessages - Read messages from the FD /*{{{*/
766 // ---------------------------------------------------------------------
767 /* This pulls full messages from the input FD into the message buffer.
768 It assumes that messages will not pause during transit so no
769 fancy buffering is used.
771 In particular: this reads blocks from the input until it believes
772 that it's run out of input text. Each block is terminated by a
773 double newline ('\n' followed by '\n').
775 bool ReadMessages(int Fd
, vector
<string
> &List
)
778 // Represents any left-over from the previous iteration of the
779 // parse loop. (i.e., if a message is split across the end
780 // of the buffer, it goes here)
781 string PartialMessage
;
784 int const Res
= read(Fd
, Buffer
, sizeof(Buffer
));
785 if (Res
< 0 && errno
== EINTR
)
788 // process we read from has died
793 #if EAGAIN != EWOULDBLOCK
794 if (Res
< 0 && (errno
== EAGAIN
|| errno
== EWOULDBLOCK
))
796 if (Res
< 0 && errno
== EAGAIN
)
802 // extract the message(s) from the buffer
803 char const *Start
= Buffer
;
804 char const * const End
= Buffer
+ Res
;
806 char const * NL
= (char const *) memchr(Start
, '\n', End
- Start
);
809 // end of buffer: store what we have so far and read new data in
810 PartialMessage
.append(Start
, End
- Start
);
816 if (PartialMessage
.empty() == false && Start
< End
)
818 // if we start with a new line, see if the partial message we have ended with one
819 // so that we properly detect records ending between two read() runs
820 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
821 // the case \r|\n\r\n is handled by the usual double-newline handling
822 if ((NL
- Start
) == 1 || ((NL
- Start
) == 2 && *Start
== '\r'))
824 if (APT::String::Endswith(PartialMessage
, "\n") || APT::String::Endswith(PartialMessage
, "\r\n\r"))
826 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
827 List
.push_back(PartialMessage
);
828 PartialMessage
.clear();
829 while (NL
< End
&& (*NL
== '\n' || *NL
== '\r')) ++NL
;
835 while (Start
< End
) {
836 char const * NL2
= (char const *) memchr(NL
, '\n', End
- NL
);
839 // end of buffer: store what we have so far and read new data in
840 PartialMessage
.append(Start
, End
- Start
);
845 // did we find a double newline?
846 if ((NL2
- NL
) == 1 || ((NL2
- NL
) == 2 && *NL
== '\r'))
848 PartialMessage
.append(Start
, NL2
- Start
);
849 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
850 List
.push_back(PartialMessage
);
851 PartialMessage
.clear();
852 while (NL2
< End
&& (*NL2
== '\n' || *NL2
== '\r')) ++NL2
;
858 // we have read at least one complete message and nothing left
859 if (PartialMessage
.empty() == true)
862 if (WaitFd(Fd
) == false)
867 // MonthConv - Converts a month string into a number /*{{{*/
868 // ---------------------------------------------------------------------
869 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
870 Made it a bit more robust with a few tolower_ascii though. */
871 static int MonthConv(char *Month
)
873 switch (tolower_ascii(*Month
))
876 return tolower_ascii(Month
[1]) == 'p'?3:7;
882 if (tolower_ascii(Month
[1]) == 'a')
884 return tolower_ascii(Month
[2]) == 'n'?5:6;
886 return tolower_ascii(Month
[2]) == 'r'?2:4;
894 // Pretend it is January..
900 // timegm - Internal timegm if the gnu version is not available /*{{{*/
901 // ---------------------------------------------------------------------
902 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
903 than local timezone (mktime assumes the latter).
905 This function is a nonstandard GNU extension that is also present on
906 the BSDs and maybe other systems. For others we follow the advice of
907 the manpage of timegm and use his portable replacement. */
909 static time_t timegm(struct tm
*t
)
911 char *tz
= getenv("TZ");
914 time_t ret
= mktime(t
);
924 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
925 // ---------------------------------------------------------------------
926 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
927 with one exception: All timezones (%Z) are accepted but the protocol
928 says that it MUST be GMT, but this one is equal to UTC which we will
929 encounter from time to time (e.g. in Release files) so we accept all
930 here and just assume it is GMT (or UTC) later on */
931 bool RFC1123StrToTime(const char* const str
,time_t &time
)
934 setlocale (LC_ALL
,"C");
936 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
937 (strptime(str
, "%a, %d %b %Y %H:%M:%S %Z", &Tm
) == NULL
&&
938 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
939 strptime(str
, "%A, %d-%b-%y %H:%M:%S %Z", &Tm
) == NULL
&&
940 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
941 strptime(str
, "%a %b %d %H:%M:%S %Y", &Tm
) == NULL
);
942 setlocale (LC_ALL
,"");
950 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
951 // ---------------------------------------------------------------------
953 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
956 // MDTM includes no whitespaces but recommend and ignored by strptime
957 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
964 // StrToTime - Converts a string into a time_t /*{{{*/
965 // ---------------------------------------------------------------------
966 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
967 and the C library asctime format. It requires the GNU library function
968 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
969 reason the C library does not provide any such function :< This also
970 handles the weird, but unambiguous FTP time format*/
971 bool StrToTime(const string
&Val
,time_t &Result
)
976 // Skip the day of the week
977 const char *I
= strchr(Val
.c_str(), ' ');
979 // Handle RFC 1123 time
981 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
982 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
984 // Handle RFC 1036 time
985 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
986 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
991 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
992 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
995 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
996 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
1005 Tm
.tm_mon
= MonthConv(Month
);
1007 Tm
.tm_mon
= 0; // we don't have a month, so pick something
1010 // Convert to local time and then to GMT
1011 Result
= timegm(&Tm
);
1015 // StrToNum - Convert a fixed length string to a number /*{{{*/
1016 // ---------------------------------------------------------------------
1017 /* This is used in decoding the crazy fixed length string headers in
1018 tar and ar files. */
1019 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
1022 if (Len
>= sizeof(S
))
1027 // All spaces is a zero
1030 for (I
= 0; S
[I
] == ' '; I
++);
1035 Res
= strtoul(S
,&End
,Base
);
1042 // StrToNum - Convert a fixed length string to a number /*{{{*/
1043 // ---------------------------------------------------------------------
1044 /* This is used in decoding the crazy fixed length string headers in
1045 tar and ar files. */
1046 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1049 if (Len
>= sizeof(S
))
1054 // All spaces is a zero
1057 for (I
= 0; S
[I
] == ' '; I
++);
1062 Res
= strtoull(S
,&End
,Base
);
1070 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1071 // ---------------------------------------------------------------------
1072 /* This is used in decoding the 256bit encoded fixed length fields in
1074 bool Base256ToNum(const char *Str
,unsigned long long &Res
,unsigned int Len
)
1076 if ((Str
[0] & 0x80) == 0)
1080 Res
= Str
[0] & 0x7F;
1081 for(unsigned int i
= 1; i
< Len
; ++i
)
1082 Res
= (Res
<<8) + Str
[i
];
1087 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1088 // ---------------------------------------------------------------------
1089 /* This is used in decoding the 256bit encoded fixed length fields in
1091 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1093 unsigned long long Num
;
1096 rc
= Base256ToNum(Str
, Num
, Len
);
1104 // HexDigit - Convert a hex character into an integer /*{{{*/
1105 // ---------------------------------------------------------------------
1106 /* Helper for Hex2Num */
1107 static int HexDigit(int c
)
1109 if (c
>= '0' && c
<= '9')
1111 if (c
>= 'a' && c
<= 'f')
1112 return c
- 'a' + 10;
1113 if (c
>= 'A' && c
<= 'F')
1114 return c
- 'A' + 10;
1118 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1119 // ---------------------------------------------------------------------
1120 /* The length of the buffer must be exactly 1/2 the length of the string. */
1121 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1123 return Hex2Num(APT::StringView(Str
), Num
, Length
);
1126 bool Hex2Num(const APT::StringView Str
,unsigned char *Num
,unsigned int Length
)
1128 if (Str
.length() != Length
*2)
1131 // Convert each digit. We store it in the same order as the string
1133 for (auto I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1135 int first_half
= HexDigit(I
[0]);
1140 second_half
= HexDigit(I
[1]);
1141 if (second_half
< 0)
1143 Num
[J
] = first_half
<< 4;
1144 Num
[J
] += second_half
;
1150 // TokSplitString - Split a string up by a given token /*{{{*/
1151 // ---------------------------------------------------------------------
1152 /* This is intended to be a faster splitter, it does not use dynamic
1153 memories. Input is changed to insert nulls at each token location. */
1154 bool TokSplitString(char Tok
,char *Input
,char **List
,
1155 unsigned long ListMax
)
1157 // Strip any leading spaces
1158 char *Start
= Input
;
1159 char *Stop
= Start
+ strlen(Start
);
1160 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1162 unsigned long Count
= 0;
1166 // Skip to the next Token
1167 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1169 // Back remove spaces
1171 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1174 List
[Count
++] = Start
;
1175 if (Count
>= ListMax
)
1182 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1190 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1191 // ---------------------------------------------------------------------
1192 /* This can be used to split a given string up into a vector, so the
1193 propose is the same as in the method above and this one is a bit slower
1194 also, but the advantage is that we have an iteratable vector */
1195 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1197 vector
<string
> exploded
;
1198 if (haystack
.empty() == true)
1200 string::const_iterator start
= haystack
.begin();
1201 string::const_iterator end
= start
;
1203 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1204 exploded
.push_back(string(start
, end
));
1206 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1210 // StringSplit - split a string into a string vector by token /*{{{*/
1211 // ---------------------------------------------------------------------
1212 /* See header for details.
1214 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1215 unsigned int maxsplit
)
1217 vector
<string
> split
;
1220 // no separator given, this is bogus
1225 while (pos
!= string::npos
)
1227 pos
= s
.find(sep
, start
);
1228 split
.push_back(s
.substr(start
, pos
-start
));
1230 // if maxsplit is reached, the remaining string is the last item
1231 if(split
.size() >= maxsplit
)
1233 split
[split
.size()-1] = s
.substr(start
);
1236 start
= pos
+sep
.size();
1241 // RegexChoice - Simple regex list/list matcher /*{{{*/
1242 // ---------------------------------------------------------------------
1244 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1245 const char **ListEnd
)
1247 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1250 unsigned long Hits
= 0;
1251 for (; ListBegin
< ListEnd
; ++ListBegin
)
1253 // Check if the name is a regex
1256 for (I
= *ListBegin
; *I
!= 0; I
++)
1257 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1262 // Compile the regex pattern
1265 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1271 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1276 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1280 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1285 if (R
->Hit
== false)
1295 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1301 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1302 // ---------------------------------------------------------------------
1303 /* This is used to make the internationalization strings easier to translate
1304 and to allow reordering of parameters */
1305 static bool iovprintf(ostream
&out
, const char *format
,
1306 va_list &args
, ssize_t
&size
) {
1307 char *S
= (char*)malloc(size
);
1308 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1309 if (n
> -1 && n
< size
) {
1322 void ioprintf(ostream
&out
,const char *format
,...)
1328 va_start(args
,format
);
1329 ret
= iovprintf(out
, format
, args
, size
);
1335 void strprintf(string
&out
,const char *format
,...)
1339 std::ostringstream outstr
;
1342 va_start(args
,format
);
1343 ret
= iovprintf(outstr
, format
, args
, size
);
1351 // safe_snprintf - Safer snprintf /*{{{*/
1352 // ---------------------------------------------------------------------
1353 /* This is a snprintf that will never (ever) go past 'End' and returns a
1354 pointer to the end of the new string. The returned string is always null
1355 terminated unless Buffer == end. This is a better alterantive to using
1356 consecutive snprintfs. */
1357 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1364 va_start(args
,Format
);
1365 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1368 if (Did
< 0 || Buffer
+ Did
> End
)
1370 return Buffer
+ Did
;
1373 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1374 // ---------------------------------------------------------------------
1375 string
StripEpoch(const string
&VerStr
)
1377 size_t i
= VerStr
.find(":");
1378 if (i
== string::npos
)
1380 return VerStr
.substr(i
+1);
1384 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1385 // ---------------------------------------------------------------------
1386 /* This little function is the most called method we have and tries
1387 therefore to do the absolut minimum - and is notable faster than
1388 standard tolower/toupper and as a bonus avoids problems with different
1389 locales - we only operate on ascii chars anyway. */
1390 #undef tolower_ascii
1391 int tolower_ascii(int const c
) APT_CONST APT_COLD
;
1392 int tolower_ascii(int const c
)
1394 return tolower_ascii_inline(c
);
1398 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1399 // ---------------------------------------------------------------------
1400 /* This little function is one of the most called methods we have and tries
1401 therefore to do the absolut minimum - and is notable faster than
1402 standard isspace() and as a bonus avoids problems with different
1403 locales - we only operate on ascii chars anyway. */
1404 #undef isspace_ascii
1405 int isspace_ascii(int const c
) APT_CONST APT_COLD
;
1406 int isspace_ascii(int const c
)
1408 return isspace_ascii_inline(c
);
1412 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1413 // ---------------------------------------------------------------------
1414 /* The domain list is a comma separate list of domains that are suffix
1415 matched against the argument */
1416 bool CheckDomainList(const string
&Host
,const string
&List
)
1418 string::const_iterator Start
= List
.begin();
1419 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1421 if (Cur
< List
.end() && *Cur
!= ',')
1424 // Match the end of the string..
1425 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1427 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1435 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1436 // ---------------------------------------------------------------------
1438 size_t strv_length(const char **str_array
)
1441 for (i
=0; str_array
[i
] != NULL
; i
++)
1447 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1448 // ---------------------------------------------------------------------
1450 string
DeEscapeString(const string
&input
)
1453 string::const_iterator it
;
1455 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1457 // just copy non-escape chars
1464 // deal with double escape
1466 (it
+ 1 < input
.end()) && it
[1] == '\\')
1470 // advance iterator one step further
1475 // ensure we have a char to read
1476 if (it
+ 1 == input
.end())
1484 if (it
+ 2 <= input
.end()) {
1488 output
+= (char)strtol(tmp
, 0, 8);
1493 if (it
+ 2 <= input
.end()) {
1497 output
+= (char)strtol(tmp
, 0, 16);
1502 // FIXME: raise exception here?
1509 // URI::CopyFrom - Copy from an object /*{{{*/
1510 // ---------------------------------------------------------------------
1511 /* This parses the URI into all of its components */
1512 void URI::CopyFrom(const string
&U
)
1514 string::const_iterator I
= U
.begin();
1516 // Locate the first colon, this separates the scheme
1517 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1518 string::const_iterator FirstColon
= I
;
1520 /* Determine if this is a host type URI with a leading double //
1521 and then search for the first single / */
1522 string::const_iterator SingleSlash
= I
;
1523 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1526 /* Find the / indicating the end of the hostname, ignoring /'s in the
1528 bool InBracket
= false;
1529 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1531 if (*SingleSlash
== '[')
1533 if (InBracket
== true && *SingleSlash
== ']')
1537 if (SingleSlash
> U
.end())
1538 SingleSlash
= U
.end();
1540 // We can now write the access and path specifiers
1541 Access
.assign(U
.begin(),FirstColon
);
1542 if (SingleSlash
!= U
.end())
1543 Path
.assign(SingleSlash
,U
.end());
1544 if (Path
.empty() == true)
1547 // Now we attempt to locate a user:pass@host fragment
1548 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1552 if (FirstColon
>= U
.end())
1555 if (FirstColon
> SingleSlash
)
1556 FirstColon
= SingleSlash
;
1558 // Find the colon...
1560 if (I
> SingleSlash
)
1562 for (; I
< SingleSlash
&& *I
!= ':'; ++I
);
1563 string::const_iterator SecondColon
= I
;
1565 // Search for the @ after the colon
1566 for (; I
< SingleSlash
&& *I
!= '@'; ++I
);
1567 string::const_iterator At
= I
;
1569 // Now write the host and user/pass
1570 if (At
== SingleSlash
)
1572 if (FirstColon
< SingleSlash
)
1573 Host
.assign(FirstColon
,SingleSlash
);
1577 Host
.assign(At
+1,SingleSlash
);
1578 // username and password must be encoded (RFC 3986)
1579 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1580 if (SecondColon
< At
)
1581 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1584 // Now we parse the RFC 2732 [] hostnames.
1585 unsigned long PortEnd
= 0;
1587 for (unsigned I
= 0; I
!= Host
.length();)
1596 if (InBracket
== true && Host
[I
] == ']')
1607 if (InBracket
== true)
1613 // Now we parse off a port number from the hostname
1615 string::size_type Pos
= Host
.rfind(':');
1616 if (Pos
== string::npos
|| Pos
< PortEnd
)
1619 Port
= atoi(string(Host
,Pos
+1).c_str());
1620 Host
.assign(Host
,0,Pos
);
1623 // URI::operator string - Convert the URI to a string /*{{{*/
1624 // ---------------------------------------------------------------------
1626 URI::operator string()
1628 std::stringstream Res
;
1630 if (Access
.empty() == false)
1631 Res
<< Access
<< ':';
1633 if (Host
.empty() == false)
1635 if (Access
.empty() == false)
1638 if (User
.empty() == false)
1640 // FIXME: Technically userinfo is permitted even less
1641 // characters than these, but this is not conveniently
1642 // expressed with a blacklist.
1643 Res
<< QuoteString(User
, ":/?#[]@");
1644 if (Password
.empty() == false)
1645 Res
<< ":" << QuoteString(Password
, ":/?#[]@");
1649 // Add RFC 2732 escaping characters
1650 if (Access
.empty() == false && Host
.find_first_of("/:") != string::npos
)
1651 Res
<< '[' << Host
<< ']';
1659 if (Path
.empty() == false)
1670 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1671 string
URI::SiteOnly(const string
&URI
)
1680 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1681 string
URI::ArchiveOnly(const string
&URI
)
1686 if (U
.Path
.empty() == false && U
.Path
[U
.Path
.length() - 1] == '/')
1687 U
.Path
.erase(U
.Path
.length() - 1);
1691 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1692 string
URI::NoUserPassword(const string
&URI
)