1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
47 // Strip - Remove white space from the front and back of a string /*{{{*/
48 // ---------------------------------------------------------------------
51 std::string
Strip(const std::string
&str
)
53 // ensure we have at least one character
54 if (str
.empty() == true)
57 char const * const s
= str
.c_str();
59 for (; isspace(s
[start
]) != 0; ++start
)
60 ; // find the first not-space
62 // string contains only whitespaces
66 size_t end
= str
.length() - 1;
67 for (; isspace(s
[end
]) != 0; --end
)
68 ; // find the last not-space
70 return str
.substr(start
, end
- start
+ 1);
73 bool Endswith(const std::string
&s
, const std::string
&end
)
75 if (end
.size() > s
.size())
77 return (s
.compare(s
.size() - end
.size(), end
.size(), end
) == 0);
80 bool Startswith(const std::string
&s
, const std::string
&start
)
82 if (start
.size() > s
.size())
84 return (s
.compare(0, start
.size(), start
) == 0);
90 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
91 // ---------------------------------------------------------------------
92 /* This is handy to use before display some information for enduser */
93 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
98 size_t insize
, bufsize
;
101 cd
= iconv_open(codeset
, "UTF-8");
102 if (cd
== (iconv_t
)(-1)) {
103 // Something went wrong
105 _error
->Error("conversion from 'UTF-8' to '%s' not available",
108 perror("iconv_open");
113 insize
= bufsize
= orig
.size();
115 inptr
= (char *)inbuf
;
116 outbuf
= new char[bufsize
];
117 size_t lastError
= -1;
121 char *outptr
= outbuf
;
122 size_t outsize
= bufsize
;
123 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
124 dest
->append(outbuf
, outptr
- outbuf
);
125 if (err
== (size_t)(-1))
132 // replace a series of unknown multibytes with a single "?"
133 if (lastError
!= insize
) {
134 lastError
= insize
- 1;
142 if (outptr
== outbuf
)
146 outbuf
= new char[bufsize
];
160 // strstrip - Remove white space from the front and back of a string /*{{{*/
161 // ---------------------------------------------------------------------
162 /* This is handy to use when parsing a file. It also removes \n's left
163 over from fgets and company */
164 char *_strstrip(char *String
)
166 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
170 return _strrstrip(String
);
173 // strrstrip - Remove white space from the back of a string /*{{{*/
174 // ---------------------------------------------------------------------
175 char *_strrstrip(char *String
)
177 char *End
= String
+ strlen(String
) - 1;
178 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
179 *End
== '\r'); End
--);
185 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
186 // ---------------------------------------------------------------------
188 char *_strtabexpand(char *String
,size_t Len
)
190 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
194 if (I
+ 8 > String
+ Len
)
200 /* Assume the start of the string is 0 and find the next 8 char
206 Len
= 8 - ((String
- I
) % 8);
214 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
215 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
220 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
221 // ---------------------------------------------------------------------
222 /* This grabs a single word, converts any % escaped characters to their
223 proper values and advances the pointer. Double quotes are understood
224 and striped out as well. This is for URI/URL parsing. It also can
225 understand [] brackets.*/
226 bool ParseQuoteWord(const char *&String
,string
&Res
)
228 // Skip leading whitespace
229 const char *C
= String
;
230 for (;*C
!= 0 && *C
== ' '; C
++);
234 // Jump to the next word
235 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
239 C
= strchr(C
+ 1, '"');
245 C
= strchr(C
+ 1, ']');
251 // Now de-quote characters
254 const char *Start
= String
;
256 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
258 if (*Start
== '%' && Start
+ 2 < C
&&
259 isxdigit(Start
[1]) && isxdigit(Start
[2]))
264 *I
= (char)strtol(Tmp
,0,16);
277 // Skip ending white space
278 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
283 // ParseCWord - Parses a string like a C "" expression /*{{{*/
284 // ---------------------------------------------------------------------
285 /* This expects a series of space separated strings enclosed in ""'s.
286 It concatenates the ""'s into a single string. */
287 bool ParseCWord(const char *&String
,string
&Res
)
289 // Skip leading whitespace
290 const char *C
= String
;
291 for (;*C
!= 0 && *C
== ' '; C
++);
297 if (strlen(String
) >= sizeof(Buffer
))
304 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
313 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
315 if (isspace(*C
) == 0)
325 // QuoteString - Convert a string into quoted from /*{{{*/
326 // ---------------------------------------------------------------------
328 string
QuoteString(const string
&Str
, const char *Bad
)
330 std::stringstream Res
;
331 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
333 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
334 *I
== 0x25 || // percent '%' char
335 *I
<= 0x20 || *I
>= 0x7F) // control chars
337 ioprintf(Res
, "%%%02hhx", *I
);
345 // DeQuoteString - Convert a string from quoted from /*{{{*/
346 // ---------------------------------------------------------------------
347 /* This undoes QuoteString */
348 string
DeQuoteString(const string
&Str
)
350 return DeQuoteString(Str
.begin(),Str
.end());
352 string
DeQuoteString(string::const_iterator
const &begin
,
353 string::const_iterator
const &end
)
356 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
358 if (*I
== '%' && I
+ 2 < end
&&
359 isxdigit(I
[1]) && isxdigit(I
[2]))
365 Res
+= (char)strtol(Tmp
,0,16);
376 // SizeToStr - Convert a long into a human readable size /*{{{*/
377 // ---------------------------------------------------------------------
378 /* A max of 4 digits are shown before conversion to the next highest unit.
379 The max length of the string will be 5 chars unless the size is > 10
381 string
SizeToStr(double Size
)
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
395 if (ASize
< 100 && I
!= 0)
398 strprintf(S
, "%'.1f %c", ASize
, Ext
[I
]);
405 strprintf(S
, "%'.0f %c", ASize
, Ext
[I
]);
414 // TimeToStr - Convert the time into a string /*{{{*/
415 // ---------------------------------------------------------------------
416 /* Converts a number of seconds to a hms format */
417 string
TimeToStr(unsigned long Sec
)
422 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
423 strprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
425 else if (Sec
> 60*60)
427 //TRANSLATOR: h means hours, min means minutes, s means seconds
428 strprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
432 //TRANSLATOR: min means minutes, s means seconds
433 strprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
437 //TRANSLATOR: s means seconds
438 strprintf(S
,_("%lis"),Sec
);
443 // SubstVar - Substitute a string for another string /*{{{*/
444 // ---------------------------------------------------------------------
445 /* This replaces all occurrences of Subst with Contents in Str. */
446 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
448 if (Subst
.empty() == true)
451 string::size_type Pos
= 0;
452 string::size_type OldPos
= 0;
455 while (OldPos
< Str
.length() &&
456 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
459 Temp
.append(Str
, OldPos
, Pos
- OldPos
);
460 if (Contents
.empty() == false)
461 Temp
.append(Contents
);
462 OldPos
= Pos
+ Subst
.length();
468 if (OldPos
>= Str
.length())
471 Temp
.append(Str
, OldPos
, string::npos
);
474 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
476 for (; Vars
->Subst
!= 0; Vars
++)
477 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
481 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
482 // ---------------------------------------------------------------------
483 /* Returns a string with the supplied separator depth + 1 times in it */
484 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
486 std::string output
= "";
487 for(unsigned long d
=Depth
+1; d
> 0; d
--)
488 output
.append(Separator
);
492 // URItoFileName - Convert the uri into a unique file name /*{{{*/
493 // ---------------------------------------------------------------------
494 /* This converts a URI into a safe filename. It quotes all unsafe characters
495 and converts / to _ and removes the scheme identifier. The resulting
496 file name should be unique and never occur again for a different file */
497 string
URItoFileName(const string
&URI
)
499 // Nuke 'sensitive' items
505 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
506 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
507 replace(NewURI
.begin(),NewURI
.end(),'/','_');
511 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
512 // ---------------------------------------------------------------------
513 /* This routine performs a base64 transformation on a string. It was ripped
514 from wget and then patched and bug fixed.
516 This spec can be found in rfc2045 */
517 string
Base64Encode(const string
&S
)
520 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
521 'I','J','K','L','M','N','O','P',
522 'Q','R','S','T','U','V','W','X',
523 'Y','Z','a','b','c','d','e','f',
524 'g','h','i','j','k','l','m','n',
525 'o','p','q','r','s','t','u','v',
526 'w','x','y','z','0','1','2','3',
527 '4','5','6','7','8','9','+','/'};
529 // Pre-allocate some space
531 Final
.reserve((4*S
.length() + 2)/3 + 2);
533 /* Transform the 3x8 bits to 4x6 bits, as required by
535 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
537 char Bits
[3] = {0,0,0};
544 Final
+= tbl
[Bits
[0] >> 2];
545 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
547 if (I
+ 1 >= S
.end())
550 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
552 if (I
+ 2 >= S
.end())
555 Final
+= tbl
[Bits
[2] & 0x3f];
558 /* Apply the padding elements, this tells how many bytes the remote
559 end should discard */
560 if (S
.length() % 3 == 2)
562 if (S
.length() % 3 == 1)
568 // stringcmp - Arbitrary string compare /*{{{*/
569 // ---------------------------------------------------------------------
570 /* This safely compares two non-null terminated strings of arbitrary
572 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
574 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
578 if (A
== AEnd
&& B
== BEnd
)
590 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
591 const char *B
,const char *BEnd
)
593 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
597 if (A
== AEnd
&& B
== BEnd
)
607 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
608 string::const_iterator B
,string::const_iterator BEnd
)
610 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
614 if (A
== AEnd
&& B
== BEnd
)
626 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
627 // ---------------------------------------------------------------------
629 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
631 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
632 if (tolower_ascii(*A
) != tolower_ascii(*B
))
635 if (A
== AEnd
&& B
== BEnd
)
641 if (tolower_ascii(*A
) < tolower_ascii(*B
))
646 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
647 const char *B
,const char *BEnd
)
649 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
650 if (tolower_ascii(*A
) != tolower_ascii(*B
))
653 if (A
== AEnd
&& B
== BEnd
)
659 if (tolower_ascii(*A
) < tolower_ascii(*B
))
663 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
664 string::const_iterator B
,string::const_iterator BEnd
)
666 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
667 if (tolower_ascii(*A
) != tolower_ascii(*B
))
670 if (A
== AEnd
&& B
== BEnd
)
676 if (tolower_ascii(*A
) < tolower_ascii(*B
))
682 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
683 // ---------------------------------------------------------------------
684 /* The format is like those used in package files and the method
685 communication system */
686 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
688 // Look for a matching tag.
689 int Length
= strlen(Tag
);
690 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
693 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
695 // Find the end of line and strip the leading/trailing spaces
696 string::const_iterator J
;
698 for (; isspace_ascii(*I
) != 0 && I
< Message
.end(); ++I
);
699 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
700 for (; J
> I
&& isspace_ascii(J
[-1]) != 0; --J
);
705 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
708 // Failed to find a match
714 // StringToBool - Converts a string into a boolean /*{{{*/
715 // ---------------------------------------------------------------------
716 /* This inspects the string to see if it is true or if it is false and
717 then returns the result. Several varients on true/false are checked. */
718 int StringToBool(const string
&Text
,int Default
)
721 int Res
= strtol(Text
.c_str(),&ParseEnd
,0);
722 // ensure that the entire string was converted by strtol to avoid
723 // failures on "apt-cache show -a 0ad" where the "0" is converted
724 const char *TextEnd
= Text
.c_str()+Text
.size();
725 if (ParseEnd
== TextEnd
&& Res
>= 0 && Res
<= 1)
728 // Check for positives
729 if (strcasecmp(Text
.c_str(),"no") == 0 ||
730 strcasecmp(Text
.c_str(),"false") == 0 ||
731 strcasecmp(Text
.c_str(),"without") == 0 ||
732 strcasecmp(Text
.c_str(),"off") == 0 ||
733 strcasecmp(Text
.c_str(),"disable") == 0)
736 // Check for negatives
737 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
738 strcasecmp(Text
.c_str(),"true") == 0 ||
739 strcasecmp(Text
.c_str(),"with") == 0 ||
740 strcasecmp(Text
.c_str(),"on") == 0 ||
741 strcasecmp(Text
.c_str(),"enable") == 0)
747 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
748 // ---------------------------------------------------------------------
749 /* This converts a time_t into a string time representation that is
750 year 2000 complient and timezone neutral */
751 string
TimeRFC1123(time_t Date
)
754 if (gmtime_r(&Date
, &Conv
) == NULL
)
757 auto const posix
= std::locale("C.UTF-8");
758 std::ostringstream datestr
;
759 datestr
.imbue(posix
);
760 APT::StringView
const fmt("%a, %d %b %Y %H:%M:%S GMT");
761 std::use_facet
<std::time_put
<char>>(posix
).put(
762 std::ostreambuf_iterator
<char>(datestr
),
763 datestr
, ' ', &Conv
, fmt
.data(), fmt
.data() + fmt
.size());
764 return datestr
.str();
767 // ReadMessages - Read messages from the FD /*{{{*/
768 // ---------------------------------------------------------------------
769 /* This pulls full messages from the input FD into the message buffer.
770 It assumes that messages will not pause during transit so no
771 fancy buffering is used.
773 In particular: this reads blocks from the input until it believes
774 that it's run out of input text. Each block is terminated by a
775 double newline ('\n' followed by '\n').
777 bool ReadMessages(int Fd
, vector
<string
> &List
)
780 // Represents any left-over from the previous iteration of the
781 // parse loop. (i.e., if a message is split across the end
782 // of the buffer, it goes here)
783 string PartialMessage
;
786 int const Res
= read(Fd
, Buffer
, sizeof(Buffer
));
787 if (Res
< 0 && errno
== EINTR
)
790 // process we read from has died
795 #if EAGAIN != EWOULDBLOCK
796 if (Res
< 0 && (errno
== EAGAIN
|| errno
== EWOULDBLOCK
))
798 if (Res
< 0 && errno
== EAGAIN
)
804 // extract the message(s) from the buffer
805 char const *Start
= Buffer
;
806 char const * const End
= Buffer
+ Res
;
808 char const * NL
= (char const *) memchr(Start
, '\n', End
- Start
);
811 // end of buffer: store what we have so far and read new data in
812 PartialMessage
.append(Start
, End
- Start
);
818 if (PartialMessage
.empty() == false && Start
< End
)
820 // if we start with a new line, see if the partial message we have ended with one
821 // so that we properly detect records ending between two read() runs
822 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
823 // the case \r|\n\r\n is handled by the usual double-newline handling
824 if ((NL
- Start
) == 1 || ((NL
- Start
) == 2 && *Start
== '\r'))
826 if (APT::String::Endswith(PartialMessage
, "\n") || APT::String::Endswith(PartialMessage
, "\r\n\r"))
828 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
829 List
.push_back(PartialMessage
);
830 PartialMessage
.clear();
831 while (NL
< End
&& (*NL
== '\n' || *NL
== '\r')) ++NL
;
837 while (Start
< End
) {
838 char const * NL2
= (char const *) memchr(NL
, '\n', End
- NL
);
841 // end of buffer: store what we have so far and read new data in
842 PartialMessage
.append(Start
, End
- Start
);
847 // did we find a double newline?
848 if ((NL2
- NL
) == 1 || ((NL2
- NL
) == 2 && *NL
== '\r'))
850 PartialMessage
.append(Start
, NL2
- Start
);
851 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
852 List
.push_back(PartialMessage
);
853 PartialMessage
.clear();
854 while (NL2
< End
&& (*NL2
== '\n' || *NL2
== '\r')) ++NL2
;
860 // we have read at least one complete message and nothing left
861 if (PartialMessage
.empty() == true)
864 if (WaitFd(Fd
) == false)
869 // MonthConv - Converts a month string into a number /*{{{*/
870 // ---------------------------------------------------------------------
871 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
872 Made it a bit more robust with a few tolower_ascii though. */
873 static int MonthConv(char const * const Month
)
875 switch (tolower_ascii(*Month
))
878 return tolower_ascii(Month
[1]) == 'p'?3:7;
884 if (tolower_ascii(Month
[1]) == 'a')
886 return tolower_ascii(Month
[2]) == 'n'?5:6;
888 return tolower_ascii(Month
[2]) == 'r'?2:4;
896 // Pretend it is January..
902 // timegm - Internal timegm if the gnu version is not available /*{{{*/
903 // ---------------------------------------------------------------------
904 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
905 than local timezone (mktime assumes the latter).
907 This function is a nonstandard GNU extension that is also present on
908 the BSDs and maybe other systems. For others we follow the advice of
909 the manpage of timegm and use his portable replacement. */
911 static time_t timegm(struct tm
*t
)
913 char *tz
= getenv("TZ");
916 time_t ret
= mktime(t
);
926 // RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
927 // ---------------------------------------------------------------------
928 /* tries to parses a full date as specified in RFC7231 §7.1.1.1
929 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
930 As we encounter dates from UTC or with a numeric timezone in other places,
931 we allow them here to to be able to reuse the method. Either way, a date
932 must be in UTC or parsing will fail. Previous implementations of this
933 method used to ignore the timezone and assume always UTC. */
934 bool RFC1123StrToTime(const char* const str
,time_t &time
)
936 unsigned short day
= 0;
937 signed int year
= 0; // yes, Y23K problem – we gonna worry then…
938 std::string weekday
, month
, datespec
, timespec
, zone
;
939 std::istringstream
ss(str
);
940 auto const &posix
= std::locale("C.UTF-8");
943 // we only superficially check weekday, mostly to avoid accepting localized
944 // weekdays here and take only its length to decide which datetime format we
945 // encounter here. The date isn't stored.
946 std::transform(weekday
.begin(), weekday
.end(), weekday
.begin(), ::tolower
);
947 std::array
<char const * const, 7> c_weekdays
= {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
948 if (std::find(c_weekdays
.begin(), c_weekdays
.end(), weekday
.substr(0,3)) == c_weekdays
.end())
951 switch (weekday
.length())
954 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
955 if (weekday
[3] != ',')
957 ss
>> day
>> month
>> year
>> timespec
>> zone
;
960 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
961 ss
>> month
>> day
>> timespec
>> year
;
969 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
970 if (weekday
[weekday
.length() - 1] != ',')
972 ss
>> datespec
>> timespec
>> zone
;
973 auto const expldate
= VectorizeString(datespec
, '-');
974 if (expldate
.size() != 3)
978 day
= std::stoi(expldate
[0], &pos
);
979 if (pos
!= expldate
[0].length())
981 year
= 1900 + std::stoi(expldate
[2], &pos
);
982 if (pos
!= expldate
[2].length())
984 strprintf(datespec
, "%.4d-%.2d-%.2d", year
, MonthConv(expldate
[1].c_str()) + 1, day
);
991 if (ss
.fail() || ss
.bad() || !ss
.eof())
994 if (zone
!= "GMT" && zone
!= "UTC" && zone
!= "Z") // RFC 822
996 // numeric timezones as a should of RFC 1123 and generally preferred
999 auto const z
= std::stoi(zone
, &pos
);
1000 if (z
!= 0 || pos
!= zone
.length())
1007 if (datespec
.empty())
1011 strprintf(datespec
, "%.4d-%.2d-%.2d", year
, MonthConv(month
.c_str()) + 1, day
);
1014 std::string
const datetime
= datespec
+ ' ' + timespec
;
1016 if (strptime(datetime
.c_str(), "%Y-%m-%d %H:%M:%S", &Tm
) == nullptr)
1022 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1023 // ---------------------------------------------------------------------
1025 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
1028 // MDTM includes no whitespaces but recommend and ignored by strptime
1029 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
1036 // StrToTime - Converts a string into a time_t /*{{{*/
1037 // ---------------------------------------------------------------------
1038 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
1039 and the C library asctime format. It requires the GNU library function
1040 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
1041 reason the C library does not provide any such function :< This also
1042 handles the weird, but unambiguous FTP time format*/
1043 bool StrToTime(const string
&Val
,time_t &Result
)
1048 // Skip the day of the week
1049 const char *I
= strchr(Val
.c_str(), ' ');
1051 // Handle RFC 1123 time
1053 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
1054 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
1056 // Handle RFC 1036 time
1057 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
1058 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
1063 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
1064 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
1067 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
1068 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
1077 Tm
.tm_mon
= MonthConv(Month
);
1079 Tm
.tm_mon
= 0; // we don't have a month, so pick something
1082 // Convert to local time and then to GMT
1083 Result
= timegm(&Tm
);
1087 // StrToNum - Convert a fixed length string to a number /*{{{*/
1088 // ---------------------------------------------------------------------
1089 /* This is used in decoding the crazy fixed length string headers in
1090 tar and ar files. */
1091 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
1094 if (Len
>= sizeof(S
))
1099 // All spaces is a zero
1102 for (I
= 0; S
[I
] == ' '; I
++);
1107 Res
= strtoul(S
,&End
,Base
);
1114 // StrToNum - Convert a fixed length string to a number /*{{{*/
1115 // ---------------------------------------------------------------------
1116 /* This is used in decoding the crazy fixed length string headers in
1117 tar and ar files. */
1118 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1121 if (Len
>= sizeof(S
))
1126 // All spaces is a zero
1129 for (I
= 0; S
[I
] == ' '; I
++);
1134 Res
= strtoull(S
,&End
,Base
);
1142 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1143 // ---------------------------------------------------------------------
1144 /* This is used in decoding the 256bit encoded fixed length fields in
1146 bool Base256ToNum(const char *Str
,unsigned long long &Res
,unsigned int Len
)
1148 if ((Str
[0] & 0x80) == 0)
1152 Res
= Str
[0] & 0x7F;
1153 for(unsigned int i
= 1; i
< Len
; ++i
)
1154 Res
= (Res
<<8) + Str
[i
];
1159 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1160 // ---------------------------------------------------------------------
1161 /* This is used in decoding the 256bit encoded fixed length fields in
1163 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1165 unsigned long long Num
;
1168 rc
= Base256ToNum(Str
, Num
, Len
);
1176 // HexDigit - Convert a hex character into an integer /*{{{*/
1177 // ---------------------------------------------------------------------
1178 /* Helper for Hex2Num */
1179 static int HexDigit(int c
)
1181 if (c
>= '0' && c
<= '9')
1183 if (c
>= 'a' && c
<= 'f')
1184 return c
- 'a' + 10;
1185 if (c
>= 'A' && c
<= 'F')
1186 return c
- 'A' + 10;
1190 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1191 // ---------------------------------------------------------------------
1192 /* The length of the buffer must be exactly 1/2 the length of the string. */
1193 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1195 return Hex2Num(APT::StringView(Str
), Num
, Length
);
1198 bool Hex2Num(const APT::StringView Str
,unsigned char *Num
,unsigned int Length
)
1200 if (Str
.length() != Length
*2)
1203 // Convert each digit. We store it in the same order as the string
1205 for (auto I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1207 int first_half
= HexDigit(I
[0]);
1212 second_half
= HexDigit(I
[1]);
1213 if (second_half
< 0)
1215 Num
[J
] = first_half
<< 4;
1216 Num
[J
] += second_half
;
1222 // TokSplitString - Split a string up by a given token /*{{{*/
1223 // ---------------------------------------------------------------------
1224 /* This is intended to be a faster splitter, it does not use dynamic
1225 memories. Input is changed to insert nulls at each token location. */
1226 bool TokSplitString(char Tok
,char *Input
,char **List
,
1227 unsigned long ListMax
)
1229 // Strip any leading spaces
1230 char *Start
= Input
;
1231 char *Stop
= Start
+ strlen(Start
);
1232 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1234 unsigned long Count
= 0;
1238 // Skip to the next Token
1239 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1241 // Back remove spaces
1243 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1246 List
[Count
++] = Start
;
1247 if (Count
>= ListMax
)
1254 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1262 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1263 // ---------------------------------------------------------------------
1264 /* This can be used to split a given string up into a vector, so the
1265 propose is the same as in the method above and this one is a bit slower
1266 also, but the advantage is that we have an iteratable vector */
1267 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1269 vector
<string
> exploded
;
1270 if (haystack
.empty() == true)
1272 string::const_iterator start
= haystack
.begin();
1273 string::const_iterator end
= start
;
1275 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1276 exploded
.push_back(string(start
, end
));
1278 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1282 // StringSplit - split a string into a string vector by token /*{{{*/
1283 // ---------------------------------------------------------------------
1284 /* See header for details.
1286 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1287 unsigned int maxsplit
)
1289 vector
<string
> split
;
1292 // no separator given, this is bogus
1297 while (pos
!= string::npos
)
1299 pos
= s
.find(sep
, start
);
1300 split
.push_back(s
.substr(start
, pos
-start
));
1302 // if maxsplit is reached, the remaining string is the last item
1303 if(split
.size() >= maxsplit
)
1305 split
[split
.size()-1] = s
.substr(start
);
1308 start
= pos
+sep
.size();
1313 // RegexChoice - Simple regex list/list matcher /*{{{*/
1314 // ---------------------------------------------------------------------
1316 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1317 const char **ListEnd
)
1319 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1322 unsigned long Hits
= 0;
1323 for (; ListBegin
< ListEnd
; ++ListBegin
)
1325 // Check if the name is a regex
1328 for (I
= *ListBegin
; *I
!= 0; I
++)
1329 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1334 // Compile the regex pattern
1337 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1343 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1348 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1352 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1357 if (R
->Hit
== false)
1367 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1373 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1374 // ---------------------------------------------------------------------
1375 /* This is used to make the internationalization strings easier to translate
1376 and to allow reordering of parameters */
1377 static bool iovprintf(ostream
&out
, const char *format
,
1378 va_list &args
, ssize_t
&size
) {
1379 char *S
= (char*)malloc(size
);
1380 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1381 if (n
> -1 && n
< size
) {
1394 void ioprintf(ostream
&out
,const char *format
,...)
1400 va_start(args
,format
);
1401 ret
= iovprintf(out
, format
, args
, size
);
1407 void strprintf(string
&out
,const char *format
,...)
1411 std::ostringstream outstr
;
1414 va_start(args
,format
);
1415 ret
= iovprintf(outstr
, format
, args
, size
);
1423 // safe_snprintf - Safer snprintf /*{{{*/
1424 // ---------------------------------------------------------------------
1425 /* This is a snprintf that will never (ever) go past 'End' and returns a
1426 pointer to the end of the new string. The returned string is always null
1427 terminated unless Buffer == end. This is a better alterantive to using
1428 consecutive snprintfs. */
1429 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1436 va_start(args
,Format
);
1437 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1440 if (Did
< 0 || Buffer
+ Did
> End
)
1442 return Buffer
+ Did
;
1445 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1446 // ---------------------------------------------------------------------
1447 string
StripEpoch(const string
&VerStr
)
1449 size_t i
= VerStr
.find(":");
1450 if (i
== string::npos
)
1452 return VerStr
.substr(i
+1);
1456 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1457 // ---------------------------------------------------------------------
1458 /* This little function is the most called method we have and tries
1459 therefore to do the absolut minimum - and is notable faster than
1460 standard tolower/toupper and as a bonus avoids problems with different
1461 locales - we only operate on ascii chars anyway. */
1462 #undef tolower_ascii
1463 int tolower_ascii(int const c
) APT_CONST APT_COLD
;
1464 int tolower_ascii(int const c
)
1466 return tolower_ascii_inline(c
);
1470 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1471 // ---------------------------------------------------------------------
1472 /* This little function is one of the most called methods we have and tries
1473 therefore to do the absolut minimum - and is notable faster than
1474 standard isspace() and as a bonus avoids problems with different
1475 locales - we only operate on ascii chars anyway. */
1476 #undef isspace_ascii
1477 int isspace_ascii(int const c
) APT_CONST APT_COLD
;
1478 int isspace_ascii(int const c
)
1480 return isspace_ascii_inline(c
);
1484 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1485 // ---------------------------------------------------------------------
1486 /* The domain list is a comma separate list of domains that are suffix
1487 matched against the argument */
1488 bool CheckDomainList(const string
&Host
,const string
&List
)
1490 string::const_iterator Start
= List
.begin();
1491 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1493 if (Cur
< List
.end() && *Cur
!= ',')
1496 // Match the end of the string..
1497 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1499 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1507 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1508 // ---------------------------------------------------------------------
1510 size_t strv_length(const char **str_array
)
1513 for (i
=0; str_array
[i
] != NULL
; i
++)
1519 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1520 // ---------------------------------------------------------------------
1522 string
DeEscapeString(const string
&input
)
1525 string::const_iterator it
;
1527 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1529 // just copy non-escape chars
1536 // deal with double escape
1538 (it
+ 1 < input
.end()) && it
[1] == '\\')
1542 // advance iterator one step further
1547 // ensure we have a char to read
1548 if (it
+ 1 == input
.end())
1556 if (it
+ 2 <= input
.end()) {
1560 output
+= (char)strtol(tmp
, 0, 8);
1565 if (it
+ 2 <= input
.end()) {
1569 output
+= (char)strtol(tmp
, 0, 16);
1574 // FIXME: raise exception here?
1581 // URI::CopyFrom - Copy from an object /*{{{*/
1582 // ---------------------------------------------------------------------
1583 /* This parses the URI into all of its components */
1584 void URI::CopyFrom(const string
&U
)
1586 string::const_iterator I
= U
.begin();
1588 // Locate the first colon, this separates the scheme
1589 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1590 string::const_iterator FirstColon
= I
;
1592 /* Determine if this is a host type URI with a leading double //
1593 and then search for the first single / */
1594 string::const_iterator SingleSlash
= I
;
1595 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1598 /* Find the / indicating the end of the hostname, ignoring /'s in the
1600 bool InBracket
= false;
1601 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1603 if (*SingleSlash
== '[')
1605 if (InBracket
== true && *SingleSlash
== ']')
1609 if (SingleSlash
> U
.end())
1610 SingleSlash
= U
.end();
1612 // We can now write the access and path specifiers
1613 Access
.assign(U
.begin(),FirstColon
);
1614 if (SingleSlash
!= U
.end())
1615 Path
.assign(SingleSlash
,U
.end());
1616 if (Path
.empty() == true)
1619 // Now we attempt to locate a user:pass@host fragment
1620 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1624 if (FirstColon
>= U
.end())
1627 if (FirstColon
> SingleSlash
)
1628 FirstColon
= SingleSlash
;
1630 // Find the colon...
1632 if (I
> SingleSlash
)
1634 for (; I
< SingleSlash
&& *I
!= ':'; ++I
);
1635 string::const_iterator SecondColon
= I
;
1637 // Search for the @ after the colon
1638 for (; I
< SingleSlash
&& *I
!= '@'; ++I
);
1639 string::const_iterator At
= I
;
1641 // Now write the host and user/pass
1642 if (At
== SingleSlash
)
1644 if (FirstColon
< SingleSlash
)
1645 Host
.assign(FirstColon
,SingleSlash
);
1649 Host
.assign(At
+1,SingleSlash
);
1650 // username and password must be encoded (RFC 3986)
1651 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1652 if (SecondColon
< At
)
1653 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1656 // Now we parse the RFC 2732 [] hostnames.
1657 unsigned long PortEnd
= 0;
1659 for (unsigned I
= 0; I
!= Host
.length();)
1668 if (InBracket
== true && Host
[I
] == ']')
1679 if (InBracket
== true)
1685 // Now we parse off a port number from the hostname
1687 string::size_type Pos
= Host
.rfind(':');
1688 if (Pos
== string::npos
|| Pos
< PortEnd
)
1691 Port
= atoi(string(Host
,Pos
+1).c_str());
1692 Host
.assign(Host
,0,Pos
);
1695 // URI::operator string - Convert the URI to a string /*{{{*/
1696 // ---------------------------------------------------------------------
1698 URI::operator string()
1700 std::stringstream Res
;
1702 if (Access
.empty() == false)
1703 Res
<< Access
<< ':';
1705 if (Host
.empty() == false)
1707 if (Access
.empty() == false)
1710 if (User
.empty() == false)
1712 // FIXME: Technically userinfo is permitted even less
1713 // characters than these, but this is not conveniently
1714 // expressed with a blacklist.
1715 Res
<< QuoteString(User
, ":/?#[]@");
1716 if (Password
.empty() == false)
1717 Res
<< ":" << QuoteString(Password
, ":/?#[]@");
1721 // Add RFC 2732 escaping characters
1722 if (Access
.empty() == false && Host
.find_first_of("/:") != string::npos
)
1723 Res
<< '[' << Host
<< ']';
1728 Res
<< ':' << std::to_string(Port
);
1731 if (Path
.empty() == false)
1742 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1743 string
URI::SiteOnly(const string
&URI
)
1752 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1753 string
URI::ArchiveOnly(const string
&URI
)
1758 if (U
.Path
.empty() == false && U
.Path
[U
.Path
.length() - 1] == '/')
1759 U
.Path
.erase(U
.Path
.length() - 1);
1763 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1764 string
URI::NoUserPassword(const string
&URI
)