1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
48 // Strip - Remove white space from the front and back of a string /*{{{*/
49 // ---------------------------------------------------------------------
52 std::string
Strip(const std::string
&str
)
54 // ensure we have at least one character
55 if (str
.empty() == true)
58 char const * const s
= str
.c_str();
60 for (; isspace(s
[start
]) != 0; ++start
)
61 ; // find the first not-space
63 // string contains only whitespaces
67 size_t end
= str
.length() - 1;
68 for (; isspace(s
[end
]) != 0; --end
)
69 ; // find the last not-space
71 return str
.substr(start
, end
- start
+ 1);
74 bool Endswith(const std::string
&s
, const std::string
&end
)
76 if (end
.size() > s
.size())
78 return (s
.compare(s
.size() - end
.size(), end
.size(), end
) == 0);
81 bool Startswith(const std::string
&s
, const std::string
&start
)
83 if (start
.size() > s
.size())
85 return (s
.compare(0, start
.size(), start
) == 0);
91 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
92 // ---------------------------------------------------------------------
93 /* This is handy to use before display some information for enduser */
94 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
99 size_t insize
, bufsize
;
102 cd
= iconv_open(codeset
, "UTF-8");
103 if (cd
== (iconv_t
)(-1)) {
104 // Something went wrong
106 _error
->Error("conversion from 'UTF-8' to '%s' not available",
109 perror("iconv_open");
114 insize
= bufsize
= orig
.size();
116 inptr
= (char *)inbuf
;
117 outbuf
= new char[bufsize
];
118 size_t lastError
= -1;
122 char *outptr
= outbuf
;
123 size_t outsize
= bufsize
;
124 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
125 dest
->append(outbuf
, outptr
- outbuf
);
126 if (err
== (size_t)(-1))
133 // replace a series of unknown multibytes with a single "?"
134 if (lastError
!= insize
) {
135 lastError
= insize
- 1;
143 if (outptr
== outbuf
)
147 outbuf
= new char[bufsize
];
161 // strstrip - Remove white space from the front and back of a string /*{{{*/
162 // ---------------------------------------------------------------------
163 /* This is handy to use when parsing a file. It also removes \n's left
164 over from fgets and company */
165 char *_strstrip(char *String
)
167 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
171 return _strrstrip(String
);
174 // strrstrip - Remove white space from the back of a string /*{{{*/
175 // ---------------------------------------------------------------------
176 char *_strrstrip(char *String
)
178 char *End
= String
+ strlen(String
) - 1;
179 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
180 *End
== '\r'); End
--);
186 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
187 // ---------------------------------------------------------------------
189 char *_strtabexpand(char *String
,size_t Len
)
191 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
195 if (I
+ 8 > String
+ Len
)
201 /* Assume the start of the string is 0 and find the next 8 char
207 Len
= 8 - ((String
- I
) % 8);
215 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
216 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
221 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
222 // ---------------------------------------------------------------------
223 /* This grabs a single word, converts any % escaped characters to their
224 proper values and advances the pointer. Double quotes are understood
225 and striped out as well. This is for URI/URL parsing. It also can
226 understand [] brackets.*/
227 bool ParseQuoteWord(const char *&String
,string
&Res
)
229 // Skip leading whitespace
230 const char *C
= String
;
231 for (;*C
!= 0 && *C
== ' '; C
++);
235 // Jump to the next word
236 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
240 C
= strchr(C
+ 1, '"');
246 C
= strchr(C
+ 1, ']');
252 // Now de-quote characters
255 const char *Start
= String
;
257 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
259 if (*Start
== '%' && Start
+ 2 < C
&&
260 isxdigit(Start
[1]) && isxdigit(Start
[2]))
265 *I
= (char)strtol(Tmp
,0,16);
278 // Skip ending white space
279 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
284 // ParseCWord - Parses a string like a C "" expression /*{{{*/
285 // ---------------------------------------------------------------------
286 /* This expects a series of space separated strings enclosed in ""'s.
287 It concatenates the ""'s into a single string. */
288 bool ParseCWord(const char *&String
,string
&Res
)
290 // Skip leading whitespace
291 const char *C
= String
;
292 for (;*C
!= 0 && *C
== ' '; C
++);
298 if (strlen(String
) >= sizeof(Buffer
))
305 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
314 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
316 if (isspace(*C
) == 0)
326 // QuoteString - Convert a string into quoted from /*{{{*/
327 // ---------------------------------------------------------------------
329 string
QuoteString(const string
&Str
, const char *Bad
)
331 std::stringstream Res
;
332 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
334 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
335 *I
== 0x25 || // percent '%' char
336 *I
<= 0x20 || *I
>= 0x7F) // control chars
338 ioprintf(Res
, "%%%02hhx", *I
);
346 // DeQuoteString - Convert a string from quoted from /*{{{*/
347 // ---------------------------------------------------------------------
348 /* This undoes QuoteString */
349 string
DeQuoteString(const string
&Str
)
351 return DeQuoteString(Str
.begin(),Str
.end());
353 string
DeQuoteString(string::const_iterator
const &begin
,
354 string::const_iterator
const &end
)
357 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
359 if (*I
== '%' && I
+ 2 < end
&&
360 isxdigit(I
[1]) && isxdigit(I
[2]))
366 Res
+= (char)strtol(Tmp
,0,16);
377 // SizeToStr - Convert a long into a human readable size /*{{{*/
378 // ---------------------------------------------------------------------
379 /* A max of 4 digits are shown before conversion to the next highest unit.
380 The max length of the string will be 5 chars unless the size is > 10
382 string
SizeToStr(double Size
)
390 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
391 ExaBytes, ZettaBytes, YottaBytes */
392 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
396 if (ASize
< 100 && I
!= 0)
399 strprintf(S
, "%'.1f %c", ASize
, Ext
[I
]);
406 strprintf(S
, "%'.0f %c", ASize
, Ext
[I
]);
415 // TimeToStr - Convert the time into a string /*{{{*/
416 // ---------------------------------------------------------------------
417 /* Converts a number of seconds to a hms format */
418 string
TimeToStr(unsigned long Sec
)
423 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
424 strprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
426 else if (Sec
> 60*60)
428 //TRANSLATOR: h means hours, min means minutes, s means seconds
429 strprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
433 //TRANSLATOR: min means minutes, s means seconds
434 strprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
438 //TRANSLATOR: s means seconds
439 strprintf(S
,_("%lis"),Sec
);
444 // SubstVar - Substitute a string for another string /*{{{*/
445 // ---------------------------------------------------------------------
446 /* This replaces all occurrences of Subst with Contents in Str. */
447 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
449 if (Subst
.empty() == true)
452 string::size_type Pos
= 0;
453 string::size_type OldPos
= 0;
456 while (OldPos
< Str
.length() &&
457 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
460 Temp
.append(Str
, OldPos
, Pos
- OldPos
);
461 if (Contents
.empty() == false)
462 Temp
.append(Contents
);
463 OldPos
= Pos
+ Subst
.length();
469 if (OldPos
>= Str
.length())
472 Temp
.append(Str
, OldPos
, string::npos
);
475 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
477 for (; Vars
->Subst
!= 0; Vars
++)
478 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
482 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
483 // ---------------------------------------------------------------------
484 /* Returns a string with the supplied separator depth + 1 times in it */
485 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
487 std::string output
= "";
488 for(unsigned long d
=Depth
+1; d
> 0; d
--)
489 output
.append(Separator
);
493 // URItoFileName - Convert the uri into a unique file name /*{{{*/
494 // ---------------------------------------------------------------------
495 /* This converts a URI into a safe filename. It quotes all unsafe characters
496 and converts / to _ and removes the scheme identifier. The resulting
497 file name should be unique and never occur again for a different file */
498 string
URItoFileName(const string
&URI
)
500 // Nuke 'sensitive' items
506 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
507 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
508 replace(NewURI
.begin(),NewURI
.end(),'/','_');
512 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
513 // ---------------------------------------------------------------------
514 /* This routine performs a base64 transformation on a string. It was ripped
515 from wget and then patched and bug fixed.
517 This spec can be found in rfc2045 */
518 string
Base64Encode(const string
&S
)
521 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
522 'I','J','K','L','M','N','O','P',
523 'Q','R','S','T','U','V','W','X',
524 'Y','Z','a','b','c','d','e','f',
525 'g','h','i','j','k','l','m','n',
526 'o','p','q','r','s','t','u','v',
527 'w','x','y','z','0','1','2','3',
528 '4','5','6','7','8','9','+','/'};
530 // Pre-allocate some space
532 Final
.reserve((4*S
.length() + 2)/3 + 2);
534 /* Transform the 3x8 bits to 4x6 bits, as required by
536 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
538 char Bits
[3] = {0,0,0};
545 Final
+= tbl
[Bits
[0] >> 2];
546 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
548 if (I
+ 1 >= S
.end())
551 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
553 if (I
+ 2 >= S
.end())
556 Final
+= tbl
[Bits
[2] & 0x3f];
559 /* Apply the padding elements, this tells how many bytes the remote
560 end should discard */
561 if (S
.length() % 3 == 2)
563 if (S
.length() % 3 == 1)
569 // stringcmp - Arbitrary string compare /*{{{*/
570 // ---------------------------------------------------------------------
571 /* This safely compares two non-null terminated strings of arbitrary
573 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
575 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
579 if (A
== AEnd
&& B
== BEnd
)
591 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
592 const char *B
,const char *BEnd
)
594 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
598 if (A
== AEnd
&& B
== BEnd
)
608 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
609 string::const_iterator B
,string::const_iterator BEnd
)
611 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
615 if (A
== AEnd
&& B
== BEnd
)
627 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
628 // ---------------------------------------------------------------------
630 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
632 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
633 if (tolower_ascii(*A
) != tolower_ascii(*B
))
636 if (A
== AEnd
&& B
== BEnd
)
642 if (tolower_ascii(*A
) < tolower_ascii(*B
))
647 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
648 const char *B
,const char *BEnd
)
650 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
651 if (tolower_ascii(*A
) != tolower_ascii(*B
))
654 if (A
== AEnd
&& B
== BEnd
)
660 if (tolower_ascii(*A
) < tolower_ascii(*B
))
664 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
665 string::const_iterator B
,string::const_iterator BEnd
)
667 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
668 if (tolower_ascii(*A
) != tolower_ascii(*B
))
671 if (A
== AEnd
&& B
== BEnd
)
677 if (tolower_ascii(*A
) < tolower_ascii(*B
))
683 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
684 // ---------------------------------------------------------------------
685 /* The format is like those used in package files and the method
686 communication system */
687 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
689 // Look for a matching tag.
690 int Length
= strlen(Tag
);
691 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
694 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
696 // Find the end of line and strip the leading/trailing spaces
697 string::const_iterator J
;
699 for (; isspace_ascii(*I
) != 0 && I
< Message
.end(); ++I
);
700 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
701 for (; J
> I
&& isspace_ascii(J
[-1]) != 0; --J
);
706 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
709 // Failed to find a match
715 // StringToBool - Converts a string into a boolean /*{{{*/
716 // ---------------------------------------------------------------------
717 /* This inspects the string to see if it is true or if it is false and
718 then returns the result. Several varients on true/false are checked. */
719 int StringToBool(const string
&Text
,int Default
)
722 int Res
= strtol(Text
.c_str(),&ParseEnd
,0);
723 // ensure that the entire string was converted by strtol to avoid
724 // failures on "apt-cache show -a 0ad" where the "0" is converted
725 const char *TextEnd
= Text
.c_str()+Text
.size();
726 if (ParseEnd
== TextEnd
&& Res
>= 0 && Res
<= 1)
729 // Check for positives
730 if (strcasecmp(Text
.c_str(),"no") == 0 ||
731 strcasecmp(Text
.c_str(),"false") == 0 ||
732 strcasecmp(Text
.c_str(),"without") == 0 ||
733 strcasecmp(Text
.c_str(),"off") == 0 ||
734 strcasecmp(Text
.c_str(),"disable") == 0)
737 // Check for negatives
738 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
739 strcasecmp(Text
.c_str(),"true") == 0 ||
740 strcasecmp(Text
.c_str(),"with") == 0 ||
741 strcasecmp(Text
.c_str(),"on") == 0 ||
742 strcasecmp(Text
.c_str(),"enable") == 0)
748 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
749 // ---------------------------------------------------------------------
750 /* This converts a time_t into a string time representation that is
751 year 2000 complient and timezone neutral */
752 string
TimeRFC1123(time_t Date
)
754 return TimeRFC1123(Date
, false);
756 string
TimeRFC1123(time_t Date
, bool const NumericTimezone
)
759 if (gmtime_r(&Date
, &Conv
) == NULL
)
762 auto const posix
= std::locale::classic();
763 std::ostringstream datestr
;
764 datestr
.imbue(posix
);
765 APT::StringView
const fmt("%a, %d %b %Y %H:%M:%S");
766 std::use_facet
<std::time_put
<char>>(posix
).put(
767 std::ostreambuf_iterator
<char>(datestr
),
768 datestr
, ' ', &Conv
, fmt
.data(), fmt
.data() + fmt
.size());
773 return datestr
.str();
776 // ReadMessages - Read messages from the FD /*{{{*/
777 // ---------------------------------------------------------------------
778 /* This pulls full messages from the input FD into the message buffer.
779 It assumes that messages will not pause during transit so no
780 fancy buffering is used.
782 In particular: this reads blocks from the input until it believes
783 that it's run out of input text. Each block is terminated by a
784 double newline ('\n' followed by '\n').
786 bool ReadMessages(int Fd
, vector
<string
> &List
)
789 // Represents any left-over from the previous iteration of the
790 // parse loop. (i.e., if a message is split across the end
791 // of the buffer, it goes here)
792 string PartialMessage
;
795 int const Res
= read(Fd
, Buffer
, sizeof(Buffer
));
796 if (Res
< 0 && errno
== EINTR
)
799 // process we read from has died
804 #if EAGAIN != EWOULDBLOCK
805 if (Res
< 0 && (errno
== EAGAIN
|| errno
== EWOULDBLOCK
))
807 if (Res
< 0 && errno
== EAGAIN
)
813 // extract the message(s) from the buffer
814 char const *Start
= Buffer
;
815 char const * const End
= Buffer
+ Res
;
817 char const * NL
= (char const *) memchr(Start
, '\n', End
- Start
);
820 // end of buffer: store what we have so far and read new data in
821 PartialMessage
.append(Start
, End
- Start
);
827 if (PartialMessage
.empty() == false && Start
< End
)
829 // if we start with a new line, see if the partial message we have ended with one
830 // so that we properly detect records ending between two read() runs
831 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
832 // the case \r|\n\r\n is handled by the usual double-newline handling
833 if ((NL
- Start
) == 1 || ((NL
- Start
) == 2 && *Start
== '\r'))
835 if (APT::String::Endswith(PartialMessage
, "\n") || APT::String::Endswith(PartialMessage
, "\r\n\r"))
837 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
838 List
.push_back(PartialMessage
);
839 PartialMessage
.clear();
840 while (NL
< End
&& (*NL
== '\n' || *NL
== '\r')) ++NL
;
846 while (Start
< End
) {
847 char const * NL2
= (char const *) memchr(NL
, '\n', End
- NL
);
850 // end of buffer: store what we have so far and read new data in
851 PartialMessage
.append(Start
, End
- Start
);
856 // did we find a double newline?
857 if ((NL2
- NL
) == 1 || ((NL2
- NL
) == 2 && *NL
== '\r'))
859 PartialMessage
.append(Start
, NL2
- Start
);
860 PartialMessage
.erase(PartialMessage
.find_last_not_of("\r\n") + 1);
861 List
.push_back(PartialMessage
);
862 PartialMessage
.clear();
863 while (NL2
< End
&& (*NL2
== '\n' || *NL2
== '\r')) ++NL2
;
869 // we have read at least one complete message and nothing left
870 if (PartialMessage
.empty() == true)
873 if (WaitFd(Fd
) == false)
878 // MonthConv - Converts a month string into a number /*{{{*/
879 // ---------------------------------------------------------------------
880 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
881 Made it a bit more robust with a few tolower_ascii though. */
882 static int MonthConv(char const * const Month
)
884 switch (tolower_ascii(*Month
))
887 return tolower_ascii(Month
[1]) == 'p'?3:7;
893 if (tolower_ascii(Month
[1]) == 'a')
895 return tolower_ascii(Month
[2]) == 'n'?5:6;
897 return tolower_ascii(Month
[2]) == 'r'?2:4;
905 // Pretend it is January..
911 // timegm - Internal timegm if the gnu version is not available /*{{{*/
912 // ---------------------------------------------------------------------
913 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
914 than local timezone (mktime assumes the latter).
916 This function is a nonstandard GNU extension that is also present on
917 the BSDs and maybe other systems. For others we follow the advice of
918 the manpage of timegm and use his portable replacement. */
920 static time_t timegm(struct tm
*t
)
922 char *tz
= getenv("TZ");
925 time_t ret
= mktime(t
);
935 // RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
936 // ---------------------------------------------------------------------
937 /* tries to parses a full date as specified in RFC7231 §7.1.1.1
938 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
939 As we encounter dates from UTC or with a numeric timezone in other places,
940 we allow them here to to be able to reuse the method. Either way, a date
941 must be in UTC or parsing will fail. Previous implementations of this
942 method used to ignore the timezone and assume always UTC. */
943 bool RFC1123StrToTime(const char* const str
,time_t &time
)
945 unsigned short day
= 0;
946 signed int year
= 0; // yes, Y23K problem – we gonna worry then…
947 std::string weekday
, month
, datespec
, timespec
, zone
;
948 std::istringstream
ss(str
);
949 auto const &posix
= std::locale::classic();
952 // we only superficially check weekday, mostly to avoid accepting localized
953 // weekdays here and take only its length to decide which datetime format we
954 // encounter here. The date isn't stored.
955 std::transform(weekday
.begin(), weekday
.end(), weekday
.begin(), ::tolower
);
956 std::array
<char const * const, 7> c_weekdays
= {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
957 if (std::find(c_weekdays
.begin(), c_weekdays
.end(), weekday
.substr(0,3)) == c_weekdays
.end())
960 switch (weekday
.length())
963 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
964 if (weekday
[3] != ',')
966 ss
>> day
>> month
>> year
>> timespec
>> zone
;
969 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
970 ss
>> month
>> day
>> timespec
>> year
;
978 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
979 if (weekday
[weekday
.length() - 1] != ',')
981 ss
>> datespec
>> timespec
>> zone
;
982 auto const expldate
= VectorizeString(datespec
, '-');
983 if (expldate
.size() != 3)
987 day
= std::stoi(expldate
[0], &pos
);
988 if (pos
!= expldate
[0].length())
990 year
= 1900 + std::stoi(expldate
[2], &pos
);
991 if (pos
!= expldate
[2].length())
993 strprintf(datespec
, "%.4d-%.2d-%.2d", year
, MonthConv(expldate
[1].c_str()) + 1, day
);
1000 if (ss
.fail() || ss
.bad() || !ss
.eof())
1003 if (zone
!= "GMT" && zone
!= "UTC" && zone
!= "Z") // RFC 822
1005 // numeric timezones as a should of RFC 1123 and generally preferred
1008 auto const z
= std::stoi(zone
, &pos
);
1009 if (z
!= 0 || pos
!= zone
.length())
1016 if (datespec
.empty())
1020 strprintf(datespec
, "%.4d-%.2d-%.2d", year
, MonthConv(month
.c_str()) + 1, day
);
1023 std::string
const datetime
= datespec
+ ' ' + timespec
;
1025 if (strptime(datetime
.c_str(), "%Y-%m-%d %H:%M:%S", &Tm
) == nullptr)
1031 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1032 // ---------------------------------------------------------------------
1034 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
1037 // MDTM includes no whitespaces but recommend and ignored by strptime
1038 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
1045 // StrToTime - Converts a string into a time_t /*{{{*/
1046 // ---------------------------------------------------------------------
1047 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
1048 and the C library asctime format. It requires the GNU library function
1049 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
1050 reason the C library does not provide any such function :< This also
1051 handles the weird, but unambiguous FTP time format*/
1052 bool StrToTime(const string
&Val
,time_t &Result
)
1057 // Skip the day of the week
1058 const char *I
= strchr(Val
.c_str(), ' ');
1060 // Handle RFC 1123 time
1062 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
1063 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
1065 // Handle RFC 1036 time
1066 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
1067 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
1072 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
1073 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
1076 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
1077 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
1086 Tm
.tm_mon
= MonthConv(Month
);
1088 Tm
.tm_mon
= 0; // we don't have a month, so pick something
1091 // Convert to local time and then to GMT
1092 Result
= timegm(&Tm
);
1096 // StrToNum - Convert a fixed length string to a number /*{{{*/
1097 // ---------------------------------------------------------------------
1098 /* This is used in decoding the crazy fixed length string headers in
1099 tar and ar files. */
1100 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
1103 if (Len
>= sizeof(S
))
1108 // All spaces is a zero
1111 for (I
= 0; S
[I
] == ' '; I
++);
1116 Res
= strtoul(S
,&End
,Base
);
1123 // StrToNum - Convert a fixed length string to a number /*{{{*/
1124 // ---------------------------------------------------------------------
1125 /* This is used in decoding the crazy fixed length string headers in
1126 tar and ar files. */
1127 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1130 if (Len
>= sizeof(S
))
1135 // All spaces is a zero
1138 for (I
= 0; S
[I
] == ' '; I
++);
1143 Res
= strtoull(S
,&End
,Base
);
1151 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* This is used in decoding the 256bit encoded fixed length fields in
1155 bool Base256ToNum(const char *Str
,unsigned long long &Res
,unsigned int Len
)
1157 if ((Str
[0] & 0x80) == 0)
1161 Res
= Str
[0] & 0x7F;
1162 for(unsigned int i
= 1; i
< Len
; ++i
)
1163 Res
= (Res
<<8) + Str
[i
];
1168 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1169 // ---------------------------------------------------------------------
1170 /* This is used in decoding the 256bit encoded fixed length fields in
1172 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1174 unsigned long long Num
= 0;
1177 rc
= Base256ToNum(Str
, Num
, Len
);
1178 // rudimentary check for overflow (Res = ulong, Num = ulonglong)
1186 // HexDigit - Convert a hex character into an integer /*{{{*/
1187 // ---------------------------------------------------------------------
1188 /* Helper for Hex2Num */
1189 static int HexDigit(int c
)
1191 if (c
>= '0' && c
<= '9')
1193 if (c
>= 'a' && c
<= 'f')
1194 return c
- 'a' + 10;
1195 if (c
>= 'A' && c
<= 'F')
1196 return c
- 'A' + 10;
1200 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1201 // ---------------------------------------------------------------------
1202 /* The length of the buffer must be exactly 1/2 the length of the string. */
1203 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1205 return Hex2Num(APT::StringView(Str
), Num
, Length
);
1208 bool Hex2Num(const APT::StringView Str
,unsigned char *Num
,unsigned int Length
)
1210 if (Str
.length() != Length
*2)
1213 // Convert each digit. We store it in the same order as the string
1215 for (auto I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1217 int first_half
= HexDigit(I
[0]);
1222 second_half
= HexDigit(I
[1]);
1223 if (second_half
< 0)
1225 Num
[J
] = first_half
<< 4;
1226 Num
[J
] += second_half
;
1232 // TokSplitString - Split a string up by a given token /*{{{*/
1233 // ---------------------------------------------------------------------
1234 /* This is intended to be a faster splitter, it does not use dynamic
1235 memories. Input is changed to insert nulls at each token location. */
1236 bool TokSplitString(char Tok
,char *Input
,char **List
,
1237 unsigned long ListMax
)
1239 // Strip any leading spaces
1240 char *Start
= Input
;
1241 char *Stop
= Start
+ strlen(Start
);
1242 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1244 unsigned long Count
= 0;
1248 // Skip to the next Token
1249 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1251 // Back remove spaces
1253 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1256 List
[Count
++] = Start
;
1257 if (Count
>= ListMax
)
1264 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1272 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1273 // ---------------------------------------------------------------------
1274 /* This can be used to split a given string up into a vector, so the
1275 propose is the same as in the method above and this one is a bit slower
1276 also, but the advantage is that we have an iteratable vector */
1277 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1279 vector
<string
> exploded
;
1280 if (haystack
.empty() == true)
1282 string::const_iterator start
= haystack
.begin();
1283 string::const_iterator end
= start
;
1285 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1286 exploded
.push_back(string(start
, end
));
1288 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1292 // StringSplit - split a string into a string vector by token /*{{{*/
1293 // ---------------------------------------------------------------------
1294 /* See header for details.
1296 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1297 unsigned int maxsplit
)
1299 vector
<string
> split
;
1302 // no separator given, this is bogus
1307 while (pos
!= string::npos
)
1309 pos
= s
.find(sep
, start
);
1310 split
.push_back(s
.substr(start
, pos
-start
));
1312 // if maxsplit is reached, the remaining string is the last item
1313 if(split
.size() >= maxsplit
)
1315 split
[split
.size()-1] = s
.substr(start
);
1318 start
= pos
+sep
.size();
1323 // RegexChoice - Simple regex list/list matcher /*{{{*/
1324 // ---------------------------------------------------------------------
1326 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1327 const char **ListEnd
)
1329 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1332 unsigned long Hits
= 0;
1333 for (; ListBegin
< ListEnd
; ++ListBegin
)
1335 // Check if the name is a regex
1338 for (I
= *ListBegin
; *I
!= 0; I
++)
1339 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1344 // Compile the regex pattern
1347 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1353 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1358 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1362 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1367 if (R
->Hit
== false)
1377 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1383 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1384 // ---------------------------------------------------------------------
1385 /* This is used to make the internationalization strings easier to translate
1386 and to allow reordering of parameters */
1387 static bool iovprintf(ostream
&out
, const char *format
,
1388 va_list &args
, ssize_t
&size
) {
1389 char *S
= (char*)malloc(size
);
1390 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1391 if (n
> -1 && n
< size
) {
1404 void ioprintf(ostream
&out
,const char *format
,...)
1410 va_start(args
,format
);
1411 ret
= iovprintf(out
, format
, args
, size
);
1417 void strprintf(string
&out
,const char *format
,...)
1421 std::ostringstream outstr
;
1424 va_start(args
,format
);
1425 ret
= iovprintf(outstr
, format
, args
, size
);
1433 // safe_snprintf - Safer snprintf /*{{{*/
1434 // ---------------------------------------------------------------------
1435 /* This is a snprintf that will never (ever) go past 'End' and returns a
1436 pointer to the end of the new string. The returned string is always null
1437 terminated unless Buffer == end. This is a better alterantive to using
1438 consecutive snprintfs. */
1439 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1446 va_start(args
,Format
);
1447 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1450 if (Did
< 0 || Buffer
+ Did
> End
)
1452 return Buffer
+ Did
;
1455 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1456 // ---------------------------------------------------------------------
1457 string
StripEpoch(const string
&VerStr
)
1459 size_t i
= VerStr
.find(":");
1460 if (i
== string::npos
)
1462 return VerStr
.substr(i
+1);
1466 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1467 // ---------------------------------------------------------------------
1468 /* This little function is the most called method we have and tries
1469 therefore to do the absolut minimum - and is notable faster than
1470 standard tolower/toupper and as a bonus avoids problems with different
1471 locales - we only operate on ascii chars anyway. */
1472 #undef tolower_ascii
1473 int tolower_ascii(int const c
) APT_CONST APT_COLD
;
1474 int tolower_ascii(int const c
)
1476 return tolower_ascii_inline(c
);
1480 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1481 // ---------------------------------------------------------------------
1482 /* This little function is one of the most called methods we have and tries
1483 therefore to do the absolut minimum - and is notable faster than
1484 standard isspace() and as a bonus avoids problems with different
1485 locales - we only operate on ascii chars anyway. */
1486 #undef isspace_ascii
1487 int isspace_ascii(int const c
) APT_CONST APT_COLD
;
1488 int isspace_ascii(int const c
)
1490 return isspace_ascii_inline(c
);
1494 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1495 // ---------------------------------------------------------------------
1496 /* The domain list is a comma separate list of domains that are suffix
1497 matched against the argument */
1498 bool CheckDomainList(const string
&Host
,const string
&List
)
1500 string::const_iterator Start
= List
.begin();
1501 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1503 if (Cur
< List
.end() && *Cur
!= ',')
1506 // Match the end of the string..
1507 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1509 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1517 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1518 // ---------------------------------------------------------------------
1520 size_t strv_length(const char **str_array
)
1523 for (i
=0; str_array
[i
] != NULL
; i
++)
1529 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1530 // ---------------------------------------------------------------------
1532 string
DeEscapeString(const string
&input
)
1535 string::const_iterator it
;
1537 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1539 // just copy non-escape chars
1546 // deal with double escape
1548 (it
+ 1 < input
.end()) && it
[1] == '\\')
1552 // advance iterator one step further
1557 // ensure we have a char to read
1558 if (it
+ 1 == input
.end())
1566 if (it
+ 2 <= input
.end()) {
1570 output
+= (char)strtol(tmp
, 0, 8);
1575 if (it
+ 2 <= input
.end()) {
1579 output
+= (char)strtol(tmp
, 0, 16);
1584 // FIXME: raise exception here?
1591 // URI::CopyFrom - Copy from an object /*{{{*/
1592 // ---------------------------------------------------------------------
1593 /* This parses the URI into all of its components */
1594 void URI::CopyFrom(const string
&U
)
1596 string::const_iterator I
= U
.begin();
1598 // Locate the first colon, this separates the scheme
1599 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1600 string::const_iterator FirstColon
= I
;
1602 /* Determine if this is a host type URI with a leading double //
1603 and then search for the first single / */
1604 string::const_iterator SingleSlash
= I
;
1605 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1608 /* Find the / indicating the end of the hostname, ignoring /'s in the
1610 bool InBracket
= false;
1611 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1613 if (*SingleSlash
== '[')
1615 if (InBracket
== true && *SingleSlash
== ']')
1619 if (SingleSlash
> U
.end())
1620 SingleSlash
= U
.end();
1622 // We can now write the access and path specifiers
1623 Access
.assign(U
.begin(),FirstColon
);
1624 if (SingleSlash
!= U
.end())
1625 Path
.assign(SingleSlash
,U
.end());
1626 if (Path
.empty() == true)
1629 // Now we attempt to locate a user:pass@host fragment
1630 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1634 if (FirstColon
>= U
.end())
1637 if (FirstColon
> SingleSlash
)
1638 FirstColon
= SingleSlash
;
1640 // Find the colon...
1642 if (I
> SingleSlash
)
1645 // Search for the @ separating user:pass from host
1646 auto const RevAt
= std::find(
1647 std::string::const_reverse_iterator(SingleSlash
),
1648 std::string::const_reverse_iterator(I
), '@');
1649 string::const_iterator
const At
= RevAt
.base() == I
? SingleSlash
: std::prev(RevAt
.base());
1650 // and then look for the colon between user and pass
1651 string::const_iterator
const SecondColon
= std::find(I
, At
, ':');
1653 // Now write the host and user/pass
1654 if (At
== SingleSlash
)
1656 if (FirstColon
< SingleSlash
)
1657 Host
.assign(FirstColon
,SingleSlash
);
1661 Host
.assign(At
+1,SingleSlash
);
1662 // username and password must be encoded (RFC 3986)
1663 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1664 if (SecondColon
< At
)
1665 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1668 // Now we parse the RFC 2732 [] hostnames.
1669 unsigned long PortEnd
= 0;
1671 for (unsigned I
= 0; I
!= Host
.length();)
1680 if (InBracket
== true && Host
[I
] == ']')
1691 if (InBracket
== true)
1697 // Now we parse off a port number from the hostname
1699 string::size_type Pos
= Host
.rfind(':');
1700 if (Pos
== string::npos
|| Pos
< PortEnd
)
1703 Port
= atoi(string(Host
,Pos
+1).c_str());
1704 Host
.assign(Host
,0,Pos
);
1707 // URI::operator string - Convert the URI to a string /*{{{*/
1708 // ---------------------------------------------------------------------
1710 URI::operator string()
1712 std::stringstream Res
;
1714 if (Access
.empty() == false)
1715 Res
<< Access
<< ':';
1717 if (Host
.empty() == false)
1719 if (Access
.empty() == false)
1722 if (User
.empty() == false)
1724 // FIXME: Technically userinfo is permitted even less
1725 // characters than these, but this is not conveniently
1726 // expressed with a blacklist.
1727 Res
<< QuoteString(User
, ":/?#[]@");
1728 if (Password
.empty() == false)
1729 Res
<< ":" << QuoteString(Password
, ":/?#[]@");
1733 // Add RFC 2732 escaping characters
1734 if (Access
.empty() == false && Host
.find_first_of("/:") != string::npos
)
1735 Res
<< '[' << Host
<< ']';
1740 Res
<< ':' << std::to_string(Port
);
1743 if (Path
.empty() == false)
1754 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1755 string
URI::SiteOnly(const string
&URI
)
1764 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1765 string
URI::ArchiveOnly(const string
&URI
)
1770 if (U
.Path
.empty() == false && U
.Path
[U
.Path
.length() - 1] == '/')
1771 U
.Path
.erase(U
.Path
.length() - 1);
1775 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1776 string
URI::NoUserPassword(const string
&URI
)