1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
48 std::string
Strip(const std::string
&s
)
50 size_t start
= s
.find_first_not_of(" \t\n");
52 if (start
== string::npos
)
54 size_t end
= s
.find_last_not_of(" \t\n");
55 return s
.substr(start
, end
-start
+1);
58 bool Endswith(const std::string
&s
, const std::string
&end
)
60 if (end
.size() > s
.size())
62 return (s
.substr(s
.size() - end
.size(), s
.size()) == end
);
68 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
69 // ---------------------------------------------------------------------
70 /* This is handy to use before display some information for enduser */
71 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
76 size_t insize
, bufsize
;
79 cd
= iconv_open(codeset
, "UTF-8");
80 if (cd
== (iconv_t
)(-1)) {
81 // Something went wrong
83 _error
->Error("conversion from 'UTF-8' to '%s' not available",
91 insize
= bufsize
= orig
.size();
93 inptr
= (char *)inbuf
;
94 outbuf
= new char[bufsize
];
95 size_t lastError
= -1;
99 char *outptr
= outbuf
;
100 size_t outsize
= bufsize
;
101 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
102 dest
->append(outbuf
, outptr
- outbuf
);
103 if (err
== (size_t)(-1))
110 // replace a series of unknown multibytes with a single "?"
111 if (lastError
!= insize
) {
112 lastError
= insize
- 1;
120 if (outptr
== outbuf
)
124 outbuf
= new char[bufsize
];
138 // strstrip - Remove white space from the front and back of a string /*{{{*/
139 // ---------------------------------------------------------------------
140 /* This is handy to use when parsing a file. It also removes \n's left
141 over from fgets and company */
142 char *_strstrip(char *String
)
144 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
148 return _strrstrip(String
);
151 // strrstrip - Remove white space from the back of a string /*{{{*/
152 // ---------------------------------------------------------------------
153 char *_strrstrip(char *String
)
155 char *End
= String
+ strlen(String
) - 1;
156 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
157 *End
== '\r'); End
--);
163 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
164 // ---------------------------------------------------------------------
166 char *_strtabexpand(char *String
,size_t Len
)
168 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
172 if (I
+ 8 > String
+ Len
)
178 /* Assume the start of the string is 0 and find the next 8 char
184 Len
= 8 - ((String
- I
) % 8);
192 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
193 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
198 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This grabs a single word, converts any % escaped characters to their
201 proper values and advances the pointer. Double quotes are understood
202 and striped out as well. This is for URI/URL parsing. It also can
203 understand [] brackets.*/
204 bool ParseQuoteWord(const char *&String
,string
&Res
)
206 // Skip leading whitespace
207 const char *C
= String
;
208 for (;*C
!= 0 && *C
== ' '; C
++);
212 // Jump to the next word
213 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
217 C
= strchr(C
+ 1, '"');
223 C
= strchr(C
+ 1, ']');
229 // Now de-quote characters
232 const char *Start
= String
;
234 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
236 if (*Start
== '%' && Start
+ 2 < C
&&
237 isxdigit(Start
[1]) && isxdigit(Start
[2]))
242 *I
= (char)strtol(Tmp
,0,16);
255 // Skip ending white space
256 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
261 // ParseCWord - Parses a string like a C "" expression /*{{{*/
262 // ---------------------------------------------------------------------
263 /* This expects a series of space separated strings enclosed in ""'s.
264 It concatenates the ""'s into a single string. */
265 bool ParseCWord(const char *&String
,string
&Res
)
267 // Skip leading whitespace
268 const char *C
= String
;
269 for (;*C
!= 0 && *C
== ' '; C
++);
275 if (strlen(String
) >= sizeof(Buffer
))
282 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
291 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
293 if (isspace(*C
) == 0)
303 // QuoteString - Convert a string into quoted from /*{{{*/
304 // ---------------------------------------------------------------------
306 string
QuoteString(const string
&Str
, const char *Bad
)
309 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
311 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
312 *I
== 0x25 || // percent '%' char
313 *I
<= 0x20 || *I
>= 0x7F) // control chars
316 sprintf(Buf
,"%%%02x",(int)*I
);
325 // DeQuoteString - Convert a string from quoted from /*{{{*/
326 // ---------------------------------------------------------------------
327 /* This undoes QuoteString */
328 string
DeQuoteString(const string
&Str
)
330 return DeQuoteString(Str
.begin(),Str
.end());
332 string
DeQuoteString(string::const_iterator
const &begin
,
333 string::const_iterator
const &end
)
336 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
338 if (*I
== '%' && I
+ 2 < end
&&
339 isxdigit(I
[1]) && isxdigit(I
[2]))
345 Res
+= (char)strtol(Tmp
,0,16);
356 // SizeToStr - Convert a long into a human readable size /*{{{*/
357 // ---------------------------------------------------------------------
358 /* A max of 4 digits are shown before conversion to the next highest unit.
359 The max length of the string will be 5 chars unless the size is > 10
361 string
SizeToStr(double Size
)
370 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
371 ExaBytes, ZettaBytes, YottaBytes */
372 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
376 if (ASize
< 100 && I
!= 0)
378 sprintf(S
,"%'.1f %c",ASize
,Ext
[I
]);
384 sprintf(S
,"%'.0f %c",ASize
,Ext
[I
]);
394 // TimeToStr - Convert the time into a string /*{{{*/
395 // ---------------------------------------------------------------------
396 /* Converts a number of seconds to a hms format */
397 string
TimeToStr(unsigned long Sec
)
405 //d means days, h means hours, min means minutes, s means seconds
406 sprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
412 //h means hours, min means minutes, s means seconds
413 sprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
419 //min means minutes, s means seconds
420 sprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
425 sprintf(S
,_("%lis"),Sec
);
432 // SubstVar - Substitute a string for another string /*{{{*/
433 // ---------------------------------------------------------------------
434 /* This replaces all occurrences of Subst with Contents in Str. */
435 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
437 string::size_type Pos
= 0;
438 string::size_type OldPos
= 0;
441 while (OldPos
< Str
.length() &&
442 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
444 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
445 OldPos
= Pos
+ Subst
.length();
451 return Temp
+ string(Str
,OldPos
);
454 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
456 for (; Vars
->Subst
!= 0; Vars
++)
457 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
461 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
462 // ---------------------------------------------------------------------
463 /* Returns a string with the supplied separator depth + 1 times in it */
464 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
466 std::string output
= "";
467 for(unsigned long d
=Depth
+1; d
> 0; d
--)
468 output
.append(Separator
);
472 // URItoFileName - Convert the uri into a unique file name /*{{{*/
473 // ---------------------------------------------------------------------
474 /* This converts a URI into a safe filename. It quotes all unsafe characters
475 and converts / to _ and removes the scheme identifier. The resulting
476 file name should be unique and never occur again for a different file */
477 string
URItoFileName(const string
&URI
)
479 // Nuke 'sensitive' items
485 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
486 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
487 replace(NewURI
.begin(),NewURI
.end(),'/','_');
491 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
492 // ---------------------------------------------------------------------
493 /* This routine performs a base64 transformation on a string. It was ripped
494 from wget and then patched and bug fixed.
496 This spec can be found in rfc2045 */
497 string
Base64Encode(const string
&S
)
500 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
501 'I','J','K','L','M','N','O','P',
502 'Q','R','S','T','U','V','W','X',
503 'Y','Z','a','b','c','d','e','f',
504 'g','h','i','j','k','l','m','n',
505 'o','p','q','r','s','t','u','v',
506 'w','x','y','z','0','1','2','3',
507 '4','5','6','7','8','9','+','/'};
509 // Pre-allocate some space
511 Final
.reserve((4*S
.length() + 2)/3 + 2);
513 /* Transform the 3x8 bits to 4x6 bits, as required by
515 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
517 char Bits
[3] = {0,0,0};
524 Final
+= tbl
[Bits
[0] >> 2];
525 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
527 if (I
+ 1 >= S
.end())
530 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
532 if (I
+ 2 >= S
.end())
535 Final
+= tbl
[Bits
[2] & 0x3f];
538 /* Apply the padding elements, this tells how many bytes the remote
539 end should discard */
540 if (S
.length() % 3 == 2)
542 if (S
.length() % 3 == 1)
548 // stringcmp - Arbitrary string compare /*{{{*/
549 // ---------------------------------------------------------------------
550 /* This safely compares two non-null terminated strings of arbitrary
552 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
554 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
558 if (A
== AEnd
&& B
== BEnd
)
570 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
571 const char *B
,const char *BEnd
)
573 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
577 if (A
== AEnd
&& B
== BEnd
)
587 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
588 string::const_iterator B
,string::const_iterator BEnd
)
590 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
594 if (A
== AEnd
&& B
== BEnd
)
606 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
607 // ---------------------------------------------------------------------
609 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
611 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
612 if (tolower_ascii(*A
) != tolower_ascii(*B
))
615 if (A
== AEnd
&& B
== BEnd
)
621 if (tolower_ascii(*A
) < tolower_ascii(*B
))
626 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
627 const char *B
,const char *BEnd
)
629 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
630 if (tolower_ascii(*A
) != tolower_ascii(*B
))
633 if (A
== AEnd
&& B
== BEnd
)
639 if (tolower_ascii(*A
) < tolower_ascii(*B
))
643 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
644 string::const_iterator B
,string::const_iterator BEnd
)
646 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
647 if (tolower_ascii(*A
) != tolower_ascii(*B
))
650 if (A
== AEnd
&& B
== BEnd
)
656 if (tolower_ascii(*A
) < tolower_ascii(*B
))
662 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
663 // ---------------------------------------------------------------------
664 /* The format is like those used in package files and the method
665 communication system */
666 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
668 // Look for a matching tag.
669 int Length
= strlen(Tag
);
670 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
673 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
675 // Find the end of line and strip the leading/trailing spaces
676 string::const_iterator J
;
678 for (; isspace(*I
) != 0 && I
< Message
.end(); ++I
);
679 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
680 for (; J
> I
&& isspace(J
[-1]) != 0; --J
);
685 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
688 // Failed to find a match
694 // StringToBool - Converts a string into a boolean /*{{{*/
695 // ---------------------------------------------------------------------
696 /* This inspects the string to see if it is true or if it is false and
697 then returns the result. Several varients on true/false are checked. */
698 int StringToBool(const string
&Text
,int Default
)
701 int Res
= strtol(Text
.c_str(),&End
,0);
702 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
705 // Check for positives
706 if (strcasecmp(Text
.c_str(),"no") == 0 ||
707 strcasecmp(Text
.c_str(),"false") == 0 ||
708 strcasecmp(Text
.c_str(),"without") == 0 ||
709 strcasecmp(Text
.c_str(),"off") == 0 ||
710 strcasecmp(Text
.c_str(),"disable") == 0)
713 // Check for negatives
714 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
715 strcasecmp(Text
.c_str(),"true") == 0 ||
716 strcasecmp(Text
.c_str(),"with") == 0 ||
717 strcasecmp(Text
.c_str(),"on") == 0 ||
718 strcasecmp(Text
.c_str(),"enable") == 0)
724 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
725 // ---------------------------------------------------------------------
726 /* This converts a time_t into a string time representation that is
727 year 2000 complient and timezone neutral */
728 string
TimeRFC1123(time_t Date
)
731 if (gmtime_r(&Date
, &Conv
) == NULL
)
735 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
736 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
737 "Aug","Sep","Oct","Nov","Dec"};
739 snprintf(Buf
, sizeof(Buf
), "%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
740 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
741 Conv
.tm_min
,Conv
.tm_sec
);
745 // ReadMessages - Read messages from the FD /*{{{*/
746 // ---------------------------------------------------------------------
747 /* This pulls full messages from the input FD into the message buffer.
748 It assumes that messages will not pause during transit so no
749 fancy buffering is used.
751 In particular: this reads blocks from the input until it believes
752 that it's run out of input text. Each block is terminated by a
753 double newline ('\n' followed by '\n'). As noted below, there is a
754 bug in this code: it assumes that all the blocks have been read if
755 it doesn't see additional text in the buffer after the last one is
756 parsed, which will cause it to lose blocks if the last block
757 coincides with the end of the buffer.
759 bool ReadMessages(int Fd
, vector
<string
> &List
)
763 // Represents any left-over from the previous iteration of the
764 // parse loop. (i.e., if a message is split across the end
765 // of the buffer, it goes here)
766 string PartialMessage
;
770 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
771 if (Res
< 0 && errno
== EINTR
)
774 // Process is dead, this is kind of bad..
779 if (Res
< 0 && errno
== EAGAIN
)
786 // Look for the end of the message
787 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
790 (I
[0] != '\n' && strncmp(I
, "\r\n\r\n", 4) != 0))
793 // Pull the message out
794 string
Message(Buffer
,I
-Buffer
);
795 PartialMessage
+= Message
;
798 for (; I
< End
&& (*I
== '\n' || *I
== '\r'); ++I
);
800 memmove(Buffer
,I
,End
-Buffer
);
803 List
.push_back(PartialMessage
);
804 PartialMessage
.clear();
808 // If there's text left in the buffer, store it
809 // in PartialMessage and throw the rest of the buffer
810 // away. This allows us to handle messages that
811 // are longer than the static buffer size.
812 PartialMessage
+= string(Buffer
, End
);
817 // BUG ALERT: if a message block happens to end at a
818 // multiple of 64000 characters, this will cause it to
819 // terminate early, leading to a badly formed block and
820 // probably crashing the method. However, this is the only
821 // way we have to find the end of the message block. I have
822 // an idea of how to fix this, but it will require changes
823 // to the protocol (essentially to mark the beginning and
824 // end of the block).
826 // -- dburrows 2008-04-02
830 if (WaitFd(Fd
) == false)
835 // MonthConv - Converts a month string into a number /*{{{*/
836 // ---------------------------------------------------------------------
837 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
838 Made it a bit more robust with a few tolower_ascii though. */
839 static int MonthConv(char *Month
)
841 switch (tolower_ascii(*Month
))
844 return tolower_ascii(Month
[1]) == 'p'?3:7;
850 if (tolower_ascii(Month
[1]) == 'a')
852 return tolower_ascii(Month
[2]) == 'n'?5:6;
854 return tolower_ascii(Month
[2]) == 'r'?2:4;
862 // Pretend it is January..
868 // timegm - Internal timegm if the gnu version is not available /*{{{*/
869 // ---------------------------------------------------------------------
870 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
871 than local timezone (mktime assumes the latter).
873 This function is a nonstandard GNU extension that is also present on
874 the BSDs and maybe other systems. For others we follow the advice of
875 the manpage of timegm and use his portable replacement. */
877 static time_t timegm(struct tm
*t
)
879 char *tz
= getenv("TZ");
882 time_t ret
= mktime(t
);
892 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
893 // ---------------------------------------------------------------------
894 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
895 with one exception: All timezones (%Z) are accepted but the protocol
896 says that it MUST be GMT, but this one is equal to UTC which we will
897 encounter from time to time (e.g. in Release files) so we accept all
898 here and just assume it is GMT (or UTC) later on */
899 bool RFC1123StrToTime(const char* const str
,time_t &time
)
902 setlocale (LC_ALL
,"C");
904 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
905 (strptime(str
, "%a, %d %b %Y %H:%M:%S %Z", &Tm
) == NULL
&&
906 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
907 strptime(str
, "%A, %d-%b-%y %H:%M:%S %Z", &Tm
) == NULL
&&
908 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
909 strptime(str
, "%a %b %d %H:%M:%S %Y", &Tm
) == NULL
);
910 setlocale (LC_ALL
,"");
918 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
919 // ---------------------------------------------------------------------
921 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
924 // MDTM includes no whitespaces but recommend and ignored by strptime
925 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
932 // StrToTime - Converts a string into a time_t /*{{{*/
933 // ---------------------------------------------------------------------
934 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
935 and the C library asctime format. It requires the GNU library function
936 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
937 reason the C library does not provide any such function :< This also
938 handles the weird, but unambiguous FTP time format*/
939 bool StrToTime(const string
&Val
,time_t &Result
)
944 // Skip the day of the week
945 const char *I
= strchr(Val
.c_str(), ' ');
947 // Handle RFC 1123 time
949 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
950 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
952 // Handle RFC 1036 time
953 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
954 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
959 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
960 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
963 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
964 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
973 Tm
.tm_mon
= MonthConv(Month
);
975 Tm
.tm_mon
= 0; // we don't have a month, so pick something
978 // Convert to local time and then to GMT
979 Result
= timegm(&Tm
);
983 // StrToNum - Convert a fixed length string to a number /*{{{*/
984 // ---------------------------------------------------------------------
985 /* This is used in decoding the crazy fixed length string headers in
987 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
990 if (Len
>= sizeof(S
))
995 // All spaces is a zero
998 for (I
= 0; S
[I
] == ' '; I
++);
1003 Res
= strtoul(S
,&End
,Base
);
1010 // StrToNum - Convert a fixed length string to a number /*{{{*/
1011 // ---------------------------------------------------------------------
1012 /* This is used in decoding the crazy fixed length string headers in
1013 tar and ar files. */
1014 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1017 if (Len
>= sizeof(S
))
1022 // All spaces is a zero
1025 for (I
= 0; S
[I
] == ' '; I
++);
1030 Res
= strtoull(S
,&End
,Base
);
1038 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1039 // ---------------------------------------------------------------------
1040 /* This is used in decoding the 256bit encoded fixed length fields in
1042 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1044 if ((Str
[0] & 0x80) == 0)
1048 Res
= Str
[0] & 0x7F;
1049 for(unsigned int i
= 1; i
< Len
; ++i
)
1050 Res
= (Res
<<8) + Str
[i
];
1055 // HexDigit - Convert a hex character into an integer /*{{{*/
1056 // ---------------------------------------------------------------------
1057 /* Helper for Hex2Num */
1058 static int HexDigit(int c
)
1060 if (c
>= '0' && c
<= '9')
1062 if (c
>= 'a' && c
<= 'f')
1063 return c
- 'a' + 10;
1064 if (c
>= 'A' && c
<= 'F')
1065 return c
- 'A' + 10;
1069 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1070 // ---------------------------------------------------------------------
1071 /* The length of the buffer must be exactly 1/2 the length of the string. */
1072 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1074 if (Str
.length() != Length
*2)
1077 // Convert each digit. We store it in the same order as the string
1079 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1081 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
1084 Num
[J
] = HexDigit(I
[0]) << 4;
1085 Num
[J
] += HexDigit(I
[1]);
1091 // TokSplitString - Split a string up by a given token /*{{{*/
1092 // ---------------------------------------------------------------------
1093 /* This is intended to be a faster splitter, it does not use dynamic
1094 memories. Input is changed to insert nulls at each token location. */
1095 bool TokSplitString(char Tok
,char *Input
,char **List
,
1096 unsigned long ListMax
)
1098 // Strip any leading spaces
1099 char *Start
= Input
;
1100 char *Stop
= Start
+ strlen(Start
);
1101 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1103 unsigned long Count
= 0;
1107 // Skip to the next Token
1108 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1110 // Back remove spaces
1112 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1115 List
[Count
++] = Start
;
1116 if (Count
>= ListMax
)
1123 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1131 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1132 // ---------------------------------------------------------------------
1133 /* This can be used to split a given string up into a vector, so the
1134 propose is the same as in the method above and this one is a bit slower
1135 also, but the advantage is that we have an iteratable vector */
1136 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1138 vector
<string
> exploded
;
1139 if (haystack
.empty() == true)
1141 string::const_iterator start
= haystack
.begin();
1142 string::const_iterator end
= start
;
1144 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1145 exploded
.push_back(string(start
, end
));
1147 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1151 // StringSplit - split a string into a string vector by token /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* See header for details.
1155 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1156 unsigned int maxsplit
)
1158 vector
<string
> split
;
1161 // no seperator given, this is bogus
1166 while (pos
!= string::npos
)
1168 pos
= s
.find(sep
, start
);
1169 split
.push_back(s
.substr(start
, pos
-start
));
1171 // if maxsplit is reached, the remaining string is the last item
1172 if(split
.size() >= maxsplit
)
1174 split
[split
.size()-1] = s
.substr(start
);
1177 start
= pos
+sep
.size();
1182 // RegexChoice - Simple regex list/list matcher /*{{{*/
1183 // ---------------------------------------------------------------------
1185 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1186 const char **ListEnd
)
1188 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1191 unsigned long Hits
= 0;
1192 for (; ListBegin
< ListEnd
; ++ListBegin
)
1194 // Check if the name is a regex
1197 for (I
= *ListBegin
; *I
!= 0; I
++)
1198 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1203 // Compile the regex pattern
1206 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1212 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1217 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1221 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1226 if (R
->Hit
== false)
1236 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1242 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1243 // ---------------------------------------------------------------------
1244 /* This is used to make the internationalization strings easier to translate
1245 and to allow reordering of parameters */
1246 static bool iovprintf(ostream
&out
, const char *format
,
1247 va_list &args
, ssize_t
&size
) {
1248 char *S
= (char*)malloc(size
);
1249 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1250 if (n
> -1 && n
< size
) {
1263 void ioprintf(ostream
&out
,const char *format
,...)
1268 va_start(args
,format
);
1269 if (iovprintf(out
, format
, args
, size
) == true)
1274 void strprintf(string
&out
,const char *format
,...)
1278 std::ostringstream outstr
;
1280 va_start(args
,format
);
1281 if (iovprintf(outstr
, format
, args
, size
) == true)
1288 // safe_snprintf - Safer snprintf /*{{{*/
1289 // ---------------------------------------------------------------------
1290 /* This is a snprintf that will never (ever) go past 'End' and returns a
1291 pointer to the end of the new string. The returned string is always null
1292 terminated unless Buffer == end. This is a better alterantive to using
1293 consecutive snprintfs. */
1294 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1301 va_start(args
,Format
);
1302 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1305 if (Did
< 0 || Buffer
+ Did
> End
)
1307 return Buffer
+ Did
;
1310 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1311 // ---------------------------------------------------------------------
1312 string
StripEpoch(const string
&VerStr
)
1314 size_t i
= VerStr
.find(":");
1315 if (i
== string::npos
)
1317 return VerStr
.substr(i
+1);
1320 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1321 // ---------------------------------------------------------------------
1322 /* This little function is the most called method we have and tries
1323 therefore to do the absolut minimum - and is notable faster than
1324 standard tolower/toupper and as a bonus avoids problems with different
1325 locales - we only operate on ascii chars anyway. */
1326 int tolower_ascii(int const c
)
1328 if (c
>= 'A' && c
<= 'Z')
1334 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1335 // ---------------------------------------------------------------------
1336 /* The domain list is a comma separate list of domains that are suffix
1337 matched against the argument */
1338 bool CheckDomainList(const string
&Host
,const string
&List
)
1340 string::const_iterator Start
= List
.begin();
1341 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1343 if (Cur
< List
.end() && *Cur
!= ',')
1346 // Match the end of the string..
1347 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1349 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1357 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1358 // ---------------------------------------------------------------------
1360 size_t strv_length(const char **str_array
)
1363 for (i
=0; str_array
[i
] != NULL
; i
++)
1369 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1370 // ---------------------------------------------------------------------
1372 string
DeEscapeString(const string
&input
)
1375 string::const_iterator it
;
1377 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1379 // just copy non-escape chars
1386 // deal with double escape
1388 (it
+ 1 < input
.end()) && it
[1] == '\\')
1392 // advance iterator one step further
1397 // ensure we have a char to read
1398 if (it
+ 1 == input
.end())
1406 if (it
+ 2 <= input
.end()) {
1410 output
+= (char)strtol(tmp
, 0, 8);
1415 if (it
+ 2 <= input
.end()) {
1419 output
+= (char)strtol(tmp
, 0, 16);
1424 // FIXME: raise exception here?
1431 // URI::CopyFrom - Copy from an object /*{{{*/
1432 // ---------------------------------------------------------------------
1433 /* This parses the URI into all of its components */
1434 void URI::CopyFrom(const string
&U
)
1436 string::const_iterator I
= U
.begin();
1438 // Locate the first colon, this separates the scheme
1439 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1440 string::const_iterator FirstColon
= I
;
1442 /* Determine if this is a host type URI with a leading double //
1443 and then search for the first single / */
1444 string::const_iterator SingleSlash
= I
;
1445 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1448 /* Find the / indicating the end of the hostname, ignoring /'s in the
1450 bool InBracket
= false;
1451 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1453 if (*SingleSlash
== '[')
1455 if (InBracket
== true && *SingleSlash
== ']')
1459 if (SingleSlash
> U
.end())
1460 SingleSlash
= U
.end();
1462 // We can now write the access and path specifiers
1463 Access
.assign(U
.begin(),FirstColon
);
1464 if (SingleSlash
!= U
.end())
1465 Path
.assign(SingleSlash
,U
.end());
1466 if (Path
.empty() == true)
1469 // Now we attempt to locate a user:pass@host fragment
1470 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1474 if (FirstColon
>= U
.end())
1477 if (FirstColon
> SingleSlash
)
1478 FirstColon
= SingleSlash
;
1480 // Find the colon...
1482 if (I
> SingleSlash
)
1484 for (; I
< SingleSlash
&& *I
!= ':'; ++I
);
1485 string::const_iterator SecondColon
= I
;
1487 // Search for the @ after the colon
1488 for (; I
< SingleSlash
&& *I
!= '@'; ++I
);
1489 string::const_iterator At
= I
;
1491 // Now write the host and user/pass
1492 if (At
== SingleSlash
)
1494 if (FirstColon
< SingleSlash
)
1495 Host
.assign(FirstColon
,SingleSlash
);
1499 Host
.assign(At
+1,SingleSlash
);
1500 // username and password must be encoded (RFC 3986)
1501 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1502 if (SecondColon
< At
)
1503 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1506 // Now we parse the RFC 2732 [] hostnames.
1507 unsigned long PortEnd
= 0;
1509 for (unsigned I
= 0; I
!= Host
.length();)
1518 if (InBracket
== true && Host
[I
] == ']')
1529 if (InBracket
== true)
1535 // Now we parse off a port number from the hostname
1537 string::size_type Pos
= Host
.rfind(':');
1538 if (Pos
== string::npos
|| Pos
< PortEnd
)
1541 Port
= atoi(string(Host
,Pos
+1).c_str());
1542 Host
.assign(Host
,0,Pos
);
1545 // URI::operator string - Convert the URI to a string /*{{{*/
1546 // ---------------------------------------------------------------------
1548 URI::operator string()
1552 if (Access
.empty() == false)
1555 if (Host
.empty() == false)
1557 if (Access
.empty() == false)
1560 if (User
.empty() == false)
1562 // FIXME: Technically userinfo is permitted even less
1563 // characters than these, but this is not conveniently
1564 // expressed with a blacklist.
1565 Res
+= QuoteString(User
, ":/?#[]@");
1566 if (Password
.empty() == false)
1567 Res
+= ":" + QuoteString(Password
, ":/?#[]@");
1571 // Add RFC 2732 escaping characters
1572 if (Access
.empty() == false &&
1573 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1574 Res
+= '[' + Host
+ ']';
1581 sprintf(S
,":%u",Port
);
1586 if (Path
.empty() == false)
1597 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1598 // ---------------------------------------------------------------------
1600 string
URI::SiteOnly(const string
&URI
)
1609 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1610 // ---------------------------------------------------------------------
1612 string
URI::NoUserPassword(const string
&URI
)