1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
48 std::string
Strip(const std::string
&s
)
50 size_t start
= s
.find_first_not_of(" \t\n");
52 if (start
== string::npos
)
54 size_t end
= s
.find_last_not_of(" \t\n");
55 return s
.substr(start
, end
-start
+1);
58 bool Endswith(const std::string
&s
, const std::string
&end
)
60 if (end
.size() > s
.size())
62 return (s
.substr(s
.size() - end
.size(), s
.size()) == end
);
65 bool Startswith(const std::string
&s
, const std::string
&start
)
67 if (start
.size() > s
.size())
69 return (s
.substr(0, start
.size()) == start
);
75 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
76 // ---------------------------------------------------------------------
77 /* This is handy to use before display some information for enduser */
78 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
83 size_t insize
, bufsize
;
86 cd
= iconv_open(codeset
, "UTF-8");
87 if (cd
== (iconv_t
)(-1)) {
88 // Something went wrong
90 _error
->Error("conversion from 'UTF-8' to '%s' not available",
98 insize
= bufsize
= orig
.size();
100 inptr
= (char *)inbuf
;
101 outbuf
= new char[bufsize
];
102 size_t lastError
= -1;
106 char *outptr
= outbuf
;
107 size_t outsize
= bufsize
;
108 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
109 dest
->append(outbuf
, outptr
- outbuf
);
110 if (err
== (size_t)(-1))
117 // replace a series of unknown multibytes with a single "?"
118 if (lastError
!= insize
) {
119 lastError
= insize
- 1;
127 if (outptr
== outbuf
)
131 outbuf
= new char[bufsize
];
145 // strstrip - Remove white space from the front and back of a string /*{{{*/
146 // ---------------------------------------------------------------------
147 /* This is handy to use when parsing a file. It also removes \n's left
148 over from fgets and company */
149 char *_strstrip(char *String
)
151 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
155 return _strrstrip(String
);
158 // strrstrip - Remove white space from the back of a string /*{{{*/
159 // ---------------------------------------------------------------------
160 char *_strrstrip(char *String
)
162 char *End
= String
+ strlen(String
) - 1;
163 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
164 *End
== '\r'); End
--);
170 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
171 // ---------------------------------------------------------------------
173 char *_strtabexpand(char *String
,size_t Len
)
175 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
179 if (I
+ 8 > String
+ Len
)
185 /* Assume the start of the string is 0 and find the next 8 char
191 Len
= 8 - ((String
- I
) % 8);
199 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
200 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
205 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
206 // ---------------------------------------------------------------------
207 /* This grabs a single word, converts any % escaped characters to their
208 proper values and advances the pointer. Double quotes are understood
209 and striped out as well. This is for URI/URL parsing. It also can
210 understand [] brackets.*/
211 bool ParseQuoteWord(const char *&String
,string
&Res
)
213 // Skip leading whitespace
214 const char *C
= String
;
215 for (;*C
!= 0 && *C
== ' '; C
++);
219 // Jump to the next word
220 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
224 C
= strchr(C
+ 1, '"');
230 C
= strchr(C
+ 1, ']');
236 // Now de-quote characters
239 const char *Start
= String
;
241 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
243 if (*Start
== '%' && Start
+ 2 < C
&&
244 isxdigit(Start
[1]) && isxdigit(Start
[2]))
249 *I
= (char)strtol(Tmp
,0,16);
262 // Skip ending white space
263 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
268 // ParseCWord - Parses a string like a C "" expression /*{{{*/
269 // ---------------------------------------------------------------------
270 /* This expects a series of space separated strings enclosed in ""'s.
271 It concatenates the ""'s into a single string. */
272 bool ParseCWord(const char *&String
,string
&Res
)
274 // Skip leading whitespace
275 const char *C
= String
;
276 for (;*C
!= 0 && *C
== ' '; C
++);
282 if (strlen(String
) >= sizeof(Buffer
))
289 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
298 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
300 if (isspace(*C
) == 0)
310 // QuoteString - Convert a string into quoted from /*{{{*/
311 // ---------------------------------------------------------------------
313 string
QuoteString(const string
&Str
, const char *Bad
)
316 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); ++I
)
318 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
319 *I
== 0x25 || // percent '%' char
320 *I
<= 0x20 || *I
>= 0x7F) // control chars
323 sprintf(Buf
,"%%%02x",(int)*I
);
332 // DeQuoteString - Convert a string from quoted from /*{{{*/
333 // ---------------------------------------------------------------------
334 /* This undoes QuoteString */
335 string
DeQuoteString(const string
&Str
)
337 return DeQuoteString(Str
.begin(),Str
.end());
339 string
DeQuoteString(string::const_iterator
const &begin
,
340 string::const_iterator
const &end
)
343 for (string::const_iterator I
= begin
; I
!= end
; ++I
)
345 if (*I
== '%' && I
+ 2 < end
&&
346 isxdigit(I
[1]) && isxdigit(I
[2]))
352 Res
+= (char)strtol(Tmp
,0,16);
363 // SizeToStr - Convert a long into a human readable size /*{{{*/
364 // ---------------------------------------------------------------------
365 /* A max of 4 digits are shown before conversion to the next highest unit.
366 The max length of the string will be 5 chars unless the size is > 10
368 string
SizeToStr(double Size
)
377 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
378 ExaBytes, ZettaBytes, YottaBytes */
379 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
383 if (ASize
< 100 && I
!= 0)
385 sprintf(S
,"%'.1f %c",ASize
,Ext
[I
]);
391 sprintf(S
,"%'.0f %c",ASize
,Ext
[I
]);
401 // TimeToStr - Convert the time into a string /*{{{*/
402 // ---------------------------------------------------------------------
403 /* Converts a number of seconds to a hms format */
404 string
TimeToStr(unsigned long Sec
)
412 //d means days, h means hours, min means minutes, s means seconds
413 sprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
419 //h means hours, min means minutes, s means seconds
420 sprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
426 //min means minutes, s means seconds
427 sprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
432 sprintf(S
,_("%lis"),Sec
);
439 // SubstVar - Substitute a string for another string /*{{{*/
440 // ---------------------------------------------------------------------
441 /* This replaces all occurrences of Subst with Contents in Str. */
442 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
444 if (Subst
.empty() == true)
447 string::size_type Pos
= 0;
448 string::size_type OldPos
= 0;
451 while (OldPos
< Str
.length() &&
452 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
455 Temp
.append(Str
, OldPos
, Pos
- OldPos
);
456 if (Contents
.empty() == false)
457 Temp
.append(Contents
);
458 OldPos
= Pos
+ Subst
.length();
464 if (OldPos
>= Str
.length())
466 return Temp
+ string(Str
,OldPos
);
468 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
470 for (; Vars
->Subst
!= 0; Vars
++)
471 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
475 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
476 // ---------------------------------------------------------------------
477 /* Returns a string with the supplied separator depth + 1 times in it */
478 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
480 std::string output
= "";
481 for(unsigned long d
=Depth
+1; d
> 0; d
--)
482 output
.append(Separator
);
486 // URItoFileName - Convert the uri into a unique file name /*{{{*/
487 // ---------------------------------------------------------------------
488 /* This converts a URI into a safe filename. It quotes all unsafe characters
489 and converts / to _ and removes the scheme identifier. The resulting
490 file name should be unique and never occur again for a different file */
491 string
URItoFileName(const string
&URI
)
493 // Nuke 'sensitive' items
499 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
500 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
501 replace(NewURI
.begin(),NewURI
.end(),'/','_');
505 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This routine performs a base64 transformation on a string. It was ripped
508 from wget and then patched and bug fixed.
510 This spec can be found in rfc2045 */
511 string
Base64Encode(const string
&S
)
514 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
515 'I','J','K','L','M','N','O','P',
516 'Q','R','S','T','U','V','W','X',
517 'Y','Z','a','b','c','d','e','f',
518 'g','h','i','j','k','l','m','n',
519 'o','p','q','r','s','t','u','v',
520 'w','x','y','z','0','1','2','3',
521 '4','5','6','7','8','9','+','/'};
523 // Pre-allocate some space
525 Final
.reserve((4*S
.length() + 2)/3 + 2);
527 /* Transform the 3x8 bits to 4x6 bits, as required by
529 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
531 char Bits
[3] = {0,0,0};
538 Final
+= tbl
[Bits
[0] >> 2];
539 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
541 if (I
+ 1 >= S
.end())
544 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
546 if (I
+ 2 >= S
.end())
549 Final
+= tbl
[Bits
[2] & 0x3f];
552 /* Apply the padding elements, this tells how many bytes the remote
553 end should discard */
554 if (S
.length() % 3 == 2)
556 if (S
.length() % 3 == 1)
562 // stringcmp - Arbitrary string compare /*{{{*/
563 // ---------------------------------------------------------------------
564 /* This safely compares two non-null terminated strings of arbitrary
566 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
568 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
572 if (A
== AEnd
&& B
== BEnd
)
584 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
585 const char *B
,const char *BEnd
)
587 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
591 if (A
== AEnd
&& B
== BEnd
)
601 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
602 string::const_iterator B
,string::const_iterator BEnd
)
604 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
608 if (A
== AEnd
&& B
== BEnd
)
620 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
621 // ---------------------------------------------------------------------
623 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
625 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
626 if (tolower_ascii(*A
) != tolower_ascii(*B
))
629 if (A
== AEnd
&& B
== BEnd
)
635 if (tolower_ascii(*A
) < tolower_ascii(*B
))
640 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
641 const char *B
,const char *BEnd
)
643 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
644 if (tolower_ascii(*A
) != tolower_ascii(*B
))
647 if (A
== AEnd
&& B
== BEnd
)
653 if (tolower_ascii(*A
) < tolower_ascii(*B
))
657 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
658 string::const_iterator B
,string::const_iterator BEnd
)
660 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
661 if (tolower_ascii(*A
) != tolower_ascii(*B
))
664 if (A
== AEnd
&& B
== BEnd
)
670 if (tolower_ascii(*A
) < tolower_ascii(*B
))
676 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
677 // ---------------------------------------------------------------------
678 /* The format is like those used in package files and the method
679 communication system */
680 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
682 // Look for a matching tag.
683 int Length
= strlen(Tag
);
684 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); ++I
)
687 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
689 // Find the end of line and strip the leading/trailing spaces
690 string::const_iterator J
;
692 for (; isspace(*I
) != 0 && I
< Message
.end(); ++I
);
693 for (J
= I
; *J
!= '\n' && J
< Message
.end(); ++J
);
694 for (; J
> I
&& isspace(J
[-1]) != 0; --J
);
699 for (; *I
!= '\n' && I
< Message
.end(); ++I
);
702 // Failed to find a match
708 // StringToBool - Converts a string into a boolean /*{{{*/
709 // ---------------------------------------------------------------------
710 /* This inspects the string to see if it is true or if it is false and
711 then returns the result. Several varients on true/false are checked. */
712 int StringToBool(const string
&Text
,int Default
)
715 int Res
= strtol(Text
.c_str(),&End
,0);
716 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
719 // Check for positives
720 if (strcasecmp(Text
.c_str(),"no") == 0 ||
721 strcasecmp(Text
.c_str(),"false") == 0 ||
722 strcasecmp(Text
.c_str(),"without") == 0 ||
723 strcasecmp(Text
.c_str(),"off") == 0 ||
724 strcasecmp(Text
.c_str(),"disable") == 0)
727 // Check for negatives
728 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
729 strcasecmp(Text
.c_str(),"true") == 0 ||
730 strcasecmp(Text
.c_str(),"with") == 0 ||
731 strcasecmp(Text
.c_str(),"on") == 0 ||
732 strcasecmp(Text
.c_str(),"enable") == 0)
738 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
739 // ---------------------------------------------------------------------
740 /* This converts a time_t into a string time representation that is
741 year 2000 complient and timezone neutral */
742 string
TimeRFC1123(time_t Date
)
745 if (gmtime_r(&Date
, &Conv
) == NULL
)
749 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
750 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
751 "Aug","Sep","Oct","Nov","Dec"};
753 snprintf(Buf
, sizeof(Buf
), "%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
754 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
755 Conv
.tm_min
,Conv
.tm_sec
);
759 // ReadMessages - Read messages from the FD /*{{{*/
760 // ---------------------------------------------------------------------
761 /* This pulls full messages from the input FD into the message buffer.
762 It assumes that messages will not pause during transit so no
763 fancy buffering is used.
765 In particular: this reads blocks from the input until it believes
766 that it's run out of input text. Each block is terminated by a
767 double newline ('\n' followed by '\n'). As noted below, there is a
768 bug in this code: it assumes that all the blocks have been read if
769 it doesn't see additional text in the buffer after the last one is
770 parsed, which will cause it to lose blocks if the last block
771 coincides with the end of the buffer.
773 bool ReadMessages(int Fd
, vector
<string
> &List
)
777 // Represents any left-over from the previous iteration of the
778 // parse loop. (i.e., if a message is split across the end
779 // of the buffer, it goes here)
780 string PartialMessage
;
784 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
785 if (Res
< 0 && errno
== EINTR
)
788 // Process is dead, this is kind of bad..
793 if (Res
< 0 && errno
== EAGAIN
)
800 // Look for the end of the message
801 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
804 (I
[0] != '\n' && strncmp(I
, "\r\n\r\n", 4) != 0))
807 // Pull the message out
808 string
Message(Buffer
,I
-Buffer
);
809 PartialMessage
+= Message
;
812 for (; I
< End
&& (*I
== '\n' || *I
== '\r'); ++I
);
814 memmove(Buffer
,I
,End
-Buffer
);
817 List
.push_back(PartialMessage
);
818 PartialMessage
.clear();
822 // If there's text left in the buffer, store it
823 // in PartialMessage and throw the rest of the buffer
824 // away. This allows us to handle messages that
825 // are longer than the static buffer size.
826 PartialMessage
+= string(Buffer
, End
);
831 // BUG ALERT: if a message block happens to end at a
832 // multiple of 64000 characters, this will cause it to
833 // terminate early, leading to a badly formed block and
834 // probably crashing the method. However, this is the only
835 // way we have to find the end of the message block. I have
836 // an idea of how to fix this, but it will require changes
837 // to the protocol (essentially to mark the beginning and
838 // end of the block).
840 // -- dburrows 2008-04-02
844 if (WaitFd(Fd
) == false)
849 // MonthConv - Converts a month string into a number /*{{{*/
850 // ---------------------------------------------------------------------
851 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
852 Made it a bit more robust with a few tolower_ascii though. */
853 static int MonthConv(char *Month
)
855 switch (tolower_ascii(*Month
))
858 return tolower_ascii(Month
[1]) == 'p'?3:7;
864 if (tolower_ascii(Month
[1]) == 'a')
866 return tolower_ascii(Month
[2]) == 'n'?5:6;
868 return tolower_ascii(Month
[2]) == 'r'?2:4;
876 // Pretend it is January..
882 // timegm - Internal timegm if the gnu version is not available /*{{{*/
883 // ---------------------------------------------------------------------
884 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
885 than local timezone (mktime assumes the latter).
887 This function is a nonstandard GNU extension that is also present on
888 the BSDs and maybe other systems. For others we follow the advice of
889 the manpage of timegm and use his portable replacement. */
891 static time_t timegm(struct tm
*t
)
893 char *tz
= getenv("TZ");
896 time_t ret
= mktime(t
);
906 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
907 // ---------------------------------------------------------------------
908 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
909 with one exception: All timezones (%Z) are accepted but the protocol
910 says that it MUST be GMT, but this one is equal to UTC which we will
911 encounter from time to time (e.g. in Release files) so we accept all
912 here and just assume it is GMT (or UTC) later on */
913 bool RFC1123StrToTime(const char* const str
,time_t &time
)
916 setlocale (LC_ALL
,"C");
918 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
919 (strptime(str
, "%a, %d %b %Y %H:%M:%S %Z", &Tm
) == NULL
&&
920 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
921 strptime(str
, "%A, %d-%b-%y %H:%M:%S %Z", &Tm
) == NULL
&&
922 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
923 strptime(str
, "%a %b %d %H:%M:%S %Y", &Tm
) == NULL
);
924 setlocale (LC_ALL
,"");
932 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
933 // ---------------------------------------------------------------------
935 bool FTPMDTMStrToTime(const char* const str
,time_t &time
)
938 // MDTM includes no whitespaces but recommend and ignored by strptime
939 if (strptime(str
, "%Y %m %d %H %M %S", &Tm
) == NULL
)
946 // StrToTime - Converts a string into a time_t /*{{{*/
947 // ---------------------------------------------------------------------
948 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
949 and the C library asctime format. It requires the GNU library function
950 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
951 reason the C library does not provide any such function :< This also
952 handles the weird, but unambiguous FTP time format*/
953 bool StrToTime(const string
&Val
,time_t &Result
)
958 // Skip the day of the week
959 const char *I
= strchr(Val
.c_str(), ' ');
961 // Handle RFC 1123 time
963 if (sscanf(I
," %2d %3s %4d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
964 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
966 // Handle RFC 1036 time
967 if (sscanf(I
," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm
.tm_mday
,Month
,
968 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
973 if (sscanf(I
," %3s %2d %2d:%2d:%2d %4d",Month
,&Tm
.tm_mday
,
974 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
977 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
978 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
987 Tm
.tm_mon
= MonthConv(Month
);
989 Tm
.tm_mon
= 0; // we don't have a month, so pick something
992 // Convert to local time and then to GMT
993 Result
= timegm(&Tm
);
997 // StrToNum - Convert a fixed length string to a number /*{{{*/
998 // ---------------------------------------------------------------------
999 /* This is used in decoding the crazy fixed length string headers in
1000 tar and ar files. */
1001 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
1004 if (Len
>= sizeof(S
))
1009 // All spaces is a zero
1012 for (I
= 0; S
[I
] == ' '; I
++);
1017 Res
= strtoul(S
,&End
,Base
);
1024 // StrToNum - Convert a fixed length string to a number /*{{{*/
1025 // ---------------------------------------------------------------------
1026 /* This is used in decoding the crazy fixed length string headers in
1027 tar and ar files. */
1028 bool StrToNum(const char *Str
,unsigned long long &Res
,unsigned Len
,unsigned Base
)
1031 if (Len
>= sizeof(S
))
1036 // All spaces is a zero
1039 for (I
= 0; S
[I
] == ' '; I
++);
1044 Res
= strtoull(S
,&End
,Base
);
1052 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1053 // ---------------------------------------------------------------------
1054 /* This is used in decoding the 256bit encoded fixed length fields in
1056 bool Base256ToNum(const char *Str
,unsigned long long &Res
,unsigned int Len
)
1058 if ((Str
[0] & 0x80) == 0)
1062 Res
= Str
[0] & 0x7F;
1063 for(unsigned int i
= 1; i
< Len
; ++i
)
1064 Res
= (Res
<<8) + Str
[i
];
1069 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1070 // ---------------------------------------------------------------------
1071 /* This is used in decoding the 256bit encoded fixed length fields in
1073 bool Base256ToNum(const char *Str
,unsigned long &Res
,unsigned int Len
)
1075 unsigned long long Num
;
1078 rc
= Base256ToNum(Str
, Num
, Len
);
1086 // HexDigit - Convert a hex character into an integer /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* Helper for Hex2Num */
1089 static int HexDigit(int c
)
1091 if (c
>= '0' && c
<= '9')
1093 if (c
>= 'a' && c
<= 'f')
1094 return c
- 'a' + 10;
1095 if (c
>= 'A' && c
<= 'F')
1096 return c
- 'A' + 10;
1100 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1101 // ---------------------------------------------------------------------
1102 /* The length of the buffer must be exactly 1/2 the length of the string. */
1103 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
1105 if (Str
.length() != Length
*2)
1108 // Convert each digit. We store it in the same order as the string
1110 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
1112 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
1115 Num
[J
] = HexDigit(I
[0]) << 4;
1116 Num
[J
] += HexDigit(I
[1]);
1122 // TokSplitString - Split a string up by a given token /*{{{*/
1123 // ---------------------------------------------------------------------
1124 /* This is intended to be a faster splitter, it does not use dynamic
1125 memories. Input is changed to insert nulls at each token location. */
1126 bool TokSplitString(char Tok
,char *Input
,char **List
,
1127 unsigned long ListMax
)
1129 // Strip any leading spaces
1130 char *Start
= Input
;
1131 char *Stop
= Start
+ strlen(Start
);
1132 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
1134 unsigned long Count
= 0;
1138 // Skip to the next Token
1139 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
1141 // Back remove spaces
1143 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
1146 List
[Count
++] = Start
;
1147 if (Count
>= ListMax
)
1154 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1162 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1163 // ---------------------------------------------------------------------
1164 /* This can be used to split a given string up into a vector, so the
1165 propose is the same as in the method above and this one is a bit slower
1166 also, but the advantage is that we have an iteratable vector */
1167 vector
<string
> VectorizeString(string
const &haystack
, char const &split
)
1169 vector
<string
> exploded
;
1170 if (haystack
.empty() == true)
1172 string::const_iterator start
= haystack
.begin();
1173 string::const_iterator end
= start
;
1175 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1176 exploded
.push_back(string(start
, end
));
1178 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1182 // StringSplit - split a string into a string vector by token /*{{{*/
1183 // ---------------------------------------------------------------------
1184 /* See header for details.
1186 vector
<string
> StringSplit(std::string
const &s
, std::string
const &sep
,
1187 unsigned int maxsplit
)
1189 vector
<string
> split
;
1192 // no seperator given, this is bogus
1197 while (pos
!= string::npos
)
1199 pos
= s
.find(sep
, start
);
1200 split
.push_back(s
.substr(start
, pos
-start
));
1202 // if maxsplit is reached, the remaining string is the last item
1203 if(split
.size() >= maxsplit
)
1205 split
[split
.size()-1] = s
.substr(start
);
1208 start
= pos
+sep
.size();
1213 // RegexChoice - Simple regex list/list matcher /*{{{*/
1214 // ---------------------------------------------------------------------
1216 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1217 const char **ListEnd
)
1219 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1222 unsigned long Hits
= 0;
1223 for (; ListBegin
< ListEnd
; ++ListBegin
)
1225 // Check if the name is a regex
1228 for (I
= *ListBegin
; *I
!= 0; I
++)
1229 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1234 // Compile the regex pattern
1237 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1243 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1248 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1252 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1257 if (R
->Hit
== false)
1267 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1273 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1274 // ---------------------------------------------------------------------
1275 /* This is used to make the internationalization strings easier to translate
1276 and to allow reordering of parameters */
1277 static bool iovprintf(ostream
&out
, const char *format
,
1278 va_list &args
, ssize_t
&size
) {
1279 char *S
= (char*)malloc(size
);
1280 ssize_t
const n
= vsnprintf(S
, size
, format
, args
);
1281 if (n
> -1 && n
< size
) {
1294 void ioprintf(ostream
&out
,const char *format
,...)
1299 va_start(args
,format
);
1300 if (iovprintf(out
, format
, args
, size
) == true)
1305 void strprintf(string
&out
,const char *format
,...)
1309 std::ostringstream outstr
;
1311 va_start(args
,format
);
1312 if (iovprintf(outstr
, format
, args
, size
) == true)
1319 // safe_snprintf - Safer snprintf /*{{{*/
1320 // ---------------------------------------------------------------------
1321 /* This is a snprintf that will never (ever) go past 'End' and returns a
1322 pointer to the end of the new string. The returned string is always null
1323 terminated unless Buffer == end. This is a better alterantive to using
1324 consecutive snprintfs. */
1325 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1332 va_start(args
,Format
);
1333 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1336 if (Did
< 0 || Buffer
+ Did
> End
)
1338 return Buffer
+ Did
;
1341 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1342 // ---------------------------------------------------------------------
1343 string
StripEpoch(const string
&VerStr
)
1345 size_t i
= VerStr
.find(":");
1346 if (i
== string::npos
)
1348 return VerStr
.substr(i
+1);
1351 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1352 // ---------------------------------------------------------------------
1353 /* This little function is the most called method we have and tries
1354 therefore to do the absolut minimum - and is notable faster than
1355 standard tolower/toupper and as a bonus avoids problems with different
1356 locales - we only operate on ascii chars anyway. */
1357 int tolower_ascii(int const c
)
1359 if (c
>= 'A' && c
<= 'Z')
1365 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1366 // ---------------------------------------------------------------------
1367 /* The domain list is a comma separate list of domains that are suffix
1368 matched against the argument */
1369 bool CheckDomainList(const string
&Host
,const string
&List
)
1371 string::const_iterator Start
= List
.begin();
1372 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); ++Cur
)
1374 if (Cur
< List
.end() && *Cur
!= ',')
1377 // Match the end of the string..
1378 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1380 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1388 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1389 // ---------------------------------------------------------------------
1391 size_t strv_length(const char **str_array
)
1394 for (i
=0; str_array
[i
] != NULL
; i
++)
1400 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1401 // ---------------------------------------------------------------------
1403 string
DeEscapeString(const string
&input
)
1406 string::const_iterator it
;
1408 for (it
= input
.begin(); it
!= input
.end(); ++it
)
1410 // just copy non-escape chars
1417 // deal with double escape
1419 (it
+ 1 < input
.end()) && it
[1] == '\\')
1423 // advance iterator one step further
1428 // ensure we have a char to read
1429 if (it
+ 1 == input
.end())
1437 if (it
+ 2 <= input
.end()) {
1441 output
+= (char)strtol(tmp
, 0, 8);
1446 if (it
+ 2 <= input
.end()) {
1450 output
+= (char)strtol(tmp
, 0, 16);
1455 // FIXME: raise exception here?
1462 // URI::CopyFrom - Copy from an object /*{{{*/
1463 // ---------------------------------------------------------------------
1464 /* This parses the URI into all of its components */
1465 void URI::CopyFrom(const string
&U
)
1467 string::const_iterator I
= U
.begin();
1469 // Locate the first colon, this separates the scheme
1470 for (; I
< U
.end() && *I
!= ':' ; ++I
);
1471 string::const_iterator FirstColon
= I
;
1473 /* Determine if this is a host type URI with a leading double //
1474 and then search for the first single / */
1475 string::const_iterator SingleSlash
= I
;
1476 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1479 /* Find the / indicating the end of the hostname, ignoring /'s in the
1481 bool InBracket
= false;
1482 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); ++SingleSlash
)
1484 if (*SingleSlash
== '[')
1486 if (InBracket
== true && *SingleSlash
== ']')
1490 if (SingleSlash
> U
.end())
1491 SingleSlash
= U
.end();
1493 // We can now write the access and path specifiers
1494 Access
.assign(U
.begin(),FirstColon
);
1495 if (SingleSlash
!= U
.end())
1496 Path
.assign(SingleSlash
,U
.end());
1497 if (Path
.empty() == true)
1500 // Now we attempt to locate a user:pass@host fragment
1501 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1505 if (FirstColon
>= U
.end())
1508 if (FirstColon
> SingleSlash
)
1509 FirstColon
= SingleSlash
;
1511 // Find the colon...
1513 if (I
> SingleSlash
)
1515 for (; I
< SingleSlash
&& *I
!= ':'; ++I
);
1516 string::const_iterator SecondColon
= I
;
1518 // Search for the @ after the colon
1519 for (; I
< SingleSlash
&& *I
!= '@'; ++I
);
1520 string::const_iterator At
= I
;
1522 // Now write the host and user/pass
1523 if (At
== SingleSlash
)
1525 if (FirstColon
< SingleSlash
)
1526 Host
.assign(FirstColon
,SingleSlash
);
1530 Host
.assign(At
+1,SingleSlash
);
1531 // username and password must be encoded (RFC 3986)
1532 User
.assign(DeQuoteString(FirstColon
,SecondColon
));
1533 if (SecondColon
< At
)
1534 Password
.assign(DeQuoteString(SecondColon
+1,At
));
1537 // Now we parse the RFC 2732 [] hostnames.
1538 unsigned long PortEnd
= 0;
1540 for (unsigned I
= 0; I
!= Host
.length();)
1549 if (InBracket
== true && Host
[I
] == ']')
1560 if (InBracket
== true)
1566 // Now we parse off a port number from the hostname
1568 string::size_type Pos
= Host
.rfind(':');
1569 if (Pos
== string::npos
|| Pos
< PortEnd
)
1572 Port
= atoi(string(Host
,Pos
+1).c_str());
1573 Host
.assign(Host
,0,Pos
);
1576 // URI::operator string - Convert the URI to a string /*{{{*/
1577 // ---------------------------------------------------------------------
1579 URI::operator string()
1583 if (Access
.empty() == false)
1586 if (Host
.empty() == false)
1588 if (Access
.empty() == false)
1591 if (User
.empty() == false)
1593 // FIXME: Technically userinfo is permitted even less
1594 // characters than these, but this is not conveniently
1595 // expressed with a blacklist.
1596 Res
+= QuoteString(User
, ":/?#[]@");
1597 if (Password
.empty() == false)
1598 Res
+= ":" + QuoteString(Password
, ":/?#[]@");
1602 // Add RFC 2732 escaping characters
1603 if (Access
.empty() == false &&
1604 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1605 Res
+= '[' + Host
+ ']';
1612 sprintf(S
,":%u",Port
);
1617 if (Path
.empty() == false)
1628 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1629 // ---------------------------------------------------------------------
1631 string
URI::SiteOnly(const string
&URI
)
1640 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1641 // ---------------------------------------------------------------------
1643 string
URI::NoUserPassword(const string
&URI
)