1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/error.h>
39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40 // ---------------------------------------------------------------------
41 /* This is handy to use before display some information for enduser */
42 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
47 size_t insize
, bufsize
;
50 cd
= iconv_open(codeset
, "UTF-8");
51 if (cd
== (iconv_t
)(-1)) {
52 // Something went wrong
54 _error
->Error("conversion from 'UTF-8' to '%s' not available",
62 insize
= bufsize
= orig
.size();
64 inptr
= (char *)inbuf
;
65 outbuf
= new char[bufsize
];
66 size_t lastError
= -1;
70 char *outptr
= outbuf
;
71 size_t outsize
= bufsize
;
72 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
73 dest
->append(outbuf
, outptr
- outbuf
);
74 if (err
== (size_t)(-1))
81 // replace a series of unknown multibytes with a single "?"
82 if (lastError
!= insize
) {
83 lastError
= insize
- 1;
95 outbuf
= new char[bufsize
];
109 // strstrip - Remove white space from the front and back of a string /*{{{*/
110 // ---------------------------------------------------------------------
111 /* This is handy to use when parsing a file. It also removes \n's left
112 over from fgets and company */
113 char *_strstrip(char *String
)
115 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
120 char *End
= String
+ strlen(String
) - 1;
121 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
122 *End
== '\r'); End
--);
128 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
129 // ---------------------------------------------------------------------
131 char *_strtabexpand(char *String
,size_t Len
)
133 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
137 if (I
+ 8 > String
+ Len
)
143 /* Assume the start of the string is 0 and find the next 8 char
149 Len
= 8 - ((String
- I
) % 8);
157 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
158 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
163 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
164 // ---------------------------------------------------------------------
165 /* This grabs a single word, converts any % escaped characters to their
166 proper values and advances the pointer. Double quotes are understood
167 and striped out as well. This is for URI/URL parsing. It also can
168 understand [] brackets.*/
169 bool ParseQuoteWord(const char *&String
,string
&Res
)
171 // Skip leading whitespace
172 const char *C
= String
;
173 for (;*C
!= 0 && *C
== ' '; C
++);
177 // Jump to the next word
178 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
182 for (C
++; *C
!= 0 && *C
!= '"'; C
++);
188 for (C
++; *C
!= 0 && *C
!= ']'; C
++);
194 // Now de-quote characters
197 const char *Start
= String
;
199 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
201 if (*Start
== '%' && Start
+ 2 < C
)
206 *I
= (char)strtol(Tmp
,0,16);
219 // Skip ending white space
220 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
225 // ParseCWord - Parses a string like a C "" expression /*{{{*/
226 // ---------------------------------------------------------------------
227 /* This expects a series of space separated strings enclosed in ""'s.
228 It concatenates the ""'s into a single string. */
229 bool ParseCWord(const char *&String
,string
&Res
)
231 // Skip leading whitespace
232 const char *C
= String
;
233 for (;*C
!= 0 && *C
== ' '; C
++);
239 if (strlen(String
) >= sizeof(Buffer
))
246 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
255 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
257 if (isspace(*C
) == 0)
267 // QuoteString - Convert a string into quoted from /*{{{*/
268 // ---------------------------------------------------------------------
270 string
QuoteString(const string
&Str
, const char *Bad
)
273 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
275 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
276 *I
<= 0x20 || *I
>= 0x7F)
279 sprintf(Buf
,"%%%02x",(int)*I
);
288 // DeQuoteString - Convert a string from quoted from /*{{{*/
289 // ---------------------------------------------------------------------
290 /* This undoes QuoteString */
291 string
DeQuoteString(const string
&Str
)
294 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
296 if (*I
== '%' && I
+ 2 < Str
.end())
302 Res
+= (char)strtol(Tmp
,0,16);
313 // SizeToStr - Convert a long into a human readable size /*{{{*/
314 // ---------------------------------------------------------------------
315 /* A max of 4 digits are shown before conversion to the next highest unit.
316 The max length of the string will be 5 chars unless the size is > 10
318 string
SizeToStr(double Size
)
327 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
328 ExaBytes, ZettaBytes, YottaBytes */
329 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
333 if (ASize
< 100 && I
!= 0)
335 sprintf(S
,"%'.1f%c",ASize
,Ext
[I
]);
341 sprintf(S
,"%'.0f%c",ASize
,Ext
[I
]);
351 // TimeToStr - Convert the time into a string /*{{{*/
352 // ---------------------------------------------------------------------
353 /* Converts a number of seconds to a hms format */
354 string
TimeToStr(unsigned long Sec
)
362 //d means days, h means hours, min means minutes, s means seconds
363 sprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
369 //h means hours, min means minutes, s means seconds
370 sprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
376 //min means minutes, s means seconds
377 sprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
382 sprintf(S
,_("%lis"),Sec
);
389 // SubstVar - Substitute a string for another string /*{{{*/
390 // ---------------------------------------------------------------------
391 /* This replaces all occurances of Subst with Contents in Str. */
392 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
394 string::size_type Pos
= 0;
395 string::size_type OldPos
= 0;
398 while (OldPos
< Str
.length() &&
399 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
401 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
402 OldPos
= Pos
+ Subst
.length();
408 return Temp
+ string(Str
,OldPos
);
411 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
413 for (; Vars
->Subst
!= 0; Vars
++)
414 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
418 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
419 // ---------------------------------------------------------------------
420 /* Returns a string with the supplied separator depth + 1 times in it */
421 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
423 std::string output
= "";
424 for(unsigned long d
=Depth
+1; d
> 0; d
--)
425 output
.append(Separator
);
429 // URItoFileName - Convert the uri into a unique file name /*{{{*/
430 // ---------------------------------------------------------------------
431 /* This converts a URI into a safe filename. It quotes all unsafe characters
432 and converts / to _ and removes the scheme identifier. The resulting
433 file name should be unique and never occur again for a different file */
434 string
URItoFileName(const string
&URI
)
436 // Nuke 'sensitive' items
442 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
443 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
444 replace(NewURI
.begin(),NewURI
.end(),'/','_');
448 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
449 // ---------------------------------------------------------------------
450 /* This routine performs a base64 transformation on a string. It was ripped
451 from wget and then patched and bug fixed.
453 This spec can be found in rfc2045 */
454 string
Base64Encode(const string
&S
)
457 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
458 'I','J','K','L','M','N','O','P',
459 'Q','R','S','T','U','V','W','X',
460 'Y','Z','a','b','c','d','e','f',
461 'g','h','i','j','k','l','m','n',
462 'o','p','q','r','s','t','u','v',
463 'w','x','y','z','0','1','2','3',
464 '4','5','6','7','8','9','+','/'};
466 // Pre-allocate some space
468 Final
.reserve((4*S
.length() + 2)/3 + 2);
470 /* Transform the 3x8 bits to 4x6 bits, as required by
472 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
474 char Bits
[3] = {0,0,0};
481 Final
+= tbl
[Bits
[0] >> 2];
482 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
484 if (I
+ 1 >= S
.end())
487 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
489 if (I
+ 2 >= S
.end())
492 Final
+= tbl
[Bits
[2] & 0x3f];
495 /* Apply the padding elements, this tells how many bytes the remote
496 end should discard */
497 if (S
.length() % 3 == 2)
499 if (S
.length() % 3 == 1)
505 // stringcmp - Arbitrary string compare /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This safely compares two non-null terminated strings of arbitrary
509 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
511 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
515 if (A
== AEnd
&& B
== BEnd
)
527 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
528 const char *B
,const char *BEnd
)
530 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
534 if (A
== AEnd
&& B
== BEnd
)
544 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
545 string::const_iterator B
,string::const_iterator BEnd
)
547 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
551 if (A
== AEnd
&& B
== BEnd
)
563 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
564 // ---------------------------------------------------------------------
566 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
568 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
569 if (tolower_ascii(*A
) != tolower_ascii(*B
))
572 if (A
== AEnd
&& B
== BEnd
)
578 if (tolower_ascii(*A
) < tolower_ascii(*B
))
583 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
584 const char *B
,const char *BEnd
)
586 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
587 if (tolower_ascii(*A
) != tolower_ascii(*B
))
590 if (A
== AEnd
&& B
== BEnd
)
596 if (tolower_ascii(*A
) < tolower_ascii(*B
))
600 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
601 string::const_iterator B
,string::const_iterator BEnd
)
603 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
604 if (tolower_ascii(*A
) != tolower_ascii(*B
))
607 if (A
== AEnd
&& B
== BEnd
)
613 if (tolower_ascii(*A
) < tolower_ascii(*B
))
619 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
620 // ---------------------------------------------------------------------
621 /* The format is like those used in package files and the method
622 communication system */
623 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
625 // Look for a matching tag.
626 int Length
= strlen(Tag
);
627 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
630 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
632 // Find the end of line and strip the leading/trailing spaces
633 string::const_iterator J
;
635 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
636 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
637 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
642 for (; *I
!= '\n' && I
< Message
.end(); I
++);
645 // Failed to find a match
651 // StringToBool - Converts a string into a boolean /*{{{*/
652 // ---------------------------------------------------------------------
653 /* This inspects the string to see if it is true or if it is false and
654 then returns the result. Several varients on true/false are checked. */
655 int StringToBool(const string
&Text
,int Default
)
658 int Res
= strtol(Text
.c_str(),&End
,0);
659 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
662 // Check for positives
663 if (strcasecmp(Text
.c_str(),"no") == 0 ||
664 strcasecmp(Text
.c_str(),"false") == 0 ||
665 strcasecmp(Text
.c_str(),"without") == 0 ||
666 strcasecmp(Text
.c_str(),"off") == 0 ||
667 strcasecmp(Text
.c_str(),"disable") == 0)
670 // Check for negatives
671 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
672 strcasecmp(Text
.c_str(),"true") == 0 ||
673 strcasecmp(Text
.c_str(),"with") == 0 ||
674 strcasecmp(Text
.c_str(),"on") == 0 ||
675 strcasecmp(Text
.c_str(),"enable") == 0)
681 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
682 // ---------------------------------------------------------------------
683 /* This converts a time_t into a string time representation that is
684 year 2000 complient and timezone neutral */
685 string
TimeRFC1123(time_t Date
)
687 struct tm Conv
= *gmtime(&Date
);
690 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
691 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
692 "Aug","Sep","Oct","Nov","Dec"};
694 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
695 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
696 Conv
.tm_min
,Conv
.tm_sec
);
700 // ReadMessages - Read messages from the FD /*{{{*/
701 // ---------------------------------------------------------------------
702 /* This pulls full messages from the input FD into the message buffer.
703 It assumes that messages will not pause during transit so no
704 fancy buffering is used.
706 In particular: this reads blocks from the input until it believes
707 that it's run out of input text. Each block is terminated by a
708 double newline ('\n' followed by '\n'). As noted below, there is a
709 bug in this code: it assumes that all the blocks have been read if
710 it doesn't see additional text in the buffer after the last one is
711 parsed, which will cause it to lose blocks if the last block
712 coincides with the end of the buffer.
714 bool ReadMessages(int Fd
, vector
<string
> &List
)
718 // Represents any left-over from the previous iteration of the
719 // parse loop. (i.e., if a message is split across the end
720 // of the buffer, it goes here)
721 string PartialMessage
;
725 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
726 if (Res
< 0 && errno
== EINTR
)
729 // Process is dead, this is kind of bad..
734 if (Res
< 0 && errno
== EAGAIN
)
741 // Look for the end of the message
742 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
744 if (I
[0] != '\n' || I
[1] != '\n')
747 // Pull the message out
748 string
Message(Buffer
,I
-Buffer
);
749 PartialMessage
+= Message
;
752 for (; I
< End
&& *I
== '\n'; I
++);
754 memmove(Buffer
,I
,End
-Buffer
);
757 List
.push_back(PartialMessage
);
758 PartialMessage
.clear();
762 // If there's text left in the buffer, store it
763 // in PartialMessage and throw the rest of the buffer
764 // away. This allows us to handle messages that
765 // are longer than the static buffer size.
766 PartialMessage
+= string(Buffer
, End
);
771 // BUG ALERT: if a message block happens to end at a
772 // multiple of 64000 characters, this will cause it to
773 // terminate early, leading to a badly formed block and
774 // probably crashing the method. However, this is the only
775 // way we have to find the end of the message block. I have
776 // an idea of how to fix this, but it will require changes
777 // to the protocol (essentially to mark the beginning and
778 // end of the block).
780 // -- dburrows 2008-04-02
784 if (WaitFd(Fd
) == false)
789 // MonthConv - Converts a month string into a number /*{{{*/
790 // ---------------------------------------------------------------------
791 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
792 Made it a bit more robust with a few tolower_ascii though. */
793 static int MonthConv(char *Month
)
795 switch (tolower_ascii(*Month
))
798 return tolower_ascii(Month
[1]) == 'p'?3:7;
804 if (tolower_ascii(Month
[1]) == 'a')
806 return tolower_ascii(Month
[2]) == 'n'?5:6;
808 return tolower_ascii(Month
[2]) == 'r'?2:4;
816 // Pretend it is January..
822 // timegm - Internal timegm function if gnu is not available /*{{{*/
823 // ---------------------------------------------------------------------
824 /* Ripped this evil little function from wget - I prefer the use of
825 GNU timegm if possible as this technique will have interesting problems
826 with leap seconds, timezones and other.
828 Converts struct tm to time_t, assuming the data in tm is UTC rather
829 than local timezone (mktime assumes the latter).
831 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
832 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
834 /* Turned it into an autoconf check, because GNU is not the only thing which
835 can provide timegm. -- 2002-09-22, Joel Baker */
837 #ifndef HAVE_TIMEGM // Now with autoconf!
838 static time_t timegm(struct tm
*t
)
845 tb
= mktime (gmtime (&tl
));
846 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
850 // StrToTime - Converts a string into a time_t /*{{{*/
851 // ---------------------------------------------------------------------
852 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
853 and the C library asctime format. It requires the GNU library function
854 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
855 reason the C library does not provide any such function :< This also
856 handles the weird, but unambiguous FTP time format*/
857 bool StrToTime(const string
&Val
,time_t &Result
)
861 const char *I
= Val
.c_str();
863 // Skip the day of the week
864 for (;*I
!= 0 && *I
!= ' '; I
++);
866 // Handle RFC 1123 time
868 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
869 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
871 // Handle RFC 1036 time
872 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
873 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
878 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
879 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
882 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
883 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
892 Tm
.tm_mon
= MonthConv(Month
);
895 // Convert to local time and then to GMT
896 Result
= timegm(&Tm
);
900 // StrToNum - Convert a fixed length string to a number /*{{{*/
901 // ---------------------------------------------------------------------
902 /* This is used in decoding the crazy fixed length string headers in
904 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
907 if (Len
>= sizeof(S
))
912 // All spaces is a zero
915 for (I
= 0; S
[I
] == ' '; I
++);
920 Res
= strtoul(S
,&End
,Base
);
927 // HexDigit - Convert a hex character into an integer /*{{{*/
928 // ---------------------------------------------------------------------
929 /* Helper for Hex2Num */
930 static int HexDigit(int c
)
932 if (c
>= '0' && c
<= '9')
934 if (c
>= 'a' && c
<= 'f')
936 if (c
>= 'A' && c
<= 'F')
941 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
942 // ---------------------------------------------------------------------
943 /* The length of the buffer must be exactly 1/2 the length of the string. */
944 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
946 if (Str
.length() != Length
*2)
949 // Convert each digit. We store it in the same order as the string
951 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
953 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
956 Num
[J
] = HexDigit(I
[0]) << 4;
957 Num
[J
] += HexDigit(I
[1]);
963 // TokSplitString - Split a string up by a given token /*{{{*/
964 // ---------------------------------------------------------------------
965 /* This is intended to be a faster splitter, it does not use dynamic
966 memories. Input is changed to insert nulls at each token location. */
967 bool TokSplitString(char Tok
,char *Input
,char **List
,
968 unsigned long ListMax
)
970 // Strip any leading spaces
972 char *Stop
= Start
+ strlen(Start
);
973 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
975 unsigned long Count
= 0;
979 // Skip to the next Token
980 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
982 // Back remove spaces
984 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
987 List
[Count
++] = Start
;
988 if (Count
>= ListMax
)
995 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1003 // ExplodeString - Split a string up into a vector /*{{{*/
1004 // ---------------------------------------------------------------------
1005 /* This can be used to split a given string up into a vector, so the
1006 propose is the same as in the method above and this one is a bit slower
1007 also, but the advantage is that we an iteratable vector */
1008 vector
<string
> ExplodeString(string
const &haystack
, char const &split
)
1010 string::const_iterator start
= haystack
.begin();
1011 string::const_iterator end
= start
;
1012 vector
<string
> exploded
;
1014 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1015 exploded
.push_back(string(start
, end
));
1017 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1021 // RegexChoice - Simple regex list/list matcher /*{{{*/
1022 // ---------------------------------------------------------------------
1024 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1025 const char **ListEnd
)
1027 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1030 unsigned long Hits
= 0;
1031 for (; ListBegin
!= ListEnd
; ListBegin
++)
1033 // Check if the name is a regex
1036 for (I
= *ListBegin
; *I
!= 0; I
++)
1037 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1042 // Compile the regex pattern
1045 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1051 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1056 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1060 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1065 if (R
->Hit
== false)
1075 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1081 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
1082 // ---------------------------------------------------------------------
1083 /* This is used to make the internationalization strings easier to translate
1084 and to allow reordering of parameters */
1085 void ioprintf(ostream
&out
,const char *format
,...)
1088 va_start(args
,format
);
1090 // sprintf the description
1092 vsnprintf(S
,sizeof(S
),format
,args
);
1096 // strprintf - C format string outputter to C++ strings /*{{{*/
1097 // ---------------------------------------------------------------------
1098 /* This is used to make the internationalization strings easier to translate
1099 and to allow reordering of parameters */
1100 void strprintf(string
&out
,const char *format
,...)
1103 va_start(args
,format
);
1105 // sprintf the description
1107 vsnprintf(S
,sizeof(S
),format
,args
);
1111 // safe_snprintf - Safer snprintf /*{{{*/
1112 // ---------------------------------------------------------------------
1113 /* This is a snprintf that will never (ever) go past 'End' and returns a
1114 pointer to the end of the new string. The returned string is always null
1115 terminated unless Buffer == end. This is a better alterantive to using
1116 consecutive snprintfs. */
1117 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1122 va_start(args
,Format
);
1127 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1128 if (Did
< 0 || Buffer
+ Did
> End
)
1130 return Buffer
+ Did
;
1134 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1135 // ---------------------------------------------------------------------
1136 /* This little function is the most called method we have and tries
1137 therefore to do the absolut minimum - and is noteable faster than
1138 standard tolower/toupper and as a bonus avoids problems with different
1139 locales - we only operate on ascii chars anyway. */
1140 int tolower_ascii(int const c
)
1142 if (c
>= 'A' && c
<= 'Z')
1148 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1149 // ---------------------------------------------------------------------
1150 /* The domain list is a comma seperate list of domains that are suffix
1151 matched against the argument */
1152 bool CheckDomainList(const string
&Host
,const string
&List
)
1154 string::const_iterator Start
= List
.begin();
1155 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); Cur
++)
1157 if (Cur
< List
.end() && *Cur
!= ',')
1160 // Match the end of the string..
1161 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1163 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1172 // URI::CopyFrom - Copy from an object /*{{{*/
1173 // ---------------------------------------------------------------------
1174 /* This parses the URI into all of its components */
1175 void URI::CopyFrom(const string
&U
)
1177 string::const_iterator I
= U
.begin();
1179 // Locate the first colon, this separates the scheme
1180 for (; I
< U
.end() && *I
!= ':' ; I
++);
1181 string::const_iterator FirstColon
= I
;
1183 /* Determine if this is a host type URI with a leading double //
1184 and then search for the first single / */
1185 string::const_iterator SingleSlash
= I
;
1186 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1189 /* Find the / indicating the end of the hostname, ignoring /'s in the
1191 bool InBracket
= false;
1192 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); SingleSlash
++)
1194 if (*SingleSlash
== '[')
1196 if (InBracket
== true && *SingleSlash
== ']')
1200 if (SingleSlash
> U
.end())
1201 SingleSlash
= U
.end();
1203 // We can now write the access and path specifiers
1204 Access
.assign(U
.begin(),FirstColon
);
1205 if (SingleSlash
!= U
.end())
1206 Path
.assign(SingleSlash
,U
.end());
1207 if (Path
.empty() == true)
1210 // Now we attempt to locate a user:pass@host fragment
1211 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1215 if (FirstColon
>= U
.end())
1218 if (FirstColon
> SingleSlash
)
1219 FirstColon
= SingleSlash
;
1221 // Find the colon...
1223 if (I
> SingleSlash
)
1225 for (; I
< SingleSlash
&& *I
!= ':'; I
++);
1226 string::const_iterator SecondColon
= I
;
1228 // Search for the @ after the colon
1229 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
1230 string::const_iterator At
= I
;
1232 // Now write the host and user/pass
1233 if (At
== SingleSlash
)
1235 if (FirstColon
< SingleSlash
)
1236 Host
.assign(FirstColon
,SingleSlash
);
1240 Host
.assign(At
+1,SingleSlash
);
1241 User
.assign(FirstColon
,SecondColon
);
1242 if (SecondColon
< At
)
1243 Password
.assign(SecondColon
+1,At
);
1246 // Now we parse the RFC 2732 [] hostnames.
1247 unsigned long PortEnd
= 0;
1249 for (unsigned I
= 0; I
!= Host
.length();)
1258 if (InBracket
== true && Host
[I
] == ']')
1269 if (InBracket
== true)
1275 // Now we parse off a port number from the hostname
1277 string::size_type Pos
= Host
.rfind(':');
1278 if (Pos
== string::npos
|| Pos
< PortEnd
)
1281 Port
= atoi(string(Host
,Pos
+1).c_str());
1282 Host
.assign(Host
,0,Pos
);
1285 // URI::operator string - Convert the URI to a string /*{{{*/
1286 // ---------------------------------------------------------------------
1288 URI::operator string()
1292 if (Access
.empty() == false)
1295 if (Host
.empty() == false)
1297 if (Access
.empty() == false)
1300 if (User
.empty() == false)
1303 if (Password
.empty() == false)
1304 Res
+= ":" + Password
;
1308 // Add RFC 2732 escaping characters
1309 if (Access
.empty() == false &&
1310 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1311 Res
+= '[' + Host
+ ']';
1318 sprintf(S
,":%u",Port
);
1323 if (Path
.empty() == false)
1334 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1335 // ---------------------------------------------------------------------
1337 string
URI::SiteOnly(const string
&URI
)
1347 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1348 // ---------------------------------------------------------------------
1350 string
URI::NoUserPassword(const string
&URI
)