1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/error.h>
39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40 // ---------------------------------------------------------------------
41 /* This is handy to use before display some information for enduser */
42 bool UTF8ToCodeset(const char *codeset
, const string
&orig
, string
*dest
)
47 size_t insize
, bufsize
;
50 cd
= iconv_open(codeset
, "UTF-8");
51 if (cd
== (iconv_t
)(-1)) {
52 // Something went wrong
54 _error
->Error("conversion from 'UTF-8' to '%s' not available",
62 insize
= bufsize
= orig
.size();
64 inptr
= (char *)inbuf
;
65 outbuf
= new char[bufsize
];
66 size_t lastError
= -1;
70 char *outptr
= outbuf
;
71 size_t outsize
= bufsize
;
72 size_t const err
= iconv(cd
, &inptr
, &insize
, &outptr
, &outsize
);
73 dest
->append(outbuf
, outptr
- outbuf
);
74 if (err
== (size_t)(-1))
81 // replace a series of unknown multibytes with a single "?"
82 if (lastError
!= insize
) {
83 lastError
= insize
- 1;
95 outbuf
= new char[bufsize
];
109 // strstrip - Remove white space from the front and back of a string /*{{{*/
110 // ---------------------------------------------------------------------
111 /* This is handy to use when parsing a file. It also removes \n's left
112 over from fgets and company */
113 char *_strstrip(char *String
)
115 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
120 char *End
= String
+ strlen(String
) - 1;
121 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
122 *End
== '\r'); End
--);
128 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
129 // ---------------------------------------------------------------------
131 char *_strtabexpand(char *String
,size_t Len
)
133 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
137 if (I
+ 8 > String
+ Len
)
143 /* Assume the start of the string is 0 and find the next 8 char
149 Len
= 8 - ((String
- I
) % 8);
157 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
158 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
163 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
164 // ---------------------------------------------------------------------
165 /* This grabs a single word, converts any % escaped characters to their
166 proper values and advances the pointer. Double quotes are understood
167 and striped out as well. This is for URI/URL parsing. It also can
168 understand [] brackets.*/
169 bool ParseQuoteWord(const char *&String
,string
&Res
)
171 // Skip leading whitespace
172 const char *C
= String
;
173 for (;*C
!= 0 && *C
== ' '; C
++);
177 // Jump to the next word
178 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
182 for (C
++; *C
!= 0 && *C
!= '"'; C
++);
188 for (C
++; *C
!= 0 && *C
!= ']'; C
++);
194 // Now de-quote characters
197 const char *Start
= String
;
199 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
201 if (*Start
== '%' && Start
+ 2 < C
)
206 *I
= (char)strtol(Tmp
,0,16);
219 // Skip ending white space
220 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
225 // ParseCWord - Parses a string like a C "" expression /*{{{*/
226 // ---------------------------------------------------------------------
227 /* This expects a series of space separated strings enclosed in ""'s.
228 It concatenates the ""'s into a single string. */
229 bool ParseCWord(const char *&String
,string
&Res
)
231 // Skip leading whitespace
232 const char *C
= String
;
233 for (;*C
!= 0 && *C
== ' '; C
++);
239 if (strlen(String
) >= sizeof(Buffer
))
246 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
255 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
257 if (isspace(*C
) == 0)
267 // QuoteString - Convert a string into quoted from /*{{{*/
268 // ---------------------------------------------------------------------
270 string
QuoteString(const string
&Str
, const char *Bad
)
273 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
275 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
276 *I
<= 0x20 || *I
>= 0x7F)
279 sprintf(Buf
,"%%%02x",(int)*I
);
288 // DeQuoteString - Convert a string from quoted from /*{{{*/
289 // ---------------------------------------------------------------------
290 /* This undoes QuoteString */
291 string
DeQuoteString(const string
&Str
)
294 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
296 if (*I
== '%' && I
+ 2 < Str
.end())
302 Res
+= (char)strtol(Tmp
,0,16);
313 // SizeToStr - Convert a long into a human readable size /*{{{*/
314 // ---------------------------------------------------------------------
315 /* A max of 4 digits are shown before conversion to the next highest unit.
316 The max length of the string will be 5 chars unless the size is > 10
318 string
SizeToStr(double Size
)
327 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
328 ExaBytes, ZettaBytes, YottaBytes */
329 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
333 if (ASize
< 100 && I
!= 0)
335 sprintf(S
,"%'.1f%c",ASize
,Ext
[I
]);
341 sprintf(S
,"%'.0f%c",ASize
,Ext
[I
]);
351 // TimeToStr - Convert the time into a string /*{{{*/
352 // ---------------------------------------------------------------------
353 /* Converts a number of seconds to a hms format */
354 string
TimeToStr(unsigned long Sec
)
362 //d means days, h means hours, min means minutes, s means seconds
363 sprintf(S
,_("%lid %lih %limin %lis"),Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
369 //h means hours, min means minutes, s means seconds
370 sprintf(S
,_("%lih %limin %lis"),Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
376 //min means minutes, s means seconds
377 sprintf(S
,_("%limin %lis"),Sec
/60,Sec
% 60);
382 sprintf(S
,_("%lis"),Sec
);
389 // SubstVar - Substitute a string for another string /*{{{*/
390 // ---------------------------------------------------------------------
391 /* This replaces all occurances of Subst with Contents in Str. */
392 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
394 string::size_type Pos
= 0;
395 string::size_type OldPos
= 0;
398 while (OldPos
< Str
.length() &&
399 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
401 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
402 OldPos
= Pos
+ Subst
.length();
408 return Temp
+ string(Str
,OldPos
);
411 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
413 for (; Vars
->Subst
!= 0; Vars
++)
414 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
418 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
419 // ---------------------------------------------------------------------
420 /* Returns a string with the supplied separator depth + 1 times in it */
421 std::string
OutputInDepth(const unsigned long Depth
, const char* Separator
)
423 std::string output
= "";
424 for(unsigned long d
=Depth
+1; d
> 0; d
--)
425 output
.append(Separator
);
429 // URItoFileName - Convert the uri into a unique file name /*{{{*/
430 // ---------------------------------------------------------------------
431 /* This converts a URI into a safe filename. It quotes all unsafe characters
432 and converts / to _ and removes the scheme identifier. The resulting
433 file name should be unique and never occur again for a different file */
434 string
URItoFileName(const string
&URI
)
436 // Nuke 'sensitive' items
442 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
443 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
444 replace(NewURI
.begin(),NewURI
.end(),'/','_');
448 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
449 // ---------------------------------------------------------------------
450 /* This routine performs a base64 transformation on a string. It was ripped
451 from wget and then patched and bug fixed.
453 This spec can be found in rfc2045 */
454 string
Base64Encode(const string
&S
)
457 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
458 'I','J','K','L','M','N','O','P',
459 'Q','R','S','T','U','V','W','X',
460 'Y','Z','a','b','c','d','e','f',
461 'g','h','i','j','k','l','m','n',
462 'o','p','q','r','s','t','u','v',
463 'w','x','y','z','0','1','2','3',
464 '4','5','6','7','8','9','+','/'};
466 // Pre-allocate some space
468 Final
.reserve((4*S
.length() + 2)/3 + 2);
470 /* Transform the 3x8 bits to 4x6 bits, as required by
472 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
474 char Bits
[3] = {0,0,0};
481 Final
+= tbl
[Bits
[0] >> 2];
482 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
484 if (I
+ 1 >= S
.end())
487 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
489 if (I
+ 2 >= S
.end())
492 Final
+= tbl
[Bits
[2] & 0x3f];
495 /* Apply the padding elements, this tells how many bytes the remote
496 end should discard */
497 if (S
.length() % 3 == 2)
499 if (S
.length() % 3 == 1)
505 // stringcmp - Arbitrary string compare /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This safely compares two non-null terminated strings of arbitrary
509 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
511 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
515 if (A
== AEnd
&& B
== BEnd
)
527 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
528 const char *B
,const char *BEnd
)
530 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
534 if (A
== AEnd
&& B
== BEnd
)
544 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
545 string::const_iterator B
,string::const_iterator BEnd
)
547 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
551 if (A
== AEnd
&& B
== BEnd
)
563 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
564 // ---------------------------------------------------------------------
566 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
568 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
569 if (toupper(*A
) != toupper(*B
))
572 if (A
== AEnd
&& B
== BEnd
)
578 if (toupper(*A
) < toupper(*B
))
583 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
584 const char *B
,const char *BEnd
)
586 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
587 if (toupper(*A
) != toupper(*B
))
590 if (A
== AEnd
&& B
== BEnd
)
596 if (toupper(*A
) < toupper(*B
))
600 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
601 string::const_iterator B
,string::const_iterator BEnd
)
603 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
604 if (toupper(*A
) != toupper(*B
))
607 if (A
== AEnd
&& B
== BEnd
)
613 if (toupper(*A
) < toupper(*B
))
619 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
620 // ---------------------------------------------------------------------
621 /* The format is like those used in package files and the method
622 communication system */
623 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
625 // Look for a matching tag.
626 int Length
= strlen(Tag
);
627 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
630 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
632 // Find the end of line and strip the leading/trailing spaces
633 string::const_iterator J
;
635 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
636 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
637 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
642 for (; *I
!= '\n' && I
< Message
.end(); I
++);
645 // Failed to find a match
651 // StringToBool - Converts a string into a boolean /*{{{*/
652 // ---------------------------------------------------------------------
653 /* This inspects the string to see if it is true or if it is false and
654 then returns the result. Several varients on true/false are checked. */
655 int StringToBool(const string
&Text
,int Default
)
658 int Res
= strtol(Text
.c_str(),&End
,0);
659 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
662 // Check for positives
663 if (strcasecmp(Text
.c_str(),"no") == 0 ||
664 strcasecmp(Text
.c_str(),"false") == 0 ||
665 strcasecmp(Text
.c_str(),"without") == 0 ||
666 strcasecmp(Text
.c_str(),"off") == 0 ||
667 strcasecmp(Text
.c_str(),"disable") == 0)
670 // Check for negatives
671 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
672 strcasecmp(Text
.c_str(),"true") == 0 ||
673 strcasecmp(Text
.c_str(),"with") == 0 ||
674 strcasecmp(Text
.c_str(),"on") == 0 ||
675 strcasecmp(Text
.c_str(),"enable") == 0)
681 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
682 // ---------------------------------------------------------------------
683 /* This converts a time_t into a string time representation that is
684 year 2000 complient and timezone neutral */
685 string
TimeRFC1123(time_t Date
)
687 struct tm Conv
= *gmtime(&Date
);
690 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
691 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
692 "Aug","Sep","Oct","Nov","Dec"};
694 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
695 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
696 Conv
.tm_min
,Conv
.tm_sec
);
700 // ReadMessages - Read messages from the FD /*{{{*/
701 // ---------------------------------------------------------------------
702 /* This pulls full messages from the input FD into the message buffer.
703 It assumes that messages will not pause during transit so no
704 fancy buffering is used.
706 In particular: this reads blocks from the input until it believes
707 that it's run out of input text. Each block is terminated by a
708 double newline ('\n' followed by '\n'). As noted below, there is a
709 bug in this code: it assumes that all the blocks have been read if
710 it doesn't see additional text in the buffer after the last one is
711 parsed, which will cause it to lose blocks if the last block
712 coincides with the end of the buffer.
714 bool ReadMessages(int Fd
, vector
<string
> &List
)
718 // Represents any left-over from the previous iteration of the
719 // parse loop. (i.e., if a message is split across the end
720 // of the buffer, it goes here)
721 string PartialMessage
;
725 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
726 if (Res
< 0 && errno
== EINTR
)
729 // Process is dead, this is kind of bad..
734 if (Res
< 0 && errno
== EAGAIN
)
741 // Look for the end of the message
742 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
744 if (I
[0] != '\n' || I
[1] != '\n')
747 // Pull the message out
748 string
Message(Buffer
,I
-Buffer
);
749 PartialMessage
+= Message
;
752 for (; I
< End
&& *I
== '\n'; I
++);
754 memmove(Buffer
,I
,End
-Buffer
);
757 List
.push_back(PartialMessage
);
758 PartialMessage
.clear();
762 // If there's text left in the buffer, store it
763 // in PartialMessage and throw the rest of the buffer
764 // away. This allows us to handle messages that
765 // are longer than the static buffer size.
766 PartialMessage
+= string(Buffer
, End
);
771 // BUG ALERT: if a message block happens to end at a
772 // multiple of 64000 characters, this will cause it to
773 // terminate early, leading to a badly formed block and
774 // probably crashing the method. However, this is the only
775 // way we have to find the end of the message block. I have
776 // an idea of how to fix this, but it will require changes
777 // to the protocol (essentially to mark the beginning and
778 // end of the block).
780 // -- dburrows 2008-04-02
784 if (WaitFd(Fd
) == false)
789 // MonthConv - Converts a month string into a number /*{{{*/
790 // ---------------------------------------------------------------------
791 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
792 Made it a bit more robust with a few touppers though. */
793 static int MonthConv(char *Month
)
795 switch (toupper(*Month
))
798 return toupper(Month
[1]) == 'P'?3:7;
804 if (toupper(Month
[1]) == 'A')
806 return toupper(Month
[2]) == 'N'?5:6;
808 return toupper(Month
[2]) == 'R'?2:4;
816 // Pretend it is January..
822 // timegm - Internal timegm function if gnu is not available /*{{{*/
823 // ---------------------------------------------------------------------
824 /* Ripped this evil little function from wget - I prefer the use of
825 GNU timegm if possible as this technique will have interesting problems
826 with leap seconds, timezones and other.
828 Converts struct tm to time_t, assuming the data in tm is UTC rather
829 than local timezone (mktime assumes the latter).
831 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
832 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
834 /* Turned it into an autoconf check, because GNU is not the only thing which
835 can provide timegm. -- 2002-09-22, Joel Baker */
837 #ifndef HAVE_TIMEGM // Now with autoconf!
838 static time_t timegm(struct tm
*t
)
845 tb
= mktime (gmtime (&tl
));
846 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
850 // StrToTime - Converts a string into a time_t /*{{{*/
851 // ---------------------------------------------------------------------
852 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
853 and the C library asctime format. It requires the GNU library function
854 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
855 reason the C library does not provide any such function :< This also
856 handles the weird, but unambiguous FTP time format*/
857 bool StrToTime(const string
&Val
,time_t &Result
)
861 const char *I
= Val
.c_str();
863 // Skip the day of the week
864 for (;*I
!= 0 && *I
!= ' '; I
++);
866 // Handle RFC 1123 time
868 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
869 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
871 // Handle RFC 1036 time
872 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
873 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
878 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
879 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
882 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
883 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
892 Tm
.tm_mon
= MonthConv(Month
);
895 // Convert to local time and then to GMT
896 Result
= timegm(&Tm
);
900 // StrToNum - Convert a fixed length string to a number /*{{{*/
901 // ---------------------------------------------------------------------
902 /* This is used in decoding the crazy fixed length string headers in
904 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
907 if (Len
>= sizeof(S
))
912 // All spaces is a zero
915 for (I
= 0; S
[I
] == ' '; I
++);
920 Res
= strtoul(S
,&End
,Base
);
927 // HexDigit - Convert a hex character into an integer /*{{{*/
928 // ---------------------------------------------------------------------
929 /* Helper for Hex2Num */
930 static int HexDigit(int c
)
932 if (c
>= '0' && c
<= '9')
934 if (c
>= 'a' && c
<= 'f')
936 if (c
>= 'A' && c
<= 'F')
941 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
942 // ---------------------------------------------------------------------
943 /* The length of the buffer must be exactly 1/2 the length of the string. */
944 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
946 if (Str
.length() != Length
*2)
949 // Convert each digit. We store it in the same order as the string
951 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
953 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
956 Num
[J
] = HexDigit(I
[0]) << 4;
957 Num
[J
] += HexDigit(I
[1]);
963 // TokSplitString - Split a string up by a given token /*{{{*/
964 // ---------------------------------------------------------------------
965 /* This is intended to be a faster splitter, it does not use dynamic
966 memories. Input is changed to insert nulls at each token location. */
967 bool TokSplitString(char Tok
,char *Input
,char **List
,
968 unsigned long ListMax
)
970 // Strip any leading spaces
972 char *Stop
= Start
+ strlen(Start
);
973 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
975 unsigned long Count
= 0;
979 // Skip to the next Token
980 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
982 // Back remove spaces
984 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
987 List
[Count
++] = Start
;
988 if (Count
>= ListMax
)
995 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
1003 // ExplodeString - Split a string up into a vector /*{{{*/
1004 // ---------------------------------------------------------------------
1005 /* This can be used to split a given string up into a vector, so the
1006 propose is the same as in the method above and this one is a bit slower
1007 also, but the advantage is that we an iteratable vector */
1008 vector
<string
> ExplodeString(string
const &haystack
, char const &split
)
1010 string::const_iterator start
= haystack
.begin();
1011 string::const_iterator end
= start
;
1012 vector
<string
> exploded
;
1014 for (; end
!= haystack
.end() && *end
!= split
; ++end
);
1015 exploded
.push_back(string(start
, end
));
1017 } while (end
!= haystack
.end() && (++end
) != haystack
.end());
1021 // RegexChoice - Simple regex list/list matcher /*{{{*/
1022 // ---------------------------------------------------------------------
1024 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
1025 const char **ListEnd
)
1027 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1030 unsigned long Hits
= 0;
1031 for (; ListBegin
!= ListEnd
; ListBegin
++)
1033 // Check if the name is a regex
1036 for (I
= *ListBegin
; *I
!= 0; I
++)
1037 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
1042 // Compile the regex pattern
1045 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
1051 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
1056 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
1060 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
1065 if (R
->Hit
== false)
1075 _error
->Warning(_("Selection %s not found"),*ListBegin
);
1081 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
1082 // ---------------------------------------------------------------------
1083 /* This is used to make the internationalization strings easier to translate
1084 and to allow reordering of parameters */
1085 void ioprintf(ostream
&out
,const char *format
,...)
1088 va_start(args
,format
);
1090 // sprintf the description
1092 vsnprintf(S
,sizeof(S
),format
,args
);
1096 // strprintf - C format string outputter to C++ strings /*{{{*/
1097 // ---------------------------------------------------------------------
1098 /* This is used to make the internationalization strings easier to translate
1099 and to allow reordering of parameters */
1100 void strprintf(string
&out
,const char *format
,...)
1103 va_start(args
,format
);
1105 // sprintf the description
1107 vsnprintf(S
,sizeof(S
),format
,args
);
1111 // safe_snprintf - Safer snprintf /*{{{*/
1112 // ---------------------------------------------------------------------
1113 /* This is a snprintf that will never (ever) go past 'End' and returns a
1114 pointer to the end of the new string. The returned string is always null
1115 terminated unless Buffer == end. This is a better alterantive to using
1116 consecutive snprintfs. */
1117 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
1122 va_start(args
,Format
);
1127 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
1128 if (Did
< 0 || Buffer
+ Did
> End
)
1130 return Buffer
+ Did
;
1134 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1135 // ---------------------------------------------------------------------
1137 int tolower_ascii(int c
)
1139 if (c
>= 'A' and c
<= 'Z')
1145 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1146 // ---------------------------------------------------------------------
1147 /* The domain list is a comma seperate list of domains that are suffix
1148 matched against the argument */
1149 bool CheckDomainList(const string
&Host
,const string
&List
)
1151 string::const_iterator Start
= List
.begin();
1152 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); Cur
++)
1154 if (Cur
< List
.end() && *Cur
!= ',')
1157 // Match the end of the string..
1158 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
1160 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1169 // URI::CopyFrom - Copy from an object /*{{{*/
1170 // ---------------------------------------------------------------------
1171 /* This parses the URI into all of its components */
1172 void URI::CopyFrom(const string
&U
)
1174 string::const_iterator I
= U
.begin();
1176 // Locate the first colon, this separates the scheme
1177 for (; I
< U
.end() && *I
!= ':' ; I
++);
1178 string::const_iterator FirstColon
= I
;
1180 /* Determine if this is a host type URI with a leading double //
1181 and then search for the first single / */
1182 string::const_iterator SingleSlash
= I
;
1183 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1186 /* Find the / indicating the end of the hostname, ignoring /'s in the
1188 bool InBracket
= false;
1189 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); SingleSlash
++)
1191 if (*SingleSlash
== '[')
1193 if (InBracket
== true && *SingleSlash
== ']')
1197 if (SingleSlash
> U
.end())
1198 SingleSlash
= U
.end();
1200 // We can now write the access and path specifiers
1201 Access
.assign(U
.begin(),FirstColon
);
1202 if (SingleSlash
!= U
.end())
1203 Path
.assign(SingleSlash
,U
.end());
1204 if (Path
.empty() == true)
1207 // Now we attempt to locate a user:pass@host fragment
1208 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1212 if (FirstColon
>= U
.end())
1215 if (FirstColon
> SingleSlash
)
1216 FirstColon
= SingleSlash
;
1218 // Find the colon...
1220 if (I
> SingleSlash
)
1222 for (; I
< SingleSlash
&& *I
!= ':'; I
++);
1223 string::const_iterator SecondColon
= I
;
1225 // Search for the @ after the colon
1226 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
1227 string::const_iterator At
= I
;
1229 // Now write the host and user/pass
1230 if (At
== SingleSlash
)
1232 if (FirstColon
< SingleSlash
)
1233 Host
.assign(FirstColon
,SingleSlash
);
1237 Host
.assign(At
+1,SingleSlash
);
1238 User
.assign(FirstColon
,SecondColon
);
1239 if (SecondColon
< At
)
1240 Password
.assign(SecondColon
+1,At
);
1243 // Now we parse the RFC 2732 [] hostnames.
1244 unsigned long PortEnd
= 0;
1246 for (unsigned I
= 0; I
!= Host
.length();)
1255 if (InBracket
== true && Host
[I
] == ']')
1266 if (InBracket
== true)
1272 // Now we parse off a port number from the hostname
1274 string::size_type Pos
= Host
.rfind(':');
1275 if (Pos
== string::npos
|| Pos
< PortEnd
)
1278 Port
= atoi(string(Host
,Pos
+1).c_str());
1279 Host
.assign(Host
,0,Pos
);
1282 // URI::operator string - Convert the URI to a string /*{{{*/
1283 // ---------------------------------------------------------------------
1285 URI::operator string()
1289 if (Access
.empty() == false)
1292 if (Host
.empty() == false)
1294 if (Access
.empty() == false)
1297 if (User
.empty() == false)
1300 if (Password
.empty() == false)
1301 Res
+= ":" + Password
;
1305 // Add RFC 2732 escaping characters
1306 if (Access
.empty() == false &&
1307 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1308 Res
+= '[' + Host
+ ']';
1315 sprintf(S
,":%u",Port
);
1320 if (Path
.empty() == false)
1331 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1332 // ---------------------------------------------------------------------
1334 string
URI::SiteOnly(const string
&URI
)
1344 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1345 // ---------------------------------------------------------------------
1347 string
URI::NoUserPassword(const string
&URI
)