1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
24 #include <apt-pkg/error.h>
42 // strstrip - Remove white space from the front and back of a string /*{{{*/
43 // ---------------------------------------------------------------------
44 /* This is handy to use when parsing a file. It also removes \n's left
45 over from fgets and company */
46 char *_strstrip(char *String
)
48 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
53 char *End
= String
+ strlen(String
) - 1;
54 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
55 *End
== '\r'); End
--);
61 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
62 // ---------------------------------------------------------------------
64 char *_strtabexpand(char *String
,size_t Len
)
66 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
70 if (I
+ 8 > String
+ Len
)
76 /* Assume the start of the string is 0 and find the next 8 char
82 Len
= 8 - ((String
- I
) % 8);
90 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
91 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
96 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
97 // ---------------------------------------------------------------------
98 /* This grabs a single word, converts any % escaped characters to their
99 proper values and advances the pointer. Double quotes are understood
100 and striped out as well. This is for URI/URL parsing. It also can
101 understand [] brackets.*/
102 bool ParseQuoteWord(const char *&String
,string
&Res
)
104 // Skip leading whitespace
105 const char *C
= String
;
106 for (;*C
!= 0 && *C
== ' '; C
++);
110 // Jump to the next word
111 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
115 for (C
++; *C
!= 0 && *C
!= '"'; C
++);
121 for (C
++; *C
!= 0 && *C
!= ']'; C
++);
127 // Now de-quote characters
130 const char *Start
= String
;
132 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
134 if (*Start
== '%' && Start
+ 2 < C
)
139 *I
= (char)strtol(Tmp
,0,16);
152 // Skip ending white space
153 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
158 // ParseCWord - Parses a string like a C "" expression /*{{{*/
159 // ---------------------------------------------------------------------
160 /* This expects a series of space separated strings enclosed in ""'s.
161 It concatenates the ""'s into a single string. */
162 bool ParseCWord(const char *&String
,string
&Res
)
164 // Skip leading whitespace
165 const char *C
= String
;
166 for (;*C
!= 0 && *C
== ' '; C
++);
172 if (strlen(String
) >= sizeof(Buffer
))
179 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
188 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
190 if (isspace(*C
) == 0)
200 // QuoteString - Convert a string into quoted from /*{{{*/
201 // ---------------------------------------------------------------------
203 string
QuoteString(const string
&Str
, const char *Bad
)
206 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
208 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
209 *I
<= 0x20 || *I
>= 0x7F)
212 sprintf(Buf
,"%%%02x",(int)*I
);
221 // DeQuoteString - Convert a string from quoted from /*{{{*/
222 // ---------------------------------------------------------------------
223 /* This undoes QuoteString */
224 string
DeQuoteString(const string
&Str
)
227 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
229 if (*I
== '%' && I
+ 2 < Str
.end())
235 Res
+= (char)strtol(Tmp
,0,16);
246 // SizeToStr - Convert a long into a human readable size /*{{{*/
247 // ---------------------------------------------------------------------
248 /* A max of 4 digits are shown before conversion to the next highest unit.
249 The max length of the string will be 5 chars unless the size is > 10
251 string
SizeToStr(double Size
)
260 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
261 ExaBytes, ZettaBytes, YottaBytes */
262 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
266 if (ASize
< 100 && I
!= 0)
268 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
274 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
284 // TimeToStr - Convert the time into a string /*{{{*/
285 // ---------------------------------------------------------------------
286 /* Converts a number of seconds to a hms format */
287 string
TimeToStr(unsigned long Sec
)
295 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
301 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
307 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
311 sprintf(S
,"%lis",Sec
);
318 // SubstVar - Substitute a string for another string /*{{{*/
319 // ---------------------------------------------------------------------
320 /* This replaces all occurances of Subst with Contents in Str. */
321 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
323 string::size_type Pos
= 0;
324 string::size_type OldPos
= 0;
327 while (OldPos
< Str
.length() &&
328 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
330 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
331 OldPos
= Pos
+ Subst
.length();
337 return Temp
+ string(Str
,OldPos
);
340 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
342 for (; Vars
->Subst
!= 0; Vars
++)
343 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
347 // URItoFileName - Convert the uri into a unique file name /*{{{*/
348 // ---------------------------------------------------------------------
349 /* This converts a URI into a safe filename. It quotes all unsafe characters
350 and converts / to _ and removes the scheme identifier. The resulting
351 file name should be unique and never occur again for a different file */
352 string
URItoFileName(const string
&URI
)
354 // Nuke 'sensitive' items
360 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
361 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
362 replace(NewURI
.begin(),NewURI
.end(),'/','_');
366 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
367 // ---------------------------------------------------------------------
368 /* This routine performs a base64 transformation on a string. It was ripped
369 from wget and then patched and bug fixed.
371 This spec can be found in rfc2045 */
372 string
Base64Encode(const string
&S
)
375 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
376 'I','J','K','L','M','N','O','P',
377 'Q','R','S','T','U','V','W','X',
378 'Y','Z','a','b','c','d','e','f',
379 'g','h','i','j','k','l','m','n',
380 'o','p','q','r','s','t','u','v',
381 'w','x','y','z','0','1','2','3',
382 '4','5','6','7','8','9','+','/'};
384 // Pre-allocate some space
386 Final
.reserve((4*S
.length() + 2)/3 + 2);
388 /* Transform the 3x8 bits to 4x6 bits, as required by
390 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
392 char Bits
[3] = {0,0,0};
399 Final
+= tbl
[Bits
[0] >> 2];
400 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
402 if (I
+ 1 >= S
.end())
405 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
407 if (I
+ 2 >= S
.end())
410 Final
+= tbl
[Bits
[2] & 0x3f];
413 /* Apply the padding elements, this tells how many bytes the remote
414 end should discard */
415 if (S
.length() % 3 == 2)
417 if (S
.length() % 3 == 1)
423 // stringcmp - Arbitary string compare /*{{{*/
424 // ---------------------------------------------------------------------
425 /* This safely compares two non-null terminated strings of arbitary
427 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
429 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
433 if (A
== AEnd
&& B
== BEnd
)
445 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
446 const char *B
,const char *BEnd
)
448 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
452 if (A
== AEnd
&& B
== BEnd
)
462 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
463 string::const_iterator B
,string::const_iterator BEnd
)
465 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
469 if (A
== AEnd
&& B
== BEnd
)
481 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
482 // ---------------------------------------------------------------------
484 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
486 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
487 if (toupper(*A
) != toupper(*B
))
490 if (A
== AEnd
&& B
== BEnd
)
496 if (toupper(*A
) < toupper(*B
))
501 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
502 const char *B
,const char *BEnd
)
504 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
505 if (toupper(*A
) != toupper(*B
))
508 if (A
== AEnd
&& B
== BEnd
)
514 if (toupper(*A
) < toupper(*B
))
518 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
519 string::const_iterator B
,string::const_iterator BEnd
)
521 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
522 if (toupper(*A
) != toupper(*B
))
525 if (A
== AEnd
&& B
== BEnd
)
531 if (toupper(*A
) < toupper(*B
))
537 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
538 // ---------------------------------------------------------------------
539 /* The format is like those used in package files and the method
540 communication system */
541 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
543 // Look for a matching tag.
544 int Length
= strlen(Tag
);
545 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
548 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
550 // Find the end of line and strip the leading/trailing spaces
551 string::const_iterator J
;
553 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
554 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
555 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
560 for (; *I
!= '\n' && I
< Message
.end(); I
++);
563 // Failed to find a match
569 // StringToBool - Converts a string into a boolean /*{{{*/
570 // ---------------------------------------------------------------------
571 /* This inspects the string to see if it is true or if it is false and
572 then returns the result. Several varients on true/false are checked. */
573 int StringToBool(const string
&Text
,int Default
)
576 int Res
= strtol(Text
.c_str(),&End
,0);
577 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
580 // Check for positives
581 if (strcasecmp(Text
.c_str(),"no") == 0 ||
582 strcasecmp(Text
.c_str(),"false") == 0 ||
583 strcasecmp(Text
.c_str(),"without") == 0 ||
584 strcasecmp(Text
.c_str(),"off") == 0 ||
585 strcasecmp(Text
.c_str(),"disable") == 0)
588 // Check for negatives
589 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
590 strcasecmp(Text
.c_str(),"true") == 0 ||
591 strcasecmp(Text
.c_str(),"with") == 0 ||
592 strcasecmp(Text
.c_str(),"on") == 0 ||
593 strcasecmp(Text
.c_str(),"enable") == 0)
599 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
600 // ---------------------------------------------------------------------
601 /* This converts a time_t into a string time representation that is
602 year 2000 complient and timezone neutral */
603 string
TimeRFC1123(time_t Date
)
605 struct tm Conv
= *gmtime(&Date
);
608 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
609 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
610 "Aug","Sep","Oct","Nov","Dec"};
612 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
613 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
614 Conv
.tm_min
,Conv
.tm_sec
);
618 // ReadMessages - Read messages from the FD /*{{{*/
619 // ---------------------------------------------------------------------
620 /* This pulls full messages from the input FD into the message buffer.
621 It assumes that messages will not pause during transit so no
622 fancy buffering is used. */
623 bool ReadMessages(int Fd
, vector
<string
> &List
)
630 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
631 if (Res
< 0 && errno
== EINTR
)
634 // Process is dead, this is kind of bad..
639 if (Res
< 0 && errno
== EAGAIN
)
646 // Look for the end of the message
647 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
649 if (I
[0] != '\n' || I
[1] != '\n')
652 // Pull the message out
653 string
Message(Buffer
,I
-Buffer
);
656 for (; I
< End
&& *I
== '\n'; I
++);
658 memmove(Buffer
,I
,End
-Buffer
);
661 List
.push_back(Message
);
666 if (WaitFd(Fd
) == false)
671 // MonthConv - Converts a month string into a number /*{{{*/
672 // ---------------------------------------------------------------------
673 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
674 Made it a bit more robust with a few touppers though. */
675 static int MonthConv(char *Month
)
677 switch (toupper(*Month
))
680 return toupper(Month
[1]) == 'P'?3:7;
686 if (toupper(Month
[1]) == 'A')
688 return toupper(Month
[2]) == 'N'?5:6;
690 return toupper(Month
[2]) == 'R'?2:4;
698 // Pretend it is January..
704 // timegm - Internal timegm function if gnu is not available /*{{{*/
705 // ---------------------------------------------------------------------
706 /* Ripped this evil little function from wget - I prefer the use of
707 GNU timegm if possible as this technique will have interesting problems
708 with leap seconds, timezones and other.
710 Converts struct tm to time_t, assuming the data in tm is UTC rather
711 than local timezone (mktime assumes the latter).
713 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
714 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
716 /* Turned it into an autoconf check, because GNU is not the only thing which
717 can provide timegm. -- 2002-09-22, Joel Baker */
719 #ifndef HAVE_TIMEGM // Now with autoconf!
720 static time_t timegm(struct tm
*t
)
727 tb
= mktime (gmtime (&tl
));
728 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
732 // StrToTime - Converts a string into a time_t /*{{{*/
733 // ---------------------------------------------------------------------
734 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
735 and the C library asctime format. It requires the GNU library function
736 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
737 reason the C library does not provide any such function :< This also
738 handles the weird, but unambiguous FTP time format*/
739 bool StrToTime(const string
&Val
,time_t &Result
)
743 const char *I
= Val
.c_str();
745 // Skip the day of the week
746 for (;*I
!= 0 && *I
!= ' '; I
++);
748 // Handle RFC 1123 time
750 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
751 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
753 // Handle RFC 1036 time
754 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
755 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
760 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
761 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
764 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
765 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
774 Tm
.tm_mon
= MonthConv(Month
);
777 // Convert to local time and then to GMT
778 Result
= timegm(&Tm
);
782 // StrToNum - Convert a fixed length string to a number /*{{{*/
783 // ---------------------------------------------------------------------
784 /* This is used in decoding the crazy fixed length string headers in
786 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
789 if (Len
>= sizeof(S
))
794 // All spaces is a zero
797 for (I
= 0; S
[I
] == ' '; I
++);
802 Res
= strtoul(S
,&End
,Base
);
809 // HexDigit - Convert a hex character into an integer /*{{{*/
810 // ---------------------------------------------------------------------
811 /* Helper for Hex2Num */
812 static int HexDigit(int c
)
814 if (c
>= '0' && c
<= '9')
816 if (c
>= 'a' && c
<= 'f')
818 if (c
>= 'A' && c
<= 'F')
823 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
824 // ---------------------------------------------------------------------
825 /* The length of the buffer must be exactly 1/2 the length of the string. */
826 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
828 if (Str
.length() != Length
*2)
831 // Convert each digit. We store it in the same order as the string
833 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
835 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
838 Num
[J
] = HexDigit(I
[0]) << 4;
839 Num
[J
] += HexDigit(I
[1]);
845 // TokSplitString - Split a string up by a given token /*{{{*/
846 // ---------------------------------------------------------------------
847 /* This is intended to be a faster splitter, it does not use dynamic
848 memories. Input is changed to insert nulls at each token location. */
849 bool TokSplitString(char Tok
,char *Input
,char **List
,
850 unsigned long ListMax
)
852 // Strip any leading spaces
854 char *Stop
= Start
+ strlen(Start
);
855 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
857 unsigned long Count
= 0;
861 // Skip to the next Token
862 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
864 // Back remove spaces
866 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
869 List
[Count
++] = Start
;
870 if (Count
>= ListMax
)
877 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
885 // RegexChoice - Simple regex list/list matcher /*{{{*/
886 // ---------------------------------------------------------------------
888 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
889 const char **ListEnd
)
891 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
894 unsigned long Hits
= 0;
895 for (; ListBegin
!= ListEnd
; ListBegin
++)
897 // Check if the name is a regex
900 for (I
= *ListBegin
; *I
!= 0; I
++)
901 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
906 // Compile the regex pattern
909 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
915 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
920 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
924 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
939 _error
->Warning(_("Selection %s not found"),*ListBegin
);
945 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
946 // ---------------------------------------------------------------------
947 /* This is used to make the internationalization strings easier to translate
948 and to allow reordering of parameters */
949 void ioprintf(ostream
&out
,const char *format
,...)
952 va_start(args
,format
);
954 // sprintf the description
956 vsnprintf(S
,sizeof(S
),format
,args
);
960 // safe_snprintf - Safer snprintf /*{{{*/
961 // ---------------------------------------------------------------------
962 /* This is a snprintf that will never (ever) go past 'End' and returns a
963 pointer to the end of the new string. The returned string is always null
964 terminated unless Buffer == end. This is a better alterantive to using
965 consecutive snprintfs. */
966 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
971 va_start(args
,Format
);
976 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
977 if (Did
< 0 || Buffer
+ Did
> End
)
983 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
984 // ---------------------------------------------------------------------
985 /* The domain list is a comma seperate list of domains that are suffix
986 matched against the argument */
987 bool CheckDomainList(const string
&Host
,const string
&List
)
989 string::const_iterator Start
= List
.begin();
990 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); Cur
++)
992 if (Cur
< List
.end() && *Cur
!= ',')
995 // Match the end of the string..
996 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
998 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1007 // URI::CopyFrom - Copy from an object /*{{{*/
1008 // ---------------------------------------------------------------------
1009 /* This parses the URI into all of its components */
1010 void URI::CopyFrom(const string
&U
)
1012 string::const_iterator I
= U
.begin();
1014 // Locate the first colon, this separates the scheme
1015 for (; I
< U
.end() && *I
!= ':' ; I
++);
1016 string::const_iterator FirstColon
= I
;
1018 /* Determine if this is a host type URI with a leading double //
1019 and then search for the first single / */
1020 string::const_iterator SingleSlash
= I
;
1021 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1024 /* Find the / indicating the end of the hostname, ignoring /'s in the
1026 bool InBracket
= false;
1027 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); SingleSlash
++)
1029 if (*SingleSlash
== '[')
1031 if (InBracket
== true && *SingleSlash
== ']')
1035 if (SingleSlash
> U
.end())
1036 SingleSlash
= U
.end();
1038 // We can now write the access and path specifiers
1039 Access
.assign(U
.begin(),FirstColon
);
1040 if (SingleSlash
!= U
.end())
1041 Path
.assign(SingleSlash
,U
.end());
1042 if (Path
.empty() == true)
1045 // Now we attempt to locate a user:pass@host fragment
1046 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1050 if (FirstColon
>= U
.end())
1053 if (FirstColon
> SingleSlash
)
1054 FirstColon
= SingleSlash
;
1056 // Find the colon...
1058 if (I
> SingleSlash
)
1060 for (; I
< SingleSlash
&& *I
!= ':'; I
++);
1061 string::const_iterator SecondColon
= I
;
1063 // Search for the @ after the colon
1064 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
1065 string::const_iterator At
= I
;
1067 // Now write the host and user/pass
1068 if (At
== SingleSlash
)
1070 if (FirstColon
< SingleSlash
)
1071 Host
.assign(FirstColon
,SingleSlash
);
1075 Host
.assign(At
+1,SingleSlash
);
1076 User
.assign(FirstColon
,SecondColon
);
1077 if (SecondColon
< At
)
1078 Password
.assign(SecondColon
+1,At
);
1081 // Now we parse the RFC 2732 [] hostnames.
1082 unsigned long PortEnd
= 0;
1084 for (unsigned I
= 0; I
!= Host
.length();)
1093 if (InBracket
== true && Host
[I
] == ']')
1104 if (InBracket
== true)
1110 // Now we parse off a port number from the hostname
1112 string::size_type Pos
= Host
.rfind(':');
1113 if (Pos
== string::npos
|| Pos
< PortEnd
)
1116 Port
= atoi(string(Host
,Pos
+1).c_str());
1117 Host
.assign(Host
,0,Pos
);
1120 // URI::operator string - Convert the URI to a string /*{{{*/
1121 // ---------------------------------------------------------------------
1123 URI::operator string()
1127 if (Access
.empty() == false)
1130 if (Host
.empty() == false)
1132 if (Access
.empty() == false)
1135 if (User
.empty() == false)
1138 if (Password
.empty() == false)
1139 Res
+= ":" + Password
;
1143 // Add RFC 2732 escaping characters
1144 if (Access
.empty() == false &&
1145 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1146 Res
+= '[' + Host
+ ']';
1153 sprintf(S
,":%u",Port
);
1158 if (Path
.empty() == false)
1169 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1170 // ---------------------------------------------------------------------
1172 string
URI::SiteOnly(const string
&URI
)