1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
6 String Util - Some useful string functions.
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "apt-pkg/strutl.h"
22 #include <apt-pkg/strutl.h>
23 #include <apt-pkg/fileutl.h>
24 #include <apt-pkg/error.h>
41 // strstrip - Remove white space from the front and back of a string /*{{{*/
42 // ---------------------------------------------------------------------
43 /* This is handy to use when parsing a file. It also removes \n's left
44 over from fgets and company */
45 char *_strstrip(char *String
)
47 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
52 char *End
= String
+ strlen(String
) - 1;
53 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
54 *End
== '\r'); End
--);
60 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
61 // ---------------------------------------------------------------------
63 char *_strtabexpand(char *String
,size_t Len
)
65 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
69 if (I
+ 8 > String
+ Len
)
75 /* Assume the start of the string is 0 and find the next 8 char
81 Len
= 8 - ((String
- I
) % 8);
89 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
90 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
95 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
96 // ---------------------------------------------------------------------
97 /* This grabs a single word, converts any % escaped characters to their
98 proper values and advances the pointer. Double quotes are understood
99 and striped out as well. This is for URI/URL parsing. It also can
100 understand [] brackets.*/
101 bool ParseQuoteWord(const char *&String
,string
&Res
)
103 // Skip leading whitespace
104 const char *C
= String
;
105 for (;*C
!= 0 && *C
== ' '; C
++);
109 // Jump to the next word
110 for (;*C
!= 0 && isspace(*C
) == 0; C
++)
114 for (C
++; *C
!= 0 && *C
!= '"'; C
++);
120 for (C
++; *C
!= 0 && *C
!= ']'; C
++);
126 // Now de-quote characters
129 const char *Start
= String
;
131 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
133 if (*Start
== '%' && Start
+ 2 < C
)
138 *I
= (char)strtol(Tmp
,0,16);
151 // Skip ending white space
152 for (;*C
!= 0 && isspace(*C
) != 0; C
++);
157 // ParseCWord - Parses a string like a C "" expression /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This expects a series of space separated strings enclosed in ""'s.
160 It concatenates the ""'s into a single string. */
161 bool ParseCWord(const char *&String
,string
&Res
)
163 // Skip leading whitespace
164 const char *C
= String
;
165 for (;*C
!= 0 && *C
== ' '; C
++);
171 if (strlen(String
) >= sizeof(Buffer
))
178 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
187 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
189 if (isspace(*C
) == 0)
199 // QuoteString - Convert a string into quoted from /*{{{*/
200 // ---------------------------------------------------------------------
202 string
QuoteString(const string
&Str
, const char *Bad
)
205 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
207 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
208 *I
<= 0x20 || *I
>= 0x7F)
211 sprintf(Buf
,"%%%02x",(int)*I
);
220 // DeQuoteString - Convert a string from quoted from /*{{{*/
221 // ---------------------------------------------------------------------
222 /* This undoes QuoteString */
223 string
DeQuoteString(const string
&Str
)
226 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
228 if (*I
== '%' && I
+ 2 < Str
.end())
234 Res
+= (char)strtol(Tmp
,0,16);
245 // SizeToStr - Convert a long into a human readable size /*{{{*/
246 // ---------------------------------------------------------------------
247 /* A max of 4 digits are shown before conversion to the next highest unit.
248 The max length of the string will be 5 chars unless the size is > 10
250 string
SizeToStr(double Size
)
259 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
260 ExaBytes, ZettaBytes, YottaBytes */
261 char Ext
[] = {'\0','k','M','G','T','P','E','Z','Y'};
265 if (ASize
< 100 && I
!= 0)
267 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
273 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
283 // TimeToStr - Convert the time into a string /*{{{*/
284 // ---------------------------------------------------------------------
285 /* Converts a number of seconds to a hms format */
286 string
TimeToStr(unsigned long Sec
)
294 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
300 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
306 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
310 sprintf(S
,"%lis",Sec
);
317 // SubstVar - Substitute a string for another string /*{{{*/
318 // ---------------------------------------------------------------------
319 /* This replaces all occurances of Subst with Contents in Str. */
320 string
SubstVar(const string
&Str
,const string
&Subst
,const string
&Contents
)
322 string::size_type Pos
= 0;
323 string::size_type OldPos
= 0;
326 while (OldPos
< Str
.length() &&
327 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
329 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
330 OldPos
= Pos
+ Subst
.length();
336 return Temp
+ string(Str
,OldPos
);
339 string
SubstVar(string Str
,const struct SubstVar
*Vars
)
341 for (; Vars
->Subst
!= 0; Vars
++)
342 Str
= SubstVar(Str
,Vars
->Subst
,*Vars
->Contents
);
346 // URItoFileName - Convert the uri into a unique file name /*{{{*/
347 // ---------------------------------------------------------------------
348 /* This converts a URI into a safe filename. It quotes all unsafe characters
349 and converts / to _ and removes the scheme identifier. The resulting
350 file name should be unique and never occur again for a different file */
351 string
URItoFileName(const string
&URI
)
353 // Nuke 'sensitive' items
359 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
360 string NewURI
= QuoteString(U
,"\\|{}[]<>\"^~_=!@#$%^&*");
361 replace(NewURI
.begin(),NewURI
.end(),'/','_');
365 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
366 // ---------------------------------------------------------------------
367 /* This routine performs a base64 transformation on a string. It was ripped
368 from wget and then patched and bug fixed.
370 This spec can be found in rfc2045 */
371 string
Base64Encode(const string
&S
)
374 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
375 'I','J','K','L','M','N','O','P',
376 'Q','R','S','T','U','V','W','X',
377 'Y','Z','a','b','c','d','e','f',
378 'g','h','i','j','k','l','m','n',
379 'o','p','q','r','s','t','u','v',
380 'w','x','y','z','0','1','2','3',
381 '4','5','6','7','8','9','+','/'};
383 // Pre-allocate some space
385 Final
.reserve((4*S
.length() + 2)/3 + 2);
387 /* Transform the 3x8 bits to 4x6 bits, as required by
389 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
391 char Bits
[3] = {0,0,0};
398 Final
+= tbl
[Bits
[0] >> 2];
399 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
401 if (I
+ 1 >= S
.end())
404 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
406 if (I
+ 2 >= S
.end())
409 Final
+= tbl
[Bits
[2] & 0x3f];
412 /* Apply the padding elements, this tells how many bytes the remote
413 end should discard */
414 if (S
.length() % 3 == 2)
416 if (S
.length() % 3 == 1)
422 // stringcmp - Arbitary string compare /*{{{*/
423 // ---------------------------------------------------------------------
424 /* This safely compares two non-null terminated strings of arbitary
426 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
428 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
432 if (A
== AEnd
&& B
== BEnd
)
444 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
445 const char *B
,const char *BEnd
)
447 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
451 if (A
== AEnd
&& B
== BEnd
)
461 int stringcmp(string::const_iterator A
,string::const_iterator AEnd
,
462 string::const_iterator B
,string::const_iterator BEnd
)
464 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
468 if (A
== AEnd
&& B
== BEnd
)
480 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
481 // ---------------------------------------------------------------------
483 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
485 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
486 if (toupper(*A
) != toupper(*B
))
489 if (A
== AEnd
&& B
== BEnd
)
495 if (toupper(*A
) < toupper(*B
))
500 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
501 const char *B
,const char *BEnd
)
503 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
504 if (toupper(*A
) != toupper(*B
))
507 if (A
== AEnd
&& B
== BEnd
)
513 if (toupper(*A
) < toupper(*B
))
517 int stringcasecmp(string::const_iterator A
,string::const_iterator AEnd
,
518 string::const_iterator B
,string::const_iterator BEnd
)
520 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
521 if (toupper(*A
) != toupper(*B
))
524 if (A
== AEnd
&& B
== BEnd
)
530 if (toupper(*A
) < toupper(*B
))
536 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
537 // ---------------------------------------------------------------------
538 /* The format is like those used in package files and the method
539 communication system */
540 string
LookupTag(const string
&Message
,const char *Tag
,const char *Default
)
542 // Look for a matching tag.
543 int Length
= strlen(Tag
);
544 for (string::const_iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
547 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
549 // Find the end of line and strip the leading/trailing spaces
550 string::const_iterator J
;
552 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
553 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
554 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
559 for (; *I
!= '\n' && I
< Message
.end(); I
++);
562 // Failed to find a match
568 // StringToBool - Converts a string into a boolean /*{{{*/
569 // ---------------------------------------------------------------------
570 /* This inspects the string to see if it is true or if it is false and
571 then returns the result. Several varients on true/false are checked. */
572 int StringToBool(const string
&Text
,int Default
)
575 int Res
= strtol(Text
.c_str(),&End
,0);
576 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
579 // Check for positives
580 if (strcasecmp(Text
.c_str(),"no") == 0 ||
581 strcasecmp(Text
.c_str(),"false") == 0 ||
582 strcasecmp(Text
.c_str(),"without") == 0 ||
583 strcasecmp(Text
.c_str(),"off") == 0 ||
584 strcasecmp(Text
.c_str(),"disable") == 0)
587 // Check for negatives
588 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
589 strcasecmp(Text
.c_str(),"true") == 0 ||
590 strcasecmp(Text
.c_str(),"with") == 0 ||
591 strcasecmp(Text
.c_str(),"on") == 0 ||
592 strcasecmp(Text
.c_str(),"enable") == 0)
598 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
599 // ---------------------------------------------------------------------
600 /* This converts a time_t into a string time representation that is
601 year 2000 complient and timezone neutral */
602 string
TimeRFC1123(time_t Date
)
604 struct tm Conv
= *gmtime(&Date
);
607 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
608 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
609 "Aug","Sep","Oct","Nov","Dec"};
611 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
612 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
613 Conv
.tm_min
,Conv
.tm_sec
);
617 // ReadMessages - Read messages from the FD /*{{{*/
618 // ---------------------------------------------------------------------
619 /* This pulls full messages from the input FD into the message buffer.
620 It assumes that messages will not pause during transit so no
621 fancy buffering is used. */
622 bool ReadMessages(int Fd
, vector
<string
> &List
)
629 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
630 if (Res
< 0 && errno
== EINTR
)
633 // Process is dead, this is kind of bad..
638 if (Res
< 0 && errno
== EAGAIN
)
645 // Look for the end of the message
646 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
648 if (I
[0] != '\n' || I
[1] != '\n')
651 // Pull the message out
652 string
Message(Buffer
,I
-Buffer
);
655 for (; I
< End
&& *I
== '\n'; I
++);
657 memmove(Buffer
,I
,End
-Buffer
);
660 List
.push_back(Message
);
665 if (WaitFd(Fd
) == false)
670 // MonthConv - Converts a month string into a number /*{{{*/
671 // ---------------------------------------------------------------------
672 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
673 Made it a bit more robust with a few touppers though. */
674 static int MonthConv(char *Month
)
676 switch (toupper(*Month
))
679 return toupper(Month
[1]) == 'P'?3:7;
685 if (toupper(Month
[1]) == 'A')
687 return toupper(Month
[2]) == 'N'?5:6;
689 return toupper(Month
[2]) == 'R'?2:4;
697 // Pretend it is January..
703 // timegm - Internal timegm function if gnu is not available /*{{{*/
704 // ---------------------------------------------------------------------
705 /* Ripped this evil little function from wget - I prefer the use of
706 GNU timegm if possible as this technique will have interesting problems
707 with leap seconds, timezones and other.
709 Converts struct tm to time_t, assuming the data in tm is UTC rather
710 than local timezone (mktime assumes the latter).
712 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
713 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
715 /* Turned it into an autoconf check, because GNU is not the only thing which
716 can provide timegm. -- 2002-09-22, Joel Baker */
718 #ifndef HAVE_TIMEGM // Now with autoconf!
719 static time_t timegm(struct tm
*t
)
726 tb
= mktime (gmtime (&tl
));
727 return (tl
<= tb
? (tl
+ (tl
- tb
)) : (tl
- (tb
- tl
)));
731 // StrToTime - Converts a string into a time_t /*{{{*/
732 // ---------------------------------------------------------------------
733 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
734 and the C library asctime format. It requires the GNU library function
735 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
736 reason the C library does not provide any such function :< This also
737 handles the weird, but unambiguous FTP time format*/
738 bool StrToTime(const string
&Val
,time_t &Result
)
742 const char *I
= Val
.c_str();
744 // Skip the day of the week
745 for (;*I
!= 0 && *I
!= ' '; I
++);
747 // Handle RFC 1123 time
749 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
750 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
752 // Handle RFC 1036 time
753 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
754 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
759 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
760 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
763 if (sscanf(Val
.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm
.tm_year
,&Tm
.tm_mon
,
764 &Tm
.tm_mday
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
773 Tm
.tm_mon
= MonthConv(Month
);
776 // Convert to local time and then to GMT
777 Result
= timegm(&Tm
);
781 // StrToNum - Convert a fixed length string to a number /*{{{*/
782 // ---------------------------------------------------------------------
783 /* This is used in decoding the crazy fixed length string headers in
785 bool StrToNum(const char *Str
,unsigned long &Res
,unsigned Len
,unsigned Base
)
788 if (Len
>= sizeof(S
))
793 // All spaces is a zero
796 for (I
= 0; S
[I
] == ' '; I
++);
801 Res
= strtoul(S
,&End
,Base
);
808 // HexDigit - Convert a hex character into an integer /*{{{*/
809 // ---------------------------------------------------------------------
810 /* Helper for Hex2Num */
811 static int HexDigit(int c
)
813 if (c
>= '0' && c
<= '9')
815 if (c
>= 'a' && c
<= 'f')
817 if (c
>= 'A' && c
<= 'F')
822 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
823 // ---------------------------------------------------------------------
824 /* The length of the buffer must be exactly 1/2 the length of the string. */
825 bool Hex2Num(const string
&Str
,unsigned char *Num
,unsigned int Length
)
827 if (Str
.length() != Length
*2)
830 // Convert each digit. We store it in the same order as the string
832 for (string::const_iterator I
= Str
.begin(); I
!= Str
.end();J
++, I
+= 2)
834 if (isxdigit(*I
) == 0 || isxdigit(I
[1]) == 0)
837 Num
[J
] = HexDigit(I
[0]) << 4;
838 Num
[J
] += HexDigit(I
[1]);
844 // TokSplitString - Split a string up by a given token /*{{{*/
845 // ---------------------------------------------------------------------
846 /* This is intended to be a faster splitter, it does not use dynamic
847 memories. Input is changed to insert nulls at each token location. */
848 bool TokSplitString(char Tok
,char *Input
,char **List
,
849 unsigned long ListMax
)
851 // Strip any leading spaces
853 char *Stop
= Start
+ strlen(Start
);
854 for (; *Start
!= 0 && isspace(*Start
) != 0; Start
++);
856 unsigned long Count
= 0;
860 // Skip to the next Token
861 for (; Pos
!= Stop
&& *Pos
!= Tok
; Pos
++);
863 // Back remove spaces
865 for (; End
> Start
&& (End
[-1] == Tok
|| isspace(End
[-1]) != 0); End
--);
868 List
[Count
++] = Start
;
869 if (Count
>= ListMax
)
876 for (; Pos
!= Stop
&& (*Pos
== Tok
|| isspace(*Pos
) != 0 || *Pos
== 0); Pos
++);
884 // RegexChoice - Simple regex list/list matcher /*{{{*/
885 // ---------------------------------------------------------------------
887 unsigned long RegexChoice(RxChoiceList
*Rxs
,const char **ListBegin
,
888 const char **ListEnd
)
890 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
893 unsigned long Hits
= 0;
894 for (; ListBegin
!= ListEnd
; ListBegin
++)
896 // Check if the name is a regex
899 for (I
= *ListBegin
; *I
!= 0; I
++)
900 if (*I
== '.' || *I
== '?' || *I
== '*' || *I
== '|')
905 // Compile the regex pattern
908 if (regcomp(&Pattern
,*ListBegin
,REG_EXTENDED
| REG_ICASE
|
914 for (RxChoiceList
*R
= Rxs
; R
->Str
!= 0; R
++)
919 if (strcasecmp(R
->Str
,*ListBegin
) != 0)
923 if (regexec(&Pattern
,R
->Str
,0,0,0) != 0)
938 _error
->Warning(_("Selection %s not found"),*ListBegin
);
944 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
945 // ---------------------------------------------------------------------
946 /* This is used to make the internationalization strings easier to translate
947 and to allow reordering of parameters */
948 void ioprintf(ostream
&out
,const char *format
,...)
951 va_start(args
,format
);
953 // sprintf the description
955 vsnprintf(S
,sizeof(S
),format
,args
);
959 // safe_snprintf - Safer snprintf /*{{{*/
960 // ---------------------------------------------------------------------
961 /* This is a snprintf that will never (ever) go past 'End' and returns a
962 pointer to the end of the new string. The returned string is always null
963 terminated unless Buffer == end. This is a better alterantive to using
964 consecutive snprintfs. */
965 char *safe_snprintf(char *Buffer
,char *End
,const char *Format
,...)
970 va_start(args
,Format
);
975 Did
= vsnprintf(Buffer
,End
- Buffer
,Format
,args
);
976 if (Did
< 0 || Buffer
+ Did
> End
)
982 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
983 // ---------------------------------------------------------------------
984 /* The domain list is a comma seperate list of domains that are suffix
985 matched against the argument */
986 bool CheckDomainList(const string
&Host
,const string
&List
)
988 string::const_iterator Start
= List
.begin();
989 for (string::const_iterator Cur
= List
.begin(); Cur
<= List
.end(); Cur
++)
991 if (Cur
< List
.end() && *Cur
!= ',')
994 // Match the end of the string..
995 if ((Host
.size() >= (unsigned)(Cur
- Start
)) &&
997 stringcasecmp(Host
.end() - (Cur
- Start
),Host
.end(),Start
,Cur
) == 0)
1006 // URI::CopyFrom - Copy from an object /*{{{*/
1007 // ---------------------------------------------------------------------
1008 /* This parses the URI into all of its components */
1009 void URI::CopyFrom(const string
&U
)
1011 string::const_iterator I
= U
.begin();
1013 // Locate the first colon, this separates the scheme
1014 for (; I
< U
.end() && *I
!= ':' ; I
++);
1015 string::const_iterator FirstColon
= I
;
1017 /* Determine if this is a host type URI with a leading double //
1018 and then search for the first single / */
1019 string::const_iterator SingleSlash
= I
;
1020 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
1023 /* Find the / indicating the end of the hostname, ignoring /'s in the
1025 bool InBracket
= false;
1026 for (; SingleSlash
< U
.end() && (*SingleSlash
!= '/' || InBracket
== true); SingleSlash
++)
1028 if (*SingleSlash
== '[')
1030 if (InBracket
== true && *SingleSlash
== ']')
1034 if (SingleSlash
> U
.end())
1035 SingleSlash
= U
.end();
1037 // We can now write the access and path specifiers
1038 Access
.assign(U
.begin(),FirstColon
);
1039 if (SingleSlash
!= U
.end())
1040 Path
.assign(SingleSlash
,U
.end());
1041 if (Path
.empty() == true)
1044 // Now we attempt to locate a user:pass@host fragment
1045 if (FirstColon
+ 2 <= U
.end() && FirstColon
[1] == '/' && FirstColon
[2] == '/')
1049 if (FirstColon
>= U
.end())
1052 if (FirstColon
> SingleSlash
)
1053 FirstColon
= SingleSlash
;
1055 // Find the colon...
1057 if (I
> SingleSlash
)
1059 for (; I
< SingleSlash
&& *I
!= ':'; I
++);
1060 string::const_iterator SecondColon
= I
;
1062 // Search for the @ after the colon
1063 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
1064 string::const_iterator At
= I
;
1066 // Now write the host and user/pass
1067 if (At
== SingleSlash
)
1069 if (FirstColon
< SingleSlash
)
1070 Host
.assign(FirstColon
,SingleSlash
);
1074 Host
.assign(At
+1,SingleSlash
);
1075 User
.assign(FirstColon
,SecondColon
);
1076 if (SecondColon
< At
)
1077 Password
.assign(SecondColon
+1,At
);
1080 // Now we parse the RFC 2732 [] hostnames.
1081 unsigned long PortEnd
= 0;
1083 for (unsigned I
= 0; I
!= Host
.length();)
1092 if (InBracket
== true && Host
[I
] == ']')
1103 if (InBracket
== true)
1109 // Now we parse off a port number from the hostname
1111 string::size_type Pos
= Host
.rfind(':');
1112 if (Pos
== string::npos
|| Pos
< PortEnd
)
1115 Port
= atoi(string(Host
,Pos
+1).c_str());
1116 Host
.assign(Host
,0,Pos
);
1119 // URI::operator string - Convert the URI to a string /*{{{*/
1120 // ---------------------------------------------------------------------
1122 URI::operator string()
1126 if (Access
.empty() == false)
1129 if (Host
.empty() == false)
1131 if (Access
.empty() == false)
1134 if (User
.empty() == false)
1137 if (Password
.empty() == false)
1138 Res
+= ":" + Password
;
1142 // Add RFC 2732 escaping characters
1143 if (Access
.empty() == false &&
1144 (Host
.find('/') != string::npos
|| Host
.find(':') != string::npos
))
1145 Res
+= '[' + Host
+ ']';
1152 sprintf(S
,":%u",Port
);
1157 if (Path
.empty() == false)
1168 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1169 // ---------------------------------------------------------------------
1171 string
URI::SiteOnly(const string
&URI
)