]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
more refactor
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <ctype.h>
25 #include <string.h>
26 #include <sstream>
27 #include <stdio.h>
28 #include <algorithm>
29 #include <unistd.h>
30 #include <regex.h>
31 #include <errno.h>
32 #include <stdarg.h>
33 #include <iconv.h>
34
35 #include <apti18n.h>
36
37 using namespace std;
38 /*}}}*/
39 // Strip - Remove white space from the front and back of a string /*{{{*/
40 // ---------------------------------------------------------------------
41 namespace APT {
42 namespace String {
43 std::string Strip(const std::string &s)
44 {
45 size_t start = s.find_first_not_of(" \t\n");
46 // only whitespace
47 if (start == string::npos)
48 return "";
49 size_t end = s.find_last_not_of(" \t\n");
50 return s.substr(start, end-start+1);
51 }
52
53 bool Endswith(const std::string &s, const std::string &end)
54 {
55 if (end.size() > s.size())
56 return false;
57 return (s.substr(s.size() - end.size(), s.size()) == end);
58 }
59
60 }
61 }
62 /*}}}*/
63 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
64 // ---------------------------------------------------------------------
65 /* This is handy to use before display some information for enduser */
66 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
67 {
68 iconv_t cd;
69 const char *inbuf;
70 char *inptr, *outbuf;
71 size_t insize, bufsize;
72 dest->clear();
73
74 cd = iconv_open(codeset, "UTF-8");
75 if (cd == (iconv_t)(-1)) {
76 // Something went wrong
77 if (errno == EINVAL)
78 _error->Error("conversion from 'UTF-8' to '%s' not available",
79 codeset);
80 else
81 perror("iconv_open");
82
83 return false;
84 }
85
86 insize = bufsize = orig.size();
87 inbuf = orig.data();
88 inptr = (char *)inbuf;
89 outbuf = new char[bufsize];
90 size_t lastError = -1;
91
92 while (insize != 0)
93 {
94 char *outptr = outbuf;
95 size_t outsize = bufsize;
96 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
97 dest->append(outbuf, outptr - outbuf);
98 if (err == (size_t)(-1))
99 {
100 switch (errno)
101 {
102 case EILSEQ:
103 insize--;
104 inptr++;
105 // replace a series of unknown multibytes with a single "?"
106 if (lastError != insize) {
107 lastError = insize - 1;
108 dest->append("?");
109 }
110 break;
111 case EINVAL:
112 insize = 0;
113 break;
114 case E2BIG:
115 if (outptr == outbuf)
116 {
117 bufsize *= 2;
118 delete[] outbuf;
119 outbuf = new char[bufsize];
120 }
121 break;
122 }
123 }
124 }
125
126 delete[] outbuf;
127
128 iconv_close(cd);
129
130 return true;
131 }
132 /*}}}*/
133 // strstrip - Remove white space from the front and back of a string /*{{{*/
134 // ---------------------------------------------------------------------
135 /* This is handy to use when parsing a file. It also removes \n's left
136 over from fgets and company */
137 char *_strstrip(char *String)
138 {
139 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
140
141 if (*String == 0)
142 return String;
143 return _strrstrip(String);
144 }
145 /*}}}*/
146 // strrstrip - Remove white space from the back of a string /*{{{*/
147 // ---------------------------------------------------------------------
148 char *_strrstrip(char *String)
149 {
150 char *End = String + strlen(String) - 1;
151 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
152 *End == '\r'); End--);
153 End++;
154 *End = 0;
155 return String;
156 };
157 /*}}}*/
158 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
159 // ---------------------------------------------------------------------
160 /* */
161 char *_strtabexpand(char *String,size_t Len)
162 {
163 for (char *I = String; I != I + Len && *I != 0; I++)
164 {
165 if (*I != '\t')
166 continue;
167 if (I + 8 > String + Len)
168 {
169 *I = 0;
170 return String;
171 }
172
173 /* Assume the start of the string is 0 and find the next 8 char
174 division */
175 int Len;
176 if (String == I)
177 Len = 1;
178 else
179 Len = 8 - ((String - I) % 8);
180 Len -= 2;
181 if (Len <= 0)
182 {
183 *I = ' ';
184 continue;
185 }
186
187 memmove(I + Len,I + 1,strlen(I) + 1);
188 for (char *J = I; J + Len != I; *I = ' ', I++);
189 }
190 return String;
191 }
192 /*}}}*/
193 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
194 // ---------------------------------------------------------------------
195 /* This grabs a single word, converts any % escaped characters to their
196 proper values and advances the pointer. Double quotes are understood
197 and striped out as well. This is for URI/URL parsing. It also can
198 understand [] brackets.*/
199 bool ParseQuoteWord(const char *&String,string &Res)
200 {
201 // Skip leading whitespace
202 const char *C = String;
203 for (;*C != 0 && *C == ' '; C++);
204 if (*C == 0)
205 return false;
206
207 // Jump to the next word
208 for (;*C != 0 && isspace(*C) == 0; C++)
209 {
210 if (*C == '"')
211 {
212 C = strchr(C + 1, '"');
213 if (C == NULL)
214 return false;
215 }
216 if (*C == '[')
217 {
218 C = strchr(C + 1, ']');
219 if (C == NULL)
220 return false;
221 }
222 }
223
224 // Now de-quote characters
225 char Buffer[1024];
226 char Tmp[3];
227 const char *Start = String;
228 char *I;
229 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
230 {
231 if (*Start == '%' && Start + 2 < C &&
232 isxdigit(Start[1]) && isxdigit(Start[2]))
233 {
234 Tmp[0] = Start[1];
235 Tmp[1] = Start[2];
236 Tmp[2] = 0;
237 *I = (char)strtol(Tmp,0,16);
238 Start += 3;
239 continue;
240 }
241 if (*Start != '"')
242 *I = *Start;
243 else
244 I--;
245 Start++;
246 }
247 *I = 0;
248 Res = Buffer;
249
250 // Skip ending white space
251 for (;*C != 0 && isspace(*C) != 0; C++);
252 String = C;
253 return true;
254 }
255 /*}}}*/
256 // ParseCWord - Parses a string like a C "" expression /*{{{*/
257 // ---------------------------------------------------------------------
258 /* This expects a series of space separated strings enclosed in ""'s.
259 It concatenates the ""'s into a single string. */
260 bool ParseCWord(const char *&String,string &Res)
261 {
262 // Skip leading whitespace
263 const char *C = String;
264 for (;*C != 0 && *C == ' '; C++);
265 if (*C == 0)
266 return false;
267
268 char Buffer[1024];
269 char *Buf = Buffer;
270 if (strlen(String) >= sizeof(Buffer))
271 return false;
272
273 for (; *C != 0; C++)
274 {
275 if (*C == '"')
276 {
277 for (C++; *C != 0 && *C != '"'; C++)
278 *Buf++ = *C;
279
280 if (*C == 0)
281 return false;
282
283 continue;
284 }
285
286 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
287 continue;
288 if (isspace(*C) == 0)
289 return false;
290 *Buf++ = ' ';
291 }
292 *Buf = 0;
293 Res = Buffer;
294 String = C;
295 return true;
296 }
297 /*}}}*/
298 // QuoteString - Convert a string into quoted from /*{{{*/
299 // ---------------------------------------------------------------------
300 /* */
301 string QuoteString(const string &Str, const char *Bad)
302 {
303 string Res;
304 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
305 {
306 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
307 *I == 0x25 || // percent '%' char
308 *I <= 0x20 || *I >= 0x7F) // control chars
309 {
310 char Buf[10];
311 sprintf(Buf,"%%%02x",(int)*I);
312 Res += Buf;
313 }
314 else
315 Res += *I;
316 }
317 return Res;
318 }
319 /*}}}*/
320 // DeQuoteString - Convert a string from quoted from /*{{{*/
321 // ---------------------------------------------------------------------
322 /* This undoes QuoteString */
323 string DeQuoteString(const string &Str)
324 {
325 return DeQuoteString(Str.begin(),Str.end());
326 }
327 string DeQuoteString(string::const_iterator const &begin,
328 string::const_iterator const &end)
329 {
330 string Res;
331 for (string::const_iterator I = begin; I != end; ++I)
332 {
333 if (*I == '%' && I + 2 < end &&
334 isxdigit(I[1]) && isxdigit(I[2]))
335 {
336 char Tmp[3];
337 Tmp[0] = I[1];
338 Tmp[1] = I[2];
339 Tmp[2] = 0;
340 Res += (char)strtol(Tmp,0,16);
341 I += 2;
342 continue;
343 }
344 else
345 Res += *I;
346 }
347 return Res;
348 }
349
350 /*}}}*/
351 // SizeToStr - Convert a long into a human readable size /*{{{*/
352 // ---------------------------------------------------------------------
353 /* A max of 4 digits are shown before conversion to the next highest unit.
354 The max length of the string will be 5 chars unless the size is > 10
355 YottaBytes (E24) */
356 string SizeToStr(double Size)
357 {
358 char S[300];
359 double ASize;
360 if (Size >= 0)
361 ASize = Size;
362 else
363 ASize = -1*Size;
364
365 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
366 ExaBytes, ZettaBytes, YottaBytes */
367 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
368 int I = 0;
369 while (I <= 8)
370 {
371 if (ASize < 100 && I != 0)
372 {
373 sprintf(S,"%'.1f %c",ASize,Ext[I]);
374 break;
375 }
376
377 if (ASize < 10000)
378 {
379 sprintf(S,"%'.0f %c",ASize,Ext[I]);
380 break;
381 }
382 ASize /= 1000.0;
383 I++;
384 }
385
386 return S;
387 }
388 /*}}}*/
389 // TimeToStr - Convert the time into a string /*{{{*/
390 // ---------------------------------------------------------------------
391 /* Converts a number of seconds to a hms format */
392 string TimeToStr(unsigned long Sec)
393 {
394 char S[300];
395
396 while (1)
397 {
398 if (Sec > 60*60*24)
399 {
400 //d means days, h means hours, min means minutes, s means seconds
401 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
402 break;
403 }
404
405 if (Sec > 60*60)
406 {
407 //h means hours, min means minutes, s means seconds
408 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
409 break;
410 }
411
412 if (Sec > 60)
413 {
414 //min means minutes, s means seconds
415 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
416 break;
417 }
418
419 //s means seconds
420 sprintf(S,_("%lis"),Sec);
421 break;
422 }
423
424 return S;
425 }
426 /*}}}*/
427 // SubstVar - Substitute a string for another string /*{{{*/
428 // ---------------------------------------------------------------------
429 /* This replaces all occurances of Subst with Contents in Str. */
430 string SubstVar(const string &Str,const string &Subst,const string &Contents)
431 {
432 string::size_type Pos = 0;
433 string::size_type OldPos = 0;
434 string Temp;
435
436 while (OldPos < Str.length() &&
437 (Pos = Str.find(Subst,OldPos)) != string::npos)
438 {
439 Temp += string(Str,OldPos,Pos) + Contents;
440 OldPos = Pos + Subst.length();
441 }
442
443 if (OldPos == 0)
444 return Str;
445
446 return Temp + string(Str,OldPos);
447 }
448
449 string SubstVar(string Str,const struct SubstVar *Vars)
450 {
451 for (; Vars->Subst != 0; Vars++)
452 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
453 return Str;
454 }
455 /*}}}*/
456 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
457 // ---------------------------------------------------------------------
458 /* Returns a string with the supplied separator depth + 1 times in it */
459 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
460 {
461 std::string output = "";
462 for(unsigned long d=Depth+1; d > 0; d--)
463 output.append(Separator);
464 return output;
465 }
466 /*}}}*/
467 // URItoFileName - Convert the uri into a unique file name /*{{{*/
468 // ---------------------------------------------------------------------
469 /* This converts a URI into a safe filename. It quotes all unsafe characters
470 and converts / to _ and removes the scheme identifier. The resulting
471 file name should be unique and never occur again for a different file */
472 string URItoFileName(const string &URI)
473 {
474 // Nuke 'sensitive' items
475 ::URI U(URI);
476 U.User.clear();
477 U.Password.clear();
478 U.Access.clear();
479
480 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
481 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
482 replace(NewURI.begin(),NewURI.end(),'/','_');
483 return NewURI;
484 }
485 /*}}}*/
486 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
487 // ---------------------------------------------------------------------
488 /* This routine performs a base64 transformation on a string. It was ripped
489 from wget and then patched and bug fixed.
490
491 This spec can be found in rfc2045 */
492 string Base64Encode(const string &S)
493 {
494 // Conversion table.
495 static char tbl[64] = {'A','B','C','D','E','F','G','H',
496 'I','J','K','L','M','N','O','P',
497 'Q','R','S','T','U','V','W','X',
498 'Y','Z','a','b','c','d','e','f',
499 'g','h','i','j','k','l','m','n',
500 'o','p','q','r','s','t','u','v',
501 'w','x','y','z','0','1','2','3',
502 '4','5','6','7','8','9','+','/'};
503
504 // Pre-allocate some space
505 string Final;
506 Final.reserve((4*S.length() + 2)/3 + 2);
507
508 /* Transform the 3x8 bits to 4x6 bits, as required by
509 base64. */
510 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
511 {
512 char Bits[3] = {0,0,0};
513 Bits[0] = I[0];
514 if (I + 1 < S.end())
515 Bits[1] = I[1];
516 if (I + 2 < S.end())
517 Bits[2] = I[2];
518
519 Final += tbl[Bits[0] >> 2];
520 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
521
522 if (I + 1 >= S.end())
523 break;
524
525 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
526
527 if (I + 2 >= S.end())
528 break;
529
530 Final += tbl[Bits[2] & 0x3f];
531 }
532
533 /* Apply the padding elements, this tells how many bytes the remote
534 end should discard */
535 if (S.length() % 3 == 2)
536 Final += '=';
537 if (S.length() % 3 == 1)
538 Final += "==";
539
540 return Final;
541 }
542 /*}}}*/
543 // stringcmp - Arbitrary string compare /*{{{*/
544 // ---------------------------------------------------------------------
545 /* This safely compares two non-null terminated strings of arbitrary
546 length */
547 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
548 {
549 for (; A != AEnd && B != BEnd; A++, B++)
550 if (*A != *B)
551 break;
552
553 if (A == AEnd && B == BEnd)
554 return 0;
555 if (A == AEnd)
556 return 1;
557 if (B == BEnd)
558 return -1;
559 if (*A < *B)
560 return -1;
561 return 1;
562 }
563
564 #if __GNUC__ >= 3
565 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
566 const char *B,const char *BEnd)
567 {
568 for (; A != AEnd && B != BEnd; A++, B++)
569 if (*A != *B)
570 break;
571
572 if (A == AEnd && B == BEnd)
573 return 0;
574 if (A == AEnd)
575 return 1;
576 if (B == BEnd)
577 return -1;
578 if (*A < *B)
579 return -1;
580 return 1;
581 }
582 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
583 string::const_iterator B,string::const_iterator BEnd)
584 {
585 for (; A != AEnd && B != BEnd; A++, B++)
586 if (*A != *B)
587 break;
588
589 if (A == AEnd && B == BEnd)
590 return 0;
591 if (A == AEnd)
592 return 1;
593 if (B == BEnd)
594 return -1;
595 if (*A < *B)
596 return -1;
597 return 1;
598 }
599 #endif
600 /*}}}*/
601 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
602 // ---------------------------------------------------------------------
603 /* */
604 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
605 {
606 for (; A != AEnd && B != BEnd; A++, B++)
607 if (tolower_ascii(*A) != tolower_ascii(*B))
608 break;
609
610 if (A == AEnd && B == BEnd)
611 return 0;
612 if (A == AEnd)
613 return 1;
614 if (B == BEnd)
615 return -1;
616 if (tolower_ascii(*A) < tolower_ascii(*B))
617 return -1;
618 return 1;
619 }
620 #if __GNUC__ >= 3
621 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
622 const char *B,const char *BEnd)
623 {
624 for (; A != AEnd && B != BEnd; A++, B++)
625 if (tolower_ascii(*A) != tolower_ascii(*B))
626 break;
627
628 if (A == AEnd && B == BEnd)
629 return 0;
630 if (A == AEnd)
631 return 1;
632 if (B == BEnd)
633 return -1;
634 if (tolower_ascii(*A) < tolower_ascii(*B))
635 return -1;
636 return 1;
637 }
638 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
639 string::const_iterator B,string::const_iterator BEnd)
640 {
641 for (; A != AEnd && B != BEnd; A++, B++)
642 if (tolower_ascii(*A) != tolower_ascii(*B))
643 break;
644
645 if (A == AEnd && B == BEnd)
646 return 0;
647 if (A == AEnd)
648 return 1;
649 if (B == BEnd)
650 return -1;
651 if (tolower_ascii(*A) < tolower_ascii(*B))
652 return -1;
653 return 1;
654 }
655 #endif
656 /*}}}*/
657 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
658 // ---------------------------------------------------------------------
659 /* The format is like those used in package files and the method
660 communication system */
661 string LookupTag(const string &Message,const char *Tag,const char *Default)
662 {
663 // Look for a matching tag.
664 int Length = strlen(Tag);
665 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
666 {
667 // Found the tag
668 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
669 {
670 // Find the end of line and strip the leading/trailing spaces
671 string::const_iterator J;
672 I += Length + 1;
673 for (; isspace(*I) != 0 && I < Message.end(); ++I);
674 for (J = I; *J != '\n' && J < Message.end(); ++J);
675 for (; J > I && isspace(J[-1]) != 0; --J);
676
677 return string(I,J);
678 }
679
680 for (; *I != '\n' && I < Message.end(); ++I);
681 }
682
683 // Failed to find a match
684 if (Default == 0)
685 return string();
686 return Default;
687 }
688 /*}}}*/
689 // StringToBool - Converts a string into a boolean /*{{{*/
690 // ---------------------------------------------------------------------
691 /* This inspects the string to see if it is true or if it is false and
692 then returns the result. Several varients on true/false are checked. */
693 int StringToBool(const string &Text,int Default)
694 {
695 char *End;
696 int Res = strtol(Text.c_str(),&End,0);
697 if (End != Text.c_str() && Res >= 0 && Res <= 1)
698 return Res;
699
700 // Check for positives
701 if (strcasecmp(Text.c_str(),"no") == 0 ||
702 strcasecmp(Text.c_str(),"false") == 0 ||
703 strcasecmp(Text.c_str(),"without") == 0 ||
704 strcasecmp(Text.c_str(),"off") == 0 ||
705 strcasecmp(Text.c_str(),"disable") == 0)
706 return 0;
707
708 // Check for negatives
709 if (strcasecmp(Text.c_str(),"yes") == 0 ||
710 strcasecmp(Text.c_str(),"true") == 0 ||
711 strcasecmp(Text.c_str(),"with") == 0 ||
712 strcasecmp(Text.c_str(),"on") == 0 ||
713 strcasecmp(Text.c_str(),"enable") == 0)
714 return 1;
715
716 return Default;
717 }
718 /*}}}*/
719 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
720 // ---------------------------------------------------------------------
721 /* This converts a time_t into a string time representation that is
722 year 2000 complient and timezone neutral */
723 string TimeRFC1123(time_t Date)
724 {
725 struct tm Conv;
726 if (gmtime_r(&Date, &Conv) == NULL)
727 return "";
728
729 char Buf[300];
730 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
731 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
732 "Aug","Sep","Oct","Nov","Dec"};
733
734 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
735 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
736 Conv.tm_min,Conv.tm_sec);
737 return Buf;
738 }
739 /*}}}*/
740 // ReadMessages - Read messages from the FD /*{{{*/
741 // ---------------------------------------------------------------------
742 /* This pulls full messages from the input FD into the message buffer.
743 It assumes that messages will not pause during transit so no
744 fancy buffering is used.
745
746 In particular: this reads blocks from the input until it believes
747 that it's run out of input text. Each block is terminated by a
748 double newline ('\n' followed by '\n'). As noted below, there is a
749 bug in this code: it assumes that all the blocks have been read if
750 it doesn't see additional text in the buffer after the last one is
751 parsed, which will cause it to lose blocks if the last block
752 coincides with the end of the buffer.
753 */
754 bool ReadMessages(int Fd, vector<string> &List)
755 {
756 char Buffer[64000];
757 char *End = Buffer;
758 // Represents any left-over from the previous iteration of the
759 // parse loop. (i.e., if a message is split across the end
760 // of the buffer, it goes here)
761 string PartialMessage;
762
763 while (1)
764 {
765 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
766 if (Res < 0 && errno == EINTR)
767 continue;
768
769 // Process is dead, this is kind of bad..
770 if (Res == 0)
771 return false;
772
773 // No data
774 if (Res < 0 && errno == EAGAIN)
775 return true;
776 if (Res < 0)
777 return false;
778
779 End += Res;
780
781 // Look for the end of the message
782 for (char *I = Buffer; I + 1 < End; I++)
783 {
784 if (I[1] != '\n' ||
785 (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
786 continue;
787
788 // Pull the message out
789 string Message(Buffer,I-Buffer);
790 PartialMessage += Message;
791
792 // Fix up the buffer
793 for (; I < End && (*I == '\n' || *I == '\r'); ++I);
794 End -= I-Buffer;
795 memmove(Buffer,I,End-Buffer);
796 I = Buffer;
797
798 List.push_back(PartialMessage);
799 PartialMessage.clear();
800 }
801 if (End != Buffer)
802 {
803 // If there's text left in the buffer, store it
804 // in PartialMessage and throw the rest of the buffer
805 // away. This allows us to handle messages that
806 // are longer than the static buffer size.
807 PartialMessage += string(Buffer, End);
808 End = Buffer;
809 }
810 else
811 {
812 // BUG ALERT: if a message block happens to end at a
813 // multiple of 64000 characters, this will cause it to
814 // terminate early, leading to a badly formed block and
815 // probably crashing the method. However, this is the only
816 // way we have to find the end of the message block. I have
817 // an idea of how to fix this, but it will require changes
818 // to the protocol (essentially to mark the beginning and
819 // end of the block).
820 //
821 // -- dburrows 2008-04-02
822 return true;
823 }
824
825 if (WaitFd(Fd) == false)
826 return false;
827 }
828 }
829 /*}}}*/
830 // MonthConv - Converts a month string into a number /*{{{*/
831 // ---------------------------------------------------------------------
832 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
833 Made it a bit more robust with a few tolower_ascii though. */
834 static int MonthConv(char *Month)
835 {
836 switch (tolower_ascii(*Month))
837 {
838 case 'a':
839 return tolower_ascii(Month[1]) == 'p'?3:7;
840 case 'd':
841 return 11;
842 case 'f':
843 return 1;
844 case 'j':
845 if (tolower_ascii(Month[1]) == 'a')
846 return 0;
847 return tolower_ascii(Month[2]) == 'n'?5:6;
848 case 'm':
849 return tolower_ascii(Month[2]) == 'r'?2:4;
850 case 'n':
851 return 10;
852 case 'o':
853 return 9;
854 case 's':
855 return 8;
856
857 // Pretend it is January..
858 default:
859 return 0;
860 }
861 }
862 /*}}}*/
863 // timegm - Internal timegm if the gnu version is not available /*{{{*/
864 // ---------------------------------------------------------------------
865 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
866 than local timezone (mktime assumes the latter).
867
868 This function is a nonstandard GNU extension that is also present on
869 the BSDs and maybe other systems. For others we follow the advice of
870 the manpage of timegm and use his portable replacement. */
871 #ifndef HAVE_TIMEGM
872 static time_t timegm(struct tm *t)
873 {
874 char *tz = getenv("TZ");
875 setenv("TZ", "", 1);
876 tzset();
877 time_t ret = mktime(t);
878 if (tz)
879 setenv("TZ", tz, 1);
880 else
881 unsetenv("TZ");
882 tzset();
883 return ret;
884 }
885 #endif
886 /*}}}*/
887 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
888 // ---------------------------------------------------------------------
889 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
890 with one exception: All timezones (%Z) are accepted but the protocol
891 says that it MUST be GMT, but this one is equal to UTC which we will
892 encounter from time to time (e.g. in Release files) so we accept all
893 here and just assume it is GMT (or UTC) later on */
894 bool RFC1123StrToTime(const char* const str,time_t &time)
895 {
896 struct tm Tm;
897 setlocale (LC_ALL,"C");
898 bool const invalid =
899 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
900 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
901 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
902 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
903 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
904 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
905 setlocale (LC_ALL,"");
906 if (invalid == true)
907 return false;
908
909 time = timegm(&Tm);
910 return true;
911 }
912 /*}}}*/
913 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
914 // ---------------------------------------------------------------------
915 /* */
916 bool FTPMDTMStrToTime(const char* const str,time_t &time)
917 {
918 struct tm Tm;
919 // MDTM includes no whitespaces but recommend and ignored by strptime
920 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
921 return false;
922
923 time = timegm(&Tm);
924 return true;
925 }
926 /*}}}*/
927 // StrToTime - Converts a string into a time_t /*{{{*/
928 // ---------------------------------------------------------------------
929 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
930 and the C library asctime format. It requires the GNU library function
931 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
932 reason the C library does not provide any such function :< This also
933 handles the weird, but unambiguous FTP time format*/
934 bool StrToTime(const string &Val,time_t &Result)
935 {
936 struct tm Tm;
937 char Month[10];
938
939 // Skip the day of the week
940 const char *I = strchr(Val.c_str(), ' ');
941
942 // Handle RFC 1123 time
943 Month[0] = 0;
944 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
945 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
946 {
947 // Handle RFC 1036 time
948 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
949 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
950 Tm.tm_year += 1900;
951 else
952 {
953 // asctime format
954 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
955 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
956 {
957 // 'ftp' time
958 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
959 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
960 return false;
961 Tm.tm_mon--;
962 }
963 }
964 }
965
966 Tm.tm_isdst = 0;
967 if (Month[0] != 0)
968 Tm.tm_mon = MonthConv(Month);
969 else
970 Tm.tm_mon = 0; // we don't have a month, so pick something
971 Tm.tm_year -= 1900;
972
973 // Convert to local time and then to GMT
974 Result = timegm(&Tm);
975 return true;
976 }
977 /*}}}*/
978 // StrToNum - Convert a fixed length string to a number /*{{{*/
979 // ---------------------------------------------------------------------
980 /* This is used in decoding the crazy fixed length string headers in
981 tar and ar files. */
982 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
983 {
984 char S[30];
985 if (Len >= sizeof(S))
986 return false;
987 memcpy(S,Str,Len);
988 S[Len] = 0;
989
990 // All spaces is a zero
991 Res = 0;
992 unsigned I;
993 for (I = 0; S[I] == ' '; I++);
994 if (S[I] == 0)
995 return true;
996
997 char *End;
998 Res = strtoul(S,&End,Base);
999 if (End == S)
1000 return false;
1001
1002 return true;
1003 }
1004 /*}}}*/
1005 // StrToNum - Convert a fixed length string to a number /*{{{*/
1006 // ---------------------------------------------------------------------
1007 /* This is used in decoding the crazy fixed length string headers in
1008 tar and ar files. */
1009 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1010 {
1011 char S[30];
1012 if (Len >= sizeof(S))
1013 return false;
1014 memcpy(S,Str,Len);
1015 S[Len] = 0;
1016
1017 // All spaces is a zero
1018 Res = 0;
1019 unsigned I;
1020 for (I = 0; S[I] == ' '; I++);
1021 if (S[I] == 0)
1022 return true;
1023
1024 char *End;
1025 Res = strtoull(S,&End,Base);
1026 if (End == S)
1027 return false;
1028
1029 return true;
1030 }
1031 /*}}}*/
1032
1033 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1034 // ---------------------------------------------------------------------
1035 /* This is used in decoding the 256bit encoded fixed length fields in
1036 tar files */
1037 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1038 {
1039 if ((Str[0] & 0x80) == 0)
1040 return false;
1041 else
1042 {
1043 Res = Str[0] & 0x7F;
1044 for(unsigned int i = 1; i < Len; ++i)
1045 Res = (Res<<8) + Str[i];
1046 return true;
1047 }
1048 }
1049 /*}}}*/
1050 // HexDigit - Convert a hex character into an integer /*{{{*/
1051 // ---------------------------------------------------------------------
1052 /* Helper for Hex2Num */
1053 static int HexDigit(int c)
1054 {
1055 if (c >= '0' && c <= '9')
1056 return c - '0';
1057 if (c >= 'a' && c <= 'f')
1058 return c - 'a' + 10;
1059 if (c >= 'A' && c <= 'F')
1060 return c - 'A' + 10;
1061 return 0;
1062 }
1063 /*}}}*/
1064 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1065 // ---------------------------------------------------------------------
1066 /* The length of the buffer must be exactly 1/2 the length of the string. */
1067 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1068 {
1069 if (Str.length() != Length*2)
1070 return false;
1071
1072 // Convert each digit. We store it in the same order as the string
1073 int J = 0;
1074 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1075 {
1076 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1077 return false;
1078
1079 Num[J] = HexDigit(I[0]) << 4;
1080 Num[J] += HexDigit(I[1]);
1081 }
1082
1083 return true;
1084 }
1085 /*}}}*/
1086 // TokSplitString - Split a string up by a given token /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* This is intended to be a faster splitter, it does not use dynamic
1089 memories. Input is changed to insert nulls at each token location. */
1090 bool TokSplitString(char Tok,char *Input,char **List,
1091 unsigned long ListMax)
1092 {
1093 // Strip any leading spaces
1094 char *Start = Input;
1095 char *Stop = Start + strlen(Start);
1096 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1097
1098 unsigned long Count = 0;
1099 char *Pos = Start;
1100 while (Pos != Stop)
1101 {
1102 // Skip to the next Token
1103 for (; Pos != Stop && *Pos != Tok; Pos++);
1104
1105 // Back remove spaces
1106 char *End = Pos;
1107 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1108 *End = 0;
1109
1110 List[Count++] = Start;
1111 if (Count >= ListMax)
1112 {
1113 List[Count-1] = 0;
1114 return false;
1115 }
1116
1117 // Advance pos
1118 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1119 Start = Pos;
1120 }
1121
1122 List[Count] = 0;
1123 return true;
1124 }
1125 /*}}}*/
1126 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1127 // ---------------------------------------------------------------------
1128 /* This can be used to split a given string up into a vector, so the
1129 propose is the same as in the method above and this one is a bit slower
1130 also, but the advantage is that we have an iteratable vector */
1131 vector<string> VectorizeString(string const &haystack, char const &split)
1132 {
1133 string::const_iterator start = haystack.begin();
1134 string::const_iterator end = start;
1135 vector<string> exploded;
1136 do {
1137 for (; end != haystack.end() && *end != split; ++end);
1138 exploded.push_back(string(start, end));
1139 start = end + 1;
1140 } while (end != haystack.end() && (++end) != haystack.end());
1141 return exploded;
1142 }
1143 /*}}}*/
1144 // StringSplit - split a string into a string vector by token /*{{{*/
1145 // ---------------------------------------------------------------------
1146 /* See header for details.
1147 */
1148 vector<string> StringSplit(std::string const &s, std::string const &sep,
1149 unsigned int maxsplit)
1150 {
1151 vector<string> split;
1152 size_t start, pos;
1153
1154 // no seperator given, this is bogus
1155 if(sep.size() == 0)
1156 return split;
1157
1158 start = pos = 0;
1159 while (pos != string::npos)
1160 {
1161 pos = s.find(sep, start);
1162 split.push_back(s.substr(start, pos-start));
1163
1164 // if maxsplit is reached, the remaining string is the last item
1165 if(split.size() >= maxsplit)
1166 {
1167 split[split.size()-1] = s.substr(start);
1168 break;
1169 }
1170 start = pos+sep.size();
1171 }
1172 return split;
1173 }
1174 /*}}}*/
1175 // RegexChoice - Simple regex list/list matcher /*{{{*/
1176 // ---------------------------------------------------------------------
1177 /* */
1178 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1179 const char **ListEnd)
1180 {
1181 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1182 R->Hit = false;
1183
1184 unsigned long Hits = 0;
1185 for (; ListBegin != ListEnd; ListBegin++)
1186 {
1187 // Check if the name is a regex
1188 const char *I;
1189 bool Regex = true;
1190 for (I = *ListBegin; *I != 0; I++)
1191 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1192 break;
1193 if (*I == 0)
1194 Regex = false;
1195
1196 // Compile the regex pattern
1197 regex_t Pattern;
1198 if (Regex == true)
1199 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1200 REG_NOSUB) != 0)
1201 Regex = false;
1202
1203 // Search the list
1204 bool Done = false;
1205 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1206 {
1207 if (R->Str[0] == 0)
1208 continue;
1209
1210 if (strcasecmp(R->Str,*ListBegin) != 0)
1211 {
1212 if (Regex == false)
1213 continue;
1214 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1215 continue;
1216 }
1217 Done = true;
1218
1219 if (R->Hit == false)
1220 Hits++;
1221
1222 R->Hit = true;
1223 }
1224
1225 if (Regex == true)
1226 regfree(&Pattern);
1227
1228 if (Done == false)
1229 _error->Warning(_("Selection %s not found"),*ListBegin);
1230 }
1231
1232 return Hits;
1233 }
1234 /*}}}*/
1235 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1236 // ---------------------------------------------------------------------
1237 /* This is used to make the internationalization strings easier to translate
1238 and to allow reordering of parameters */
1239 static bool iovprintf(ostream &out, const char *format,
1240 va_list &args, ssize_t &size) {
1241 char *S = (char*)malloc(size);
1242 ssize_t const n = vsnprintf(S, size, format, args);
1243 if (n > -1 && n < size) {
1244 out << S;
1245 free(S);
1246 return true;
1247 } else {
1248 if (n > -1)
1249 size = n + 1;
1250 else
1251 size *= 2;
1252 }
1253 free(S);
1254 return false;
1255 }
1256 void ioprintf(ostream &out,const char *format,...)
1257 {
1258 va_list args;
1259 ssize_t size = 400;
1260 while (true) {
1261 va_start(args,format);
1262 if (iovprintf(out, format, args, size) == true)
1263 return;
1264 va_end(args);
1265 }
1266 }
1267 void strprintf(string &out,const char *format,...)
1268 {
1269 va_list args;
1270 ssize_t size = 400;
1271 std::ostringstream outstr;
1272 while (true) {
1273 va_start(args,format);
1274 if (iovprintf(outstr, format, args, size) == true)
1275 break;
1276 va_end(args);
1277 }
1278 out = outstr.str();
1279 }
1280 /*}}}*/
1281 // safe_snprintf - Safer snprintf /*{{{*/
1282 // ---------------------------------------------------------------------
1283 /* This is a snprintf that will never (ever) go past 'End' and returns a
1284 pointer to the end of the new string. The returned string is always null
1285 terminated unless Buffer == end. This is a better alterantive to using
1286 consecutive snprintfs. */
1287 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1288 {
1289 va_list args;
1290 int Did;
1291
1292 if (End <= Buffer)
1293 return End;
1294 va_start(args,Format);
1295 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1296 va_end(args);
1297
1298 if (Did < 0 || Buffer + Did > End)
1299 return End;
1300 return Buffer + Did;
1301 }
1302 /*}}}*/
1303 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1304 // ---------------------------------------------------------------------
1305 string StripEpoch(const string &VerStr)
1306 {
1307 size_t i = VerStr.find(":");
1308 if (i == string::npos)
1309 return VerStr;
1310 return VerStr.substr(i+1);
1311 }
1312 /*}}}*/
1313 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1314 // ---------------------------------------------------------------------
1315 /* This little function is the most called method we have and tries
1316 therefore to do the absolut minimum - and is noteable faster than
1317 standard tolower/toupper and as a bonus avoids problems with different
1318 locales - we only operate on ascii chars anyway. */
1319 int tolower_ascii(int const c)
1320 {
1321 if (c >= 'A' && c <= 'Z')
1322 return c + 32;
1323 return c;
1324 }
1325 /*}}}*/
1326
1327 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1328 // ---------------------------------------------------------------------
1329 /* The domain list is a comma seperate list of domains that are suffix
1330 matched against the argument */
1331 bool CheckDomainList(const string &Host,const string &List)
1332 {
1333 string::const_iterator Start = List.begin();
1334 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1335 {
1336 if (Cur < List.end() && *Cur != ',')
1337 continue;
1338
1339 // Match the end of the string..
1340 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1341 Cur - Start != 0 &&
1342 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1343 return true;
1344
1345 Start = Cur + 1;
1346 }
1347 return false;
1348 }
1349 /*}}}*/
1350 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1351 // ---------------------------------------------------------------------
1352 /* */
1353 size_t strv_length(const char **str_array)
1354 {
1355 size_t i;
1356 for (i=0; str_array[i] != NULL; i++)
1357 /* nothing */
1358 ;
1359 return i;
1360 }
1361
1362 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1363 // ---------------------------------------------------------------------
1364 /* */
1365 string DeEscapeString(const string &input)
1366 {
1367 char tmp[3];
1368 string::const_iterator it;
1369 string output;
1370 for (it = input.begin(); it != input.end(); ++it)
1371 {
1372 // just copy non-escape chars
1373 if (*it != '\\')
1374 {
1375 output += *it;
1376 continue;
1377 }
1378
1379 // deal with double escape
1380 if (*it == '\\' &&
1381 (it + 1 < input.end()) && it[1] == '\\')
1382 {
1383 // copy
1384 output += *it;
1385 // advance iterator one step further
1386 ++it;
1387 continue;
1388 }
1389
1390 // ensure we have a char to read
1391 if (it + 1 == input.end())
1392 continue;
1393
1394 // read it
1395 ++it;
1396 switch (*it)
1397 {
1398 case '0':
1399 if (it + 2 <= input.end()) {
1400 tmp[0] = it[1];
1401 tmp[1] = it[2];
1402 tmp[2] = 0;
1403 output += (char)strtol(tmp, 0, 8);
1404 it += 2;
1405 }
1406 break;
1407 case 'x':
1408 if (it + 2 <= input.end()) {
1409 tmp[0] = it[1];
1410 tmp[1] = it[2];
1411 tmp[2] = 0;
1412 output += (char)strtol(tmp, 0, 16);
1413 it += 2;
1414 }
1415 break;
1416 default:
1417 // FIXME: raise exception here?
1418 break;
1419 }
1420 }
1421 return output;
1422 }
1423 /*}}}*/
1424 // URI::CopyFrom - Copy from an object /*{{{*/
1425 // ---------------------------------------------------------------------
1426 /* This parses the URI into all of its components */
1427 void URI::CopyFrom(const string &U)
1428 {
1429 string::const_iterator I = U.begin();
1430
1431 // Locate the first colon, this separates the scheme
1432 for (; I < U.end() && *I != ':' ; ++I);
1433 string::const_iterator FirstColon = I;
1434
1435 /* Determine if this is a host type URI with a leading double //
1436 and then search for the first single / */
1437 string::const_iterator SingleSlash = I;
1438 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1439 SingleSlash += 3;
1440
1441 /* Find the / indicating the end of the hostname, ignoring /'s in the
1442 square brackets */
1443 bool InBracket = false;
1444 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1445 {
1446 if (*SingleSlash == '[')
1447 InBracket = true;
1448 if (InBracket == true && *SingleSlash == ']')
1449 InBracket = false;
1450 }
1451
1452 if (SingleSlash > U.end())
1453 SingleSlash = U.end();
1454
1455 // We can now write the access and path specifiers
1456 Access.assign(U.begin(),FirstColon);
1457 if (SingleSlash != U.end())
1458 Path.assign(SingleSlash,U.end());
1459 if (Path.empty() == true)
1460 Path = "/";
1461
1462 // Now we attempt to locate a user:pass@host fragment
1463 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1464 FirstColon += 3;
1465 else
1466 FirstColon += 1;
1467 if (FirstColon >= U.end())
1468 return;
1469
1470 if (FirstColon > SingleSlash)
1471 FirstColon = SingleSlash;
1472
1473 // Find the colon...
1474 I = FirstColon + 1;
1475 if (I > SingleSlash)
1476 I = SingleSlash;
1477 for (; I < SingleSlash && *I != ':'; ++I);
1478 string::const_iterator SecondColon = I;
1479
1480 // Search for the @ after the colon
1481 for (; I < SingleSlash && *I != '@'; ++I);
1482 string::const_iterator At = I;
1483
1484 // Now write the host and user/pass
1485 if (At == SingleSlash)
1486 {
1487 if (FirstColon < SingleSlash)
1488 Host.assign(FirstColon,SingleSlash);
1489 }
1490 else
1491 {
1492 Host.assign(At+1,SingleSlash);
1493 // username and password must be encoded (RFC 3986)
1494 User.assign(DeQuoteString(FirstColon,SecondColon));
1495 if (SecondColon < At)
1496 Password.assign(DeQuoteString(SecondColon+1,At));
1497 }
1498
1499 // Now we parse the RFC 2732 [] hostnames.
1500 unsigned long PortEnd = 0;
1501 InBracket = false;
1502 for (unsigned I = 0; I != Host.length();)
1503 {
1504 if (Host[I] == '[')
1505 {
1506 InBracket = true;
1507 Host.erase(I,1);
1508 continue;
1509 }
1510
1511 if (InBracket == true && Host[I] == ']')
1512 {
1513 InBracket = false;
1514 Host.erase(I,1);
1515 PortEnd = I;
1516 continue;
1517 }
1518 I++;
1519 }
1520
1521 // Tsk, weird.
1522 if (InBracket == true)
1523 {
1524 Host.clear();
1525 return;
1526 }
1527
1528 // Now we parse off a port number from the hostname
1529 Port = 0;
1530 string::size_type Pos = Host.rfind(':');
1531 if (Pos == string::npos || Pos < PortEnd)
1532 return;
1533
1534 Port = atoi(string(Host,Pos+1).c_str());
1535 Host.assign(Host,0,Pos);
1536 }
1537 /*}}}*/
1538 // URI::operator string - Convert the URI to a string /*{{{*/
1539 // ---------------------------------------------------------------------
1540 /* */
1541 URI::operator string()
1542 {
1543 string Res;
1544
1545 if (Access.empty() == false)
1546 Res = Access + ':';
1547
1548 if (Host.empty() == false)
1549 {
1550 if (Access.empty() == false)
1551 Res += "//";
1552
1553 if (User.empty() == false)
1554 {
1555 // FIXME: Technically userinfo is permitted even less
1556 // characters than these, but this is not conveniently
1557 // expressed with a blacklist.
1558 Res += QuoteString(User, ":/?#[]@");
1559 if (Password.empty() == false)
1560 Res += ":" + QuoteString(Password, ":/?#[]@");
1561 Res += "@";
1562 }
1563
1564 // Add RFC 2732 escaping characters
1565 if (Access.empty() == false &&
1566 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1567 Res += '[' + Host + ']';
1568 else
1569 Res += Host;
1570
1571 if (Port != 0)
1572 {
1573 char S[30];
1574 sprintf(S,":%u",Port);
1575 Res += S;
1576 }
1577 }
1578
1579 if (Path.empty() == false)
1580 {
1581 if (Path[0] != '/')
1582 Res += "/" + Path;
1583 else
1584 Res += Path;
1585 }
1586
1587 return Res;
1588 }
1589 /*}}}*/
1590 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1591 // ---------------------------------------------------------------------
1592 /* */
1593 string URI::SiteOnly(const string &URI)
1594 {
1595 ::URI U(URI);
1596 U.User.clear();
1597 U.Password.clear();
1598 U.Path.clear();
1599 return U;
1600 }
1601 /*}}}*/
1602 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1603 // ---------------------------------------------------------------------
1604 /* */
1605 string URI::NoUserPassword(const string &URI)
1606 {
1607 ::URI U(URI);
1608 U.User.clear();
1609 U.Password.clear();
1610 return U;
1611 }
1612 /*}}}*/