]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Merge remote-tracking branch 'mvo/feature/srcrec-enum2' into debian/sid
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &s)
49 {
50 size_t start = s.find_first_not_of(" \t\n");
51 // only whitespace
52 if (start == string::npos)
53 return "";
54 size_t end = s.find_last_not_of(" \t\n");
55 return s.substr(start, end-start+1);
56 }
57
58 bool Endswith(const std::string &s, const std::string &end)
59 {
60 if (end.size() > s.size())
61 return false;
62 return (s.substr(s.size() - end.size(), s.size()) == end);
63 }
64
65 }
66 }
67 /*}}}*/
68 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
69 // ---------------------------------------------------------------------
70 /* This is handy to use before display some information for enduser */
71 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
72 {
73 iconv_t cd;
74 const char *inbuf;
75 char *inptr, *outbuf;
76 size_t insize, bufsize;
77 dest->clear();
78
79 cd = iconv_open(codeset, "UTF-8");
80 if (cd == (iconv_t)(-1)) {
81 // Something went wrong
82 if (errno == EINVAL)
83 _error->Error("conversion from 'UTF-8' to '%s' not available",
84 codeset);
85 else
86 perror("iconv_open");
87
88 return false;
89 }
90
91 insize = bufsize = orig.size();
92 inbuf = orig.data();
93 inptr = (char *)inbuf;
94 outbuf = new char[bufsize];
95 size_t lastError = -1;
96
97 while (insize != 0)
98 {
99 char *outptr = outbuf;
100 size_t outsize = bufsize;
101 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
102 dest->append(outbuf, outptr - outbuf);
103 if (err == (size_t)(-1))
104 {
105 switch (errno)
106 {
107 case EILSEQ:
108 insize--;
109 inptr++;
110 // replace a series of unknown multibytes with a single "?"
111 if (lastError != insize) {
112 lastError = insize - 1;
113 dest->append("?");
114 }
115 break;
116 case EINVAL:
117 insize = 0;
118 break;
119 case E2BIG:
120 if (outptr == outbuf)
121 {
122 bufsize *= 2;
123 delete[] outbuf;
124 outbuf = new char[bufsize];
125 }
126 break;
127 }
128 }
129 }
130
131 delete[] outbuf;
132
133 iconv_close(cd);
134
135 return true;
136 }
137 /*}}}*/
138 // strstrip - Remove white space from the front and back of a string /*{{{*/
139 // ---------------------------------------------------------------------
140 /* This is handy to use when parsing a file. It also removes \n's left
141 over from fgets and company */
142 char *_strstrip(char *String)
143 {
144 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
145
146 if (*String == 0)
147 return String;
148 return _strrstrip(String);
149 }
150 /*}}}*/
151 // strrstrip - Remove white space from the back of a string /*{{{*/
152 // ---------------------------------------------------------------------
153 char *_strrstrip(char *String)
154 {
155 char *End = String + strlen(String) - 1;
156 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
157 *End == '\r'); End--);
158 End++;
159 *End = 0;
160 return String;
161 }
162 /*}}}*/
163 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
164 // ---------------------------------------------------------------------
165 /* */
166 char *_strtabexpand(char *String,size_t Len)
167 {
168 for (char *I = String; I != I + Len && *I != 0; I++)
169 {
170 if (*I != '\t')
171 continue;
172 if (I + 8 > String + Len)
173 {
174 *I = 0;
175 return String;
176 }
177
178 /* Assume the start of the string is 0 and find the next 8 char
179 division */
180 int Len;
181 if (String == I)
182 Len = 1;
183 else
184 Len = 8 - ((String - I) % 8);
185 Len -= 2;
186 if (Len <= 0)
187 {
188 *I = ' ';
189 continue;
190 }
191
192 memmove(I + Len,I + 1,strlen(I) + 1);
193 for (char *J = I; J + Len != I; *I = ' ', I++);
194 }
195 return String;
196 }
197 /*}}}*/
198 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This grabs a single word, converts any % escaped characters to their
201 proper values and advances the pointer. Double quotes are understood
202 and striped out as well. This is for URI/URL parsing. It also can
203 understand [] brackets.*/
204 bool ParseQuoteWord(const char *&String,string &Res)
205 {
206 // Skip leading whitespace
207 const char *C = String;
208 for (;*C != 0 && *C == ' '; C++);
209 if (*C == 0)
210 return false;
211
212 // Jump to the next word
213 for (;*C != 0 && isspace(*C) == 0; C++)
214 {
215 if (*C == '"')
216 {
217 C = strchr(C + 1, '"');
218 if (C == NULL)
219 return false;
220 }
221 if (*C == '[')
222 {
223 C = strchr(C + 1, ']');
224 if (C == NULL)
225 return false;
226 }
227 }
228
229 // Now de-quote characters
230 char Buffer[1024];
231 char Tmp[3];
232 const char *Start = String;
233 char *I;
234 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
235 {
236 if (*Start == '%' && Start + 2 < C &&
237 isxdigit(Start[1]) && isxdigit(Start[2]))
238 {
239 Tmp[0] = Start[1];
240 Tmp[1] = Start[2];
241 Tmp[2] = 0;
242 *I = (char)strtol(Tmp,0,16);
243 Start += 3;
244 continue;
245 }
246 if (*Start != '"')
247 *I = *Start;
248 else
249 I--;
250 Start++;
251 }
252 *I = 0;
253 Res = Buffer;
254
255 // Skip ending white space
256 for (;*C != 0 && isspace(*C) != 0; C++);
257 String = C;
258 return true;
259 }
260 /*}}}*/
261 // ParseCWord - Parses a string like a C "" expression /*{{{*/
262 // ---------------------------------------------------------------------
263 /* This expects a series of space separated strings enclosed in ""'s.
264 It concatenates the ""'s into a single string. */
265 bool ParseCWord(const char *&String,string &Res)
266 {
267 // Skip leading whitespace
268 const char *C = String;
269 for (;*C != 0 && *C == ' '; C++);
270 if (*C == 0)
271 return false;
272
273 char Buffer[1024];
274 char *Buf = Buffer;
275 if (strlen(String) >= sizeof(Buffer))
276 return false;
277
278 for (; *C != 0; C++)
279 {
280 if (*C == '"')
281 {
282 for (C++; *C != 0 && *C != '"'; C++)
283 *Buf++ = *C;
284
285 if (*C == 0)
286 return false;
287
288 continue;
289 }
290
291 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
292 continue;
293 if (isspace(*C) == 0)
294 return false;
295 *Buf++ = ' ';
296 }
297 *Buf = 0;
298 Res = Buffer;
299 String = C;
300 return true;
301 }
302 /*}}}*/
303 // QuoteString - Convert a string into quoted from /*{{{*/
304 // ---------------------------------------------------------------------
305 /* */
306 string QuoteString(const string &Str, const char *Bad)
307 {
308 string Res;
309 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
310 {
311 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
312 *I == 0x25 || // percent '%' char
313 *I <= 0x20 || *I >= 0x7F) // control chars
314 {
315 char Buf[10];
316 sprintf(Buf,"%%%02x",(int)*I);
317 Res += Buf;
318 }
319 else
320 Res += *I;
321 }
322 return Res;
323 }
324 /*}}}*/
325 // DeQuoteString - Convert a string from quoted from /*{{{*/
326 // ---------------------------------------------------------------------
327 /* This undoes QuoteString */
328 string DeQuoteString(const string &Str)
329 {
330 return DeQuoteString(Str.begin(),Str.end());
331 }
332 string DeQuoteString(string::const_iterator const &begin,
333 string::const_iterator const &end)
334 {
335 string Res;
336 for (string::const_iterator I = begin; I != end; ++I)
337 {
338 if (*I == '%' && I + 2 < end &&
339 isxdigit(I[1]) && isxdigit(I[2]))
340 {
341 char Tmp[3];
342 Tmp[0] = I[1];
343 Tmp[1] = I[2];
344 Tmp[2] = 0;
345 Res += (char)strtol(Tmp,0,16);
346 I += 2;
347 continue;
348 }
349 else
350 Res += *I;
351 }
352 return Res;
353 }
354
355 /*}}}*/
356 // SizeToStr - Convert a long into a human readable size /*{{{*/
357 // ---------------------------------------------------------------------
358 /* A max of 4 digits are shown before conversion to the next highest unit.
359 The max length of the string will be 5 chars unless the size is > 10
360 YottaBytes (E24) */
361 string SizeToStr(double Size)
362 {
363 char S[300];
364 double ASize;
365 if (Size >= 0)
366 ASize = Size;
367 else
368 ASize = -1*Size;
369
370 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
371 ExaBytes, ZettaBytes, YottaBytes */
372 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
373 int I = 0;
374 while (I <= 8)
375 {
376 if (ASize < 100 && I != 0)
377 {
378 sprintf(S,"%'.1f %c",ASize,Ext[I]);
379 break;
380 }
381
382 if (ASize < 10000)
383 {
384 sprintf(S,"%'.0f %c",ASize,Ext[I]);
385 break;
386 }
387 ASize /= 1000.0;
388 I++;
389 }
390
391 return S;
392 }
393 /*}}}*/
394 // TimeToStr - Convert the time into a string /*{{{*/
395 // ---------------------------------------------------------------------
396 /* Converts a number of seconds to a hms format */
397 string TimeToStr(unsigned long Sec)
398 {
399 char S[300];
400
401 while (1)
402 {
403 if (Sec > 60*60*24)
404 {
405 //d means days, h means hours, min means minutes, s means seconds
406 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
407 break;
408 }
409
410 if (Sec > 60*60)
411 {
412 //h means hours, min means minutes, s means seconds
413 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
414 break;
415 }
416
417 if (Sec > 60)
418 {
419 //min means minutes, s means seconds
420 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
421 break;
422 }
423
424 //s means seconds
425 sprintf(S,_("%lis"),Sec);
426 break;
427 }
428
429 return S;
430 }
431 /*}}}*/
432 // SubstVar - Substitute a string for another string /*{{{*/
433 // ---------------------------------------------------------------------
434 /* This replaces all occurrences of Subst with Contents in Str. */
435 string SubstVar(const string &Str,const string &Subst,const string &Contents)
436 {
437 string::size_type Pos = 0;
438 string::size_type OldPos = 0;
439 string Temp;
440
441 while (OldPos < Str.length() &&
442 (Pos = Str.find(Subst,OldPos)) != string::npos)
443 {
444 Temp += string(Str,OldPos,Pos) + Contents;
445 OldPos = Pos + Subst.length();
446 }
447
448 if (OldPos == 0)
449 return Str;
450
451 return Temp + string(Str,OldPos);
452 }
453
454 string SubstVar(string Str,const struct SubstVar *Vars)
455 {
456 for (; Vars->Subst != 0; Vars++)
457 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
458 return Str;
459 }
460 /*}}}*/
461 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
462 // ---------------------------------------------------------------------
463 /* Returns a string with the supplied separator depth + 1 times in it */
464 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
465 {
466 std::string output = "";
467 for(unsigned long d=Depth+1; d > 0; d--)
468 output.append(Separator);
469 return output;
470 }
471 /*}}}*/
472 // URItoFileName - Convert the uri into a unique file name /*{{{*/
473 // ---------------------------------------------------------------------
474 /* This converts a URI into a safe filename. It quotes all unsafe characters
475 and converts / to _ and removes the scheme identifier. The resulting
476 file name should be unique and never occur again for a different file */
477 string URItoFileName(const string &URI)
478 {
479 // Nuke 'sensitive' items
480 ::URI U(URI);
481 U.User.clear();
482 U.Password.clear();
483 U.Access.clear();
484
485 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
486 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
487 replace(NewURI.begin(),NewURI.end(),'/','_');
488 return NewURI;
489 }
490 /*}}}*/
491 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
492 // ---------------------------------------------------------------------
493 /* This routine performs a base64 transformation on a string. It was ripped
494 from wget and then patched and bug fixed.
495
496 This spec can be found in rfc2045 */
497 string Base64Encode(const string &S)
498 {
499 // Conversion table.
500 static char tbl[64] = {'A','B','C','D','E','F','G','H',
501 'I','J','K','L','M','N','O','P',
502 'Q','R','S','T','U','V','W','X',
503 'Y','Z','a','b','c','d','e','f',
504 'g','h','i','j','k','l','m','n',
505 'o','p','q','r','s','t','u','v',
506 'w','x','y','z','0','1','2','3',
507 '4','5','6','7','8','9','+','/'};
508
509 // Pre-allocate some space
510 string Final;
511 Final.reserve((4*S.length() + 2)/3 + 2);
512
513 /* Transform the 3x8 bits to 4x6 bits, as required by
514 base64. */
515 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
516 {
517 char Bits[3] = {0,0,0};
518 Bits[0] = I[0];
519 if (I + 1 < S.end())
520 Bits[1] = I[1];
521 if (I + 2 < S.end())
522 Bits[2] = I[2];
523
524 Final += tbl[Bits[0] >> 2];
525 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
526
527 if (I + 1 >= S.end())
528 break;
529
530 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
531
532 if (I + 2 >= S.end())
533 break;
534
535 Final += tbl[Bits[2] & 0x3f];
536 }
537
538 /* Apply the padding elements, this tells how many bytes the remote
539 end should discard */
540 if (S.length() % 3 == 2)
541 Final += '=';
542 if (S.length() % 3 == 1)
543 Final += "==";
544
545 return Final;
546 }
547 /*}}}*/
548 // stringcmp - Arbitrary string compare /*{{{*/
549 // ---------------------------------------------------------------------
550 /* This safely compares two non-null terminated strings of arbitrary
551 length */
552 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
553 {
554 for (; A != AEnd && B != BEnd; A++, B++)
555 if (*A != *B)
556 break;
557
558 if (A == AEnd && B == BEnd)
559 return 0;
560 if (A == AEnd)
561 return 1;
562 if (B == BEnd)
563 return -1;
564 if (*A < *B)
565 return -1;
566 return 1;
567 }
568
569 #if __GNUC__ >= 3
570 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
571 const char *B,const char *BEnd)
572 {
573 for (; A != AEnd && B != BEnd; A++, B++)
574 if (*A != *B)
575 break;
576
577 if (A == AEnd && B == BEnd)
578 return 0;
579 if (A == AEnd)
580 return 1;
581 if (B == BEnd)
582 return -1;
583 if (*A < *B)
584 return -1;
585 return 1;
586 }
587 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
588 string::const_iterator B,string::const_iterator BEnd)
589 {
590 for (; A != AEnd && B != BEnd; A++, B++)
591 if (*A != *B)
592 break;
593
594 if (A == AEnd && B == BEnd)
595 return 0;
596 if (A == AEnd)
597 return 1;
598 if (B == BEnd)
599 return -1;
600 if (*A < *B)
601 return -1;
602 return 1;
603 }
604 #endif
605 /*}}}*/
606 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
607 // ---------------------------------------------------------------------
608 /* */
609 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
610 {
611 for (; A != AEnd && B != BEnd; A++, B++)
612 if (tolower_ascii(*A) != tolower_ascii(*B))
613 break;
614
615 if (A == AEnd && B == BEnd)
616 return 0;
617 if (A == AEnd)
618 return 1;
619 if (B == BEnd)
620 return -1;
621 if (tolower_ascii(*A) < tolower_ascii(*B))
622 return -1;
623 return 1;
624 }
625 #if __GNUC__ >= 3
626 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
627 const char *B,const char *BEnd)
628 {
629 for (; A != AEnd && B != BEnd; A++, B++)
630 if (tolower_ascii(*A) != tolower_ascii(*B))
631 break;
632
633 if (A == AEnd && B == BEnd)
634 return 0;
635 if (A == AEnd)
636 return 1;
637 if (B == BEnd)
638 return -1;
639 if (tolower_ascii(*A) < tolower_ascii(*B))
640 return -1;
641 return 1;
642 }
643 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
644 string::const_iterator B,string::const_iterator BEnd)
645 {
646 for (; A != AEnd && B != BEnd; A++, B++)
647 if (tolower_ascii(*A) != tolower_ascii(*B))
648 break;
649
650 if (A == AEnd && B == BEnd)
651 return 0;
652 if (A == AEnd)
653 return 1;
654 if (B == BEnd)
655 return -1;
656 if (tolower_ascii(*A) < tolower_ascii(*B))
657 return -1;
658 return 1;
659 }
660 #endif
661 /*}}}*/
662 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
663 // ---------------------------------------------------------------------
664 /* The format is like those used in package files and the method
665 communication system */
666 string LookupTag(const string &Message,const char *Tag,const char *Default)
667 {
668 // Look for a matching tag.
669 int Length = strlen(Tag);
670 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
671 {
672 // Found the tag
673 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
674 {
675 // Find the end of line and strip the leading/trailing spaces
676 string::const_iterator J;
677 I += Length + 1;
678 for (; isspace(*I) != 0 && I < Message.end(); ++I);
679 for (J = I; *J != '\n' && J < Message.end(); ++J);
680 for (; J > I && isspace(J[-1]) != 0; --J);
681
682 return string(I,J);
683 }
684
685 for (; *I != '\n' && I < Message.end(); ++I);
686 }
687
688 // Failed to find a match
689 if (Default == 0)
690 return string();
691 return Default;
692 }
693 /*}}}*/
694 // StringToBool - Converts a string into a boolean /*{{{*/
695 // ---------------------------------------------------------------------
696 /* This inspects the string to see if it is true or if it is false and
697 then returns the result. Several varients on true/false are checked. */
698 int StringToBool(const string &Text,int Default)
699 {
700 char *End;
701 int Res = strtol(Text.c_str(),&End,0);
702 if (End != Text.c_str() && Res >= 0 && Res <= 1)
703 return Res;
704
705 // Check for positives
706 if (strcasecmp(Text.c_str(),"no") == 0 ||
707 strcasecmp(Text.c_str(),"false") == 0 ||
708 strcasecmp(Text.c_str(),"without") == 0 ||
709 strcasecmp(Text.c_str(),"off") == 0 ||
710 strcasecmp(Text.c_str(),"disable") == 0)
711 return 0;
712
713 // Check for negatives
714 if (strcasecmp(Text.c_str(),"yes") == 0 ||
715 strcasecmp(Text.c_str(),"true") == 0 ||
716 strcasecmp(Text.c_str(),"with") == 0 ||
717 strcasecmp(Text.c_str(),"on") == 0 ||
718 strcasecmp(Text.c_str(),"enable") == 0)
719 return 1;
720
721 return Default;
722 }
723 /*}}}*/
724 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
725 // ---------------------------------------------------------------------
726 /* This converts a time_t into a string time representation that is
727 year 2000 complient and timezone neutral */
728 string TimeRFC1123(time_t Date)
729 {
730 struct tm Conv;
731 if (gmtime_r(&Date, &Conv) == NULL)
732 return "";
733
734 char Buf[300];
735 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
736 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
737 "Aug","Sep","Oct","Nov","Dec"};
738
739 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
740 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
741 Conv.tm_min,Conv.tm_sec);
742 return Buf;
743 }
744 /*}}}*/
745 // ReadMessages - Read messages from the FD /*{{{*/
746 // ---------------------------------------------------------------------
747 /* This pulls full messages from the input FD into the message buffer.
748 It assumes that messages will not pause during transit so no
749 fancy buffering is used.
750
751 In particular: this reads blocks from the input until it believes
752 that it's run out of input text. Each block is terminated by a
753 double newline ('\n' followed by '\n'). As noted below, there is a
754 bug in this code: it assumes that all the blocks have been read if
755 it doesn't see additional text in the buffer after the last one is
756 parsed, which will cause it to lose blocks if the last block
757 coincides with the end of the buffer.
758 */
759 bool ReadMessages(int Fd, vector<string> &List)
760 {
761 char Buffer[64000];
762 char *End = Buffer;
763 // Represents any left-over from the previous iteration of the
764 // parse loop. (i.e., if a message is split across the end
765 // of the buffer, it goes here)
766 string PartialMessage;
767
768 while (1)
769 {
770 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
771 if (Res < 0 && errno == EINTR)
772 continue;
773
774 // Process is dead, this is kind of bad..
775 if (Res == 0)
776 return false;
777
778 // No data
779 if (Res < 0 && errno == EAGAIN)
780 return true;
781 if (Res < 0)
782 return false;
783
784 End += Res;
785
786 // Look for the end of the message
787 for (char *I = Buffer; I + 1 < End; I++)
788 {
789 if (I[1] != '\n' ||
790 (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
791 continue;
792
793 // Pull the message out
794 string Message(Buffer,I-Buffer);
795 PartialMessage += Message;
796
797 // Fix up the buffer
798 for (; I < End && (*I == '\n' || *I == '\r'); ++I);
799 End -= I-Buffer;
800 memmove(Buffer,I,End-Buffer);
801 I = Buffer;
802
803 List.push_back(PartialMessage);
804 PartialMessage.clear();
805 }
806 if (End != Buffer)
807 {
808 // If there's text left in the buffer, store it
809 // in PartialMessage and throw the rest of the buffer
810 // away. This allows us to handle messages that
811 // are longer than the static buffer size.
812 PartialMessage += string(Buffer, End);
813 End = Buffer;
814 }
815 else
816 {
817 // BUG ALERT: if a message block happens to end at a
818 // multiple of 64000 characters, this will cause it to
819 // terminate early, leading to a badly formed block and
820 // probably crashing the method. However, this is the only
821 // way we have to find the end of the message block. I have
822 // an idea of how to fix this, but it will require changes
823 // to the protocol (essentially to mark the beginning and
824 // end of the block).
825 //
826 // -- dburrows 2008-04-02
827 return true;
828 }
829
830 if (WaitFd(Fd) == false)
831 return false;
832 }
833 }
834 /*}}}*/
835 // MonthConv - Converts a month string into a number /*{{{*/
836 // ---------------------------------------------------------------------
837 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
838 Made it a bit more robust with a few tolower_ascii though. */
839 static int MonthConv(char *Month)
840 {
841 switch (tolower_ascii(*Month))
842 {
843 case 'a':
844 return tolower_ascii(Month[1]) == 'p'?3:7;
845 case 'd':
846 return 11;
847 case 'f':
848 return 1;
849 case 'j':
850 if (tolower_ascii(Month[1]) == 'a')
851 return 0;
852 return tolower_ascii(Month[2]) == 'n'?5:6;
853 case 'm':
854 return tolower_ascii(Month[2]) == 'r'?2:4;
855 case 'n':
856 return 10;
857 case 'o':
858 return 9;
859 case 's':
860 return 8;
861
862 // Pretend it is January..
863 default:
864 return 0;
865 }
866 }
867 /*}}}*/
868 // timegm - Internal timegm if the gnu version is not available /*{{{*/
869 // ---------------------------------------------------------------------
870 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
871 than local timezone (mktime assumes the latter).
872
873 This function is a nonstandard GNU extension that is also present on
874 the BSDs and maybe other systems. For others we follow the advice of
875 the manpage of timegm and use his portable replacement. */
876 #ifndef HAVE_TIMEGM
877 static time_t timegm(struct tm *t)
878 {
879 char *tz = getenv("TZ");
880 setenv("TZ", "", 1);
881 tzset();
882 time_t ret = mktime(t);
883 if (tz)
884 setenv("TZ", tz, 1);
885 else
886 unsetenv("TZ");
887 tzset();
888 return ret;
889 }
890 #endif
891 /*}}}*/
892 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
893 // ---------------------------------------------------------------------
894 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
895 with one exception: All timezones (%Z) are accepted but the protocol
896 says that it MUST be GMT, but this one is equal to UTC which we will
897 encounter from time to time (e.g. in Release files) so we accept all
898 here and just assume it is GMT (or UTC) later on */
899 bool RFC1123StrToTime(const char* const str,time_t &time)
900 {
901 struct tm Tm;
902 setlocale (LC_ALL,"C");
903 bool const invalid =
904 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
905 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
906 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
907 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
908 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
909 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
910 setlocale (LC_ALL,"");
911 if (invalid == true)
912 return false;
913
914 time = timegm(&Tm);
915 return true;
916 }
917 /*}}}*/
918 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
919 // ---------------------------------------------------------------------
920 /* */
921 bool FTPMDTMStrToTime(const char* const str,time_t &time)
922 {
923 struct tm Tm;
924 // MDTM includes no whitespaces but recommend and ignored by strptime
925 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
926 return false;
927
928 time = timegm(&Tm);
929 return true;
930 }
931 /*}}}*/
932 // StrToTime - Converts a string into a time_t /*{{{*/
933 // ---------------------------------------------------------------------
934 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
935 and the C library asctime format. It requires the GNU library function
936 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
937 reason the C library does not provide any such function :< This also
938 handles the weird, but unambiguous FTP time format*/
939 bool StrToTime(const string &Val,time_t &Result)
940 {
941 struct tm Tm;
942 char Month[10];
943
944 // Skip the day of the week
945 const char *I = strchr(Val.c_str(), ' ');
946
947 // Handle RFC 1123 time
948 Month[0] = 0;
949 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
950 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
951 {
952 // Handle RFC 1036 time
953 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
954 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
955 Tm.tm_year += 1900;
956 else
957 {
958 // asctime format
959 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
960 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
961 {
962 // 'ftp' time
963 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
964 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
965 return false;
966 Tm.tm_mon--;
967 }
968 }
969 }
970
971 Tm.tm_isdst = 0;
972 if (Month[0] != 0)
973 Tm.tm_mon = MonthConv(Month);
974 else
975 Tm.tm_mon = 0; // we don't have a month, so pick something
976 Tm.tm_year -= 1900;
977
978 // Convert to local time and then to GMT
979 Result = timegm(&Tm);
980 return true;
981 }
982 /*}}}*/
983 // StrToNum - Convert a fixed length string to a number /*{{{*/
984 // ---------------------------------------------------------------------
985 /* This is used in decoding the crazy fixed length string headers in
986 tar and ar files. */
987 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
988 {
989 char S[30];
990 if (Len >= sizeof(S))
991 return false;
992 memcpy(S,Str,Len);
993 S[Len] = 0;
994
995 // All spaces is a zero
996 Res = 0;
997 unsigned I;
998 for (I = 0; S[I] == ' '; I++);
999 if (S[I] == 0)
1000 return true;
1001
1002 char *End;
1003 Res = strtoul(S,&End,Base);
1004 if (End == S)
1005 return false;
1006
1007 return true;
1008 }
1009 /*}}}*/
1010 // StrToNum - Convert a fixed length string to a number /*{{{*/
1011 // ---------------------------------------------------------------------
1012 /* This is used in decoding the crazy fixed length string headers in
1013 tar and ar files. */
1014 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1015 {
1016 char S[30];
1017 if (Len >= sizeof(S))
1018 return false;
1019 memcpy(S,Str,Len);
1020 S[Len] = 0;
1021
1022 // All spaces is a zero
1023 Res = 0;
1024 unsigned I;
1025 for (I = 0; S[I] == ' '; I++);
1026 if (S[I] == 0)
1027 return true;
1028
1029 char *End;
1030 Res = strtoull(S,&End,Base);
1031 if (End == S)
1032 return false;
1033
1034 return true;
1035 }
1036 /*}}}*/
1037
1038 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1039 // ---------------------------------------------------------------------
1040 /* This is used in decoding the 256bit encoded fixed length fields in
1041 tar files */
1042 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1043 {
1044 if ((Str[0] & 0x80) == 0)
1045 return false;
1046 else
1047 {
1048 Res = Str[0] & 0x7F;
1049 for(unsigned int i = 1; i < Len; ++i)
1050 Res = (Res<<8) + Str[i];
1051 return true;
1052 }
1053 }
1054 /*}}}*/
1055 // HexDigit - Convert a hex character into an integer /*{{{*/
1056 // ---------------------------------------------------------------------
1057 /* Helper for Hex2Num */
1058 static int HexDigit(int c)
1059 {
1060 if (c >= '0' && c <= '9')
1061 return c - '0';
1062 if (c >= 'a' && c <= 'f')
1063 return c - 'a' + 10;
1064 if (c >= 'A' && c <= 'F')
1065 return c - 'A' + 10;
1066 return 0;
1067 }
1068 /*}}}*/
1069 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1070 // ---------------------------------------------------------------------
1071 /* The length of the buffer must be exactly 1/2 the length of the string. */
1072 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1073 {
1074 if (Str.length() != Length*2)
1075 return false;
1076
1077 // Convert each digit. We store it in the same order as the string
1078 int J = 0;
1079 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1080 {
1081 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1082 return false;
1083
1084 Num[J] = HexDigit(I[0]) << 4;
1085 Num[J] += HexDigit(I[1]);
1086 }
1087
1088 return true;
1089 }
1090 /*}}}*/
1091 // TokSplitString - Split a string up by a given token /*{{{*/
1092 // ---------------------------------------------------------------------
1093 /* This is intended to be a faster splitter, it does not use dynamic
1094 memories. Input is changed to insert nulls at each token location. */
1095 bool TokSplitString(char Tok,char *Input,char **List,
1096 unsigned long ListMax)
1097 {
1098 // Strip any leading spaces
1099 char *Start = Input;
1100 char *Stop = Start + strlen(Start);
1101 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1102
1103 unsigned long Count = 0;
1104 char *Pos = Start;
1105 while (Pos != Stop)
1106 {
1107 // Skip to the next Token
1108 for (; Pos != Stop && *Pos != Tok; Pos++);
1109
1110 // Back remove spaces
1111 char *End = Pos;
1112 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1113 *End = 0;
1114
1115 List[Count++] = Start;
1116 if (Count >= ListMax)
1117 {
1118 List[Count-1] = 0;
1119 return false;
1120 }
1121
1122 // Advance pos
1123 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1124 Start = Pos;
1125 }
1126
1127 List[Count] = 0;
1128 return true;
1129 }
1130 /*}}}*/
1131 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1132 // ---------------------------------------------------------------------
1133 /* This can be used to split a given string up into a vector, so the
1134 propose is the same as in the method above and this one is a bit slower
1135 also, but the advantage is that we have an iteratable vector */
1136 vector<string> VectorizeString(string const &haystack, char const &split)
1137 {
1138 vector<string> exploded;
1139 if (haystack.empty() == true)
1140 return exploded;
1141 string::const_iterator start = haystack.begin();
1142 string::const_iterator end = start;
1143 do {
1144 for (; end != haystack.end() && *end != split; ++end);
1145 exploded.push_back(string(start, end));
1146 start = end + 1;
1147 } while (end != haystack.end() && (++end) != haystack.end());
1148 return exploded;
1149 }
1150 /*}}}*/
1151 // StringSplit - split a string into a string vector by token /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* See header for details.
1154 */
1155 vector<string> StringSplit(std::string const &s, std::string const &sep,
1156 unsigned int maxsplit)
1157 {
1158 vector<string> split;
1159 size_t start, pos;
1160
1161 // no seperator given, this is bogus
1162 if(sep.size() == 0)
1163 return split;
1164
1165 start = pos = 0;
1166 while (pos != string::npos)
1167 {
1168 pos = s.find(sep, start);
1169 split.push_back(s.substr(start, pos-start));
1170
1171 // if maxsplit is reached, the remaining string is the last item
1172 if(split.size() >= maxsplit)
1173 {
1174 split[split.size()-1] = s.substr(start);
1175 break;
1176 }
1177 start = pos+sep.size();
1178 }
1179 return split;
1180 }
1181 /*}}}*/
1182 // RegexChoice - Simple regex list/list matcher /*{{{*/
1183 // ---------------------------------------------------------------------
1184 /* */
1185 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1186 const char **ListEnd)
1187 {
1188 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1189 R->Hit = false;
1190
1191 unsigned long Hits = 0;
1192 for (; ListBegin < ListEnd; ++ListBegin)
1193 {
1194 // Check if the name is a regex
1195 const char *I;
1196 bool Regex = true;
1197 for (I = *ListBegin; *I != 0; I++)
1198 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1199 break;
1200 if (*I == 0)
1201 Regex = false;
1202
1203 // Compile the regex pattern
1204 regex_t Pattern;
1205 if (Regex == true)
1206 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1207 REG_NOSUB) != 0)
1208 Regex = false;
1209
1210 // Search the list
1211 bool Done = false;
1212 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1213 {
1214 if (R->Str[0] == 0)
1215 continue;
1216
1217 if (strcasecmp(R->Str,*ListBegin) != 0)
1218 {
1219 if (Regex == false)
1220 continue;
1221 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1222 continue;
1223 }
1224 Done = true;
1225
1226 if (R->Hit == false)
1227 Hits++;
1228
1229 R->Hit = true;
1230 }
1231
1232 if (Regex == true)
1233 regfree(&Pattern);
1234
1235 if (Done == false)
1236 _error->Warning(_("Selection %s not found"),*ListBegin);
1237 }
1238
1239 return Hits;
1240 }
1241 /*}}}*/
1242 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1243 // ---------------------------------------------------------------------
1244 /* This is used to make the internationalization strings easier to translate
1245 and to allow reordering of parameters */
1246 static bool iovprintf(ostream &out, const char *format,
1247 va_list &args, ssize_t &size) {
1248 char *S = (char*)malloc(size);
1249 ssize_t const n = vsnprintf(S, size, format, args);
1250 if (n > -1 && n < size) {
1251 out << S;
1252 free(S);
1253 return true;
1254 } else {
1255 if (n > -1)
1256 size = n + 1;
1257 else
1258 size *= 2;
1259 }
1260 free(S);
1261 return false;
1262 }
1263 void ioprintf(ostream &out,const char *format,...)
1264 {
1265 va_list args;
1266 ssize_t size = 400;
1267 while (true) {
1268 va_start(args,format);
1269 if (iovprintf(out, format, args, size) == true)
1270 return;
1271 va_end(args);
1272 }
1273 }
1274 void strprintf(string &out,const char *format,...)
1275 {
1276 va_list args;
1277 ssize_t size = 400;
1278 std::ostringstream outstr;
1279 while (true) {
1280 va_start(args,format);
1281 if (iovprintf(outstr, format, args, size) == true)
1282 break;
1283 va_end(args);
1284 }
1285 out = outstr.str();
1286 }
1287 /*}}}*/
1288 // safe_snprintf - Safer snprintf /*{{{*/
1289 // ---------------------------------------------------------------------
1290 /* This is a snprintf that will never (ever) go past 'End' and returns a
1291 pointer to the end of the new string. The returned string is always null
1292 terminated unless Buffer == end. This is a better alterantive to using
1293 consecutive snprintfs. */
1294 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1295 {
1296 va_list args;
1297 int Did;
1298
1299 if (End <= Buffer)
1300 return End;
1301 va_start(args,Format);
1302 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1303 va_end(args);
1304
1305 if (Did < 0 || Buffer + Did > End)
1306 return End;
1307 return Buffer + Did;
1308 }
1309 /*}}}*/
1310 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1311 // ---------------------------------------------------------------------
1312 string StripEpoch(const string &VerStr)
1313 {
1314 size_t i = VerStr.find(":");
1315 if (i == string::npos)
1316 return VerStr;
1317 return VerStr.substr(i+1);
1318 }
1319 /*}}}*/
1320 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1321 // ---------------------------------------------------------------------
1322 /* This little function is the most called method we have and tries
1323 therefore to do the absolut minimum - and is notable faster than
1324 standard tolower/toupper and as a bonus avoids problems with different
1325 locales - we only operate on ascii chars anyway. */
1326 int tolower_ascii(int const c)
1327 {
1328 if (c >= 'A' && c <= 'Z')
1329 return c + 32;
1330 return c;
1331 }
1332 /*}}}*/
1333
1334 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1335 // ---------------------------------------------------------------------
1336 /* The domain list is a comma separate list of domains that are suffix
1337 matched against the argument */
1338 bool CheckDomainList(const string &Host,const string &List)
1339 {
1340 string::const_iterator Start = List.begin();
1341 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1342 {
1343 if (Cur < List.end() && *Cur != ',')
1344 continue;
1345
1346 // Match the end of the string..
1347 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1348 Cur - Start != 0 &&
1349 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1350 return true;
1351
1352 Start = Cur + 1;
1353 }
1354 return false;
1355 }
1356 /*}}}*/
1357 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1358 // ---------------------------------------------------------------------
1359 /* */
1360 size_t strv_length(const char **str_array)
1361 {
1362 size_t i;
1363 for (i=0; str_array[i] != NULL; i++)
1364 /* nothing */
1365 ;
1366 return i;
1367 }
1368
1369 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1370 // ---------------------------------------------------------------------
1371 /* */
1372 string DeEscapeString(const string &input)
1373 {
1374 char tmp[3];
1375 string::const_iterator it;
1376 string output;
1377 for (it = input.begin(); it != input.end(); ++it)
1378 {
1379 // just copy non-escape chars
1380 if (*it != '\\')
1381 {
1382 output += *it;
1383 continue;
1384 }
1385
1386 // deal with double escape
1387 if (*it == '\\' &&
1388 (it + 1 < input.end()) && it[1] == '\\')
1389 {
1390 // copy
1391 output += *it;
1392 // advance iterator one step further
1393 ++it;
1394 continue;
1395 }
1396
1397 // ensure we have a char to read
1398 if (it + 1 == input.end())
1399 continue;
1400
1401 // read it
1402 ++it;
1403 switch (*it)
1404 {
1405 case '0':
1406 if (it + 2 <= input.end()) {
1407 tmp[0] = it[1];
1408 tmp[1] = it[2];
1409 tmp[2] = 0;
1410 output += (char)strtol(tmp, 0, 8);
1411 it += 2;
1412 }
1413 break;
1414 case 'x':
1415 if (it + 2 <= input.end()) {
1416 tmp[0] = it[1];
1417 tmp[1] = it[2];
1418 tmp[2] = 0;
1419 output += (char)strtol(tmp, 0, 16);
1420 it += 2;
1421 }
1422 break;
1423 default:
1424 // FIXME: raise exception here?
1425 break;
1426 }
1427 }
1428 return output;
1429 }
1430 /*}}}*/
1431 // URI::CopyFrom - Copy from an object /*{{{*/
1432 // ---------------------------------------------------------------------
1433 /* This parses the URI into all of its components */
1434 void URI::CopyFrom(const string &U)
1435 {
1436 string::const_iterator I = U.begin();
1437
1438 // Locate the first colon, this separates the scheme
1439 for (; I < U.end() && *I != ':' ; ++I);
1440 string::const_iterator FirstColon = I;
1441
1442 /* Determine if this is a host type URI with a leading double //
1443 and then search for the first single / */
1444 string::const_iterator SingleSlash = I;
1445 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1446 SingleSlash += 3;
1447
1448 /* Find the / indicating the end of the hostname, ignoring /'s in the
1449 square brackets */
1450 bool InBracket = false;
1451 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1452 {
1453 if (*SingleSlash == '[')
1454 InBracket = true;
1455 if (InBracket == true && *SingleSlash == ']')
1456 InBracket = false;
1457 }
1458
1459 if (SingleSlash > U.end())
1460 SingleSlash = U.end();
1461
1462 // We can now write the access and path specifiers
1463 Access.assign(U.begin(),FirstColon);
1464 if (SingleSlash != U.end())
1465 Path.assign(SingleSlash,U.end());
1466 if (Path.empty() == true)
1467 Path = "/";
1468
1469 // Now we attempt to locate a user:pass@host fragment
1470 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1471 FirstColon += 3;
1472 else
1473 FirstColon += 1;
1474 if (FirstColon >= U.end())
1475 return;
1476
1477 if (FirstColon > SingleSlash)
1478 FirstColon = SingleSlash;
1479
1480 // Find the colon...
1481 I = FirstColon + 1;
1482 if (I > SingleSlash)
1483 I = SingleSlash;
1484 for (; I < SingleSlash && *I != ':'; ++I);
1485 string::const_iterator SecondColon = I;
1486
1487 // Search for the @ after the colon
1488 for (; I < SingleSlash && *I != '@'; ++I);
1489 string::const_iterator At = I;
1490
1491 // Now write the host and user/pass
1492 if (At == SingleSlash)
1493 {
1494 if (FirstColon < SingleSlash)
1495 Host.assign(FirstColon,SingleSlash);
1496 }
1497 else
1498 {
1499 Host.assign(At+1,SingleSlash);
1500 // username and password must be encoded (RFC 3986)
1501 User.assign(DeQuoteString(FirstColon,SecondColon));
1502 if (SecondColon < At)
1503 Password.assign(DeQuoteString(SecondColon+1,At));
1504 }
1505
1506 // Now we parse the RFC 2732 [] hostnames.
1507 unsigned long PortEnd = 0;
1508 InBracket = false;
1509 for (unsigned I = 0; I != Host.length();)
1510 {
1511 if (Host[I] == '[')
1512 {
1513 InBracket = true;
1514 Host.erase(I,1);
1515 continue;
1516 }
1517
1518 if (InBracket == true && Host[I] == ']')
1519 {
1520 InBracket = false;
1521 Host.erase(I,1);
1522 PortEnd = I;
1523 continue;
1524 }
1525 I++;
1526 }
1527
1528 // Tsk, weird.
1529 if (InBracket == true)
1530 {
1531 Host.clear();
1532 return;
1533 }
1534
1535 // Now we parse off a port number from the hostname
1536 Port = 0;
1537 string::size_type Pos = Host.rfind(':');
1538 if (Pos == string::npos || Pos < PortEnd)
1539 return;
1540
1541 Port = atoi(string(Host,Pos+1).c_str());
1542 Host.assign(Host,0,Pos);
1543 }
1544 /*}}}*/
1545 // URI::operator string - Convert the URI to a string /*{{{*/
1546 // ---------------------------------------------------------------------
1547 /* */
1548 URI::operator string()
1549 {
1550 string Res;
1551
1552 if (Access.empty() == false)
1553 Res = Access + ':';
1554
1555 if (Host.empty() == false)
1556 {
1557 if (Access.empty() == false)
1558 Res += "//";
1559
1560 if (User.empty() == false)
1561 {
1562 // FIXME: Technically userinfo is permitted even less
1563 // characters than these, but this is not conveniently
1564 // expressed with a blacklist.
1565 Res += QuoteString(User, ":/?#[]@");
1566 if (Password.empty() == false)
1567 Res += ":" + QuoteString(Password, ":/?#[]@");
1568 Res += "@";
1569 }
1570
1571 // Add RFC 2732 escaping characters
1572 if (Access.empty() == false &&
1573 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1574 Res += '[' + Host + ']';
1575 else
1576 Res += Host;
1577
1578 if (Port != 0)
1579 {
1580 char S[30];
1581 sprintf(S,":%u",Port);
1582 Res += S;
1583 }
1584 }
1585
1586 if (Path.empty() == false)
1587 {
1588 if (Path[0] != '/')
1589 Res += "/" + Path;
1590 else
1591 Res += Path;
1592 }
1593
1594 return Res;
1595 }
1596 /*}}}*/
1597 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1598 // ---------------------------------------------------------------------
1599 /* */
1600 string URI::SiteOnly(const string &URI)
1601 {
1602 ::URI U(URI);
1603 U.User.clear();
1604 U.Password.clear();
1605 U.Path.clear();
1606 return U;
1607 }
1608 /*}}}*/
1609 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1610 // ---------------------------------------------------------------------
1611 /* */
1612 string URI::NoUserPassword(const string &URI)
1613 {
1614 ::URI U(URI);
1615 U.User.clear();
1616 U.Password.clear();
1617 return U;
1618 }
1619 /*}}}*/