]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
implement MarkAndSweep in cc instead of header
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &str)
49 {
50 // ensure we have at least one character
51 if (str.empty() == true)
52 return str;
53
54 char const * const s = str.c_str();
55 size_t start = 0;
56 for (; isspace(s[start]) != 0; ++start)
57 ; // find the first not-space
58
59 // string contains only whitespaces
60 if (s[start] == '\0')
61 return "";
62
63 size_t end = str.length() - 1;
64 for (; isspace(s[end]) != 0; --end)
65 ; // find the last not-space
66
67 return str.substr(start, end - start + 1);
68 }
69
70 bool Endswith(const std::string &s, const std::string &end)
71 {
72 if (end.size() > s.size())
73 return false;
74 return (s.substr(s.size() - end.size(), s.size()) == end);
75 }
76
77 bool Startswith(const std::string &s, const std::string &start)
78 {
79 if (start.size() > s.size())
80 return false;
81 return (s.substr(0, start.size()) == start);
82 }
83
84 }
85 }
86 /*}}}*/
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
91 {
92 iconv_t cd;
93 const char *inbuf;
94 char *inptr, *outbuf;
95 size_t insize, bufsize;
96 dest->clear();
97
98 cd = iconv_open(codeset, "UTF-8");
99 if (cd == (iconv_t)(-1)) {
100 // Something went wrong
101 if (errno == EINVAL)
102 _error->Error("conversion from 'UTF-8' to '%s' not available",
103 codeset);
104 else
105 perror("iconv_open");
106
107 return false;
108 }
109
110 insize = bufsize = orig.size();
111 inbuf = orig.data();
112 inptr = (char *)inbuf;
113 outbuf = new char[bufsize];
114 size_t lastError = -1;
115
116 while (insize != 0)
117 {
118 char *outptr = outbuf;
119 size_t outsize = bufsize;
120 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
121 dest->append(outbuf, outptr - outbuf);
122 if (err == (size_t)(-1))
123 {
124 switch (errno)
125 {
126 case EILSEQ:
127 insize--;
128 inptr++;
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError != insize) {
131 lastError = insize - 1;
132 dest->append("?");
133 }
134 break;
135 case EINVAL:
136 insize = 0;
137 break;
138 case E2BIG:
139 if (outptr == outbuf)
140 {
141 bufsize *= 2;
142 delete[] outbuf;
143 outbuf = new char[bufsize];
144 }
145 break;
146 }
147 }
148 }
149
150 delete[] outbuf;
151
152 iconv_close(cd);
153
154 return true;
155 }
156 /*}}}*/
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String)
162 {
163 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
164
165 if (*String == 0)
166 return String;
167 return _strrstrip(String);
168 }
169 /*}}}*/
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String)
173 {
174 char *End = String + strlen(String) - 1;
175 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
176 *End == '\r'); End--);
177 End++;
178 *End = 0;
179 return String;
180 }
181 /*}}}*/
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
184 /* */
185 char *_strtabexpand(char *String,size_t Len)
186 {
187 for (char *I = String; I != I + Len && *I != 0; I++)
188 {
189 if (*I != '\t')
190 continue;
191 if (I + 8 > String + Len)
192 {
193 *I = 0;
194 return String;
195 }
196
197 /* Assume the start of the string is 0 and find the next 8 char
198 division */
199 int Len;
200 if (String == I)
201 Len = 1;
202 else
203 Len = 8 - ((String - I) % 8);
204 Len -= 2;
205 if (Len <= 0)
206 {
207 *I = ' ';
208 continue;
209 }
210
211 memmove(I + Len,I + 1,strlen(I) + 1);
212 for (char *J = I; J + Len != I; *I = ' ', I++);
213 }
214 return String;
215 }
216 /*}}}*/
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String,string &Res)
224 {
225 // Skip leading whitespace
226 const char *C = String;
227 for (;*C != 0 && *C == ' '; C++);
228 if (*C == 0)
229 return false;
230
231 // Jump to the next word
232 for (;*C != 0 && isspace(*C) == 0; C++)
233 {
234 if (*C == '"')
235 {
236 C = strchr(C + 1, '"');
237 if (C == NULL)
238 return false;
239 }
240 if (*C == '[')
241 {
242 C = strchr(C + 1, ']');
243 if (C == NULL)
244 return false;
245 }
246 }
247
248 // Now de-quote characters
249 char Buffer[1024];
250 char Tmp[3];
251 const char *Start = String;
252 char *I;
253 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
254 {
255 if (*Start == '%' && Start + 2 < C &&
256 isxdigit(Start[1]) && isxdigit(Start[2]))
257 {
258 Tmp[0] = Start[1];
259 Tmp[1] = Start[2];
260 Tmp[2] = 0;
261 *I = (char)strtol(Tmp,0,16);
262 Start += 3;
263 continue;
264 }
265 if (*Start != '"')
266 *I = *Start;
267 else
268 I--;
269 Start++;
270 }
271 *I = 0;
272 Res = Buffer;
273
274 // Skip ending white space
275 for (;*C != 0 && isspace(*C) != 0; C++);
276 String = C;
277 return true;
278 }
279 /*}}}*/
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String,string &Res)
285 {
286 // Skip leading whitespace
287 const char *C = String;
288 for (;*C != 0 && *C == ' '; C++);
289 if (*C == 0)
290 return false;
291
292 char Buffer[1024];
293 char *Buf = Buffer;
294 if (strlen(String) >= sizeof(Buffer))
295 return false;
296
297 for (; *C != 0; C++)
298 {
299 if (*C == '"')
300 {
301 for (C++; *C != 0 && *C != '"'; C++)
302 *Buf++ = *C;
303
304 if (*C == 0)
305 return false;
306
307 continue;
308 }
309
310 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
311 continue;
312 if (isspace(*C) == 0)
313 return false;
314 *Buf++ = ' ';
315 }
316 *Buf = 0;
317 Res = Buffer;
318 String = C;
319 return true;
320 }
321 /*}}}*/
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
324 /* */
325 string QuoteString(const string &Str, const char *Bad)
326 {
327 string Res;
328 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
329 {
330 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
331 *I == 0x25 || // percent '%' char
332 *I <= 0x20 || *I >= 0x7F) // control chars
333 {
334 char Buf[10];
335 sprintf(Buf,"%%%02x",(int)*I);
336 Res += Buf;
337 }
338 else
339 Res += *I;
340 }
341 return Res;
342 }
343 /*}}}*/
344 // DeQuoteString - Convert a string from quoted from /*{{{*/
345 // ---------------------------------------------------------------------
346 /* This undoes QuoteString */
347 string DeQuoteString(const string &Str)
348 {
349 return DeQuoteString(Str.begin(),Str.end());
350 }
351 string DeQuoteString(string::const_iterator const &begin,
352 string::const_iterator const &end)
353 {
354 string Res;
355 for (string::const_iterator I = begin; I != end; ++I)
356 {
357 if (*I == '%' && I + 2 < end &&
358 isxdigit(I[1]) && isxdigit(I[2]))
359 {
360 char Tmp[3];
361 Tmp[0] = I[1];
362 Tmp[1] = I[2];
363 Tmp[2] = 0;
364 Res += (char)strtol(Tmp,0,16);
365 I += 2;
366 continue;
367 }
368 else
369 Res += *I;
370 }
371 return Res;
372 }
373
374 /*}}}*/
375 // SizeToStr - Convert a long into a human readable size /*{{{*/
376 // ---------------------------------------------------------------------
377 /* A max of 4 digits are shown before conversion to the next highest unit.
378 The max length of the string will be 5 chars unless the size is > 10
379 YottaBytes (E24) */
380 string SizeToStr(double Size)
381 {
382 char S[300];
383 double ASize;
384 if (Size >= 0)
385 ASize = Size;
386 else
387 ASize = -1*Size;
388
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 int I = 0;
393 while (I <= 8)
394 {
395 if (ASize < 100 && I != 0)
396 {
397 sprintf(S,"%'.1f %c",ASize,Ext[I]);
398 break;
399 }
400
401 if (ASize < 10000)
402 {
403 sprintf(S,"%'.0f %c",ASize,Ext[I]);
404 break;
405 }
406 ASize /= 1000.0;
407 I++;
408 }
409
410 return S;
411 }
412 /*}}}*/
413 // TimeToStr - Convert the time into a string /*{{{*/
414 // ---------------------------------------------------------------------
415 /* Converts a number of seconds to a hms format */
416 string TimeToStr(unsigned long Sec)
417 {
418 char S[300];
419
420 while (1)
421 {
422 if (Sec > 60*60*24)
423 {
424 //d means days, h means hours, min means minutes, s means seconds
425 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
426 break;
427 }
428
429 if (Sec > 60*60)
430 {
431 //h means hours, min means minutes, s means seconds
432 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
433 break;
434 }
435
436 if (Sec > 60)
437 {
438 //min means minutes, s means seconds
439 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
440 break;
441 }
442
443 //s means seconds
444 sprintf(S,_("%lis"),Sec);
445 break;
446 }
447
448 return S;
449 }
450 /*}}}*/
451 // SubstVar - Substitute a string for another string /*{{{*/
452 // ---------------------------------------------------------------------
453 /* This replaces all occurrences of Subst with Contents in Str. */
454 string SubstVar(const string &Str,const string &Subst,const string &Contents)
455 {
456 if (Subst.empty() == true)
457 return Str;
458
459 string::size_type Pos = 0;
460 string::size_type OldPos = 0;
461 string Temp;
462
463 while (OldPos < Str.length() &&
464 (Pos = Str.find(Subst,OldPos)) != string::npos)
465 {
466 if (OldPos != Pos)
467 Temp.append(Str, OldPos, Pos - OldPos);
468 if (Contents.empty() == false)
469 Temp.append(Contents);
470 OldPos = Pos + Subst.length();
471 }
472
473 if (OldPos == 0)
474 return Str;
475
476 if (OldPos >= Str.length())
477 return Temp;
478 return Temp + string(Str,OldPos);
479 }
480 string SubstVar(string Str,const struct SubstVar *Vars)
481 {
482 for (; Vars->Subst != 0; Vars++)
483 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
484 return Str;
485 }
486 /*}}}*/
487 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
488 // ---------------------------------------------------------------------
489 /* Returns a string with the supplied separator depth + 1 times in it */
490 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
491 {
492 std::string output = "";
493 for(unsigned long d=Depth+1; d > 0; d--)
494 output.append(Separator);
495 return output;
496 }
497 /*}}}*/
498 // URItoFileName - Convert the uri into a unique file name /*{{{*/
499 // ---------------------------------------------------------------------
500 /* This converts a URI into a safe filename. It quotes all unsafe characters
501 and converts / to _ and removes the scheme identifier. The resulting
502 file name should be unique and never occur again for a different file */
503 string URItoFileName(const string &URI)
504 {
505 // Nuke 'sensitive' items
506 ::URI U(URI);
507 U.User.clear();
508 U.Password.clear();
509 U.Access.clear();
510
511 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
512 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
513 replace(NewURI.begin(),NewURI.end(),'/','_');
514 return NewURI;
515 }
516 /*}}}*/
517 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
518 // ---------------------------------------------------------------------
519 /* This routine performs a base64 transformation on a string. It was ripped
520 from wget and then patched and bug fixed.
521
522 This spec can be found in rfc2045 */
523 string Base64Encode(const string &S)
524 {
525 // Conversion table.
526 static char tbl[64] = {'A','B','C','D','E','F','G','H',
527 'I','J','K','L','M','N','O','P',
528 'Q','R','S','T','U','V','W','X',
529 'Y','Z','a','b','c','d','e','f',
530 'g','h','i','j','k','l','m','n',
531 'o','p','q','r','s','t','u','v',
532 'w','x','y','z','0','1','2','3',
533 '4','5','6','7','8','9','+','/'};
534
535 // Pre-allocate some space
536 string Final;
537 Final.reserve((4*S.length() + 2)/3 + 2);
538
539 /* Transform the 3x8 bits to 4x6 bits, as required by
540 base64. */
541 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
542 {
543 char Bits[3] = {0,0,0};
544 Bits[0] = I[0];
545 if (I + 1 < S.end())
546 Bits[1] = I[1];
547 if (I + 2 < S.end())
548 Bits[2] = I[2];
549
550 Final += tbl[Bits[0] >> 2];
551 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
552
553 if (I + 1 >= S.end())
554 break;
555
556 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
557
558 if (I + 2 >= S.end())
559 break;
560
561 Final += tbl[Bits[2] & 0x3f];
562 }
563
564 /* Apply the padding elements, this tells how many bytes the remote
565 end should discard */
566 if (S.length() % 3 == 2)
567 Final += '=';
568 if (S.length() % 3 == 1)
569 Final += "==";
570
571 return Final;
572 }
573 /*}}}*/
574 // stringcmp - Arbitrary string compare /*{{{*/
575 // ---------------------------------------------------------------------
576 /* This safely compares two non-null terminated strings of arbitrary
577 length */
578 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
579 {
580 for (; A != AEnd && B != BEnd; A++, B++)
581 if (*A != *B)
582 break;
583
584 if (A == AEnd && B == BEnd)
585 return 0;
586 if (A == AEnd)
587 return 1;
588 if (B == BEnd)
589 return -1;
590 if (*A < *B)
591 return -1;
592 return 1;
593 }
594
595 #if __GNUC__ >= 3
596 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
597 const char *B,const char *BEnd)
598 {
599 for (; A != AEnd && B != BEnd; A++, B++)
600 if (*A != *B)
601 break;
602
603 if (A == AEnd && B == BEnd)
604 return 0;
605 if (A == AEnd)
606 return 1;
607 if (B == BEnd)
608 return -1;
609 if (*A < *B)
610 return -1;
611 return 1;
612 }
613 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
614 string::const_iterator B,string::const_iterator BEnd)
615 {
616 for (; A != AEnd && B != BEnd; A++, B++)
617 if (*A != *B)
618 break;
619
620 if (A == AEnd && B == BEnd)
621 return 0;
622 if (A == AEnd)
623 return 1;
624 if (B == BEnd)
625 return -1;
626 if (*A < *B)
627 return -1;
628 return 1;
629 }
630 #endif
631 /*}}}*/
632 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
633 // ---------------------------------------------------------------------
634 /* */
635 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
636 {
637 for (; A != AEnd && B != BEnd; A++, B++)
638 if (tolower_ascii(*A) != tolower_ascii(*B))
639 break;
640
641 if (A == AEnd && B == BEnd)
642 return 0;
643 if (A == AEnd)
644 return 1;
645 if (B == BEnd)
646 return -1;
647 if (tolower_ascii(*A) < tolower_ascii(*B))
648 return -1;
649 return 1;
650 }
651 #if __GNUC__ >= 3
652 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
653 const char *B,const char *BEnd)
654 {
655 for (; A != AEnd && B != BEnd; A++, B++)
656 if (tolower_ascii(*A) != tolower_ascii(*B))
657 break;
658
659 if (A == AEnd && B == BEnd)
660 return 0;
661 if (A == AEnd)
662 return 1;
663 if (B == BEnd)
664 return -1;
665 if (tolower_ascii(*A) < tolower_ascii(*B))
666 return -1;
667 return 1;
668 }
669 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
670 string::const_iterator B,string::const_iterator BEnd)
671 {
672 for (; A != AEnd && B != BEnd; A++, B++)
673 if (tolower_ascii(*A) != tolower_ascii(*B))
674 break;
675
676 if (A == AEnd && B == BEnd)
677 return 0;
678 if (A == AEnd)
679 return 1;
680 if (B == BEnd)
681 return -1;
682 if (tolower_ascii(*A) < tolower_ascii(*B))
683 return -1;
684 return 1;
685 }
686 #endif
687 /*}}}*/
688 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
689 // ---------------------------------------------------------------------
690 /* The format is like those used in package files and the method
691 communication system */
692 string LookupTag(const string &Message,const char *Tag,const char *Default)
693 {
694 // Look for a matching tag.
695 int Length = strlen(Tag);
696 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
697 {
698 // Found the tag
699 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
700 {
701 // Find the end of line and strip the leading/trailing spaces
702 string::const_iterator J;
703 I += Length + 1;
704 for (; isspace(*I) != 0 && I < Message.end(); ++I);
705 for (J = I; *J != '\n' && J < Message.end(); ++J);
706 for (; J > I && isspace(J[-1]) != 0; --J);
707
708 return string(I,J);
709 }
710
711 for (; *I != '\n' && I < Message.end(); ++I);
712 }
713
714 // Failed to find a match
715 if (Default == 0)
716 return string();
717 return Default;
718 }
719 /*}}}*/
720 // StringToBool - Converts a string into a boolean /*{{{*/
721 // ---------------------------------------------------------------------
722 /* This inspects the string to see if it is true or if it is false and
723 then returns the result. Several varients on true/false are checked. */
724 int StringToBool(const string &Text,int Default)
725 {
726 char *ParseEnd;
727 int Res = strtol(Text.c_str(),&ParseEnd,0);
728 // ensure that the entire string was converted by strtol to avoid
729 // failures on "apt-cache show -a 0ad" where the "0" is converted
730 const char *TextEnd = Text.c_str()+Text.size();
731 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
732 return Res;
733
734 // Check for positives
735 if (strcasecmp(Text.c_str(),"no") == 0 ||
736 strcasecmp(Text.c_str(),"false") == 0 ||
737 strcasecmp(Text.c_str(),"without") == 0 ||
738 strcasecmp(Text.c_str(),"off") == 0 ||
739 strcasecmp(Text.c_str(),"disable") == 0)
740 return 0;
741
742 // Check for negatives
743 if (strcasecmp(Text.c_str(),"yes") == 0 ||
744 strcasecmp(Text.c_str(),"true") == 0 ||
745 strcasecmp(Text.c_str(),"with") == 0 ||
746 strcasecmp(Text.c_str(),"on") == 0 ||
747 strcasecmp(Text.c_str(),"enable") == 0)
748 return 1;
749
750 return Default;
751 }
752 /*}}}*/
753 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
754 // ---------------------------------------------------------------------
755 /* This converts a time_t into a string time representation that is
756 year 2000 complient and timezone neutral */
757 string TimeRFC1123(time_t Date)
758 {
759 struct tm Conv;
760 if (gmtime_r(&Date, &Conv) == NULL)
761 return "";
762
763 char Buf[300];
764 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
765 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
766 "Aug","Sep","Oct","Nov","Dec"};
767
768 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
769 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
770 Conv.tm_min,Conv.tm_sec);
771 return Buf;
772 }
773 /*}}}*/
774 // ReadMessages - Read messages from the FD /*{{{*/
775 // ---------------------------------------------------------------------
776 /* This pulls full messages from the input FD into the message buffer.
777 It assumes that messages will not pause during transit so no
778 fancy buffering is used.
779
780 In particular: this reads blocks from the input until it believes
781 that it's run out of input text. Each block is terminated by a
782 double newline ('\n' followed by '\n'). As noted below, there is a
783 bug in this code: it assumes that all the blocks have been read if
784 it doesn't see additional text in the buffer after the last one is
785 parsed, which will cause it to lose blocks if the last block
786 coincides with the end of the buffer.
787 */
788 bool ReadMessages(int Fd, vector<string> &List)
789 {
790 char Buffer[64000];
791 char *End = Buffer;
792 // Represents any left-over from the previous iteration of the
793 // parse loop. (i.e., if a message is split across the end
794 // of the buffer, it goes here)
795 string PartialMessage;
796
797 while (1)
798 {
799 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
800 if (Res < 0 && errno == EINTR)
801 continue;
802
803 // Process is dead, this is kind of bad..
804 if (Res == 0)
805 return false;
806
807 // No data
808 if (Res < 0 && errno == EAGAIN)
809 return true;
810 if (Res < 0)
811 return false;
812
813 End += Res;
814
815 // Look for the end of the message
816 for (char *I = Buffer; I + 1 < End; I++)
817 {
818 if (I[1] != '\n' ||
819 (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
820 continue;
821
822 // Pull the message out
823 string Message(Buffer,I-Buffer);
824 PartialMessage += Message;
825
826 // Fix up the buffer
827 for (; I < End && (*I == '\n' || *I == '\r'); ++I);
828 End -= I-Buffer;
829 memmove(Buffer,I,End-Buffer);
830 I = Buffer;
831
832 List.push_back(PartialMessage);
833 PartialMessage.clear();
834 }
835 if (End != Buffer)
836 {
837 // If there's text left in the buffer, store it
838 // in PartialMessage and throw the rest of the buffer
839 // away. This allows us to handle messages that
840 // are longer than the static buffer size.
841 PartialMessage += string(Buffer, End);
842 End = Buffer;
843 }
844 else
845 {
846 // BUG ALERT: if a message block happens to end at a
847 // multiple of 64000 characters, this will cause it to
848 // terminate early, leading to a badly formed block and
849 // probably crashing the method. However, this is the only
850 // way we have to find the end of the message block. I have
851 // an idea of how to fix this, but it will require changes
852 // to the protocol (essentially to mark the beginning and
853 // end of the block).
854 //
855 // -- dburrows 2008-04-02
856 return true;
857 }
858
859 if (WaitFd(Fd) == false)
860 return false;
861 }
862 }
863 /*}}}*/
864 // MonthConv - Converts a month string into a number /*{{{*/
865 // ---------------------------------------------------------------------
866 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
867 Made it a bit more robust with a few tolower_ascii though. */
868 static int MonthConv(char *Month)
869 {
870 switch (tolower_ascii(*Month))
871 {
872 case 'a':
873 return tolower_ascii(Month[1]) == 'p'?3:7;
874 case 'd':
875 return 11;
876 case 'f':
877 return 1;
878 case 'j':
879 if (tolower_ascii(Month[1]) == 'a')
880 return 0;
881 return tolower_ascii(Month[2]) == 'n'?5:6;
882 case 'm':
883 return tolower_ascii(Month[2]) == 'r'?2:4;
884 case 'n':
885 return 10;
886 case 'o':
887 return 9;
888 case 's':
889 return 8;
890
891 // Pretend it is January..
892 default:
893 return 0;
894 }
895 }
896 /*}}}*/
897 // timegm - Internal timegm if the gnu version is not available /*{{{*/
898 // ---------------------------------------------------------------------
899 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
900 than local timezone (mktime assumes the latter).
901
902 This function is a nonstandard GNU extension that is also present on
903 the BSDs and maybe other systems. For others we follow the advice of
904 the manpage of timegm and use his portable replacement. */
905 #ifndef HAVE_TIMEGM
906 static time_t timegm(struct tm *t)
907 {
908 char *tz = getenv("TZ");
909 setenv("TZ", "", 1);
910 tzset();
911 time_t ret = mktime(t);
912 if (tz)
913 setenv("TZ", tz, 1);
914 else
915 unsetenv("TZ");
916 tzset();
917 return ret;
918 }
919 #endif
920 /*}}}*/
921 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
922 // ---------------------------------------------------------------------
923 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
924 with one exception: All timezones (%Z) are accepted but the protocol
925 says that it MUST be GMT, but this one is equal to UTC which we will
926 encounter from time to time (e.g. in Release files) so we accept all
927 here and just assume it is GMT (or UTC) later on */
928 bool RFC1123StrToTime(const char* const str,time_t &time)
929 {
930 struct tm Tm;
931 setlocale (LC_ALL,"C");
932 bool const invalid =
933 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
934 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
935 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
936 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
937 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
938 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
939 setlocale (LC_ALL,"");
940 if (invalid == true)
941 return false;
942
943 time = timegm(&Tm);
944 return true;
945 }
946 /*}}}*/
947 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
948 // ---------------------------------------------------------------------
949 /* */
950 bool FTPMDTMStrToTime(const char* const str,time_t &time)
951 {
952 struct tm Tm;
953 // MDTM includes no whitespaces but recommend and ignored by strptime
954 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
955 return false;
956
957 time = timegm(&Tm);
958 return true;
959 }
960 /*}}}*/
961 // StrToTime - Converts a string into a time_t /*{{{*/
962 // ---------------------------------------------------------------------
963 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
964 and the C library asctime format. It requires the GNU library function
965 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
966 reason the C library does not provide any such function :< This also
967 handles the weird, but unambiguous FTP time format*/
968 bool StrToTime(const string &Val,time_t &Result)
969 {
970 struct tm Tm;
971 char Month[10];
972
973 // Skip the day of the week
974 const char *I = strchr(Val.c_str(), ' ');
975
976 // Handle RFC 1123 time
977 Month[0] = 0;
978 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
979 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
980 {
981 // Handle RFC 1036 time
982 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
983 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
984 Tm.tm_year += 1900;
985 else
986 {
987 // asctime format
988 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
989 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
990 {
991 // 'ftp' time
992 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
993 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
994 return false;
995 Tm.tm_mon--;
996 }
997 }
998 }
999
1000 Tm.tm_isdst = 0;
1001 if (Month[0] != 0)
1002 Tm.tm_mon = MonthConv(Month);
1003 else
1004 Tm.tm_mon = 0; // we don't have a month, so pick something
1005 Tm.tm_year -= 1900;
1006
1007 // Convert to local time and then to GMT
1008 Result = timegm(&Tm);
1009 return true;
1010 }
1011 /*}}}*/
1012 // StrToNum - Convert a fixed length string to a number /*{{{*/
1013 // ---------------------------------------------------------------------
1014 /* This is used in decoding the crazy fixed length string headers in
1015 tar and ar files. */
1016 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1017 {
1018 char S[30];
1019 if (Len >= sizeof(S))
1020 return false;
1021 memcpy(S,Str,Len);
1022 S[Len] = 0;
1023
1024 // All spaces is a zero
1025 Res = 0;
1026 unsigned I;
1027 for (I = 0; S[I] == ' '; I++);
1028 if (S[I] == 0)
1029 return true;
1030
1031 char *End;
1032 Res = strtoul(S,&End,Base);
1033 if (End == S)
1034 return false;
1035
1036 return true;
1037 }
1038 /*}}}*/
1039 // StrToNum - Convert a fixed length string to a number /*{{{*/
1040 // ---------------------------------------------------------------------
1041 /* This is used in decoding the crazy fixed length string headers in
1042 tar and ar files. */
1043 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1044 {
1045 char S[30];
1046 if (Len >= sizeof(S))
1047 return false;
1048 memcpy(S,Str,Len);
1049 S[Len] = 0;
1050
1051 // All spaces is a zero
1052 Res = 0;
1053 unsigned I;
1054 for (I = 0; S[I] == ' '; I++);
1055 if (S[I] == 0)
1056 return true;
1057
1058 char *End;
1059 Res = strtoull(S,&End,Base);
1060 if (End == S)
1061 return false;
1062
1063 return true;
1064 }
1065 /*}}}*/
1066
1067 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1068 // ---------------------------------------------------------------------
1069 /* This is used in decoding the 256bit encoded fixed length fields in
1070 tar files */
1071 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1072 {
1073 if ((Str[0] & 0x80) == 0)
1074 return false;
1075 else
1076 {
1077 Res = Str[0] & 0x7F;
1078 for(unsigned int i = 1; i < Len; ++i)
1079 Res = (Res<<8) + Str[i];
1080 return true;
1081 }
1082 }
1083 /*}}}*/
1084 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1085 // ---------------------------------------------------------------------
1086 /* This is used in decoding the 256bit encoded fixed length fields in
1087 tar files */
1088 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1089 {
1090 unsigned long long Num;
1091 bool rc;
1092
1093 rc = Base256ToNum(Str, Num, Len);
1094 Res = Num;
1095 if (Res != Num)
1096 return false;
1097
1098 return rc;
1099 }
1100 /*}}}*/
1101 // HexDigit - Convert a hex character into an integer /*{{{*/
1102 // ---------------------------------------------------------------------
1103 /* Helper for Hex2Num */
1104 static int HexDigit(int c)
1105 {
1106 if (c >= '0' && c <= '9')
1107 return c - '0';
1108 if (c >= 'a' && c <= 'f')
1109 return c - 'a' + 10;
1110 if (c >= 'A' && c <= 'F')
1111 return c - 'A' + 10;
1112 return 0;
1113 }
1114 /*}}}*/
1115 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1116 // ---------------------------------------------------------------------
1117 /* The length of the buffer must be exactly 1/2 the length of the string. */
1118 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1119 {
1120 if (Str.length() != Length*2)
1121 return false;
1122
1123 // Convert each digit. We store it in the same order as the string
1124 int J = 0;
1125 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1126 {
1127 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1128 return false;
1129
1130 Num[J] = HexDigit(I[0]) << 4;
1131 Num[J] += HexDigit(I[1]);
1132 }
1133
1134 return true;
1135 }
1136 /*}}}*/
1137 // TokSplitString - Split a string up by a given token /*{{{*/
1138 // ---------------------------------------------------------------------
1139 /* This is intended to be a faster splitter, it does not use dynamic
1140 memories. Input is changed to insert nulls at each token location. */
1141 bool TokSplitString(char Tok,char *Input,char **List,
1142 unsigned long ListMax)
1143 {
1144 // Strip any leading spaces
1145 char *Start = Input;
1146 char *Stop = Start + strlen(Start);
1147 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1148
1149 unsigned long Count = 0;
1150 char *Pos = Start;
1151 while (Pos != Stop)
1152 {
1153 // Skip to the next Token
1154 for (; Pos != Stop && *Pos != Tok; Pos++);
1155
1156 // Back remove spaces
1157 char *End = Pos;
1158 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1159 *End = 0;
1160
1161 List[Count++] = Start;
1162 if (Count >= ListMax)
1163 {
1164 List[Count-1] = 0;
1165 return false;
1166 }
1167
1168 // Advance pos
1169 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1170 Start = Pos;
1171 }
1172
1173 List[Count] = 0;
1174 return true;
1175 }
1176 /*}}}*/
1177 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1178 // ---------------------------------------------------------------------
1179 /* This can be used to split a given string up into a vector, so the
1180 propose is the same as in the method above and this one is a bit slower
1181 also, but the advantage is that we have an iteratable vector */
1182 vector<string> VectorizeString(string const &haystack, char const &split)
1183 {
1184 vector<string> exploded;
1185 if (haystack.empty() == true)
1186 return exploded;
1187 string::const_iterator start = haystack.begin();
1188 string::const_iterator end = start;
1189 do {
1190 for (; end != haystack.end() && *end != split; ++end);
1191 exploded.push_back(string(start, end));
1192 start = end + 1;
1193 } while (end != haystack.end() && (++end) != haystack.end());
1194 return exploded;
1195 }
1196 /*}}}*/
1197 // StringSplit - split a string into a string vector by token /*{{{*/
1198 // ---------------------------------------------------------------------
1199 /* See header for details.
1200 */
1201 vector<string> StringSplit(std::string const &s, std::string const &sep,
1202 unsigned int maxsplit)
1203 {
1204 vector<string> split;
1205 size_t start, pos;
1206
1207 // no seperator given, this is bogus
1208 if(sep.size() == 0)
1209 return split;
1210
1211 start = pos = 0;
1212 while (pos != string::npos)
1213 {
1214 pos = s.find(sep, start);
1215 split.push_back(s.substr(start, pos-start));
1216
1217 // if maxsplit is reached, the remaining string is the last item
1218 if(split.size() >= maxsplit)
1219 {
1220 split[split.size()-1] = s.substr(start);
1221 break;
1222 }
1223 start = pos+sep.size();
1224 }
1225 return split;
1226 }
1227 /*}}}*/
1228 // RegexChoice - Simple regex list/list matcher /*{{{*/
1229 // ---------------------------------------------------------------------
1230 /* */
1231 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1232 const char **ListEnd)
1233 {
1234 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1235 R->Hit = false;
1236
1237 unsigned long Hits = 0;
1238 for (; ListBegin < ListEnd; ++ListBegin)
1239 {
1240 // Check if the name is a regex
1241 const char *I;
1242 bool Regex = true;
1243 for (I = *ListBegin; *I != 0; I++)
1244 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1245 break;
1246 if (*I == 0)
1247 Regex = false;
1248
1249 // Compile the regex pattern
1250 regex_t Pattern;
1251 if (Regex == true)
1252 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1253 REG_NOSUB) != 0)
1254 Regex = false;
1255
1256 // Search the list
1257 bool Done = false;
1258 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1259 {
1260 if (R->Str[0] == 0)
1261 continue;
1262
1263 if (strcasecmp(R->Str,*ListBegin) != 0)
1264 {
1265 if (Regex == false)
1266 continue;
1267 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1268 continue;
1269 }
1270 Done = true;
1271
1272 if (R->Hit == false)
1273 Hits++;
1274
1275 R->Hit = true;
1276 }
1277
1278 if (Regex == true)
1279 regfree(&Pattern);
1280
1281 if (Done == false)
1282 _error->Warning(_("Selection %s not found"),*ListBegin);
1283 }
1284
1285 return Hits;
1286 }
1287 /*}}}*/
1288 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1289 // ---------------------------------------------------------------------
1290 /* This is used to make the internationalization strings easier to translate
1291 and to allow reordering of parameters */
1292 static bool iovprintf(ostream &out, const char *format,
1293 va_list &args, ssize_t &size) {
1294 char *S = (char*)malloc(size);
1295 ssize_t const n = vsnprintf(S, size, format, args);
1296 if (n > -1 && n < size) {
1297 out << S;
1298 free(S);
1299 return true;
1300 } else {
1301 if (n > -1)
1302 size = n + 1;
1303 else
1304 size *= 2;
1305 }
1306 free(S);
1307 return false;
1308 }
1309 void ioprintf(ostream &out,const char *format,...)
1310 {
1311 va_list args;
1312 ssize_t size = 400;
1313 while (true) {
1314 va_start(args,format);
1315 if (iovprintf(out, format, args, size) == true)
1316 return;
1317 va_end(args);
1318 }
1319 }
1320 void strprintf(string &out,const char *format,...)
1321 {
1322 va_list args;
1323 ssize_t size = 400;
1324 std::ostringstream outstr;
1325 while (true) {
1326 va_start(args,format);
1327 if (iovprintf(outstr, format, args, size) == true)
1328 break;
1329 va_end(args);
1330 }
1331 out = outstr.str();
1332 }
1333 /*}}}*/
1334 // safe_snprintf - Safer snprintf /*{{{*/
1335 // ---------------------------------------------------------------------
1336 /* This is a snprintf that will never (ever) go past 'End' and returns a
1337 pointer to the end of the new string. The returned string is always null
1338 terminated unless Buffer == end. This is a better alterantive to using
1339 consecutive snprintfs. */
1340 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1341 {
1342 va_list args;
1343 int Did;
1344
1345 if (End <= Buffer)
1346 return End;
1347 va_start(args,Format);
1348 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1349 va_end(args);
1350
1351 if (Did < 0 || Buffer + Did > End)
1352 return End;
1353 return Buffer + Did;
1354 }
1355 /*}}}*/
1356 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1357 // ---------------------------------------------------------------------
1358 string StripEpoch(const string &VerStr)
1359 {
1360 size_t i = VerStr.find(":");
1361 if (i == string::npos)
1362 return VerStr;
1363 return VerStr.substr(i+1);
1364 }
1365 /*}}}*/
1366 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1367 // ---------------------------------------------------------------------
1368 /* This little function is the most called method we have and tries
1369 therefore to do the absolut minimum - and is notable faster than
1370 standard tolower/toupper and as a bonus avoids problems with different
1371 locales - we only operate on ascii chars anyway. */
1372 int tolower_ascii(int const c)
1373 {
1374 if (c >= 'A' && c <= 'Z')
1375 return c + 32;
1376 return c;
1377 }
1378 /*}}}*/
1379
1380 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1381 // ---------------------------------------------------------------------
1382 /* The domain list is a comma separate list of domains that are suffix
1383 matched against the argument */
1384 bool CheckDomainList(const string &Host,const string &List)
1385 {
1386 string::const_iterator Start = List.begin();
1387 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1388 {
1389 if (Cur < List.end() && *Cur != ',')
1390 continue;
1391
1392 // Match the end of the string..
1393 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1394 Cur - Start != 0 &&
1395 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1396 return true;
1397
1398 Start = Cur + 1;
1399 }
1400 return false;
1401 }
1402 /*}}}*/
1403 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1404 // ---------------------------------------------------------------------
1405 /* */
1406 size_t strv_length(const char **str_array)
1407 {
1408 size_t i;
1409 for (i=0; str_array[i] != NULL; i++)
1410 /* nothing */
1411 ;
1412 return i;
1413 }
1414
1415 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1416 // ---------------------------------------------------------------------
1417 /* */
1418 string DeEscapeString(const string &input)
1419 {
1420 char tmp[3];
1421 string::const_iterator it;
1422 string output;
1423 for (it = input.begin(); it != input.end(); ++it)
1424 {
1425 // just copy non-escape chars
1426 if (*it != '\\')
1427 {
1428 output += *it;
1429 continue;
1430 }
1431
1432 // deal with double escape
1433 if (*it == '\\' &&
1434 (it + 1 < input.end()) && it[1] == '\\')
1435 {
1436 // copy
1437 output += *it;
1438 // advance iterator one step further
1439 ++it;
1440 continue;
1441 }
1442
1443 // ensure we have a char to read
1444 if (it + 1 == input.end())
1445 continue;
1446
1447 // read it
1448 ++it;
1449 switch (*it)
1450 {
1451 case '0':
1452 if (it + 2 <= input.end()) {
1453 tmp[0] = it[1];
1454 tmp[1] = it[2];
1455 tmp[2] = 0;
1456 output += (char)strtol(tmp, 0, 8);
1457 it += 2;
1458 }
1459 break;
1460 case 'x':
1461 if (it + 2 <= input.end()) {
1462 tmp[0] = it[1];
1463 tmp[1] = it[2];
1464 tmp[2] = 0;
1465 output += (char)strtol(tmp, 0, 16);
1466 it += 2;
1467 }
1468 break;
1469 default:
1470 // FIXME: raise exception here?
1471 break;
1472 }
1473 }
1474 return output;
1475 }
1476 /*}}}*/
1477 // URI::CopyFrom - Copy from an object /*{{{*/
1478 // ---------------------------------------------------------------------
1479 /* This parses the URI into all of its components */
1480 void URI::CopyFrom(const string &U)
1481 {
1482 string::const_iterator I = U.begin();
1483
1484 // Locate the first colon, this separates the scheme
1485 for (; I < U.end() && *I != ':' ; ++I);
1486 string::const_iterator FirstColon = I;
1487
1488 /* Determine if this is a host type URI with a leading double //
1489 and then search for the first single / */
1490 string::const_iterator SingleSlash = I;
1491 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1492 SingleSlash += 3;
1493
1494 /* Find the / indicating the end of the hostname, ignoring /'s in the
1495 square brackets */
1496 bool InBracket = false;
1497 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1498 {
1499 if (*SingleSlash == '[')
1500 InBracket = true;
1501 if (InBracket == true && *SingleSlash == ']')
1502 InBracket = false;
1503 }
1504
1505 if (SingleSlash > U.end())
1506 SingleSlash = U.end();
1507
1508 // We can now write the access and path specifiers
1509 Access.assign(U.begin(),FirstColon);
1510 if (SingleSlash != U.end())
1511 Path.assign(SingleSlash,U.end());
1512 if (Path.empty() == true)
1513 Path = "/";
1514
1515 // Now we attempt to locate a user:pass@host fragment
1516 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1517 FirstColon += 3;
1518 else
1519 FirstColon += 1;
1520 if (FirstColon >= U.end())
1521 return;
1522
1523 if (FirstColon > SingleSlash)
1524 FirstColon = SingleSlash;
1525
1526 // Find the colon...
1527 I = FirstColon + 1;
1528 if (I > SingleSlash)
1529 I = SingleSlash;
1530 for (; I < SingleSlash && *I != ':'; ++I);
1531 string::const_iterator SecondColon = I;
1532
1533 // Search for the @ after the colon
1534 for (; I < SingleSlash && *I != '@'; ++I);
1535 string::const_iterator At = I;
1536
1537 // Now write the host and user/pass
1538 if (At == SingleSlash)
1539 {
1540 if (FirstColon < SingleSlash)
1541 Host.assign(FirstColon,SingleSlash);
1542 }
1543 else
1544 {
1545 Host.assign(At+1,SingleSlash);
1546 // username and password must be encoded (RFC 3986)
1547 User.assign(DeQuoteString(FirstColon,SecondColon));
1548 if (SecondColon < At)
1549 Password.assign(DeQuoteString(SecondColon+1,At));
1550 }
1551
1552 // Now we parse the RFC 2732 [] hostnames.
1553 unsigned long PortEnd = 0;
1554 InBracket = false;
1555 for (unsigned I = 0; I != Host.length();)
1556 {
1557 if (Host[I] == '[')
1558 {
1559 InBracket = true;
1560 Host.erase(I,1);
1561 continue;
1562 }
1563
1564 if (InBracket == true && Host[I] == ']')
1565 {
1566 InBracket = false;
1567 Host.erase(I,1);
1568 PortEnd = I;
1569 continue;
1570 }
1571 I++;
1572 }
1573
1574 // Tsk, weird.
1575 if (InBracket == true)
1576 {
1577 Host.clear();
1578 return;
1579 }
1580
1581 // Now we parse off a port number from the hostname
1582 Port = 0;
1583 string::size_type Pos = Host.rfind(':');
1584 if (Pos == string::npos || Pos < PortEnd)
1585 return;
1586
1587 Port = atoi(string(Host,Pos+1).c_str());
1588 Host.assign(Host,0,Pos);
1589 }
1590 /*}}}*/
1591 // URI::operator string - Convert the URI to a string /*{{{*/
1592 // ---------------------------------------------------------------------
1593 /* */
1594 URI::operator string()
1595 {
1596 string Res;
1597
1598 if (Access.empty() == false)
1599 Res = Access + ':';
1600
1601 if (Host.empty() == false)
1602 {
1603 if (Access.empty() == false)
1604 Res += "//";
1605
1606 if (User.empty() == false)
1607 {
1608 // FIXME: Technically userinfo is permitted even less
1609 // characters than these, but this is not conveniently
1610 // expressed with a blacklist.
1611 Res += QuoteString(User, ":/?#[]@");
1612 if (Password.empty() == false)
1613 Res += ":" + QuoteString(Password, ":/?#[]@");
1614 Res += "@";
1615 }
1616
1617 // Add RFC 2732 escaping characters
1618 if (Access.empty() == false &&
1619 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1620 Res += '[' + Host + ']';
1621 else
1622 Res += Host;
1623
1624 if (Port != 0)
1625 {
1626 char S[30];
1627 sprintf(S,":%u",Port);
1628 Res += S;
1629 }
1630 }
1631
1632 if (Path.empty() == false)
1633 {
1634 if (Path[0] != '/')
1635 Res += "/" + Path;
1636 else
1637 Res += Path;
1638 }
1639
1640 return Res;
1641 }
1642 /*}}}*/
1643 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1644 // ---------------------------------------------------------------------
1645 /* */
1646 string URI::SiteOnly(const string &URI)
1647 {
1648 ::URI U(URI);
1649 U.User.clear();
1650 U.Password.clear();
1651 U.Path.clear();
1652 return U;
1653 }
1654 /*}}}*/
1655 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1656 // ---------------------------------------------------------------------
1657 /* */
1658 string URI::NoUserPassword(const string &URI)
1659 {
1660 ::URI U(URI);
1661 U.User.clear();
1662 U.Password.clear();
1663 return U;
1664 }
1665 /*}}}*/