]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
test/integration/test-ubuntu-bug-346386-apt-get-update-paywall: use downloadfile()
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &s)
49 {
50 size_t start = s.find_first_not_of(" \t\n");
51 // only whitespace
52 if (start == string::npos)
53 return "";
54 size_t end = s.find_last_not_of(" \t\n");
55 return s.substr(start, end-start+1);
56 }
57
58 bool Endswith(const std::string &s, const std::string &end)
59 {
60 if (end.size() > s.size())
61 return false;
62 return (s.substr(s.size() - end.size(), s.size()) == end);
63 }
64
65 }
66 }
67 /*}}}*/
68 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
69 // ---------------------------------------------------------------------
70 /* This is handy to use before display some information for enduser */
71 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
72 {
73 iconv_t cd;
74 const char *inbuf;
75 char *inptr, *outbuf;
76 size_t insize, bufsize;
77 dest->clear();
78
79 cd = iconv_open(codeset, "UTF-8");
80 if (cd == (iconv_t)(-1)) {
81 // Something went wrong
82 if (errno == EINVAL)
83 _error->Error("conversion from 'UTF-8' to '%s' not available",
84 codeset);
85 else
86 perror("iconv_open");
87
88 return false;
89 }
90
91 insize = bufsize = orig.size();
92 inbuf = orig.data();
93 inptr = (char *)inbuf;
94 outbuf = new char[bufsize];
95 size_t lastError = -1;
96
97 while (insize != 0)
98 {
99 char *outptr = outbuf;
100 size_t outsize = bufsize;
101 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
102 dest->append(outbuf, outptr - outbuf);
103 if (err == (size_t)(-1))
104 {
105 switch (errno)
106 {
107 case EILSEQ:
108 insize--;
109 inptr++;
110 // replace a series of unknown multibytes with a single "?"
111 if (lastError != insize) {
112 lastError = insize - 1;
113 dest->append("?");
114 }
115 break;
116 case EINVAL:
117 insize = 0;
118 break;
119 case E2BIG:
120 if (outptr == outbuf)
121 {
122 bufsize *= 2;
123 delete[] outbuf;
124 outbuf = new char[bufsize];
125 }
126 break;
127 }
128 }
129 }
130
131 delete[] outbuf;
132
133 iconv_close(cd);
134
135 return true;
136 }
137 /*}}}*/
138 // strstrip - Remove white space from the front and back of a string /*{{{*/
139 // ---------------------------------------------------------------------
140 /* This is handy to use when parsing a file. It also removes \n's left
141 over from fgets and company */
142 char *_strstrip(char *String)
143 {
144 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
145
146 if (*String == 0)
147 return String;
148 return _strrstrip(String);
149 }
150 /*}}}*/
151 // strrstrip - Remove white space from the back of a string /*{{{*/
152 // ---------------------------------------------------------------------
153 char *_strrstrip(char *String)
154 {
155 char *End = String + strlen(String) - 1;
156 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
157 *End == '\r'); End--);
158 End++;
159 *End = 0;
160 return String;
161 }
162 /*}}}*/
163 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
164 // ---------------------------------------------------------------------
165 /* */
166 char *_strtabexpand(char *String,size_t Len)
167 {
168 for (char *I = String; I != I + Len && *I != 0; I++)
169 {
170 if (*I != '\t')
171 continue;
172 if (I + 8 > String + Len)
173 {
174 *I = 0;
175 return String;
176 }
177
178 /* Assume the start of the string is 0 and find the next 8 char
179 division */
180 int Len;
181 if (String == I)
182 Len = 1;
183 else
184 Len = 8 - ((String - I) % 8);
185 Len -= 2;
186 if (Len <= 0)
187 {
188 *I = ' ';
189 continue;
190 }
191
192 memmove(I + Len,I + 1,strlen(I) + 1);
193 for (char *J = I; J + Len != I; *I = ' ', I++);
194 }
195 return String;
196 }
197 /*}}}*/
198 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
199 // ---------------------------------------------------------------------
200 /* This grabs a single word, converts any % escaped characters to their
201 proper values and advances the pointer. Double quotes are understood
202 and striped out as well. This is for URI/URL parsing. It also can
203 understand [] brackets.*/
204 bool ParseQuoteWord(const char *&String,string &Res)
205 {
206 // Skip leading whitespace
207 const char *C = String;
208 for (;*C != 0 && *C == ' '; C++);
209 if (*C == 0)
210 return false;
211
212 // Jump to the next word
213 for (;*C != 0 && isspace(*C) == 0; C++)
214 {
215 if (*C == '"')
216 {
217 C = strchr(C + 1, '"');
218 if (C == NULL)
219 return false;
220 }
221 if (*C == '[')
222 {
223 C = strchr(C + 1, ']');
224 if (C == NULL)
225 return false;
226 }
227 }
228
229 // Now de-quote characters
230 char Buffer[1024];
231 char Tmp[3];
232 const char *Start = String;
233 char *I;
234 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
235 {
236 if (*Start == '%' && Start + 2 < C &&
237 isxdigit(Start[1]) && isxdigit(Start[2]))
238 {
239 Tmp[0] = Start[1];
240 Tmp[1] = Start[2];
241 Tmp[2] = 0;
242 *I = (char)strtol(Tmp,0,16);
243 Start += 3;
244 continue;
245 }
246 if (*Start != '"')
247 *I = *Start;
248 else
249 I--;
250 Start++;
251 }
252 *I = 0;
253 Res = Buffer;
254
255 // Skip ending white space
256 for (;*C != 0 && isspace(*C) != 0; C++);
257 String = C;
258 return true;
259 }
260 /*}}}*/
261 // ParseCWord - Parses a string like a C "" expression /*{{{*/
262 // ---------------------------------------------------------------------
263 /* This expects a series of space separated strings enclosed in ""'s.
264 It concatenates the ""'s into a single string. */
265 bool ParseCWord(const char *&String,string &Res)
266 {
267 // Skip leading whitespace
268 const char *C = String;
269 for (;*C != 0 && *C == ' '; C++);
270 if (*C == 0)
271 return false;
272
273 char Buffer[1024];
274 char *Buf = Buffer;
275 if (strlen(String) >= sizeof(Buffer))
276 return false;
277
278 for (; *C != 0; C++)
279 {
280 if (*C == '"')
281 {
282 for (C++; *C != 0 && *C != '"'; C++)
283 *Buf++ = *C;
284
285 if (*C == 0)
286 return false;
287
288 continue;
289 }
290
291 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
292 continue;
293 if (isspace(*C) == 0)
294 return false;
295 *Buf++ = ' ';
296 }
297 *Buf = 0;
298 Res = Buffer;
299 String = C;
300 return true;
301 }
302 /*}}}*/
303 // QuoteString - Convert a string into quoted from /*{{{*/
304 // ---------------------------------------------------------------------
305 /* */
306 string QuoteString(const string &Str, const char *Bad)
307 {
308 string Res;
309 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
310 {
311 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
312 *I == 0x25 || // percent '%' char
313 *I <= 0x20 || *I >= 0x7F) // control chars
314 {
315 char Buf[10];
316 sprintf(Buf,"%%%02x",(int)*I);
317 Res += Buf;
318 }
319 else
320 Res += *I;
321 }
322 return Res;
323 }
324 /*}}}*/
325 // DeQuoteString - Convert a string from quoted from /*{{{*/
326 // ---------------------------------------------------------------------
327 /* This undoes QuoteString */
328 string DeQuoteString(const string &Str)
329 {
330 return DeQuoteString(Str.begin(),Str.end());
331 }
332 string DeQuoteString(string::const_iterator const &begin,
333 string::const_iterator const &end)
334 {
335 string Res;
336 for (string::const_iterator I = begin; I != end; ++I)
337 {
338 if (*I == '%' && I + 2 < end &&
339 isxdigit(I[1]) && isxdigit(I[2]))
340 {
341 char Tmp[3];
342 Tmp[0] = I[1];
343 Tmp[1] = I[2];
344 Tmp[2] = 0;
345 Res += (char)strtol(Tmp,0,16);
346 I += 2;
347 continue;
348 }
349 else
350 Res += *I;
351 }
352 return Res;
353 }
354
355 /*}}}*/
356 // SizeToStr - Convert a long into a human readable size /*{{{*/
357 // ---------------------------------------------------------------------
358 /* A max of 4 digits are shown before conversion to the next highest unit.
359 The max length of the string will be 5 chars unless the size is > 10
360 YottaBytes (E24) */
361 string SizeToStr(double Size)
362 {
363 char S[300];
364 double ASize;
365 if (Size >= 0)
366 ASize = Size;
367 else
368 ASize = -1*Size;
369
370 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
371 ExaBytes, ZettaBytes, YottaBytes */
372 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
373 int I = 0;
374 while (I <= 8)
375 {
376 if (ASize < 100 && I != 0)
377 {
378 sprintf(S,"%'.1f %c",ASize,Ext[I]);
379 break;
380 }
381
382 if (ASize < 10000)
383 {
384 sprintf(S,"%'.0f %c",ASize,Ext[I]);
385 break;
386 }
387 ASize /= 1000.0;
388 I++;
389 }
390
391 return S;
392 }
393 /*}}}*/
394 // TimeToStr - Convert the time into a string /*{{{*/
395 // ---------------------------------------------------------------------
396 /* Converts a number of seconds to a hms format */
397 string TimeToStr(unsigned long Sec)
398 {
399 char S[300];
400
401 while (1)
402 {
403 if (Sec > 60*60*24)
404 {
405 //d means days, h means hours, min means minutes, s means seconds
406 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
407 break;
408 }
409
410 if (Sec > 60*60)
411 {
412 //h means hours, min means minutes, s means seconds
413 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
414 break;
415 }
416
417 if (Sec > 60)
418 {
419 //min means minutes, s means seconds
420 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
421 break;
422 }
423
424 //s means seconds
425 sprintf(S,_("%lis"),Sec);
426 break;
427 }
428
429 return S;
430 }
431 /*}}}*/
432 // SubstVar - Substitute a string for another string /*{{{*/
433 // ---------------------------------------------------------------------
434 /* This replaces all occurrences of Subst with Contents in Str. */
435 string SubstVar(const string &Str,const string &Subst,const string &Contents)
436 {
437 if (Subst.empty() == true)
438 return Str;
439
440 string::size_type Pos = 0;
441 string::size_type OldPos = 0;
442 string Temp;
443
444 while (OldPos < Str.length() &&
445 (Pos = Str.find(Subst,OldPos)) != string::npos)
446 {
447 if (OldPos != Pos)
448 Temp.append(Str, OldPos, Pos - OldPos);
449 if (Contents.empty() == false)
450 Temp.append(Contents);
451 OldPos = Pos + Subst.length();
452 }
453
454 if (OldPos == 0)
455 return Str;
456
457 if (OldPos >= Str.length())
458 return Temp;
459 return Temp + string(Str,OldPos);
460 }
461 string SubstVar(string Str,const struct SubstVar *Vars)
462 {
463 for (; Vars->Subst != 0; Vars++)
464 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
465 return Str;
466 }
467 /*}}}*/
468 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
469 // ---------------------------------------------------------------------
470 /* Returns a string with the supplied separator depth + 1 times in it */
471 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
472 {
473 std::string output = "";
474 for(unsigned long d=Depth+1; d > 0; d--)
475 output.append(Separator);
476 return output;
477 }
478 /*}}}*/
479 // URItoFileName - Convert the uri into a unique file name /*{{{*/
480 // ---------------------------------------------------------------------
481 /* This converts a URI into a safe filename. It quotes all unsafe characters
482 and converts / to _ and removes the scheme identifier. The resulting
483 file name should be unique and never occur again for a different file */
484 string URItoFileName(const string &URI)
485 {
486 // Nuke 'sensitive' items
487 ::URI U(URI);
488 U.User.clear();
489 U.Password.clear();
490 U.Access.clear();
491
492 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
493 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
494 replace(NewURI.begin(),NewURI.end(),'/','_');
495 return NewURI;
496 }
497 /*}}}*/
498 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
499 // ---------------------------------------------------------------------
500 /* This routine performs a base64 transformation on a string. It was ripped
501 from wget and then patched and bug fixed.
502
503 This spec can be found in rfc2045 */
504 string Base64Encode(const string &S)
505 {
506 // Conversion table.
507 static char tbl[64] = {'A','B','C','D','E','F','G','H',
508 'I','J','K','L','M','N','O','P',
509 'Q','R','S','T','U','V','W','X',
510 'Y','Z','a','b','c','d','e','f',
511 'g','h','i','j','k','l','m','n',
512 'o','p','q','r','s','t','u','v',
513 'w','x','y','z','0','1','2','3',
514 '4','5','6','7','8','9','+','/'};
515
516 // Pre-allocate some space
517 string Final;
518 Final.reserve((4*S.length() + 2)/3 + 2);
519
520 /* Transform the 3x8 bits to 4x6 bits, as required by
521 base64. */
522 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
523 {
524 char Bits[3] = {0,0,0};
525 Bits[0] = I[0];
526 if (I + 1 < S.end())
527 Bits[1] = I[1];
528 if (I + 2 < S.end())
529 Bits[2] = I[2];
530
531 Final += tbl[Bits[0] >> 2];
532 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
533
534 if (I + 1 >= S.end())
535 break;
536
537 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
538
539 if (I + 2 >= S.end())
540 break;
541
542 Final += tbl[Bits[2] & 0x3f];
543 }
544
545 /* Apply the padding elements, this tells how many bytes the remote
546 end should discard */
547 if (S.length() % 3 == 2)
548 Final += '=';
549 if (S.length() % 3 == 1)
550 Final += "==";
551
552 return Final;
553 }
554 /*}}}*/
555 // stringcmp - Arbitrary string compare /*{{{*/
556 // ---------------------------------------------------------------------
557 /* This safely compares two non-null terminated strings of arbitrary
558 length */
559 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
560 {
561 for (; A != AEnd && B != BEnd; A++, B++)
562 if (*A != *B)
563 break;
564
565 if (A == AEnd && B == BEnd)
566 return 0;
567 if (A == AEnd)
568 return 1;
569 if (B == BEnd)
570 return -1;
571 if (*A < *B)
572 return -1;
573 return 1;
574 }
575
576 #if __GNUC__ >= 3
577 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
578 const char *B,const char *BEnd)
579 {
580 for (; A != AEnd && B != BEnd; A++, B++)
581 if (*A != *B)
582 break;
583
584 if (A == AEnd && B == BEnd)
585 return 0;
586 if (A == AEnd)
587 return 1;
588 if (B == BEnd)
589 return -1;
590 if (*A < *B)
591 return -1;
592 return 1;
593 }
594 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
595 string::const_iterator B,string::const_iterator BEnd)
596 {
597 for (; A != AEnd && B != BEnd; A++, B++)
598 if (*A != *B)
599 break;
600
601 if (A == AEnd && B == BEnd)
602 return 0;
603 if (A == AEnd)
604 return 1;
605 if (B == BEnd)
606 return -1;
607 if (*A < *B)
608 return -1;
609 return 1;
610 }
611 #endif
612 /*}}}*/
613 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
614 // ---------------------------------------------------------------------
615 /* */
616 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
617 {
618 for (; A != AEnd && B != BEnd; A++, B++)
619 if (tolower_ascii(*A) != tolower_ascii(*B))
620 break;
621
622 if (A == AEnd && B == BEnd)
623 return 0;
624 if (A == AEnd)
625 return 1;
626 if (B == BEnd)
627 return -1;
628 if (tolower_ascii(*A) < tolower_ascii(*B))
629 return -1;
630 return 1;
631 }
632 #if __GNUC__ >= 3
633 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
634 const char *B,const char *BEnd)
635 {
636 for (; A != AEnd && B != BEnd; A++, B++)
637 if (tolower_ascii(*A) != tolower_ascii(*B))
638 break;
639
640 if (A == AEnd && B == BEnd)
641 return 0;
642 if (A == AEnd)
643 return 1;
644 if (B == BEnd)
645 return -1;
646 if (tolower_ascii(*A) < tolower_ascii(*B))
647 return -1;
648 return 1;
649 }
650 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
651 string::const_iterator B,string::const_iterator BEnd)
652 {
653 for (; A != AEnd && B != BEnd; A++, B++)
654 if (tolower_ascii(*A) != tolower_ascii(*B))
655 break;
656
657 if (A == AEnd && B == BEnd)
658 return 0;
659 if (A == AEnd)
660 return 1;
661 if (B == BEnd)
662 return -1;
663 if (tolower_ascii(*A) < tolower_ascii(*B))
664 return -1;
665 return 1;
666 }
667 #endif
668 /*}}}*/
669 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
670 // ---------------------------------------------------------------------
671 /* The format is like those used in package files and the method
672 communication system */
673 string LookupTag(const string &Message,const char *Tag,const char *Default)
674 {
675 // Look for a matching tag.
676 int Length = strlen(Tag);
677 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
678 {
679 // Found the tag
680 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
681 {
682 // Find the end of line and strip the leading/trailing spaces
683 string::const_iterator J;
684 I += Length + 1;
685 for (; isspace(*I) != 0 && I < Message.end(); ++I);
686 for (J = I; *J != '\n' && J < Message.end(); ++J);
687 for (; J > I && isspace(J[-1]) != 0; --J);
688
689 return string(I,J);
690 }
691
692 for (; *I != '\n' && I < Message.end(); ++I);
693 }
694
695 // Failed to find a match
696 if (Default == 0)
697 return string();
698 return Default;
699 }
700 /*}}}*/
701 // StringToBool - Converts a string into a boolean /*{{{*/
702 // ---------------------------------------------------------------------
703 /* This inspects the string to see if it is true or if it is false and
704 then returns the result. Several varients on true/false are checked. */
705 int StringToBool(const string &Text,int Default)
706 {
707 char *ParseEnd;
708 int Res = strtol(Text.c_str(),&ParseEnd,0);
709 // ensure that the entire string was converted by strtol to avoid
710 // failures on "apt-cache show -a 0ad" where the "0" is converted
711 const char *TextEnd = Text.c_str()+Text.size();
712 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
713 return Res;
714
715 // Check for positives
716 if (strcasecmp(Text.c_str(),"no") == 0 ||
717 strcasecmp(Text.c_str(),"false") == 0 ||
718 strcasecmp(Text.c_str(),"without") == 0 ||
719 strcasecmp(Text.c_str(),"off") == 0 ||
720 strcasecmp(Text.c_str(),"disable") == 0)
721 return 0;
722
723 // Check for negatives
724 if (strcasecmp(Text.c_str(),"yes") == 0 ||
725 strcasecmp(Text.c_str(),"true") == 0 ||
726 strcasecmp(Text.c_str(),"with") == 0 ||
727 strcasecmp(Text.c_str(),"on") == 0 ||
728 strcasecmp(Text.c_str(),"enable") == 0)
729 return 1;
730
731 return Default;
732 }
733 /*}}}*/
734 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
735 // ---------------------------------------------------------------------
736 /* This converts a time_t into a string time representation that is
737 year 2000 complient and timezone neutral */
738 string TimeRFC1123(time_t Date)
739 {
740 struct tm Conv;
741 if (gmtime_r(&Date, &Conv) == NULL)
742 return "";
743
744 char Buf[300];
745 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
746 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
747 "Aug","Sep","Oct","Nov","Dec"};
748
749 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
750 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
751 Conv.tm_min,Conv.tm_sec);
752 return Buf;
753 }
754 /*}}}*/
755 // ReadMessages - Read messages from the FD /*{{{*/
756 // ---------------------------------------------------------------------
757 /* This pulls full messages from the input FD into the message buffer.
758 It assumes that messages will not pause during transit so no
759 fancy buffering is used.
760
761 In particular: this reads blocks from the input until it believes
762 that it's run out of input text. Each block is terminated by a
763 double newline ('\n' followed by '\n'). As noted below, there is a
764 bug in this code: it assumes that all the blocks have been read if
765 it doesn't see additional text in the buffer after the last one is
766 parsed, which will cause it to lose blocks if the last block
767 coincides with the end of the buffer.
768 */
769 bool ReadMessages(int Fd, vector<string> &List)
770 {
771 char Buffer[64000];
772 char *End = Buffer;
773 // Represents any left-over from the previous iteration of the
774 // parse loop. (i.e., if a message is split across the end
775 // of the buffer, it goes here)
776 string PartialMessage;
777
778 while (1)
779 {
780 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
781 if (Res < 0 && errno == EINTR)
782 continue;
783
784 // Process is dead, this is kind of bad..
785 if (Res == 0)
786 return false;
787
788 // No data
789 if (Res < 0 && errno == EAGAIN)
790 return true;
791 if (Res < 0)
792 return false;
793
794 End += Res;
795
796 // Look for the end of the message
797 for (char *I = Buffer; I + 1 < End; I++)
798 {
799 if (I[1] != '\n' ||
800 (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
801 continue;
802
803 // Pull the message out
804 string Message(Buffer,I-Buffer);
805 PartialMessage += Message;
806
807 // Fix up the buffer
808 for (; I < End && (*I == '\n' || *I == '\r'); ++I);
809 End -= I-Buffer;
810 memmove(Buffer,I,End-Buffer);
811 I = Buffer;
812
813 List.push_back(PartialMessage);
814 PartialMessage.clear();
815 }
816 if (End != Buffer)
817 {
818 // If there's text left in the buffer, store it
819 // in PartialMessage and throw the rest of the buffer
820 // away. This allows us to handle messages that
821 // are longer than the static buffer size.
822 PartialMessage += string(Buffer, End);
823 End = Buffer;
824 }
825 else
826 {
827 // BUG ALERT: if a message block happens to end at a
828 // multiple of 64000 characters, this will cause it to
829 // terminate early, leading to a badly formed block and
830 // probably crashing the method. However, this is the only
831 // way we have to find the end of the message block. I have
832 // an idea of how to fix this, but it will require changes
833 // to the protocol (essentially to mark the beginning and
834 // end of the block).
835 //
836 // -- dburrows 2008-04-02
837 return true;
838 }
839
840 if (WaitFd(Fd) == false)
841 return false;
842 }
843 }
844 /*}}}*/
845 // MonthConv - Converts a month string into a number /*{{{*/
846 // ---------------------------------------------------------------------
847 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
848 Made it a bit more robust with a few tolower_ascii though. */
849 static int MonthConv(char *Month)
850 {
851 switch (tolower_ascii(*Month))
852 {
853 case 'a':
854 return tolower_ascii(Month[1]) == 'p'?3:7;
855 case 'd':
856 return 11;
857 case 'f':
858 return 1;
859 case 'j':
860 if (tolower_ascii(Month[1]) == 'a')
861 return 0;
862 return tolower_ascii(Month[2]) == 'n'?5:6;
863 case 'm':
864 return tolower_ascii(Month[2]) == 'r'?2:4;
865 case 'n':
866 return 10;
867 case 'o':
868 return 9;
869 case 's':
870 return 8;
871
872 // Pretend it is January..
873 default:
874 return 0;
875 }
876 }
877 /*}}}*/
878 // timegm - Internal timegm if the gnu version is not available /*{{{*/
879 // ---------------------------------------------------------------------
880 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
881 than local timezone (mktime assumes the latter).
882
883 This function is a nonstandard GNU extension that is also present on
884 the BSDs and maybe other systems. For others we follow the advice of
885 the manpage of timegm and use his portable replacement. */
886 #ifndef HAVE_TIMEGM
887 static time_t timegm(struct tm *t)
888 {
889 char *tz = getenv("TZ");
890 setenv("TZ", "", 1);
891 tzset();
892 time_t ret = mktime(t);
893 if (tz)
894 setenv("TZ", tz, 1);
895 else
896 unsetenv("TZ");
897 tzset();
898 return ret;
899 }
900 #endif
901 /*}}}*/
902 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
903 // ---------------------------------------------------------------------
904 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
905 with one exception: All timezones (%Z) are accepted but the protocol
906 says that it MUST be GMT, but this one is equal to UTC which we will
907 encounter from time to time (e.g. in Release files) so we accept all
908 here and just assume it is GMT (or UTC) later on */
909 bool RFC1123StrToTime(const char* const str,time_t &time)
910 {
911 struct tm Tm;
912 setlocale (LC_ALL,"C");
913 bool const invalid =
914 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
915 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
916 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
917 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
918 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
919 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
920 setlocale (LC_ALL,"");
921 if (invalid == true)
922 return false;
923
924 time = timegm(&Tm);
925 return true;
926 }
927 /*}}}*/
928 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
929 // ---------------------------------------------------------------------
930 /* */
931 bool FTPMDTMStrToTime(const char* const str,time_t &time)
932 {
933 struct tm Tm;
934 // MDTM includes no whitespaces but recommend and ignored by strptime
935 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
936 return false;
937
938 time = timegm(&Tm);
939 return true;
940 }
941 /*}}}*/
942 // StrToTime - Converts a string into a time_t /*{{{*/
943 // ---------------------------------------------------------------------
944 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
945 and the C library asctime format. It requires the GNU library function
946 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
947 reason the C library does not provide any such function :< This also
948 handles the weird, but unambiguous FTP time format*/
949 bool StrToTime(const string &Val,time_t &Result)
950 {
951 struct tm Tm;
952 char Month[10];
953
954 // Skip the day of the week
955 const char *I = strchr(Val.c_str(), ' ');
956
957 // Handle RFC 1123 time
958 Month[0] = 0;
959 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
960 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
961 {
962 // Handle RFC 1036 time
963 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
964 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
965 Tm.tm_year += 1900;
966 else
967 {
968 // asctime format
969 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
970 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
971 {
972 // 'ftp' time
973 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
974 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
975 return false;
976 Tm.tm_mon--;
977 }
978 }
979 }
980
981 Tm.tm_isdst = 0;
982 if (Month[0] != 0)
983 Tm.tm_mon = MonthConv(Month);
984 else
985 Tm.tm_mon = 0; // we don't have a month, so pick something
986 Tm.tm_year -= 1900;
987
988 // Convert to local time and then to GMT
989 Result = timegm(&Tm);
990 return true;
991 }
992 /*}}}*/
993 // StrToNum - Convert a fixed length string to a number /*{{{*/
994 // ---------------------------------------------------------------------
995 /* This is used in decoding the crazy fixed length string headers in
996 tar and ar files. */
997 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
998 {
999 char S[30];
1000 if (Len >= sizeof(S))
1001 return false;
1002 memcpy(S,Str,Len);
1003 S[Len] = 0;
1004
1005 // All spaces is a zero
1006 Res = 0;
1007 unsigned I;
1008 for (I = 0; S[I] == ' '; I++);
1009 if (S[I] == 0)
1010 return true;
1011
1012 char *End;
1013 Res = strtoul(S,&End,Base);
1014 if (End == S)
1015 return false;
1016
1017 return true;
1018 }
1019 /*}}}*/
1020 // StrToNum - Convert a fixed length string to a number /*{{{*/
1021 // ---------------------------------------------------------------------
1022 /* This is used in decoding the crazy fixed length string headers in
1023 tar and ar files. */
1024 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1025 {
1026 char S[30];
1027 if (Len >= sizeof(S))
1028 return false;
1029 memcpy(S,Str,Len);
1030 S[Len] = 0;
1031
1032 // All spaces is a zero
1033 Res = 0;
1034 unsigned I;
1035 for (I = 0; S[I] == ' '; I++);
1036 if (S[I] == 0)
1037 return true;
1038
1039 char *End;
1040 Res = strtoull(S,&End,Base);
1041 if (End == S)
1042 return false;
1043
1044 return true;
1045 }
1046 /*}}}*/
1047
1048 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1049 // ---------------------------------------------------------------------
1050 /* This is used in decoding the 256bit encoded fixed length fields in
1051 tar files */
1052 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1053 {
1054 if ((Str[0] & 0x80) == 0)
1055 return false;
1056 else
1057 {
1058 Res = Str[0] & 0x7F;
1059 for(unsigned int i = 1; i < Len; ++i)
1060 Res = (Res<<8) + Str[i];
1061 return true;
1062 }
1063 }
1064 /*}}}*/
1065 // HexDigit - Convert a hex character into an integer /*{{{*/
1066 // ---------------------------------------------------------------------
1067 /* Helper for Hex2Num */
1068 static int HexDigit(int c)
1069 {
1070 if (c >= '0' && c <= '9')
1071 return c - '0';
1072 if (c >= 'a' && c <= 'f')
1073 return c - 'a' + 10;
1074 if (c >= 'A' && c <= 'F')
1075 return c - 'A' + 10;
1076 return 0;
1077 }
1078 /*}}}*/
1079 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1080 // ---------------------------------------------------------------------
1081 /* The length of the buffer must be exactly 1/2 the length of the string. */
1082 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1083 {
1084 if (Str.length() != Length*2)
1085 return false;
1086
1087 // Convert each digit. We store it in the same order as the string
1088 int J = 0;
1089 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1090 {
1091 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1092 return false;
1093
1094 Num[J] = HexDigit(I[0]) << 4;
1095 Num[J] += HexDigit(I[1]);
1096 }
1097
1098 return true;
1099 }
1100 /*}}}*/
1101 // TokSplitString - Split a string up by a given token /*{{{*/
1102 // ---------------------------------------------------------------------
1103 /* This is intended to be a faster splitter, it does not use dynamic
1104 memories. Input is changed to insert nulls at each token location. */
1105 bool TokSplitString(char Tok,char *Input,char **List,
1106 unsigned long ListMax)
1107 {
1108 // Strip any leading spaces
1109 char *Start = Input;
1110 char *Stop = Start + strlen(Start);
1111 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1112
1113 unsigned long Count = 0;
1114 char *Pos = Start;
1115 while (Pos != Stop)
1116 {
1117 // Skip to the next Token
1118 for (; Pos != Stop && *Pos != Tok; Pos++);
1119
1120 // Back remove spaces
1121 char *End = Pos;
1122 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1123 *End = 0;
1124
1125 List[Count++] = Start;
1126 if (Count >= ListMax)
1127 {
1128 List[Count-1] = 0;
1129 return false;
1130 }
1131
1132 // Advance pos
1133 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1134 Start = Pos;
1135 }
1136
1137 List[Count] = 0;
1138 return true;
1139 }
1140 /*}}}*/
1141 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1142 // ---------------------------------------------------------------------
1143 /* This can be used to split a given string up into a vector, so the
1144 propose is the same as in the method above and this one is a bit slower
1145 also, but the advantage is that we have an iteratable vector */
1146 vector<string> VectorizeString(string const &haystack, char const &split)
1147 {
1148 vector<string> exploded;
1149 if (haystack.empty() == true)
1150 return exploded;
1151 string::const_iterator start = haystack.begin();
1152 string::const_iterator end = start;
1153 do {
1154 for (; end != haystack.end() && *end != split; ++end);
1155 exploded.push_back(string(start, end));
1156 start = end + 1;
1157 } while (end != haystack.end() && (++end) != haystack.end());
1158 return exploded;
1159 }
1160 /*}}}*/
1161 // StringSplit - split a string into a string vector by token /*{{{*/
1162 // ---------------------------------------------------------------------
1163 /* See header for details.
1164 */
1165 vector<string> StringSplit(std::string const &s, std::string const &sep,
1166 unsigned int maxsplit)
1167 {
1168 vector<string> split;
1169 size_t start, pos;
1170
1171 // no seperator given, this is bogus
1172 if(sep.size() == 0)
1173 return split;
1174
1175 start = pos = 0;
1176 while (pos != string::npos)
1177 {
1178 pos = s.find(sep, start);
1179 split.push_back(s.substr(start, pos-start));
1180
1181 // if maxsplit is reached, the remaining string is the last item
1182 if(split.size() >= maxsplit)
1183 {
1184 split[split.size()-1] = s.substr(start);
1185 break;
1186 }
1187 start = pos+sep.size();
1188 }
1189 return split;
1190 }
1191 /*}}}*/
1192 // RegexChoice - Simple regex list/list matcher /*{{{*/
1193 // ---------------------------------------------------------------------
1194 /* */
1195 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1196 const char **ListEnd)
1197 {
1198 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1199 R->Hit = false;
1200
1201 unsigned long Hits = 0;
1202 for (; ListBegin < ListEnd; ++ListBegin)
1203 {
1204 // Check if the name is a regex
1205 const char *I;
1206 bool Regex = true;
1207 for (I = *ListBegin; *I != 0; I++)
1208 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1209 break;
1210 if (*I == 0)
1211 Regex = false;
1212
1213 // Compile the regex pattern
1214 regex_t Pattern;
1215 if (Regex == true)
1216 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1217 REG_NOSUB) != 0)
1218 Regex = false;
1219
1220 // Search the list
1221 bool Done = false;
1222 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1223 {
1224 if (R->Str[0] == 0)
1225 continue;
1226
1227 if (strcasecmp(R->Str,*ListBegin) != 0)
1228 {
1229 if (Regex == false)
1230 continue;
1231 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1232 continue;
1233 }
1234 Done = true;
1235
1236 if (R->Hit == false)
1237 Hits++;
1238
1239 R->Hit = true;
1240 }
1241
1242 if (Regex == true)
1243 regfree(&Pattern);
1244
1245 if (Done == false)
1246 _error->Warning(_("Selection %s not found"),*ListBegin);
1247 }
1248
1249 return Hits;
1250 }
1251 /*}}}*/
1252 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1253 // ---------------------------------------------------------------------
1254 /* This is used to make the internationalization strings easier to translate
1255 and to allow reordering of parameters */
1256 static bool iovprintf(ostream &out, const char *format,
1257 va_list &args, ssize_t &size) {
1258 char *S = (char*)malloc(size);
1259 ssize_t const n = vsnprintf(S, size, format, args);
1260 if (n > -1 && n < size) {
1261 out << S;
1262 free(S);
1263 return true;
1264 } else {
1265 if (n > -1)
1266 size = n + 1;
1267 else
1268 size *= 2;
1269 }
1270 free(S);
1271 return false;
1272 }
1273 void ioprintf(ostream &out,const char *format,...)
1274 {
1275 va_list args;
1276 ssize_t size = 400;
1277 while (true) {
1278 va_start(args,format);
1279 if (iovprintf(out, format, args, size) == true)
1280 return;
1281 va_end(args);
1282 }
1283 }
1284 void strprintf(string &out,const char *format,...)
1285 {
1286 va_list args;
1287 ssize_t size = 400;
1288 std::ostringstream outstr;
1289 while (true) {
1290 va_start(args,format);
1291 if (iovprintf(outstr, format, args, size) == true)
1292 break;
1293 va_end(args);
1294 }
1295 out = outstr.str();
1296 }
1297 /*}}}*/
1298 // safe_snprintf - Safer snprintf /*{{{*/
1299 // ---------------------------------------------------------------------
1300 /* This is a snprintf that will never (ever) go past 'End' and returns a
1301 pointer to the end of the new string. The returned string is always null
1302 terminated unless Buffer == end. This is a better alterantive to using
1303 consecutive snprintfs. */
1304 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1305 {
1306 va_list args;
1307 int Did;
1308
1309 if (End <= Buffer)
1310 return End;
1311 va_start(args,Format);
1312 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1313 va_end(args);
1314
1315 if (Did < 0 || Buffer + Did > End)
1316 return End;
1317 return Buffer + Did;
1318 }
1319 /*}}}*/
1320 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1321 // ---------------------------------------------------------------------
1322 string StripEpoch(const string &VerStr)
1323 {
1324 size_t i = VerStr.find(":");
1325 if (i == string::npos)
1326 return VerStr;
1327 return VerStr.substr(i+1);
1328 }
1329 /*}}}*/
1330 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1331 // ---------------------------------------------------------------------
1332 /* This little function is the most called method we have and tries
1333 therefore to do the absolut minimum - and is notable faster than
1334 standard tolower/toupper and as a bonus avoids problems with different
1335 locales - we only operate on ascii chars anyway. */
1336 int tolower_ascii(int const c)
1337 {
1338 if (c >= 'A' && c <= 'Z')
1339 return c + 32;
1340 return c;
1341 }
1342 /*}}}*/
1343
1344 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1345 // ---------------------------------------------------------------------
1346 /* The domain list is a comma separate list of domains that are suffix
1347 matched against the argument */
1348 bool CheckDomainList(const string &Host,const string &List)
1349 {
1350 string::const_iterator Start = List.begin();
1351 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1352 {
1353 if (Cur < List.end() && *Cur != ',')
1354 continue;
1355
1356 // Match the end of the string..
1357 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1358 Cur - Start != 0 &&
1359 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1360 return true;
1361
1362 Start = Cur + 1;
1363 }
1364 return false;
1365 }
1366 /*}}}*/
1367 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1368 // ---------------------------------------------------------------------
1369 /* */
1370 size_t strv_length(const char **str_array)
1371 {
1372 size_t i;
1373 for (i=0; str_array[i] != NULL; i++)
1374 /* nothing */
1375 ;
1376 return i;
1377 }
1378
1379 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1380 // ---------------------------------------------------------------------
1381 /* */
1382 string DeEscapeString(const string &input)
1383 {
1384 char tmp[3];
1385 string::const_iterator it;
1386 string output;
1387 for (it = input.begin(); it != input.end(); ++it)
1388 {
1389 // just copy non-escape chars
1390 if (*it != '\\')
1391 {
1392 output += *it;
1393 continue;
1394 }
1395
1396 // deal with double escape
1397 if (*it == '\\' &&
1398 (it + 1 < input.end()) && it[1] == '\\')
1399 {
1400 // copy
1401 output += *it;
1402 // advance iterator one step further
1403 ++it;
1404 continue;
1405 }
1406
1407 // ensure we have a char to read
1408 if (it + 1 == input.end())
1409 continue;
1410
1411 // read it
1412 ++it;
1413 switch (*it)
1414 {
1415 case '0':
1416 if (it + 2 <= input.end()) {
1417 tmp[0] = it[1];
1418 tmp[1] = it[2];
1419 tmp[2] = 0;
1420 output += (char)strtol(tmp, 0, 8);
1421 it += 2;
1422 }
1423 break;
1424 case 'x':
1425 if (it + 2 <= input.end()) {
1426 tmp[0] = it[1];
1427 tmp[1] = it[2];
1428 tmp[2] = 0;
1429 output += (char)strtol(tmp, 0, 16);
1430 it += 2;
1431 }
1432 break;
1433 default:
1434 // FIXME: raise exception here?
1435 break;
1436 }
1437 }
1438 return output;
1439 }
1440 /*}}}*/
1441 // URI::CopyFrom - Copy from an object /*{{{*/
1442 // ---------------------------------------------------------------------
1443 /* This parses the URI into all of its components */
1444 void URI::CopyFrom(const string &U)
1445 {
1446 string::const_iterator I = U.begin();
1447
1448 // Locate the first colon, this separates the scheme
1449 for (; I < U.end() && *I != ':' ; ++I);
1450 string::const_iterator FirstColon = I;
1451
1452 /* Determine if this is a host type URI with a leading double //
1453 and then search for the first single / */
1454 string::const_iterator SingleSlash = I;
1455 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1456 SingleSlash += 3;
1457
1458 /* Find the / indicating the end of the hostname, ignoring /'s in the
1459 square brackets */
1460 bool InBracket = false;
1461 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1462 {
1463 if (*SingleSlash == '[')
1464 InBracket = true;
1465 if (InBracket == true && *SingleSlash == ']')
1466 InBracket = false;
1467 }
1468
1469 if (SingleSlash > U.end())
1470 SingleSlash = U.end();
1471
1472 // We can now write the access and path specifiers
1473 Access.assign(U.begin(),FirstColon);
1474 if (SingleSlash != U.end())
1475 Path.assign(SingleSlash,U.end());
1476 if (Path.empty() == true)
1477 Path = "/";
1478
1479 // Now we attempt to locate a user:pass@host fragment
1480 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1481 FirstColon += 3;
1482 else
1483 FirstColon += 1;
1484 if (FirstColon >= U.end())
1485 return;
1486
1487 if (FirstColon > SingleSlash)
1488 FirstColon = SingleSlash;
1489
1490 // Find the colon...
1491 I = FirstColon + 1;
1492 if (I > SingleSlash)
1493 I = SingleSlash;
1494 for (; I < SingleSlash && *I != ':'; ++I);
1495 string::const_iterator SecondColon = I;
1496
1497 // Search for the @ after the colon
1498 for (; I < SingleSlash && *I != '@'; ++I);
1499 string::const_iterator At = I;
1500
1501 // Now write the host and user/pass
1502 if (At == SingleSlash)
1503 {
1504 if (FirstColon < SingleSlash)
1505 Host.assign(FirstColon,SingleSlash);
1506 }
1507 else
1508 {
1509 Host.assign(At+1,SingleSlash);
1510 // username and password must be encoded (RFC 3986)
1511 User.assign(DeQuoteString(FirstColon,SecondColon));
1512 if (SecondColon < At)
1513 Password.assign(DeQuoteString(SecondColon+1,At));
1514 }
1515
1516 // Now we parse the RFC 2732 [] hostnames.
1517 unsigned long PortEnd = 0;
1518 InBracket = false;
1519 for (unsigned I = 0; I != Host.length();)
1520 {
1521 if (Host[I] == '[')
1522 {
1523 InBracket = true;
1524 Host.erase(I,1);
1525 continue;
1526 }
1527
1528 if (InBracket == true && Host[I] == ']')
1529 {
1530 InBracket = false;
1531 Host.erase(I,1);
1532 PortEnd = I;
1533 continue;
1534 }
1535 I++;
1536 }
1537
1538 // Tsk, weird.
1539 if (InBracket == true)
1540 {
1541 Host.clear();
1542 return;
1543 }
1544
1545 // Now we parse off a port number from the hostname
1546 Port = 0;
1547 string::size_type Pos = Host.rfind(':');
1548 if (Pos == string::npos || Pos < PortEnd)
1549 return;
1550
1551 Port = atoi(string(Host,Pos+1).c_str());
1552 Host.assign(Host,0,Pos);
1553 }
1554 /*}}}*/
1555 // URI::operator string - Convert the URI to a string /*{{{*/
1556 // ---------------------------------------------------------------------
1557 /* */
1558 URI::operator string()
1559 {
1560 string Res;
1561
1562 if (Access.empty() == false)
1563 Res = Access + ':';
1564
1565 if (Host.empty() == false)
1566 {
1567 if (Access.empty() == false)
1568 Res += "//";
1569
1570 if (User.empty() == false)
1571 {
1572 // FIXME: Technically userinfo is permitted even less
1573 // characters than these, but this is not conveniently
1574 // expressed with a blacklist.
1575 Res += QuoteString(User, ":/?#[]@");
1576 if (Password.empty() == false)
1577 Res += ":" + QuoteString(Password, ":/?#[]@");
1578 Res += "@";
1579 }
1580
1581 // Add RFC 2732 escaping characters
1582 if (Access.empty() == false &&
1583 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1584 Res += '[' + Host + ']';
1585 else
1586 Res += Host;
1587
1588 if (Port != 0)
1589 {
1590 char S[30];
1591 sprintf(S,":%u",Port);
1592 Res += S;
1593 }
1594 }
1595
1596 if (Path.empty() == false)
1597 {
1598 if (Path[0] != '/')
1599 Res += "/" + Path;
1600 else
1601 Res += Path;
1602 }
1603
1604 return Res;
1605 }
1606 /*}}}*/
1607 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1608 // ---------------------------------------------------------------------
1609 /* */
1610 string URI::SiteOnly(const string &URI)
1611 {
1612 ::URI U(URI);
1613 U.User.clear();
1614 U.Password.clear();
1615 U.Path.clear();
1616 return U;
1617 }
1618 /*}}}*/
1619 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1620 // ---------------------------------------------------------------------
1621 /* */
1622 string URI::NoUserPassword(const string &URI)
1623 {
1624 ::URI U(URI);
1625 U.User.clear();
1626 U.Password.clear();
1627 return U;
1628 }
1629 /*}}}*/