]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
make compressed-indexes test pass again
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &s)
49 {
50 size_t start = s.find_first_not_of(" \t\n");
51 // only whitespace
52 if (start == string::npos)
53 return "";
54 size_t end = s.find_last_not_of(" \t\n");
55 return s.substr(start, end-start+1);
56 }
57
58 bool Endswith(const std::string &s, const std::string &end)
59 {
60 if (end.size() > s.size())
61 return false;
62 return (s.substr(s.size() - end.size(), s.size()) == end);
63 }
64
65 bool Startswith(const std::string &s, const std::string &start)
66 {
67 if (start.size() > s.size())
68 return false;
69 return (s.substr(0, start.size()) == start);
70 }
71
72 }
73 }
74 /*}}}*/
75 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
76 // ---------------------------------------------------------------------
77 /* This is handy to use before display some information for enduser */
78 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
79 {
80 iconv_t cd;
81 const char *inbuf;
82 char *inptr, *outbuf;
83 size_t insize, bufsize;
84 dest->clear();
85
86 cd = iconv_open(codeset, "UTF-8");
87 if (cd == (iconv_t)(-1)) {
88 // Something went wrong
89 if (errno == EINVAL)
90 _error->Error("conversion from 'UTF-8' to '%s' not available",
91 codeset);
92 else
93 perror("iconv_open");
94
95 return false;
96 }
97
98 insize = bufsize = orig.size();
99 inbuf = orig.data();
100 inptr = (char *)inbuf;
101 outbuf = new char[bufsize];
102 size_t lastError = -1;
103
104 while (insize != 0)
105 {
106 char *outptr = outbuf;
107 size_t outsize = bufsize;
108 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
109 dest->append(outbuf, outptr - outbuf);
110 if (err == (size_t)(-1))
111 {
112 switch (errno)
113 {
114 case EILSEQ:
115 insize--;
116 inptr++;
117 // replace a series of unknown multibytes with a single "?"
118 if (lastError != insize) {
119 lastError = insize - 1;
120 dest->append("?");
121 }
122 break;
123 case EINVAL:
124 insize = 0;
125 break;
126 case E2BIG:
127 if (outptr == outbuf)
128 {
129 bufsize *= 2;
130 delete[] outbuf;
131 outbuf = new char[bufsize];
132 }
133 break;
134 }
135 }
136 }
137
138 delete[] outbuf;
139
140 iconv_close(cd);
141
142 return true;
143 }
144 /*}}}*/
145 // strstrip - Remove white space from the front and back of a string /*{{{*/
146 // ---------------------------------------------------------------------
147 /* This is handy to use when parsing a file. It also removes \n's left
148 over from fgets and company */
149 char *_strstrip(char *String)
150 {
151 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
152
153 if (*String == 0)
154 return String;
155 return _strrstrip(String);
156 }
157 /*}}}*/
158 // strrstrip - Remove white space from the back of a string /*{{{*/
159 // ---------------------------------------------------------------------
160 char *_strrstrip(char *String)
161 {
162 char *End = String + strlen(String) - 1;
163 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
164 *End == '\r'); End--);
165 End++;
166 *End = 0;
167 return String;
168 }
169 /*}}}*/
170 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
171 // ---------------------------------------------------------------------
172 /* */
173 char *_strtabexpand(char *String,size_t Len)
174 {
175 for (char *I = String; I != I + Len && *I != 0; I++)
176 {
177 if (*I != '\t')
178 continue;
179 if (I + 8 > String + Len)
180 {
181 *I = 0;
182 return String;
183 }
184
185 /* Assume the start of the string is 0 and find the next 8 char
186 division */
187 int Len;
188 if (String == I)
189 Len = 1;
190 else
191 Len = 8 - ((String - I) % 8);
192 Len -= 2;
193 if (Len <= 0)
194 {
195 *I = ' ';
196 continue;
197 }
198
199 memmove(I + Len,I + 1,strlen(I) + 1);
200 for (char *J = I; J + Len != I; *I = ' ', I++);
201 }
202 return String;
203 }
204 /*}}}*/
205 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
206 // ---------------------------------------------------------------------
207 /* This grabs a single word, converts any % escaped characters to their
208 proper values and advances the pointer. Double quotes are understood
209 and striped out as well. This is for URI/URL parsing. It also can
210 understand [] brackets.*/
211 bool ParseQuoteWord(const char *&String,string &Res)
212 {
213 // Skip leading whitespace
214 const char *C = String;
215 for (;*C != 0 && *C == ' '; C++);
216 if (*C == 0)
217 return false;
218
219 // Jump to the next word
220 for (;*C != 0 && isspace(*C) == 0; C++)
221 {
222 if (*C == '"')
223 {
224 C = strchr(C + 1, '"');
225 if (C == NULL)
226 return false;
227 }
228 if (*C == '[')
229 {
230 C = strchr(C + 1, ']');
231 if (C == NULL)
232 return false;
233 }
234 }
235
236 // Now de-quote characters
237 char Buffer[1024];
238 char Tmp[3];
239 const char *Start = String;
240 char *I;
241 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
242 {
243 if (*Start == '%' && Start + 2 < C &&
244 isxdigit(Start[1]) && isxdigit(Start[2]))
245 {
246 Tmp[0] = Start[1];
247 Tmp[1] = Start[2];
248 Tmp[2] = 0;
249 *I = (char)strtol(Tmp,0,16);
250 Start += 3;
251 continue;
252 }
253 if (*Start != '"')
254 *I = *Start;
255 else
256 I--;
257 Start++;
258 }
259 *I = 0;
260 Res = Buffer;
261
262 // Skip ending white space
263 for (;*C != 0 && isspace(*C) != 0; C++);
264 String = C;
265 return true;
266 }
267 /*}}}*/
268 // ParseCWord - Parses a string like a C "" expression /*{{{*/
269 // ---------------------------------------------------------------------
270 /* This expects a series of space separated strings enclosed in ""'s.
271 It concatenates the ""'s into a single string. */
272 bool ParseCWord(const char *&String,string &Res)
273 {
274 // Skip leading whitespace
275 const char *C = String;
276 for (;*C != 0 && *C == ' '; C++);
277 if (*C == 0)
278 return false;
279
280 char Buffer[1024];
281 char *Buf = Buffer;
282 if (strlen(String) >= sizeof(Buffer))
283 return false;
284
285 for (; *C != 0; C++)
286 {
287 if (*C == '"')
288 {
289 for (C++; *C != 0 && *C != '"'; C++)
290 *Buf++ = *C;
291
292 if (*C == 0)
293 return false;
294
295 continue;
296 }
297
298 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
299 continue;
300 if (isspace(*C) == 0)
301 return false;
302 *Buf++ = ' ';
303 }
304 *Buf = 0;
305 Res = Buffer;
306 String = C;
307 return true;
308 }
309 /*}}}*/
310 // QuoteString - Convert a string into quoted from /*{{{*/
311 // ---------------------------------------------------------------------
312 /* */
313 string QuoteString(const string &Str, const char *Bad)
314 {
315 string Res;
316 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
317 {
318 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
319 *I == 0x25 || // percent '%' char
320 *I <= 0x20 || *I >= 0x7F) // control chars
321 {
322 char Buf[10];
323 sprintf(Buf,"%%%02x",(int)*I);
324 Res += Buf;
325 }
326 else
327 Res += *I;
328 }
329 return Res;
330 }
331 /*}}}*/
332 // DeQuoteString - Convert a string from quoted from /*{{{*/
333 // ---------------------------------------------------------------------
334 /* This undoes QuoteString */
335 string DeQuoteString(const string &Str)
336 {
337 return DeQuoteString(Str.begin(),Str.end());
338 }
339 string DeQuoteString(string::const_iterator const &begin,
340 string::const_iterator const &end)
341 {
342 string Res;
343 for (string::const_iterator I = begin; I != end; ++I)
344 {
345 if (*I == '%' && I + 2 < end &&
346 isxdigit(I[1]) && isxdigit(I[2]))
347 {
348 char Tmp[3];
349 Tmp[0] = I[1];
350 Tmp[1] = I[2];
351 Tmp[2] = 0;
352 Res += (char)strtol(Tmp,0,16);
353 I += 2;
354 continue;
355 }
356 else
357 Res += *I;
358 }
359 return Res;
360 }
361
362 /*}}}*/
363 // SizeToStr - Convert a long into a human readable size /*{{{*/
364 // ---------------------------------------------------------------------
365 /* A max of 4 digits are shown before conversion to the next highest unit.
366 The max length of the string will be 5 chars unless the size is > 10
367 YottaBytes (E24) */
368 string SizeToStr(double Size)
369 {
370 char S[300];
371 double ASize;
372 if (Size >= 0)
373 ASize = Size;
374 else
375 ASize = -1*Size;
376
377 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
378 ExaBytes, ZettaBytes, YottaBytes */
379 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
380 int I = 0;
381 while (I <= 8)
382 {
383 if (ASize < 100 && I != 0)
384 {
385 sprintf(S,"%'.1f %c",ASize,Ext[I]);
386 break;
387 }
388
389 if (ASize < 10000)
390 {
391 sprintf(S,"%'.0f %c",ASize,Ext[I]);
392 break;
393 }
394 ASize /= 1000.0;
395 I++;
396 }
397
398 return S;
399 }
400 /*}}}*/
401 // TimeToStr - Convert the time into a string /*{{{*/
402 // ---------------------------------------------------------------------
403 /* Converts a number of seconds to a hms format */
404 string TimeToStr(unsigned long Sec)
405 {
406 char S[300];
407
408 while (1)
409 {
410 if (Sec > 60*60*24)
411 {
412 //d means days, h means hours, min means minutes, s means seconds
413 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
414 break;
415 }
416
417 if (Sec > 60*60)
418 {
419 //h means hours, min means minutes, s means seconds
420 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
421 break;
422 }
423
424 if (Sec > 60)
425 {
426 //min means minutes, s means seconds
427 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
428 break;
429 }
430
431 //s means seconds
432 sprintf(S,_("%lis"),Sec);
433 break;
434 }
435
436 return S;
437 }
438 /*}}}*/
439 // SubstVar - Substitute a string for another string /*{{{*/
440 // ---------------------------------------------------------------------
441 /* This replaces all occurrences of Subst with Contents in Str. */
442 string SubstVar(const string &Str,const string &Subst,const string &Contents)
443 {
444 if (Subst.empty() == true)
445 return Str;
446
447 string::size_type Pos = 0;
448 string::size_type OldPos = 0;
449 string Temp;
450
451 while (OldPos < Str.length() &&
452 (Pos = Str.find(Subst,OldPos)) != string::npos)
453 {
454 if (OldPos != Pos)
455 Temp.append(Str, OldPos, Pos - OldPos);
456 if (Contents.empty() == false)
457 Temp.append(Contents);
458 OldPos = Pos + Subst.length();
459 }
460
461 if (OldPos == 0)
462 return Str;
463
464 if (OldPos >= Str.length())
465 return Temp;
466 return Temp + string(Str,OldPos);
467 }
468 string SubstVar(string Str,const struct SubstVar *Vars)
469 {
470 for (; Vars->Subst != 0; Vars++)
471 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
472 return Str;
473 }
474 /*}}}*/
475 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
476 // ---------------------------------------------------------------------
477 /* Returns a string with the supplied separator depth + 1 times in it */
478 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
479 {
480 std::string output = "";
481 for(unsigned long d=Depth+1; d > 0; d--)
482 output.append(Separator);
483 return output;
484 }
485 /*}}}*/
486 // URItoFileName - Convert the uri into a unique file name /*{{{*/
487 // ---------------------------------------------------------------------
488 /* This converts a URI into a safe filename. It quotes all unsafe characters
489 and converts / to _ and removes the scheme identifier. The resulting
490 file name should be unique and never occur again for a different file */
491 string URItoFileName(const string &URI)
492 {
493 // Nuke 'sensitive' items
494 ::URI U(URI);
495 U.User.clear();
496 U.Password.clear();
497 U.Access.clear();
498
499 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
500 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
501 replace(NewURI.begin(),NewURI.end(),'/','_');
502 return NewURI;
503 }
504 /*}}}*/
505 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This routine performs a base64 transformation on a string. It was ripped
508 from wget and then patched and bug fixed.
509
510 This spec can be found in rfc2045 */
511 string Base64Encode(const string &S)
512 {
513 // Conversion table.
514 static char tbl[64] = {'A','B','C','D','E','F','G','H',
515 'I','J','K','L','M','N','O','P',
516 'Q','R','S','T','U','V','W','X',
517 'Y','Z','a','b','c','d','e','f',
518 'g','h','i','j','k','l','m','n',
519 'o','p','q','r','s','t','u','v',
520 'w','x','y','z','0','1','2','3',
521 '4','5','6','7','8','9','+','/'};
522
523 // Pre-allocate some space
524 string Final;
525 Final.reserve((4*S.length() + 2)/3 + 2);
526
527 /* Transform the 3x8 bits to 4x6 bits, as required by
528 base64. */
529 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
530 {
531 char Bits[3] = {0,0,0};
532 Bits[0] = I[0];
533 if (I + 1 < S.end())
534 Bits[1] = I[1];
535 if (I + 2 < S.end())
536 Bits[2] = I[2];
537
538 Final += tbl[Bits[0] >> 2];
539 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
540
541 if (I + 1 >= S.end())
542 break;
543
544 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
545
546 if (I + 2 >= S.end())
547 break;
548
549 Final += tbl[Bits[2] & 0x3f];
550 }
551
552 /* Apply the padding elements, this tells how many bytes the remote
553 end should discard */
554 if (S.length() % 3 == 2)
555 Final += '=';
556 if (S.length() % 3 == 1)
557 Final += "==";
558
559 return Final;
560 }
561 /*}}}*/
562 // stringcmp - Arbitrary string compare /*{{{*/
563 // ---------------------------------------------------------------------
564 /* This safely compares two non-null terminated strings of arbitrary
565 length */
566 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
567 {
568 for (; A != AEnd && B != BEnd; A++, B++)
569 if (*A != *B)
570 break;
571
572 if (A == AEnd && B == BEnd)
573 return 0;
574 if (A == AEnd)
575 return 1;
576 if (B == BEnd)
577 return -1;
578 if (*A < *B)
579 return -1;
580 return 1;
581 }
582
583 #if __GNUC__ >= 3
584 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
585 const char *B,const char *BEnd)
586 {
587 for (; A != AEnd && B != BEnd; A++, B++)
588 if (*A != *B)
589 break;
590
591 if (A == AEnd && B == BEnd)
592 return 0;
593 if (A == AEnd)
594 return 1;
595 if (B == BEnd)
596 return -1;
597 if (*A < *B)
598 return -1;
599 return 1;
600 }
601 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
602 string::const_iterator B,string::const_iterator BEnd)
603 {
604 for (; A != AEnd && B != BEnd; A++, B++)
605 if (*A != *B)
606 break;
607
608 if (A == AEnd && B == BEnd)
609 return 0;
610 if (A == AEnd)
611 return 1;
612 if (B == BEnd)
613 return -1;
614 if (*A < *B)
615 return -1;
616 return 1;
617 }
618 #endif
619 /*}}}*/
620 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
621 // ---------------------------------------------------------------------
622 /* */
623 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
624 {
625 for (; A != AEnd && B != BEnd; A++, B++)
626 if (tolower_ascii(*A) != tolower_ascii(*B))
627 break;
628
629 if (A == AEnd && B == BEnd)
630 return 0;
631 if (A == AEnd)
632 return 1;
633 if (B == BEnd)
634 return -1;
635 if (tolower_ascii(*A) < tolower_ascii(*B))
636 return -1;
637 return 1;
638 }
639 #if __GNUC__ >= 3
640 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
641 const char *B,const char *BEnd)
642 {
643 for (; A != AEnd && B != BEnd; A++, B++)
644 if (tolower_ascii(*A) != tolower_ascii(*B))
645 break;
646
647 if (A == AEnd && B == BEnd)
648 return 0;
649 if (A == AEnd)
650 return 1;
651 if (B == BEnd)
652 return -1;
653 if (tolower_ascii(*A) < tolower_ascii(*B))
654 return -1;
655 return 1;
656 }
657 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
658 string::const_iterator B,string::const_iterator BEnd)
659 {
660 for (; A != AEnd && B != BEnd; A++, B++)
661 if (tolower_ascii(*A) != tolower_ascii(*B))
662 break;
663
664 if (A == AEnd && B == BEnd)
665 return 0;
666 if (A == AEnd)
667 return 1;
668 if (B == BEnd)
669 return -1;
670 if (tolower_ascii(*A) < tolower_ascii(*B))
671 return -1;
672 return 1;
673 }
674 #endif
675 /*}}}*/
676 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
677 // ---------------------------------------------------------------------
678 /* The format is like those used in package files and the method
679 communication system */
680 string LookupTag(const string &Message,const char *Tag,const char *Default)
681 {
682 // Look for a matching tag.
683 int Length = strlen(Tag);
684 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
685 {
686 // Found the tag
687 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
688 {
689 // Find the end of line and strip the leading/trailing spaces
690 string::const_iterator J;
691 I += Length + 1;
692 for (; isspace(*I) != 0 && I < Message.end(); ++I);
693 for (J = I; *J != '\n' && J < Message.end(); ++J);
694 for (; J > I && isspace(J[-1]) != 0; --J);
695
696 return string(I,J);
697 }
698
699 for (; *I != '\n' && I < Message.end(); ++I);
700 }
701
702 // Failed to find a match
703 if (Default == 0)
704 return string();
705 return Default;
706 }
707 /*}}}*/
708 // StringToBool - Converts a string into a boolean /*{{{*/
709 // ---------------------------------------------------------------------
710 /* This inspects the string to see if it is true or if it is false and
711 then returns the result. Several varients on true/false are checked. */
712 int StringToBool(const string &Text,int Default)
713 {
714 char *End;
715 int Res = strtol(Text.c_str(),&End,0);
716 if (End != Text.c_str() && Res >= 0 && Res <= 1)
717 return Res;
718
719 // Check for positives
720 if (strcasecmp(Text.c_str(),"no") == 0 ||
721 strcasecmp(Text.c_str(),"false") == 0 ||
722 strcasecmp(Text.c_str(),"without") == 0 ||
723 strcasecmp(Text.c_str(),"off") == 0 ||
724 strcasecmp(Text.c_str(),"disable") == 0)
725 return 0;
726
727 // Check for negatives
728 if (strcasecmp(Text.c_str(),"yes") == 0 ||
729 strcasecmp(Text.c_str(),"true") == 0 ||
730 strcasecmp(Text.c_str(),"with") == 0 ||
731 strcasecmp(Text.c_str(),"on") == 0 ||
732 strcasecmp(Text.c_str(),"enable") == 0)
733 return 1;
734
735 return Default;
736 }
737 /*}}}*/
738 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
739 // ---------------------------------------------------------------------
740 /* This converts a time_t into a string time representation that is
741 year 2000 complient and timezone neutral */
742 string TimeRFC1123(time_t Date)
743 {
744 struct tm Conv;
745 if (gmtime_r(&Date, &Conv) == NULL)
746 return "";
747
748 char Buf[300];
749 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
750 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
751 "Aug","Sep","Oct","Nov","Dec"};
752
753 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
754 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
755 Conv.tm_min,Conv.tm_sec);
756 return Buf;
757 }
758 /*}}}*/
759 // ReadMessages - Read messages from the FD /*{{{*/
760 // ---------------------------------------------------------------------
761 /* This pulls full messages from the input FD into the message buffer.
762 It assumes that messages will not pause during transit so no
763 fancy buffering is used.
764
765 In particular: this reads blocks from the input until it believes
766 that it's run out of input text. Each block is terminated by a
767 double newline ('\n' followed by '\n'). As noted below, there is a
768 bug in this code: it assumes that all the blocks have been read if
769 it doesn't see additional text in the buffer after the last one is
770 parsed, which will cause it to lose blocks if the last block
771 coincides with the end of the buffer.
772 */
773 bool ReadMessages(int Fd, vector<string> &List)
774 {
775 char Buffer[64000];
776 char *End = Buffer;
777 // Represents any left-over from the previous iteration of the
778 // parse loop. (i.e., if a message is split across the end
779 // of the buffer, it goes here)
780 string PartialMessage;
781
782 while (1)
783 {
784 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
785 if (Res < 0 && errno == EINTR)
786 continue;
787
788 // Process is dead, this is kind of bad..
789 if (Res == 0)
790 return false;
791
792 // No data
793 if (Res < 0 && errno == EAGAIN)
794 return true;
795 if (Res < 0)
796 return false;
797
798 End += Res;
799
800 // Look for the end of the message
801 for (char *I = Buffer; I + 1 < End; I++)
802 {
803 if (I[1] != '\n' ||
804 (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
805 continue;
806
807 // Pull the message out
808 string Message(Buffer,I-Buffer);
809 PartialMessage += Message;
810
811 // Fix up the buffer
812 for (; I < End && (*I == '\n' || *I == '\r'); ++I);
813 End -= I-Buffer;
814 memmove(Buffer,I,End-Buffer);
815 I = Buffer;
816
817 List.push_back(PartialMessage);
818 PartialMessage.clear();
819 }
820 if (End != Buffer)
821 {
822 // If there's text left in the buffer, store it
823 // in PartialMessage and throw the rest of the buffer
824 // away. This allows us to handle messages that
825 // are longer than the static buffer size.
826 PartialMessage += string(Buffer, End);
827 End = Buffer;
828 }
829 else
830 {
831 // BUG ALERT: if a message block happens to end at a
832 // multiple of 64000 characters, this will cause it to
833 // terminate early, leading to a badly formed block and
834 // probably crashing the method. However, this is the only
835 // way we have to find the end of the message block. I have
836 // an idea of how to fix this, but it will require changes
837 // to the protocol (essentially to mark the beginning and
838 // end of the block).
839 //
840 // -- dburrows 2008-04-02
841 return true;
842 }
843
844 if (WaitFd(Fd) == false)
845 return false;
846 }
847 }
848 /*}}}*/
849 // MonthConv - Converts a month string into a number /*{{{*/
850 // ---------------------------------------------------------------------
851 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
852 Made it a bit more robust with a few tolower_ascii though. */
853 static int MonthConv(char *Month)
854 {
855 switch (tolower_ascii(*Month))
856 {
857 case 'a':
858 return tolower_ascii(Month[1]) == 'p'?3:7;
859 case 'd':
860 return 11;
861 case 'f':
862 return 1;
863 case 'j':
864 if (tolower_ascii(Month[1]) == 'a')
865 return 0;
866 return tolower_ascii(Month[2]) == 'n'?5:6;
867 case 'm':
868 return tolower_ascii(Month[2]) == 'r'?2:4;
869 case 'n':
870 return 10;
871 case 'o':
872 return 9;
873 case 's':
874 return 8;
875
876 // Pretend it is January..
877 default:
878 return 0;
879 }
880 }
881 /*}}}*/
882 // timegm - Internal timegm if the gnu version is not available /*{{{*/
883 // ---------------------------------------------------------------------
884 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
885 than local timezone (mktime assumes the latter).
886
887 This function is a nonstandard GNU extension that is also present on
888 the BSDs and maybe other systems. For others we follow the advice of
889 the manpage of timegm and use his portable replacement. */
890 #ifndef HAVE_TIMEGM
891 static time_t timegm(struct tm *t)
892 {
893 char *tz = getenv("TZ");
894 setenv("TZ", "", 1);
895 tzset();
896 time_t ret = mktime(t);
897 if (tz)
898 setenv("TZ", tz, 1);
899 else
900 unsetenv("TZ");
901 tzset();
902 return ret;
903 }
904 #endif
905 /*}}}*/
906 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
907 // ---------------------------------------------------------------------
908 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
909 with one exception: All timezones (%Z) are accepted but the protocol
910 says that it MUST be GMT, but this one is equal to UTC which we will
911 encounter from time to time (e.g. in Release files) so we accept all
912 here and just assume it is GMT (or UTC) later on */
913 bool RFC1123StrToTime(const char* const str,time_t &time)
914 {
915 struct tm Tm;
916 setlocale (LC_ALL,"C");
917 bool const invalid =
918 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
919 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
920 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
921 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
922 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
923 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
924 setlocale (LC_ALL,"");
925 if (invalid == true)
926 return false;
927
928 time = timegm(&Tm);
929 return true;
930 }
931 /*}}}*/
932 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
933 // ---------------------------------------------------------------------
934 /* */
935 bool FTPMDTMStrToTime(const char* const str,time_t &time)
936 {
937 struct tm Tm;
938 // MDTM includes no whitespaces but recommend and ignored by strptime
939 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
940 return false;
941
942 time = timegm(&Tm);
943 return true;
944 }
945 /*}}}*/
946 // StrToTime - Converts a string into a time_t /*{{{*/
947 // ---------------------------------------------------------------------
948 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
949 and the C library asctime format. It requires the GNU library function
950 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
951 reason the C library does not provide any such function :< This also
952 handles the weird, but unambiguous FTP time format*/
953 bool StrToTime(const string &Val,time_t &Result)
954 {
955 struct tm Tm;
956 char Month[10];
957
958 // Skip the day of the week
959 const char *I = strchr(Val.c_str(), ' ');
960
961 // Handle RFC 1123 time
962 Month[0] = 0;
963 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
964 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
965 {
966 // Handle RFC 1036 time
967 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
968 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
969 Tm.tm_year += 1900;
970 else
971 {
972 // asctime format
973 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
974 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
975 {
976 // 'ftp' time
977 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
978 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
979 return false;
980 Tm.tm_mon--;
981 }
982 }
983 }
984
985 Tm.tm_isdst = 0;
986 if (Month[0] != 0)
987 Tm.tm_mon = MonthConv(Month);
988 else
989 Tm.tm_mon = 0; // we don't have a month, so pick something
990 Tm.tm_year -= 1900;
991
992 // Convert to local time and then to GMT
993 Result = timegm(&Tm);
994 return true;
995 }
996 /*}}}*/
997 // StrToNum - Convert a fixed length string to a number /*{{{*/
998 // ---------------------------------------------------------------------
999 /* This is used in decoding the crazy fixed length string headers in
1000 tar and ar files. */
1001 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1002 {
1003 char S[30];
1004 if (Len >= sizeof(S))
1005 return false;
1006 memcpy(S,Str,Len);
1007 S[Len] = 0;
1008
1009 // All spaces is a zero
1010 Res = 0;
1011 unsigned I;
1012 for (I = 0; S[I] == ' '; I++);
1013 if (S[I] == 0)
1014 return true;
1015
1016 char *End;
1017 Res = strtoul(S,&End,Base);
1018 if (End == S)
1019 return false;
1020
1021 return true;
1022 }
1023 /*}}}*/
1024 // StrToNum - Convert a fixed length string to a number /*{{{*/
1025 // ---------------------------------------------------------------------
1026 /* This is used in decoding the crazy fixed length string headers in
1027 tar and ar files. */
1028 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1029 {
1030 char S[30];
1031 if (Len >= sizeof(S))
1032 return false;
1033 memcpy(S,Str,Len);
1034 S[Len] = 0;
1035
1036 // All spaces is a zero
1037 Res = 0;
1038 unsigned I;
1039 for (I = 0; S[I] == ' '; I++);
1040 if (S[I] == 0)
1041 return true;
1042
1043 char *End;
1044 Res = strtoull(S,&End,Base);
1045 if (End == S)
1046 return false;
1047
1048 return true;
1049 }
1050 /*}}}*/
1051
1052 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1053 // ---------------------------------------------------------------------
1054 /* This is used in decoding the 256bit encoded fixed length fields in
1055 tar files */
1056 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1057 {
1058 if ((Str[0] & 0x80) == 0)
1059 return false;
1060 else
1061 {
1062 Res = Str[0] & 0x7F;
1063 for(unsigned int i = 1; i < Len; ++i)
1064 Res = (Res<<8) + Str[i];
1065 return true;
1066 }
1067 }
1068 /*}}}*/
1069 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1070 // ---------------------------------------------------------------------
1071 /* This is used in decoding the 256bit encoded fixed length fields in
1072 tar files */
1073 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1074 {
1075 unsigned long long Num;
1076 bool rc;
1077
1078 rc = Base256ToNum(Str, Num, Len);
1079 Res = Num;
1080 if (Res != Num)
1081 return false;
1082
1083 return rc;
1084 }
1085 /*}}}*/
1086 // HexDigit - Convert a hex character into an integer /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* Helper for Hex2Num */
1089 static int HexDigit(int c)
1090 {
1091 if (c >= '0' && c <= '9')
1092 return c - '0';
1093 if (c >= 'a' && c <= 'f')
1094 return c - 'a' + 10;
1095 if (c >= 'A' && c <= 'F')
1096 return c - 'A' + 10;
1097 return 0;
1098 }
1099 /*}}}*/
1100 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1101 // ---------------------------------------------------------------------
1102 /* The length of the buffer must be exactly 1/2 the length of the string. */
1103 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1104 {
1105 if (Str.length() != Length*2)
1106 return false;
1107
1108 // Convert each digit. We store it in the same order as the string
1109 int J = 0;
1110 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1111 {
1112 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1113 return false;
1114
1115 Num[J] = HexDigit(I[0]) << 4;
1116 Num[J] += HexDigit(I[1]);
1117 }
1118
1119 return true;
1120 }
1121 /*}}}*/
1122 // TokSplitString - Split a string up by a given token /*{{{*/
1123 // ---------------------------------------------------------------------
1124 /* This is intended to be a faster splitter, it does not use dynamic
1125 memories. Input is changed to insert nulls at each token location. */
1126 bool TokSplitString(char Tok,char *Input,char **List,
1127 unsigned long ListMax)
1128 {
1129 // Strip any leading spaces
1130 char *Start = Input;
1131 char *Stop = Start + strlen(Start);
1132 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1133
1134 unsigned long Count = 0;
1135 char *Pos = Start;
1136 while (Pos != Stop)
1137 {
1138 // Skip to the next Token
1139 for (; Pos != Stop && *Pos != Tok; Pos++);
1140
1141 // Back remove spaces
1142 char *End = Pos;
1143 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1144 *End = 0;
1145
1146 List[Count++] = Start;
1147 if (Count >= ListMax)
1148 {
1149 List[Count-1] = 0;
1150 return false;
1151 }
1152
1153 // Advance pos
1154 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1155 Start = Pos;
1156 }
1157
1158 List[Count] = 0;
1159 return true;
1160 }
1161 /*}}}*/
1162 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1163 // ---------------------------------------------------------------------
1164 /* This can be used to split a given string up into a vector, so the
1165 propose is the same as in the method above and this one is a bit slower
1166 also, but the advantage is that we have an iteratable vector */
1167 vector<string> VectorizeString(string const &haystack, char const &split)
1168 {
1169 vector<string> exploded;
1170 if (haystack.empty() == true)
1171 return exploded;
1172 string::const_iterator start = haystack.begin();
1173 string::const_iterator end = start;
1174 do {
1175 for (; end != haystack.end() && *end != split; ++end);
1176 exploded.push_back(string(start, end));
1177 start = end + 1;
1178 } while (end != haystack.end() && (++end) != haystack.end());
1179 return exploded;
1180 }
1181 /*}}}*/
1182 // StringSplit - split a string into a string vector by token /*{{{*/
1183 // ---------------------------------------------------------------------
1184 /* See header for details.
1185 */
1186 vector<string> StringSplit(std::string const &s, std::string const &sep,
1187 unsigned int maxsplit)
1188 {
1189 vector<string> split;
1190 size_t start, pos;
1191
1192 // no seperator given, this is bogus
1193 if(sep.size() == 0)
1194 return split;
1195
1196 start = pos = 0;
1197 while (pos != string::npos)
1198 {
1199 pos = s.find(sep, start);
1200 split.push_back(s.substr(start, pos-start));
1201
1202 // if maxsplit is reached, the remaining string is the last item
1203 if(split.size() >= maxsplit)
1204 {
1205 split[split.size()-1] = s.substr(start);
1206 break;
1207 }
1208 start = pos+sep.size();
1209 }
1210 return split;
1211 }
1212 /*}}}*/
1213 // RegexChoice - Simple regex list/list matcher /*{{{*/
1214 // ---------------------------------------------------------------------
1215 /* */
1216 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1217 const char **ListEnd)
1218 {
1219 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1220 R->Hit = false;
1221
1222 unsigned long Hits = 0;
1223 for (; ListBegin < ListEnd; ++ListBegin)
1224 {
1225 // Check if the name is a regex
1226 const char *I;
1227 bool Regex = true;
1228 for (I = *ListBegin; *I != 0; I++)
1229 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1230 break;
1231 if (*I == 0)
1232 Regex = false;
1233
1234 // Compile the regex pattern
1235 regex_t Pattern;
1236 if (Regex == true)
1237 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1238 REG_NOSUB) != 0)
1239 Regex = false;
1240
1241 // Search the list
1242 bool Done = false;
1243 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1244 {
1245 if (R->Str[0] == 0)
1246 continue;
1247
1248 if (strcasecmp(R->Str,*ListBegin) != 0)
1249 {
1250 if (Regex == false)
1251 continue;
1252 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1253 continue;
1254 }
1255 Done = true;
1256
1257 if (R->Hit == false)
1258 Hits++;
1259
1260 R->Hit = true;
1261 }
1262
1263 if (Regex == true)
1264 regfree(&Pattern);
1265
1266 if (Done == false)
1267 _error->Warning(_("Selection %s not found"),*ListBegin);
1268 }
1269
1270 return Hits;
1271 }
1272 /*}}}*/
1273 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1274 // ---------------------------------------------------------------------
1275 /* This is used to make the internationalization strings easier to translate
1276 and to allow reordering of parameters */
1277 static bool iovprintf(ostream &out, const char *format,
1278 va_list &args, ssize_t &size) {
1279 char *S = (char*)malloc(size);
1280 ssize_t const n = vsnprintf(S, size, format, args);
1281 if (n > -1 && n < size) {
1282 out << S;
1283 free(S);
1284 return true;
1285 } else {
1286 if (n > -1)
1287 size = n + 1;
1288 else
1289 size *= 2;
1290 }
1291 free(S);
1292 return false;
1293 }
1294 void ioprintf(ostream &out,const char *format,...)
1295 {
1296 va_list args;
1297 ssize_t size = 400;
1298 while (true) {
1299 va_start(args,format);
1300 if (iovprintf(out, format, args, size) == true)
1301 return;
1302 va_end(args);
1303 }
1304 }
1305 void strprintf(string &out,const char *format,...)
1306 {
1307 va_list args;
1308 ssize_t size = 400;
1309 std::ostringstream outstr;
1310 while (true) {
1311 va_start(args,format);
1312 if (iovprintf(outstr, format, args, size) == true)
1313 break;
1314 va_end(args);
1315 }
1316 out = outstr.str();
1317 }
1318 /*}}}*/
1319 // safe_snprintf - Safer snprintf /*{{{*/
1320 // ---------------------------------------------------------------------
1321 /* This is a snprintf that will never (ever) go past 'End' and returns a
1322 pointer to the end of the new string. The returned string is always null
1323 terminated unless Buffer == end. This is a better alterantive to using
1324 consecutive snprintfs. */
1325 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1326 {
1327 va_list args;
1328 int Did;
1329
1330 if (End <= Buffer)
1331 return End;
1332 va_start(args,Format);
1333 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1334 va_end(args);
1335
1336 if (Did < 0 || Buffer + Did > End)
1337 return End;
1338 return Buffer + Did;
1339 }
1340 /*}}}*/
1341 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1342 // ---------------------------------------------------------------------
1343 string StripEpoch(const string &VerStr)
1344 {
1345 size_t i = VerStr.find(":");
1346 if (i == string::npos)
1347 return VerStr;
1348 return VerStr.substr(i+1);
1349 }
1350 /*}}}*/
1351 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1352 // ---------------------------------------------------------------------
1353 /* This little function is the most called method we have and tries
1354 therefore to do the absolut minimum - and is notable faster than
1355 standard tolower/toupper and as a bonus avoids problems with different
1356 locales - we only operate on ascii chars anyway. */
1357 int tolower_ascii(int const c)
1358 {
1359 if (c >= 'A' && c <= 'Z')
1360 return c + 32;
1361 return c;
1362 }
1363 /*}}}*/
1364
1365 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1366 // ---------------------------------------------------------------------
1367 /* The domain list is a comma separate list of domains that are suffix
1368 matched against the argument */
1369 bool CheckDomainList(const string &Host,const string &List)
1370 {
1371 string::const_iterator Start = List.begin();
1372 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1373 {
1374 if (Cur < List.end() && *Cur != ',')
1375 continue;
1376
1377 // Match the end of the string..
1378 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1379 Cur - Start != 0 &&
1380 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1381 return true;
1382
1383 Start = Cur + 1;
1384 }
1385 return false;
1386 }
1387 /*}}}*/
1388 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1389 // ---------------------------------------------------------------------
1390 /* */
1391 size_t strv_length(const char **str_array)
1392 {
1393 size_t i;
1394 for (i=0; str_array[i] != NULL; i++)
1395 /* nothing */
1396 ;
1397 return i;
1398 }
1399
1400 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1401 // ---------------------------------------------------------------------
1402 /* */
1403 string DeEscapeString(const string &input)
1404 {
1405 char tmp[3];
1406 string::const_iterator it;
1407 string output;
1408 for (it = input.begin(); it != input.end(); ++it)
1409 {
1410 // just copy non-escape chars
1411 if (*it != '\\')
1412 {
1413 output += *it;
1414 continue;
1415 }
1416
1417 // deal with double escape
1418 if (*it == '\\' &&
1419 (it + 1 < input.end()) && it[1] == '\\')
1420 {
1421 // copy
1422 output += *it;
1423 // advance iterator one step further
1424 ++it;
1425 continue;
1426 }
1427
1428 // ensure we have a char to read
1429 if (it + 1 == input.end())
1430 continue;
1431
1432 // read it
1433 ++it;
1434 switch (*it)
1435 {
1436 case '0':
1437 if (it + 2 <= input.end()) {
1438 tmp[0] = it[1];
1439 tmp[1] = it[2];
1440 tmp[2] = 0;
1441 output += (char)strtol(tmp, 0, 8);
1442 it += 2;
1443 }
1444 break;
1445 case 'x':
1446 if (it + 2 <= input.end()) {
1447 tmp[0] = it[1];
1448 tmp[1] = it[2];
1449 tmp[2] = 0;
1450 output += (char)strtol(tmp, 0, 16);
1451 it += 2;
1452 }
1453 break;
1454 default:
1455 // FIXME: raise exception here?
1456 break;
1457 }
1458 }
1459 return output;
1460 }
1461 /*}}}*/
1462 // URI::CopyFrom - Copy from an object /*{{{*/
1463 // ---------------------------------------------------------------------
1464 /* This parses the URI into all of its components */
1465 void URI::CopyFrom(const string &U)
1466 {
1467 string::const_iterator I = U.begin();
1468
1469 // Locate the first colon, this separates the scheme
1470 for (; I < U.end() && *I != ':' ; ++I);
1471 string::const_iterator FirstColon = I;
1472
1473 /* Determine if this is a host type URI with a leading double //
1474 and then search for the first single / */
1475 string::const_iterator SingleSlash = I;
1476 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1477 SingleSlash += 3;
1478
1479 /* Find the / indicating the end of the hostname, ignoring /'s in the
1480 square brackets */
1481 bool InBracket = false;
1482 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1483 {
1484 if (*SingleSlash == '[')
1485 InBracket = true;
1486 if (InBracket == true && *SingleSlash == ']')
1487 InBracket = false;
1488 }
1489
1490 if (SingleSlash > U.end())
1491 SingleSlash = U.end();
1492
1493 // We can now write the access and path specifiers
1494 Access.assign(U.begin(),FirstColon);
1495 if (SingleSlash != U.end())
1496 Path.assign(SingleSlash,U.end());
1497 if (Path.empty() == true)
1498 Path = "/";
1499
1500 // Now we attempt to locate a user:pass@host fragment
1501 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1502 FirstColon += 3;
1503 else
1504 FirstColon += 1;
1505 if (FirstColon >= U.end())
1506 return;
1507
1508 if (FirstColon > SingleSlash)
1509 FirstColon = SingleSlash;
1510
1511 // Find the colon...
1512 I = FirstColon + 1;
1513 if (I > SingleSlash)
1514 I = SingleSlash;
1515 for (; I < SingleSlash && *I != ':'; ++I);
1516 string::const_iterator SecondColon = I;
1517
1518 // Search for the @ after the colon
1519 for (; I < SingleSlash && *I != '@'; ++I);
1520 string::const_iterator At = I;
1521
1522 // Now write the host and user/pass
1523 if (At == SingleSlash)
1524 {
1525 if (FirstColon < SingleSlash)
1526 Host.assign(FirstColon,SingleSlash);
1527 }
1528 else
1529 {
1530 Host.assign(At+1,SingleSlash);
1531 // username and password must be encoded (RFC 3986)
1532 User.assign(DeQuoteString(FirstColon,SecondColon));
1533 if (SecondColon < At)
1534 Password.assign(DeQuoteString(SecondColon+1,At));
1535 }
1536
1537 // Now we parse the RFC 2732 [] hostnames.
1538 unsigned long PortEnd = 0;
1539 InBracket = false;
1540 for (unsigned I = 0; I != Host.length();)
1541 {
1542 if (Host[I] == '[')
1543 {
1544 InBracket = true;
1545 Host.erase(I,1);
1546 continue;
1547 }
1548
1549 if (InBracket == true && Host[I] == ']')
1550 {
1551 InBracket = false;
1552 Host.erase(I,1);
1553 PortEnd = I;
1554 continue;
1555 }
1556 I++;
1557 }
1558
1559 // Tsk, weird.
1560 if (InBracket == true)
1561 {
1562 Host.clear();
1563 return;
1564 }
1565
1566 // Now we parse off a port number from the hostname
1567 Port = 0;
1568 string::size_type Pos = Host.rfind(':');
1569 if (Pos == string::npos || Pos < PortEnd)
1570 return;
1571
1572 Port = atoi(string(Host,Pos+1).c_str());
1573 Host.assign(Host,0,Pos);
1574 }
1575 /*}}}*/
1576 // URI::operator string - Convert the URI to a string /*{{{*/
1577 // ---------------------------------------------------------------------
1578 /* */
1579 URI::operator string()
1580 {
1581 string Res;
1582
1583 if (Access.empty() == false)
1584 Res = Access + ':';
1585
1586 if (Host.empty() == false)
1587 {
1588 if (Access.empty() == false)
1589 Res += "//";
1590
1591 if (User.empty() == false)
1592 {
1593 // FIXME: Technically userinfo is permitted even less
1594 // characters than these, but this is not conveniently
1595 // expressed with a blacklist.
1596 Res += QuoteString(User, ":/?#[]@");
1597 if (Password.empty() == false)
1598 Res += ":" + QuoteString(Password, ":/?#[]@");
1599 Res += "@";
1600 }
1601
1602 // Add RFC 2732 escaping characters
1603 if (Access.empty() == false &&
1604 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1605 Res += '[' + Host + ']';
1606 else
1607 Res += Host;
1608
1609 if (Port != 0)
1610 {
1611 char S[30];
1612 sprintf(S,":%u",Port);
1613 Res += S;
1614 }
1615 }
1616
1617 if (Path.empty() == false)
1618 {
1619 if (Path[0] != '/')
1620 Res += "/" + Path;
1621 else
1622 Res += Path;
1623 }
1624
1625 return Res;
1626 }
1627 /*}}}*/
1628 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1629 // ---------------------------------------------------------------------
1630 /* */
1631 string URI::SiteOnly(const string &URI)
1632 {
1633 ::URI U(URI);
1634 U.User.clear();
1635 U.Password.clear();
1636 U.Path.clear();
1637 return U;
1638 }
1639 /*}}}*/
1640 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1641 // ---------------------------------------------------------------------
1642 /* */
1643 string URI::NoUserPassword(const string &URI)
1644 {
1645 ::URI U(URI);
1646 U.User.clear();
1647 U.Password.clear();
1648 return U;
1649 }
1650 /*}}}*/