]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Merge remote-tracking branch 'upstream/debian/experimental' into feature/acq-trans
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &s)
49 {
50 size_t start = s.find_first_not_of(" \t\n");
51 // only whitespace
52 if (start == string::npos)
53 return "";
54 size_t end = s.find_last_not_of(" \t\n");
55 return s.substr(start, end-start+1);
56 }
57
58 bool Endswith(const std::string &s, const std::string &end)
59 {
60 if (end.size() > s.size())
61 return false;
62 return (s.substr(s.size() - end.size(), s.size()) == end);
63 }
64
65 bool Startswith(const std::string &s, const std::string &start)
66 {
67 if (start.size() > s.size())
68 return false;
69 return (s.substr(0, start.size()) == start);
70 }
71
72 }
73 }
74 /*}}}*/
75 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
76 // ---------------------------------------------------------------------
77 /* This is handy to use before display some information for enduser */
78 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
79 {
80 iconv_t cd;
81 const char *inbuf;
82 char *inptr, *outbuf;
83 size_t insize, bufsize;
84 dest->clear();
85
86 cd = iconv_open(codeset, "UTF-8");
87 if (cd == (iconv_t)(-1)) {
88 // Something went wrong
89 if (errno == EINVAL)
90 _error->Error("conversion from 'UTF-8' to '%s' not available",
91 codeset);
92 else
93 perror("iconv_open");
94
95 return false;
96 }
97
98 insize = bufsize = orig.size();
99 inbuf = orig.data();
100 inptr = (char *)inbuf;
101 outbuf = new char[bufsize];
102 size_t lastError = -1;
103
104 while (insize != 0)
105 {
106 char *outptr = outbuf;
107 size_t outsize = bufsize;
108 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
109 dest->append(outbuf, outptr - outbuf);
110 if (err == (size_t)(-1))
111 {
112 switch (errno)
113 {
114 case EILSEQ:
115 insize--;
116 inptr++;
117 // replace a series of unknown multibytes with a single "?"
118 if (lastError != insize) {
119 lastError = insize - 1;
120 dest->append("?");
121 }
122 break;
123 case EINVAL:
124 insize = 0;
125 break;
126 case E2BIG:
127 if (outptr == outbuf)
128 {
129 bufsize *= 2;
130 delete[] outbuf;
131 outbuf = new char[bufsize];
132 }
133 break;
134 }
135 }
136 }
137
138 delete[] outbuf;
139
140 iconv_close(cd);
141
142 return true;
143 }
144 /*}}}*/
145 // strstrip - Remove white space from the front and back of a string /*{{{*/
146 // ---------------------------------------------------------------------
147 /* This is handy to use when parsing a file. It also removes \n's left
148 over from fgets and company */
149 char *_strstrip(char *String)
150 {
151 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
152
153 if (*String == 0)
154 return String;
155 return _strrstrip(String);
156 }
157 /*}}}*/
158 // strrstrip - Remove white space from the back of a string /*{{{*/
159 // ---------------------------------------------------------------------
160 char *_strrstrip(char *String)
161 {
162 char *End = String + strlen(String) - 1;
163 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
164 *End == '\r'); End--);
165 End++;
166 *End = 0;
167 return String;
168 }
169 /*}}}*/
170 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
171 // ---------------------------------------------------------------------
172 /* */
173 char *_strtabexpand(char *String,size_t Len)
174 {
175 for (char *I = String; I != I + Len && *I != 0; I++)
176 {
177 if (*I != '\t')
178 continue;
179 if (I + 8 > String + Len)
180 {
181 *I = 0;
182 return String;
183 }
184
185 /* Assume the start of the string is 0 and find the next 8 char
186 division */
187 int Len;
188 if (String == I)
189 Len = 1;
190 else
191 Len = 8 - ((String - I) % 8);
192 Len -= 2;
193 if (Len <= 0)
194 {
195 *I = ' ';
196 continue;
197 }
198
199 memmove(I + Len,I + 1,strlen(I) + 1);
200 for (char *J = I; J + Len != I; *I = ' ', I++);
201 }
202 return String;
203 }
204 /*}}}*/
205 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
206 // ---------------------------------------------------------------------
207 /* This grabs a single word, converts any % escaped characters to their
208 proper values and advances the pointer. Double quotes are understood
209 and striped out as well. This is for URI/URL parsing. It also can
210 understand [] brackets.*/
211 bool ParseQuoteWord(const char *&String,string &Res)
212 {
213 // Skip leading whitespace
214 const char *C = String;
215 for (;*C != 0 && *C == ' '; C++);
216 if (*C == 0)
217 return false;
218
219 // Jump to the next word
220 for (;*C != 0 && isspace(*C) == 0; C++)
221 {
222 if (*C == '"')
223 {
224 C = strchr(C + 1, '"');
225 if (C == NULL)
226 return false;
227 }
228 if (*C == '[')
229 {
230 C = strchr(C + 1, ']');
231 if (C == NULL)
232 return false;
233 }
234 }
235
236 // Now de-quote characters
237 char Buffer[1024];
238 char Tmp[3];
239 const char *Start = String;
240 char *I;
241 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
242 {
243 if (*Start == '%' && Start + 2 < C &&
244 isxdigit(Start[1]) && isxdigit(Start[2]))
245 {
246 Tmp[0] = Start[1];
247 Tmp[1] = Start[2];
248 Tmp[2] = 0;
249 *I = (char)strtol(Tmp,0,16);
250 Start += 3;
251 continue;
252 }
253 if (*Start != '"')
254 *I = *Start;
255 else
256 I--;
257 Start++;
258 }
259 *I = 0;
260 Res = Buffer;
261
262 // Skip ending white space
263 for (;*C != 0 && isspace(*C) != 0; C++);
264 String = C;
265 return true;
266 }
267 /*}}}*/
268 // ParseCWord - Parses a string like a C "" expression /*{{{*/
269 // ---------------------------------------------------------------------
270 /* This expects a series of space separated strings enclosed in ""'s.
271 It concatenates the ""'s into a single string. */
272 bool ParseCWord(const char *&String,string &Res)
273 {
274 // Skip leading whitespace
275 const char *C = String;
276 for (;*C != 0 && *C == ' '; C++);
277 if (*C == 0)
278 return false;
279
280 char Buffer[1024];
281 char *Buf = Buffer;
282 if (strlen(String) >= sizeof(Buffer))
283 return false;
284
285 for (; *C != 0; C++)
286 {
287 if (*C == '"')
288 {
289 for (C++; *C != 0 && *C != '"'; C++)
290 *Buf++ = *C;
291
292 if (*C == 0)
293 return false;
294
295 continue;
296 }
297
298 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
299 continue;
300 if (isspace(*C) == 0)
301 return false;
302 *Buf++ = ' ';
303 }
304 *Buf = 0;
305 Res = Buffer;
306 String = C;
307 return true;
308 }
309 /*}}}*/
310 // QuoteString - Convert a string into quoted from /*{{{*/
311 // ---------------------------------------------------------------------
312 /* */
313 string QuoteString(const string &Str, const char *Bad)
314 {
315 string Res;
316 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
317 {
318 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
319 *I == 0x25 || // percent '%' char
320 *I <= 0x20 || *I >= 0x7F) // control chars
321 {
322 char Buf[10];
323 sprintf(Buf,"%%%02x",(int)*I);
324 Res += Buf;
325 }
326 else
327 Res += *I;
328 }
329 return Res;
330 }
331 /*}}}*/
332 // DeQuoteString - Convert a string from quoted from /*{{{*/
333 // ---------------------------------------------------------------------
334 /* This undoes QuoteString */
335 string DeQuoteString(const string &Str)
336 {
337 return DeQuoteString(Str.begin(),Str.end());
338 }
339 string DeQuoteString(string::const_iterator const &begin,
340 string::const_iterator const &end)
341 {
342 string Res;
343 for (string::const_iterator I = begin; I != end; ++I)
344 {
345 if (*I == '%' && I + 2 < end &&
346 isxdigit(I[1]) && isxdigit(I[2]))
347 {
348 char Tmp[3];
349 Tmp[0] = I[1];
350 Tmp[1] = I[2];
351 Tmp[2] = 0;
352 Res += (char)strtol(Tmp,0,16);
353 I += 2;
354 continue;
355 }
356 else
357 Res += *I;
358 }
359 return Res;
360 }
361
362 /*}}}*/
363 // SizeToStr - Convert a long into a human readable size /*{{{*/
364 // ---------------------------------------------------------------------
365 /* A max of 4 digits are shown before conversion to the next highest unit.
366 The max length of the string will be 5 chars unless the size is > 10
367 YottaBytes (E24) */
368 string SizeToStr(double Size)
369 {
370 char S[300];
371 double ASize;
372 if (Size >= 0)
373 ASize = Size;
374 else
375 ASize = -1*Size;
376
377 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
378 ExaBytes, ZettaBytes, YottaBytes */
379 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
380 int I = 0;
381 while (I <= 8)
382 {
383 if (ASize < 100 && I != 0)
384 {
385 sprintf(S,"%'.1f %c",ASize,Ext[I]);
386 break;
387 }
388
389 if (ASize < 10000)
390 {
391 sprintf(S,"%'.0f %c",ASize,Ext[I]);
392 break;
393 }
394 ASize /= 1000.0;
395 I++;
396 }
397
398 return S;
399 }
400 /*}}}*/
401 // TimeToStr - Convert the time into a string /*{{{*/
402 // ---------------------------------------------------------------------
403 /* Converts a number of seconds to a hms format */
404 string TimeToStr(unsigned long Sec)
405 {
406 char S[300];
407
408 while (1)
409 {
410 if (Sec > 60*60*24)
411 {
412 //d means days, h means hours, min means minutes, s means seconds
413 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
414 break;
415 }
416
417 if (Sec > 60*60)
418 {
419 //h means hours, min means minutes, s means seconds
420 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
421 break;
422 }
423
424 if (Sec > 60)
425 {
426 //min means minutes, s means seconds
427 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
428 break;
429 }
430
431 //s means seconds
432 sprintf(S,_("%lis"),Sec);
433 break;
434 }
435
436 return S;
437 }
438 /*}}}*/
439 // SubstVar - Substitute a string for another string /*{{{*/
440 // ---------------------------------------------------------------------
441 /* This replaces all occurrences of Subst with Contents in Str. */
442 string SubstVar(const string &Str,const string &Subst,const string &Contents)
443 {
444 if (Subst.empty() == true)
445 return Str;
446
447 string::size_type Pos = 0;
448 string::size_type OldPos = 0;
449 string Temp;
450
451 while (OldPos < Str.length() &&
452 (Pos = Str.find(Subst,OldPos)) != string::npos)
453 {
454 if (OldPos != Pos)
455 Temp.append(Str, OldPos, Pos - OldPos);
456 if (Contents.empty() == false)
457 Temp.append(Contents);
458 OldPos = Pos + Subst.length();
459 }
460
461 if (OldPos == 0)
462 return Str;
463
464 if (OldPos >= Str.length())
465 return Temp;
466 return Temp + string(Str,OldPos);
467 }
468 string SubstVar(string Str,const struct SubstVar *Vars)
469 {
470 for (; Vars->Subst != 0; Vars++)
471 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
472 return Str;
473 }
474 /*}}}*/
475 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
476 // ---------------------------------------------------------------------
477 /* Returns a string with the supplied separator depth + 1 times in it */
478 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
479 {
480 std::string output = "";
481 for(unsigned long d=Depth+1; d > 0; d--)
482 output.append(Separator);
483 return output;
484 }
485 /*}}}*/
486 // URItoFileName - Convert the uri into a unique file name /*{{{*/
487 // ---------------------------------------------------------------------
488 /* This converts a URI into a safe filename. It quotes all unsafe characters
489 and converts / to _ and removes the scheme identifier. The resulting
490 file name should be unique and never occur again for a different file */
491 string URItoFileName(const string &URI)
492 {
493 // Nuke 'sensitive' items
494 ::URI U(URI);
495 U.User.clear();
496 U.Password.clear();
497 U.Access.clear();
498
499 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
500 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
501 replace(NewURI.begin(),NewURI.end(),'/','_');
502 return NewURI;
503 }
504 /*}}}*/
505 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This routine performs a base64 transformation on a string. It was ripped
508 from wget and then patched and bug fixed.
509
510 This spec can be found in rfc2045 */
511 string Base64Encode(const string &S)
512 {
513 // Conversion table.
514 static char tbl[64] = {'A','B','C','D','E','F','G','H',
515 'I','J','K','L','M','N','O','P',
516 'Q','R','S','T','U','V','W','X',
517 'Y','Z','a','b','c','d','e','f',
518 'g','h','i','j','k','l','m','n',
519 'o','p','q','r','s','t','u','v',
520 'w','x','y','z','0','1','2','3',
521 '4','5','6','7','8','9','+','/'};
522
523 // Pre-allocate some space
524 string Final;
525 Final.reserve((4*S.length() + 2)/3 + 2);
526
527 /* Transform the 3x8 bits to 4x6 bits, as required by
528 base64. */
529 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
530 {
531 char Bits[3] = {0,0,0};
532 Bits[0] = I[0];
533 if (I + 1 < S.end())
534 Bits[1] = I[1];
535 if (I + 2 < S.end())
536 Bits[2] = I[2];
537
538 Final += tbl[Bits[0] >> 2];
539 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
540
541 if (I + 1 >= S.end())
542 break;
543
544 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
545
546 if (I + 2 >= S.end())
547 break;
548
549 Final += tbl[Bits[2] & 0x3f];
550 }
551
552 /* Apply the padding elements, this tells how many bytes the remote
553 end should discard */
554 if (S.length() % 3 == 2)
555 Final += '=';
556 if (S.length() % 3 == 1)
557 Final += "==";
558
559 return Final;
560 }
561 /*}}}*/
562 // stringcmp - Arbitrary string compare /*{{{*/
563 // ---------------------------------------------------------------------
564 /* This safely compares two non-null terminated strings of arbitrary
565 length */
566 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
567 {
568 for (; A != AEnd && B != BEnd; A++, B++)
569 if (*A != *B)
570 break;
571
572 if (A == AEnd && B == BEnd)
573 return 0;
574 if (A == AEnd)
575 return 1;
576 if (B == BEnd)
577 return -1;
578 if (*A < *B)
579 return -1;
580 return 1;
581 }
582
583 #if __GNUC__ >= 3
584 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
585 const char *B,const char *BEnd)
586 {
587 for (; A != AEnd && B != BEnd; A++, B++)
588 if (*A != *B)
589 break;
590
591 if (A == AEnd && B == BEnd)
592 return 0;
593 if (A == AEnd)
594 return 1;
595 if (B == BEnd)
596 return -1;
597 if (*A < *B)
598 return -1;
599 return 1;
600 }
601 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
602 string::const_iterator B,string::const_iterator BEnd)
603 {
604 for (; A != AEnd && B != BEnd; A++, B++)
605 if (*A != *B)
606 break;
607
608 if (A == AEnd && B == BEnd)
609 return 0;
610 if (A == AEnd)
611 return 1;
612 if (B == BEnd)
613 return -1;
614 if (*A < *B)
615 return -1;
616 return 1;
617 }
618 #endif
619 /*}}}*/
620 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
621 // ---------------------------------------------------------------------
622 /* */
623 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
624 {
625 for (; A != AEnd && B != BEnd; A++, B++)
626 if (tolower_ascii(*A) != tolower_ascii(*B))
627 break;
628
629 if (A == AEnd && B == BEnd)
630 return 0;
631 if (A == AEnd)
632 return 1;
633 if (B == BEnd)
634 return -1;
635 if (tolower_ascii(*A) < tolower_ascii(*B))
636 return -1;
637 return 1;
638 }
639 #if __GNUC__ >= 3
640 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
641 const char *B,const char *BEnd)
642 {
643 for (; A != AEnd && B != BEnd; A++, B++)
644 if (tolower_ascii(*A) != tolower_ascii(*B))
645 break;
646
647 if (A == AEnd && B == BEnd)
648 return 0;
649 if (A == AEnd)
650 return 1;
651 if (B == BEnd)
652 return -1;
653 if (tolower_ascii(*A) < tolower_ascii(*B))
654 return -1;
655 return 1;
656 }
657 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
658 string::const_iterator B,string::const_iterator BEnd)
659 {
660 for (; A != AEnd && B != BEnd; A++, B++)
661 if (tolower_ascii(*A) != tolower_ascii(*B))
662 break;
663
664 if (A == AEnd && B == BEnd)
665 return 0;
666 if (A == AEnd)
667 return 1;
668 if (B == BEnd)
669 return -1;
670 if (tolower_ascii(*A) < tolower_ascii(*B))
671 return -1;
672 return 1;
673 }
674 #endif
675 /*}}}*/
676 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
677 // ---------------------------------------------------------------------
678 /* The format is like those used in package files and the method
679 communication system */
680 string LookupTag(const string &Message,const char *Tag,const char *Default)
681 {
682 // Look for a matching tag.
683 int Length = strlen(Tag);
684 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
685 {
686 // Found the tag
687 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
688 {
689 // Find the end of line and strip the leading/trailing spaces
690 string::const_iterator J;
691 I += Length + 1;
692 for (; isspace(*I) != 0 && I < Message.end(); ++I);
693 for (J = I; *J != '\n' && J < Message.end(); ++J);
694 for (; J > I && isspace(J[-1]) != 0; --J);
695
696 return string(I,J);
697 }
698
699 for (; *I != '\n' && I < Message.end(); ++I);
700 }
701
702 // Failed to find a match
703 if (Default == 0)
704 return string();
705 return Default;
706 }
707 /*}}}*/
708 // StringToBool - Converts a string into a boolean /*{{{*/
709 // ---------------------------------------------------------------------
710 /* This inspects the string to see if it is true or if it is false and
711 then returns the result. Several varients on true/false are checked. */
712 int StringToBool(const string &Text,int Default)
713 {
714 char *ParseEnd;
715 int Res = strtol(Text.c_str(),&ParseEnd,0);
716 // ensure that the entire string was converted by strtol to avoid
717 // failures on "apt-cache show -a 0ad" where the "0" is converted
718 const char *TextEnd = Text.c_str()+Text.size();
719 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
720 return Res;
721
722 // Check for positives
723 if (strcasecmp(Text.c_str(),"no") == 0 ||
724 strcasecmp(Text.c_str(),"false") == 0 ||
725 strcasecmp(Text.c_str(),"without") == 0 ||
726 strcasecmp(Text.c_str(),"off") == 0 ||
727 strcasecmp(Text.c_str(),"disable") == 0)
728 return 0;
729
730 // Check for negatives
731 if (strcasecmp(Text.c_str(),"yes") == 0 ||
732 strcasecmp(Text.c_str(),"true") == 0 ||
733 strcasecmp(Text.c_str(),"with") == 0 ||
734 strcasecmp(Text.c_str(),"on") == 0 ||
735 strcasecmp(Text.c_str(),"enable") == 0)
736 return 1;
737
738 return Default;
739 }
740 /*}}}*/
741 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
742 // ---------------------------------------------------------------------
743 /* This converts a time_t into a string time representation that is
744 year 2000 complient and timezone neutral */
745 string TimeRFC1123(time_t Date)
746 {
747 struct tm Conv;
748 if (gmtime_r(&Date, &Conv) == NULL)
749 return "";
750
751 char Buf[300];
752 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
753 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
754 "Aug","Sep","Oct","Nov","Dec"};
755
756 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
757 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
758 Conv.tm_min,Conv.tm_sec);
759 return Buf;
760 }
761 /*}}}*/
762 // ReadMessages - Read messages from the FD /*{{{*/
763 // ---------------------------------------------------------------------
764 /* This pulls full messages from the input FD into the message buffer.
765 It assumes that messages will not pause during transit so no
766 fancy buffering is used.
767
768 In particular: this reads blocks from the input until it believes
769 that it's run out of input text. Each block is terminated by a
770 double newline ('\n' followed by '\n'). As noted below, there is a
771 bug in this code: it assumes that all the blocks have been read if
772 it doesn't see additional text in the buffer after the last one is
773 parsed, which will cause it to lose blocks if the last block
774 coincides with the end of the buffer.
775 */
776 bool ReadMessages(int Fd, vector<string> &List)
777 {
778 char Buffer[64000];
779 char *End = Buffer;
780 // Represents any left-over from the previous iteration of the
781 // parse loop. (i.e., if a message is split across the end
782 // of the buffer, it goes here)
783 string PartialMessage;
784
785 while (1)
786 {
787 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
788 if (Res < 0 && errno == EINTR)
789 continue;
790
791 // Process is dead, this is kind of bad..
792 if (Res == 0)
793 return false;
794
795 // No data
796 if (Res < 0 && errno == EAGAIN)
797 return true;
798 if (Res < 0)
799 return false;
800
801 End += Res;
802
803 // Look for the end of the message
804 for (char *I = Buffer; I + 1 < End; I++)
805 {
806 if (I[1] != '\n' ||
807 (I[0] != '\n' && strncmp(I, "\r\n\r\n", 4) != 0))
808 continue;
809
810 // Pull the message out
811 string Message(Buffer,I-Buffer);
812 PartialMessage += Message;
813
814 // Fix up the buffer
815 for (; I < End && (*I == '\n' || *I == '\r'); ++I);
816 End -= I-Buffer;
817 memmove(Buffer,I,End-Buffer);
818 I = Buffer;
819
820 List.push_back(PartialMessage);
821 PartialMessage.clear();
822 }
823 if (End != Buffer)
824 {
825 // If there's text left in the buffer, store it
826 // in PartialMessage and throw the rest of the buffer
827 // away. This allows us to handle messages that
828 // are longer than the static buffer size.
829 PartialMessage += string(Buffer, End);
830 End = Buffer;
831 }
832 else
833 {
834 // BUG ALERT: if a message block happens to end at a
835 // multiple of 64000 characters, this will cause it to
836 // terminate early, leading to a badly formed block and
837 // probably crashing the method. However, this is the only
838 // way we have to find the end of the message block. I have
839 // an idea of how to fix this, but it will require changes
840 // to the protocol (essentially to mark the beginning and
841 // end of the block).
842 //
843 // -- dburrows 2008-04-02
844 return true;
845 }
846
847 if (WaitFd(Fd) == false)
848 return false;
849 }
850 }
851 /*}}}*/
852 // MonthConv - Converts a month string into a number /*{{{*/
853 // ---------------------------------------------------------------------
854 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
855 Made it a bit more robust with a few tolower_ascii though. */
856 static int MonthConv(char *Month)
857 {
858 switch (tolower_ascii(*Month))
859 {
860 case 'a':
861 return tolower_ascii(Month[1]) == 'p'?3:7;
862 case 'd':
863 return 11;
864 case 'f':
865 return 1;
866 case 'j':
867 if (tolower_ascii(Month[1]) == 'a')
868 return 0;
869 return tolower_ascii(Month[2]) == 'n'?5:6;
870 case 'm':
871 return tolower_ascii(Month[2]) == 'r'?2:4;
872 case 'n':
873 return 10;
874 case 'o':
875 return 9;
876 case 's':
877 return 8;
878
879 // Pretend it is January..
880 default:
881 return 0;
882 }
883 }
884 /*}}}*/
885 // timegm - Internal timegm if the gnu version is not available /*{{{*/
886 // ---------------------------------------------------------------------
887 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
888 than local timezone (mktime assumes the latter).
889
890 This function is a nonstandard GNU extension that is also present on
891 the BSDs and maybe other systems. For others we follow the advice of
892 the manpage of timegm and use his portable replacement. */
893 #ifndef HAVE_TIMEGM
894 static time_t timegm(struct tm *t)
895 {
896 char *tz = getenv("TZ");
897 setenv("TZ", "", 1);
898 tzset();
899 time_t ret = mktime(t);
900 if (tz)
901 setenv("TZ", tz, 1);
902 else
903 unsetenv("TZ");
904 tzset();
905 return ret;
906 }
907 #endif
908 /*}}}*/
909 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
910 // ---------------------------------------------------------------------
911 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
912 with one exception: All timezones (%Z) are accepted but the protocol
913 says that it MUST be GMT, but this one is equal to UTC which we will
914 encounter from time to time (e.g. in Release files) so we accept all
915 here and just assume it is GMT (or UTC) later on */
916 bool RFC1123StrToTime(const char* const str,time_t &time)
917 {
918 struct tm Tm;
919 setlocale (LC_ALL,"C");
920 bool const invalid =
921 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
922 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
923 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
924 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
925 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
926 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
927 setlocale (LC_ALL,"");
928 if (invalid == true)
929 return false;
930
931 time = timegm(&Tm);
932 return true;
933 }
934 /*}}}*/
935 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
936 // ---------------------------------------------------------------------
937 /* */
938 bool FTPMDTMStrToTime(const char* const str,time_t &time)
939 {
940 struct tm Tm;
941 // MDTM includes no whitespaces but recommend and ignored by strptime
942 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
943 return false;
944
945 time = timegm(&Tm);
946 return true;
947 }
948 /*}}}*/
949 // StrToTime - Converts a string into a time_t /*{{{*/
950 // ---------------------------------------------------------------------
951 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
952 and the C library asctime format. It requires the GNU library function
953 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
954 reason the C library does not provide any such function :< This also
955 handles the weird, but unambiguous FTP time format*/
956 bool StrToTime(const string &Val,time_t &Result)
957 {
958 struct tm Tm;
959 char Month[10];
960
961 // Skip the day of the week
962 const char *I = strchr(Val.c_str(), ' ');
963
964 // Handle RFC 1123 time
965 Month[0] = 0;
966 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
967 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
968 {
969 // Handle RFC 1036 time
970 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
971 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
972 Tm.tm_year += 1900;
973 else
974 {
975 // asctime format
976 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
977 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
978 {
979 // 'ftp' time
980 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
981 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
982 return false;
983 Tm.tm_mon--;
984 }
985 }
986 }
987
988 Tm.tm_isdst = 0;
989 if (Month[0] != 0)
990 Tm.tm_mon = MonthConv(Month);
991 else
992 Tm.tm_mon = 0; // we don't have a month, so pick something
993 Tm.tm_year -= 1900;
994
995 // Convert to local time and then to GMT
996 Result = timegm(&Tm);
997 return true;
998 }
999 /*}}}*/
1000 // StrToNum - Convert a fixed length string to a number /*{{{*/
1001 // ---------------------------------------------------------------------
1002 /* This is used in decoding the crazy fixed length string headers in
1003 tar and ar files. */
1004 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1005 {
1006 char S[30];
1007 if (Len >= sizeof(S))
1008 return false;
1009 memcpy(S,Str,Len);
1010 S[Len] = 0;
1011
1012 // All spaces is a zero
1013 Res = 0;
1014 unsigned I;
1015 for (I = 0; S[I] == ' '; I++);
1016 if (S[I] == 0)
1017 return true;
1018
1019 char *End;
1020 Res = strtoul(S,&End,Base);
1021 if (End == S)
1022 return false;
1023
1024 return true;
1025 }
1026 /*}}}*/
1027 // StrToNum - Convert a fixed length string to a number /*{{{*/
1028 // ---------------------------------------------------------------------
1029 /* This is used in decoding the crazy fixed length string headers in
1030 tar and ar files. */
1031 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1032 {
1033 char S[30];
1034 if (Len >= sizeof(S))
1035 return false;
1036 memcpy(S,Str,Len);
1037 S[Len] = 0;
1038
1039 // All spaces is a zero
1040 Res = 0;
1041 unsigned I;
1042 for (I = 0; S[I] == ' '; I++);
1043 if (S[I] == 0)
1044 return true;
1045
1046 char *End;
1047 Res = strtoull(S,&End,Base);
1048 if (End == S)
1049 return false;
1050
1051 return true;
1052 }
1053 /*}}}*/
1054
1055 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1056 // ---------------------------------------------------------------------
1057 /* This is used in decoding the 256bit encoded fixed length fields in
1058 tar files */
1059 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1060 {
1061 if ((Str[0] & 0x80) == 0)
1062 return false;
1063 else
1064 {
1065 Res = Str[0] & 0x7F;
1066 for(unsigned int i = 1; i < Len; ++i)
1067 Res = (Res<<8) + Str[i];
1068 return true;
1069 }
1070 }
1071 /*}}}*/
1072 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1073 // ---------------------------------------------------------------------
1074 /* This is used in decoding the 256bit encoded fixed length fields in
1075 tar files */
1076 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1077 {
1078 unsigned long long Num;
1079 bool rc;
1080
1081 rc = Base256ToNum(Str, Num, Len);
1082 Res = Num;
1083 if (Res != Num)
1084 return false;
1085
1086 return rc;
1087 }
1088 /*}}}*/
1089 // HexDigit - Convert a hex character into an integer /*{{{*/
1090 // ---------------------------------------------------------------------
1091 /* Helper for Hex2Num */
1092 static int HexDigit(int c)
1093 {
1094 if (c >= '0' && c <= '9')
1095 return c - '0';
1096 if (c >= 'a' && c <= 'f')
1097 return c - 'a' + 10;
1098 if (c >= 'A' && c <= 'F')
1099 return c - 'A' + 10;
1100 return 0;
1101 }
1102 /*}}}*/
1103 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1104 // ---------------------------------------------------------------------
1105 /* The length of the buffer must be exactly 1/2 the length of the string. */
1106 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1107 {
1108 if (Str.length() != Length*2)
1109 return false;
1110
1111 // Convert each digit. We store it in the same order as the string
1112 int J = 0;
1113 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1114 {
1115 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1116 return false;
1117
1118 Num[J] = HexDigit(I[0]) << 4;
1119 Num[J] += HexDigit(I[1]);
1120 }
1121
1122 return true;
1123 }
1124 /*}}}*/
1125 // TokSplitString - Split a string up by a given token /*{{{*/
1126 // ---------------------------------------------------------------------
1127 /* This is intended to be a faster splitter, it does not use dynamic
1128 memories. Input is changed to insert nulls at each token location. */
1129 bool TokSplitString(char Tok,char *Input,char **List,
1130 unsigned long ListMax)
1131 {
1132 // Strip any leading spaces
1133 char *Start = Input;
1134 char *Stop = Start + strlen(Start);
1135 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1136
1137 unsigned long Count = 0;
1138 char *Pos = Start;
1139 while (Pos != Stop)
1140 {
1141 // Skip to the next Token
1142 for (; Pos != Stop && *Pos != Tok; Pos++);
1143
1144 // Back remove spaces
1145 char *End = Pos;
1146 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1147 *End = 0;
1148
1149 List[Count++] = Start;
1150 if (Count >= ListMax)
1151 {
1152 List[Count-1] = 0;
1153 return false;
1154 }
1155
1156 // Advance pos
1157 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1158 Start = Pos;
1159 }
1160
1161 List[Count] = 0;
1162 return true;
1163 }
1164 /*}}}*/
1165 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1166 // ---------------------------------------------------------------------
1167 /* This can be used to split a given string up into a vector, so the
1168 propose is the same as in the method above and this one is a bit slower
1169 also, but the advantage is that we have an iteratable vector */
1170 vector<string> VectorizeString(string const &haystack, char const &split)
1171 {
1172 vector<string> exploded;
1173 if (haystack.empty() == true)
1174 return exploded;
1175 string::const_iterator start = haystack.begin();
1176 string::const_iterator end = start;
1177 do {
1178 for (; end != haystack.end() && *end != split; ++end);
1179 exploded.push_back(string(start, end));
1180 start = end + 1;
1181 } while (end != haystack.end() && (++end) != haystack.end());
1182 return exploded;
1183 }
1184 /*}}}*/
1185 // StringSplit - split a string into a string vector by token /*{{{*/
1186 // ---------------------------------------------------------------------
1187 /* See header for details.
1188 */
1189 vector<string> StringSplit(std::string const &s, std::string const &sep,
1190 unsigned int maxsplit)
1191 {
1192 vector<string> split;
1193 size_t start, pos;
1194
1195 // no seperator given, this is bogus
1196 if(sep.size() == 0)
1197 return split;
1198
1199 start = pos = 0;
1200 while (pos != string::npos)
1201 {
1202 pos = s.find(sep, start);
1203 split.push_back(s.substr(start, pos-start));
1204
1205 // if maxsplit is reached, the remaining string is the last item
1206 if(split.size() >= maxsplit)
1207 {
1208 split[split.size()-1] = s.substr(start);
1209 break;
1210 }
1211 start = pos+sep.size();
1212 }
1213 return split;
1214 }
1215 /*}}}*/
1216 // RegexChoice - Simple regex list/list matcher /*{{{*/
1217 // ---------------------------------------------------------------------
1218 /* */
1219 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1220 const char **ListEnd)
1221 {
1222 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1223 R->Hit = false;
1224
1225 unsigned long Hits = 0;
1226 for (; ListBegin < ListEnd; ++ListBegin)
1227 {
1228 // Check if the name is a regex
1229 const char *I;
1230 bool Regex = true;
1231 for (I = *ListBegin; *I != 0; I++)
1232 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1233 break;
1234 if (*I == 0)
1235 Regex = false;
1236
1237 // Compile the regex pattern
1238 regex_t Pattern;
1239 if (Regex == true)
1240 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1241 REG_NOSUB) != 0)
1242 Regex = false;
1243
1244 // Search the list
1245 bool Done = false;
1246 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1247 {
1248 if (R->Str[0] == 0)
1249 continue;
1250
1251 if (strcasecmp(R->Str,*ListBegin) != 0)
1252 {
1253 if (Regex == false)
1254 continue;
1255 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1256 continue;
1257 }
1258 Done = true;
1259
1260 if (R->Hit == false)
1261 Hits++;
1262
1263 R->Hit = true;
1264 }
1265
1266 if (Regex == true)
1267 regfree(&Pattern);
1268
1269 if (Done == false)
1270 _error->Warning(_("Selection %s not found"),*ListBegin);
1271 }
1272
1273 return Hits;
1274 }
1275 /*}}}*/
1276 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1277 // ---------------------------------------------------------------------
1278 /* This is used to make the internationalization strings easier to translate
1279 and to allow reordering of parameters */
1280 static bool iovprintf(ostream &out, const char *format,
1281 va_list &args, ssize_t &size) {
1282 char *S = (char*)malloc(size);
1283 ssize_t const n = vsnprintf(S, size, format, args);
1284 if (n > -1 && n < size) {
1285 out << S;
1286 free(S);
1287 return true;
1288 } else {
1289 if (n > -1)
1290 size = n + 1;
1291 else
1292 size *= 2;
1293 }
1294 free(S);
1295 return false;
1296 }
1297 void ioprintf(ostream &out,const char *format,...)
1298 {
1299 va_list args;
1300 ssize_t size = 400;
1301 while (true) {
1302 va_start(args,format);
1303 if (iovprintf(out, format, args, size) == true)
1304 return;
1305 va_end(args);
1306 }
1307 }
1308 void strprintf(string &out,const char *format,...)
1309 {
1310 va_list args;
1311 ssize_t size = 400;
1312 std::ostringstream outstr;
1313 while (true) {
1314 va_start(args,format);
1315 if (iovprintf(outstr, format, args, size) == true)
1316 break;
1317 va_end(args);
1318 }
1319 out = outstr.str();
1320 }
1321 /*}}}*/
1322 // safe_snprintf - Safer snprintf /*{{{*/
1323 // ---------------------------------------------------------------------
1324 /* This is a snprintf that will never (ever) go past 'End' and returns a
1325 pointer to the end of the new string. The returned string is always null
1326 terminated unless Buffer == end. This is a better alterantive to using
1327 consecutive snprintfs. */
1328 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1329 {
1330 va_list args;
1331 int Did;
1332
1333 if (End <= Buffer)
1334 return End;
1335 va_start(args,Format);
1336 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1337 va_end(args);
1338
1339 if (Did < 0 || Buffer + Did > End)
1340 return End;
1341 return Buffer + Did;
1342 }
1343 /*}}}*/
1344 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1345 // ---------------------------------------------------------------------
1346 string StripEpoch(const string &VerStr)
1347 {
1348 size_t i = VerStr.find(":");
1349 if (i == string::npos)
1350 return VerStr;
1351 return VerStr.substr(i+1);
1352 }
1353 /*}}}*/
1354 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1355 // ---------------------------------------------------------------------
1356 /* This little function is the most called method we have and tries
1357 therefore to do the absolut minimum - and is notable faster than
1358 standard tolower/toupper and as a bonus avoids problems with different
1359 locales - we only operate on ascii chars anyway. */
1360 int tolower_ascii(int const c)
1361 {
1362 if (c >= 'A' && c <= 'Z')
1363 return c + 32;
1364 return c;
1365 }
1366 /*}}}*/
1367
1368 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1369 // ---------------------------------------------------------------------
1370 /* The domain list is a comma separate list of domains that are suffix
1371 matched against the argument */
1372 bool CheckDomainList(const string &Host,const string &List)
1373 {
1374 string::const_iterator Start = List.begin();
1375 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1376 {
1377 if (Cur < List.end() && *Cur != ',')
1378 continue;
1379
1380 // Match the end of the string..
1381 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1382 Cur - Start != 0 &&
1383 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1384 return true;
1385
1386 Start = Cur + 1;
1387 }
1388 return false;
1389 }
1390 /*}}}*/
1391 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1392 // ---------------------------------------------------------------------
1393 /* */
1394 size_t strv_length(const char **str_array)
1395 {
1396 size_t i;
1397 for (i=0; str_array[i] != NULL; i++)
1398 /* nothing */
1399 ;
1400 return i;
1401 }
1402
1403 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1404 // ---------------------------------------------------------------------
1405 /* */
1406 string DeEscapeString(const string &input)
1407 {
1408 char tmp[3];
1409 string::const_iterator it;
1410 string output;
1411 for (it = input.begin(); it != input.end(); ++it)
1412 {
1413 // just copy non-escape chars
1414 if (*it != '\\')
1415 {
1416 output += *it;
1417 continue;
1418 }
1419
1420 // deal with double escape
1421 if (*it == '\\' &&
1422 (it + 1 < input.end()) && it[1] == '\\')
1423 {
1424 // copy
1425 output += *it;
1426 // advance iterator one step further
1427 ++it;
1428 continue;
1429 }
1430
1431 // ensure we have a char to read
1432 if (it + 1 == input.end())
1433 continue;
1434
1435 // read it
1436 ++it;
1437 switch (*it)
1438 {
1439 case '0':
1440 if (it + 2 <= input.end()) {
1441 tmp[0] = it[1];
1442 tmp[1] = it[2];
1443 tmp[2] = 0;
1444 output += (char)strtol(tmp, 0, 8);
1445 it += 2;
1446 }
1447 break;
1448 case 'x':
1449 if (it + 2 <= input.end()) {
1450 tmp[0] = it[1];
1451 tmp[1] = it[2];
1452 tmp[2] = 0;
1453 output += (char)strtol(tmp, 0, 16);
1454 it += 2;
1455 }
1456 break;
1457 default:
1458 // FIXME: raise exception here?
1459 break;
1460 }
1461 }
1462 return output;
1463 }
1464 /*}}}*/
1465 // URI::CopyFrom - Copy from an object /*{{{*/
1466 // ---------------------------------------------------------------------
1467 /* This parses the URI into all of its components */
1468 void URI::CopyFrom(const string &U)
1469 {
1470 string::const_iterator I = U.begin();
1471
1472 // Locate the first colon, this separates the scheme
1473 for (; I < U.end() && *I != ':' ; ++I);
1474 string::const_iterator FirstColon = I;
1475
1476 /* Determine if this is a host type URI with a leading double //
1477 and then search for the first single / */
1478 string::const_iterator SingleSlash = I;
1479 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1480 SingleSlash += 3;
1481
1482 /* Find the / indicating the end of the hostname, ignoring /'s in the
1483 square brackets */
1484 bool InBracket = false;
1485 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1486 {
1487 if (*SingleSlash == '[')
1488 InBracket = true;
1489 if (InBracket == true && *SingleSlash == ']')
1490 InBracket = false;
1491 }
1492
1493 if (SingleSlash > U.end())
1494 SingleSlash = U.end();
1495
1496 // We can now write the access and path specifiers
1497 Access.assign(U.begin(),FirstColon);
1498 if (SingleSlash != U.end())
1499 Path.assign(SingleSlash,U.end());
1500 if (Path.empty() == true)
1501 Path = "/";
1502
1503 // Now we attempt to locate a user:pass@host fragment
1504 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1505 FirstColon += 3;
1506 else
1507 FirstColon += 1;
1508 if (FirstColon >= U.end())
1509 return;
1510
1511 if (FirstColon > SingleSlash)
1512 FirstColon = SingleSlash;
1513
1514 // Find the colon...
1515 I = FirstColon + 1;
1516 if (I > SingleSlash)
1517 I = SingleSlash;
1518 for (; I < SingleSlash && *I != ':'; ++I);
1519 string::const_iterator SecondColon = I;
1520
1521 // Search for the @ after the colon
1522 for (; I < SingleSlash && *I != '@'; ++I);
1523 string::const_iterator At = I;
1524
1525 // Now write the host and user/pass
1526 if (At == SingleSlash)
1527 {
1528 if (FirstColon < SingleSlash)
1529 Host.assign(FirstColon,SingleSlash);
1530 }
1531 else
1532 {
1533 Host.assign(At+1,SingleSlash);
1534 // username and password must be encoded (RFC 3986)
1535 User.assign(DeQuoteString(FirstColon,SecondColon));
1536 if (SecondColon < At)
1537 Password.assign(DeQuoteString(SecondColon+1,At));
1538 }
1539
1540 // Now we parse the RFC 2732 [] hostnames.
1541 unsigned long PortEnd = 0;
1542 InBracket = false;
1543 for (unsigned I = 0; I != Host.length();)
1544 {
1545 if (Host[I] == '[')
1546 {
1547 InBracket = true;
1548 Host.erase(I,1);
1549 continue;
1550 }
1551
1552 if (InBracket == true && Host[I] == ']')
1553 {
1554 InBracket = false;
1555 Host.erase(I,1);
1556 PortEnd = I;
1557 continue;
1558 }
1559 I++;
1560 }
1561
1562 // Tsk, weird.
1563 if (InBracket == true)
1564 {
1565 Host.clear();
1566 return;
1567 }
1568
1569 // Now we parse off a port number from the hostname
1570 Port = 0;
1571 string::size_type Pos = Host.rfind(':');
1572 if (Pos == string::npos || Pos < PortEnd)
1573 return;
1574
1575 Port = atoi(string(Host,Pos+1).c_str());
1576 Host.assign(Host,0,Pos);
1577 }
1578 /*}}}*/
1579 // URI::operator string - Convert the URI to a string /*{{{*/
1580 // ---------------------------------------------------------------------
1581 /* */
1582 URI::operator string()
1583 {
1584 string Res;
1585
1586 if (Access.empty() == false)
1587 Res = Access + ':';
1588
1589 if (Host.empty() == false)
1590 {
1591 if (Access.empty() == false)
1592 Res += "//";
1593
1594 if (User.empty() == false)
1595 {
1596 // FIXME: Technically userinfo is permitted even less
1597 // characters than these, but this is not conveniently
1598 // expressed with a blacklist.
1599 Res += QuoteString(User, ":/?#[]@");
1600 if (Password.empty() == false)
1601 Res += ":" + QuoteString(Password, ":/?#[]@");
1602 Res += "@";
1603 }
1604
1605 // Add RFC 2732 escaping characters
1606 if (Access.empty() == false &&
1607 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1608 Res += '[' + Host + ']';
1609 else
1610 Res += Host;
1611
1612 if (Port != 0)
1613 {
1614 char S[30];
1615 sprintf(S,":%u",Port);
1616 Res += S;
1617 }
1618 }
1619
1620 if (Path.empty() == false)
1621 {
1622 if (Path[0] != '/')
1623 Res += "/" + Path;
1624 else
1625 Res += Path;
1626 }
1627
1628 return Res;
1629 }
1630 /*}}}*/
1631 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1632 // ---------------------------------------------------------------------
1633 /* */
1634 string URI::SiteOnly(const string &URI)
1635 {
1636 ::URI U(URI);
1637 U.User.clear();
1638 U.Password.clear();
1639 U.Path.clear();
1640 return U;
1641 }
1642 /*}}}*/
1643 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1644 // ---------------------------------------------------------------------
1645 /* */
1646 string URI::NoUserPassword(const string &URI)
1647 {
1648 ::URI U(URI);
1649 U.User.clear();
1650 U.Password.clear();
1651 return U;
1652 }
1653 /*}}}*/