]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Merge remote-tracking branch 'upstream/debian/experimental' into feature/no-more...
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &str)
49 {
50 // ensure we have at least one character
51 if (str.empty() == true)
52 return str;
53
54 char const * const s = str.c_str();
55 size_t start = 0;
56 for (; isspace(s[start]) != 0; ++start)
57 ; // find the first not-space
58
59 // string contains only whitespaces
60 if (s[start] == '\0')
61 return "";
62
63 size_t end = str.length() - 1;
64 for (; isspace(s[end]) != 0; --end)
65 ; // find the last not-space
66
67 return str.substr(start, end - start + 1);
68 }
69
70 bool Endswith(const std::string &s, const std::string &end)
71 {
72 if (end.size() > s.size())
73 return false;
74 return (s.substr(s.size() - end.size(), s.size()) == end);
75 }
76
77 bool Startswith(const std::string &s, const std::string &start)
78 {
79 if (start.size() > s.size())
80 return false;
81 return (s.substr(0, start.size()) == start);
82 }
83
84 }
85 }
86 /*}}}*/
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
91 {
92 iconv_t cd;
93 const char *inbuf;
94 char *inptr, *outbuf;
95 size_t insize, bufsize;
96 dest->clear();
97
98 cd = iconv_open(codeset, "UTF-8");
99 if (cd == (iconv_t)(-1)) {
100 // Something went wrong
101 if (errno == EINVAL)
102 _error->Error("conversion from 'UTF-8' to '%s' not available",
103 codeset);
104 else
105 perror("iconv_open");
106
107 return false;
108 }
109
110 insize = bufsize = orig.size();
111 inbuf = orig.data();
112 inptr = (char *)inbuf;
113 outbuf = new char[bufsize];
114 size_t lastError = -1;
115
116 while (insize != 0)
117 {
118 char *outptr = outbuf;
119 size_t outsize = bufsize;
120 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
121 dest->append(outbuf, outptr - outbuf);
122 if (err == (size_t)(-1))
123 {
124 switch (errno)
125 {
126 case EILSEQ:
127 insize--;
128 inptr++;
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError != insize) {
131 lastError = insize - 1;
132 dest->append("?");
133 }
134 break;
135 case EINVAL:
136 insize = 0;
137 break;
138 case E2BIG:
139 if (outptr == outbuf)
140 {
141 bufsize *= 2;
142 delete[] outbuf;
143 outbuf = new char[bufsize];
144 }
145 break;
146 }
147 }
148 }
149
150 delete[] outbuf;
151
152 iconv_close(cd);
153
154 return true;
155 }
156 /*}}}*/
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String)
162 {
163 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
164
165 if (*String == 0)
166 return String;
167 return _strrstrip(String);
168 }
169 /*}}}*/
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String)
173 {
174 char *End = String + strlen(String) - 1;
175 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
176 *End == '\r'); End--);
177 End++;
178 *End = 0;
179 return String;
180 }
181 /*}}}*/
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
184 /* */
185 char *_strtabexpand(char *String,size_t Len)
186 {
187 for (char *I = String; I != I + Len && *I != 0; I++)
188 {
189 if (*I != '\t')
190 continue;
191 if (I + 8 > String + Len)
192 {
193 *I = 0;
194 return String;
195 }
196
197 /* Assume the start of the string is 0 and find the next 8 char
198 division */
199 int Len;
200 if (String == I)
201 Len = 1;
202 else
203 Len = 8 - ((String - I) % 8);
204 Len -= 2;
205 if (Len <= 0)
206 {
207 *I = ' ';
208 continue;
209 }
210
211 memmove(I + Len,I + 1,strlen(I) + 1);
212 for (char *J = I; J + Len != I; *I = ' ', I++);
213 }
214 return String;
215 }
216 /*}}}*/
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String,string &Res)
224 {
225 // Skip leading whitespace
226 const char *C = String;
227 for (;*C != 0 && *C == ' '; C++);
228 if (*C == 0)
229 return false;
230
231 // Jump to the next word
232 for (;*C != 0 && isspace(*C) == 0; C++)
233 {
234 if (*C == '"')
235 {
236 C = strchr(C + 1, '"');
237 if (C == NULL)
238 return false;
239 }
240 if (*C == '[')
241 {
242 C = strchr(C + 1, ']');
243 if (C == NULL)
244 return false;
245 }
246 }
247
248 // Now de-quote characters
249 char Buffer[1024];
250 char Tmp[3];
251 const char *Start = String;
252 char *I;
253 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
254 {
255 if (*Start == '%' && Start + 2 < C &&
256 isxdigit(Start[1]) && isxdigit(Start[2]))
257 {
258 Tmp[0] = Start[1];
259 Tmp[1] = Start[2];
260 Tmp[2] = 0;
261 *I = (char)strtol(Tmp,0,16);
262 Start += 3;
263 continue;
264 }
265 if (*Start != '"')
266 *I = *Start;
267 else
268 I--;
269 Start++;
270 }
271 *I = 0;
272 Res = Buffer;
273
274 // Skip ending white space
275 for (;*C != 0 && isspace(*C) != 0; C++);
276 String = C;
277 return true;
278 }
279 /*}}}*/
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String,string &Res)
285 {
286 // Skip leading whitespace
287 const char *C = String;
288 for (;*C != 0 && *C == ' '; C++);
289 if (*C == 0)
290 return false;
291
292 char Buffer[1024];
293 char *Buf = Buffer;
294 if (strlen(String) >= sizeof(Buffer))
295 return false;
296
297 for (; *C != 0; C++)
298 {
299 if (*C == '"')
300 {
301 for (C++; *C != 0 && *C != '"'; C++)
302 *Buf++ = *C;
303
304 if (*C == 0)
305 return false;
306
307 continue;
308 }
309
310 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
311 continue;
312 if (isspace(*C) == 0)
313 return false;
314 *Buf++ = ' ';
315 }
316 *Buf = 0;
317 Res = Buffer;
318 String = C;
319 return true;
320 }
321 /*}}}*/
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
324 /* */
325 string QuoteString(const string &Str, const char *Bad)
326 {
327 string Res;
328 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
329 {
330 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
331 *I == 0x25 || // percent '%' char
332 *I <= 0x20 || *I >= 0x7F) // control chars
333 {
334 char Buf[10];
335 sprintf(Buf,"%%%02x",(int)*I);
336 Res += Buf;
337 }
338 else
339 Res += *I;
340 }
341 return Res;
342 }
343 /*}}}*/
344 // DeQuoteString - Convert a string from quoted from /*{{{*/
345 // ---------------------------------------------------------------------
346 /* This undoes QuoteString */
347 string DeQuoteString(const string &Str)
348 {
349 return DeQuoteString(Str.begin(),Str.end());
350 }
351 string DeQuoteString(string::const_iterator const &begin,
352 string::const_iterator const &end)
353 {
354 string Res;
355 for (string::const_iterator I = begin; I != end; ++I)
356 {
357 if (*I == '%' && I + 2 < end &&
358 isxdigit(I[1]) && isxdigit(I[2]))
359 {
360 char Tmp[3];
361 Tmp[0] = I[1];
362 Tmp[1] = I[2];
363 Tmp[2] = 0;
364 Res += (char)strtol(Tmp,0,16);
365 I += 2;
366 continue;
367 }
368 else
369 Res += *I;
370 }
371 return Res;
372 }
373
374 /*}}}*/
375 // SizeToStr - Convert a long into a human readable size /*{{{*/
376 // ---------------------------------------------------------------------
377 /* A max of 4 digits are shown before conversion to the next highest unit.
378 The max length of the string will be 5 chars unless the size is > 10
379 YottaBytes (E24) */
380 string SizeToStr(double Size)
381 {
382 char S[300];
383 double ASize;
384 if (Size >= 0)
385 ASize = Size;
386 else
387 ASize = -1*Size;
388
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 int I = 0;
393 while (I <= 8)
394 {
395 if (ASize < 100 && I != 0)
396 {
397 sprintf(S,"%'.1f %c",ASize,Ext[I]);
398 break;
399 }
400
401 if (ASize < 10000)
402 {
403 sprintf(S,"%'.0f %c",ASize,Ext[I]);
404 break;
405 }
406 ASize /= 1000.0;
407 I++;
408 }
409
410 return S;
411 }
412 /*}}}*/
413 // TimeToStr - Convert the time into a string /*{{{*/
414 // ---------------------------------------------------------------------
415 /* Converts a number of seconds to a hms format */
416 string TimeToStr(unsigned long Sec)
417 {
418 char S[300];
419
420 while (1)
421 {
422 if (Sec > 60*60*24)
423 {
424 //d means days, h means hours, min means minutes, s means seconds
425 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
426 break;
427 }
428
429 if (Sec > 60*60)
430 {
431 //h means hours, min means minutes, s means seconds
432 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
433 break;
434 }
435
436 if (Sec > 60)
437 {
438 //min means minutes, s means seconds
439 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
440 break;
441 }
442
443 //s means seconds
444 sprintf(S,_("%lis"),Sec);
445 break;
446 }
447
448 return S;
449 }
450 /*}}}*/
451 // SubstVar - Substitute a string for another string /*{{{*/
452 // ---------------------------------------------------------------------
453 /* This replaces all occurrences of Subst with Contents in Str. */
454 string SubstVar(const string &Str,const string &Subst,const string &Contents)
455 {
456 if (Subst.empty() == true)
457 return Str;
458
459 string::size_type Pos = 0;
460 string::size_type OldPos = 0;
461 string Temp;
462
463 while (OldPos < Str.length() &&
464 (Pos = Str.find(Subst,OldPos)) != string::npos)
465 {
466 if (OldPos != Pos)
467 Temp.append(Str, OldPos, Pos - OldPos);
468 if (Contents.empty() == false)
469 Temp.append(Contents);
470 OldPos = Pos + Subst.length();
471 }
472
473 if (OldPos == 0)
474 return Str;
475
476 if (OldPos >= Str.length())
477 return Temp;
478 return Temp + string(Str,OldPos);
479 }
480 string SubstVar(string Str,const struct SubstVar *Vars)
481 {
482 for (; Vars->Subst != 0; Vars++)
483 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
484 return Str;
485 }
486 /*}}}*/
487 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
488 // ---------------------------------------------------------------------
489 /* Returns a string with the supplied separator depth + 1 times in it */
490 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
491 {
492 std::string output = "";
493 for(unsigned long d=Depth+1; d > 0; d--)
494 output.append(Separator);
495 return output;
496 }
497 /*}}}*/
498 // URItoFileName - Convert the uri into a unique file name /*{{{*/
499 // ---------------------------------------------------------------------
500 /* This converts a URI into a safe filename. It quotes all unsafe characters
501 and converts / to _ and removes the scheme identifier. The resulting
502 file name should be unique and never occur again for a different file */
503 string URItoFileName(const string &URI)
504 {
505 // Nuke 'sensitive' items
506 ::URI U(URI);
507 U.User.clear();
508 U.Password.clear();
509 U.Access.clear();
510
511 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
512 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
513 replace(NewURI.begin(),NewURI.end(),'/','_');
514 return NewURI;
515 }
516 /*}}}*/
517 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
518 // ---------------------------------------------------------------------
519 /* This routine performs a base64 transformation on a string. It was ripped
520 from wget and then patched and bug fixed.
521
522 This spec can be found in rfc2045 */
523 string Base64Encode(const string &S)
524 {
525 // Conversion table.
526 static char tbl[64] = {'A','B','C','D','E','F','G','H',
527 'I','J','K','L','M','N','O','P',
528 'Q','R','S','T','U','V','W','X',
529 'Y','Z','a','b','c','d','e','f',
530 'g','h','i','j','k','l','m','n',
531 'o','p','q','r','s','t','u','v',
532 'w','x','y','z','0','1','2','3',
533 '4','5','6','7','8','9','+','/'};
534
535 // Pre-allocate some space
536 string Final;
537 Final.reserve((4*S.length() + 2)/3 + 2);
538
539 /* Transform the 3x8 bits to 4x6 bits, as required by
540 base64. */
541 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
542 {
543 char Bits[3] = {0,0,0};
544 Bits[0] = I[0];
545 if (I + 1 < S.end())
546 Bits[1] = I[1];
547 if (I + 2 < S.end())
548 Bits[2] = I[2];
549
550 Final += tbl[Bits[0] >> 2];
551 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
552
553 if (I + 1 >= S.end())
554 break;
555
556 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
557
558 if (I + 2 >= S.end())
559 break;
560
561 Final += tbl[Bits[2] & 0x3f];
562 }
563
564 /* Apply the padding elements, this tells how many bytes the remote
565 end should discard */
566 if (S.length() % 3 == 2)
567 Final += '=';
568 if (S.length() % 3 == 1)
569 Final += "==";
570
571 return Final;
572 }
573 /*}}}*/
574 // stringcmp - Arbitrary string compare /*{{{*/
575 // ---------------------------------------------------------------------
576 /* This safely compares two non-null terminated strings of arbitrary
577 length */
578 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
579 {
580 for (; A != AEnd && B != BEnd; A++, B++)
581 if (*A != *B)
582 break;
583
584 if (A == AEnd && B == BEnd)
585 return 0;
586 if (A == AEnd)
587 return 1;
588 if (B == BEnd)
589 return -1;
590 if (*A < *B)
591 return -1;
592 return 1;
593 }
594
595 #if __GNUC__ >= 3
596 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
597 const char *B,const char *BEnd)
598 {
599 for (; A != AEnd && B != BEnd; A++, B++)
600 if (*A != *B)
601 break;
602
603 if (A == AEnd && B == BEnd)
604 return 0;
605 if (A == AEnd)
606 return 1;
607 if (B == BEnd)
608 return -1;
609 if (*A < *B)
610 return -1;
611 return 1;
612 }
613 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
614 string::const_iterator B,string::const_iterator BEnd)
615 {
616 for (; A != AEnd && B != BEnd; A++, B++)
617 if (*A != *B)
618 break;
619
620 if (A == AEnd && B == BEnd)
621 return 0;
622 if (A == AEnd)
623 return 1;
624 if (B == BEnd)
625 return -1;
626 if (*A < *B)
627 return -1;
628 return 1;
629 }
630 #endif
631 /*}}}*/
632 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
633 // ---------------------------------------------------------------------
634 /* */
635 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
636 {
637 for (; A != AEnd && B != BEnd; A++, B++)
638 if (tolower_ascii(*A) != tolower_ascii(*B))
639 break;
640
641 if (A == AEnd && B == BEnd)
642 return 0;
643 if (A == AEnd)
644 return 1;
645 if (B == BEnd)
646 return -1;
647 if (tolower_ascii(*A) < tolower_ascii(*B))
648 return -1;
649 return 1;
650 }
651 #if __GNUC__ >= 3
652 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
653 const char *B,const char *BEnd)
654 {
655 for (; A != AEnd && B != BEnd; A++, B++)
656 if (tolower_ascii(*A) != tolower_ascii(*B))
657 break;
658
659 if (A == AEnd && B == BEnd)
660 return 0;
661 if (A == AEnd)
662 return 1;
663 if (B == BEnd)
664 return -1;
665 if (tolower_ascii(*A) < tolower_ascii(*B))
666 return -1;
667 return 1;
668 }
669 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
670 string::const_iterator B,string::const_iterator BEnd)
671 {
672 for (; A != AEnd && B != BEnd; A++, B++)
673 if (tolower_ascii(*A) != tolower_ascii(*B))
674 break;
675
676 if (A == AEnd && B == BEnd)
677 return 0;
678 if (A == AEnd)
679 return 1;
680 if (B == BEnd)
681 return -1;
682 if (tolower_ascii(*A) < tolower_ascii(*B))
683 return -1;
684 return 1;
685 }
686 #endif
687 /*}}}*/
688 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
689 // ---------------------------------------------------------------------
690 /* The format is like those used in package files and the method
691 communication system */
692 string LookupTag(const string &Message,const char *Tag,const char *Default)
693 {
694 // Look for a matching tag.
695 int Length = strlen(Tag);
696 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
697 {
698 // Found the tag
699 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
700 {
701 // Find the end of line and strip the leading/trailing spaces
702 string::const_iterator J;
703 I += Length + 1;
704 for (; isspace(*I) != 0 && I < Message.end(); ++I);
705 for (J = I; *J != '\n' && J < Message.end(); ++J);
706 for (; J > I && isspace(J[-1]) != 0; --J);
707
708 return string(I,J);
709 }
710
711 for (; *I != '\n' && I < Message.end(); ++I);
712 }
713
714 // Failed to find a match
715 if (Default == 0)
716 return string();
717 return Default;
718 }
719 /*}}}*/
720 // StringToBool - Converts a string into a boolean /*{{{*/
721 // ---------------------------------------------------------------------
722 /* This inspects the string to see if it is true or if it is false and
723 then returns the result. Several varients on true/false are checked. */
724 int StringToBool(const string &Text,int Default)
725 {
726 char *ParseEnd;
727 int Res = strtol(Text.c_str(),&ParseEnd,0);
728 // ensure that the entire string was converted by strtol to avoid
729 // failures on "apt-cache show -a 0ad" where the "0" is converted
730 const char *TextEnd = Text.c_str()+Text.size();
731 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
732 return Res;
733
734 // Check for positives
735 if (strcasecmp(Text.c_str(),"no") == 0 ||
736 strcasecmp(Text.c_str(),"false") == 0 ||
737 strcasecmp(Text.c_str(),"without") == 0 ||
738 strcasecmp(Text.c_str(),"off") == 0 ||
739 strcasecmp(Text.c_str(),"disable") == 0)
740 return 0;
741
742 // Check for negatives
743 if (strcasecmp(Text.c_str(),"yes") == 0 ||
744 strcasecmp(Text.c_str(),"true") == 0 ||
745 strcasecmp(Text.c_str(),"with") == 0 ||
746 strcasecmp(Text.c_str(),"on") == 0 ||
747 strcasecmp(Text.c_str(),"enable") == 0)
748 return 1;
749
750 return Default;
751 }
752 /*}}}*/
753 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
754 // ---------------------------------------------------------------------
755 /* This converts a time_t into a string time representation that is
756 year 2000 complient and timezone neutral */
757 string TimeRFC1123(time_t Date)
758 {
759 struct tm Conv;
760 if (gmtime_r(&Date, &Conv) == NULL)
761 return "";
762
763 char Buf[300];
764 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
765 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
766 "Aug","Sep","Oct","Nov","Dec"};
767
768 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
769 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
770 Conv.tm_min,Conv.tm_sec);
771 return Buf;
772 }
773 /*}}}*/
774 // ReadMessages - Read messages from the FD /*{{{*/
775 // ---------------------------------------------------------------------
776 /* This pulls full messages from the input FD into the message buffer.
777 It assumes that messages will not pause during transit so no
778 fancy buffering is used.
779
780 In particular: this reads blocks from the input until it believes
781 that it's run out of input text. Each block is terminated by a
782 double newline ('\n' followed by '\n').
783 */
784 bool ReadMessages(int Fd, vector<string> &List)
785 {
786 char Buffer[64000];
787 // Represents any left-over from the previous iteration of the
788 // parse loop. (i.e., if a message is split across the end
789 // of the buffer, it goes here)
790 string PartialMessage;
791
792 do {
793 int const Res = read(Fd, Buffer, sizeof(Buffer));
794 if (Res < 0 && errno == EINTR)
795 continue;
796
797 // process we read from has died
798 if (Res == 0)
799 return false;
800
801 // No data
802 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
803 return true;
804 if (Res < 0)
805 return false;
806
807 // extract the message(s) from the buffer
808 char const *Start = Buffer;
809 char const * const End = Buffer + Res;
810
811 char const * NL = (char const *) memchr(Start, '\n', End - Start);
812 if (NL == NULL)
813 {
814 // end of buffer: store what we have so far and read new data in
815 PartialMessage.append(Start, End - Start);
816 Start = End;
817 }
818 else
819 ++NL;
820
821 if (PartialMessage.empty() == false && Start < End)
822 {
823 // if we start with a new line, see if the partial message we have ended with one
824 // so that we properly detect records ending between two read() runs
825 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
826 // the case \r|\n\r\n is handled by the usual double-newline handling
827 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
828 {
829 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
830 {
831 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
832 List.push_back(PartialMessage);
833 PartialMessage.clear();
834 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
835 Start = NL;
836 }
837 }
838 }
839
840 while (Start < End) {
841 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
842 if (NL2 == NULL)
843 {
844 // end of buffer: store what we have so far and read new data in
845 PartialMessage.append(Start, End - Start);
846 break;
847 }
848 ++NL2;
849
850 // did we find a double newline?
851 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
852 {
853 PartialMessage.append(Start, NL2 - Start);
854 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
855 List.push_back(PartialMessage);
856 PartialMessage.clear();
857 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
858 Start = NL2;
859 }
860 NL = NL2;
861 }
862
863 // we have read at least one complete message and nothing left
864 if (PartialMessage.empty() == true)
865 return true;
866
867 if (WaitFd(Fd) == false)
868 return false;
869 } while (true);
870 }
871 /*}}}*/
872 // MonthConv - Converts a month string into a number /*{{{*/
873 // ---------------------------------------------------------------------
874 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
875 Made it a bit more robust with a few tolower_ascii though. */
876 static int MonthConv(char *Month)
877 {
878 switch (tolower_ascii(*Month))
879 {
880 case 'a':
881 return tolower_ascii(Month[1]) == 'p'?3:7;
882 case 'd':
883 return 11;
884 case 'f':
885 return 1;
886 case 'j':
887 if (tolower_ascii(Month[1]) == 'a')
888 return 0;
889 return tolower_ascii(Month[2]) == 'n'?5:6;
890 case 'm':
891 return tolower_ascii(Month[2]) == 'r'?2:4;
892 case 'n':
893 return 10;
894 case 'o':
895 return 9;
896 case 's':
897 return 8;
898
899 // Pretend it is January..
900 default:
901 return 0;
902 }
903 }
904 /*}}}*/
905 // timegm - Internal timegm if the gnu version is not available /*{{{*/
906 // ---------------------------------------------------------------------
907 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
908 than local timezone (mktime assumes the latter).
909
910 This function is a nonstandard GNU extension that is also present on
911 the BSDs and maybe other systems. For others we follow the advice of
912 the manpage of timegm and use his portable replacement. */
913 #ifndef HAVE_TIMEGM
914 static time_t timegm(struct tm *t)
915 {
916 char *tz = getenv("TZ");
917 setenv("TZ", "", 1);
918 tzset();
919 time_t ret = mktime(t);
920 if (tz)
921 setenv("TZ", tz, 1);
922 else
923 unsetenv("TZ");
924 tzset();
925 return ret;
926 }
927 #endif
928 /*}}}*/
929 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
930 // ---------------------------------------------------------------------
931 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
932 with one exception: All timezones (%Z) are accepted but the protocol
933 says that it MUST be GMT, but this one is equal to UTC which we will
934 encounter from time to time (e.g. in Release files) so we accept all
935 here and just assume it is GMT (or UTC) later on */
936 bool RFC1123StrToTime(const char* const str,time_t &time)
937 {
938 struct tm Tm;
939 setlocale (LC_ALL,"C");
940 bool const invalid =
941 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
942 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
943 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
944 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
945 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
946 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
947 setlocale (LC_ALL,"");
948 if (invalid == true)
949 return false;
950
951 time = timegm(&Tm);
952 return true;
953 }
954 /*}}}*/
955 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
956 // ---------------------------------------------------------------------
957 /* */
958 bool FTPMDTMStrToTime(const char* const str,time_t &time)
959 {
960 struct tm Tm;
961 // MDTM includes no whitespaces but recommend and ignored by strptime
962 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
963 return false;
964
965 time = timegm(&Tm);
966 return true;
967 }
968 /*}}}*/
969 // StrToTime - Converts a string into a time_t /*{{{*/
970 // ---------------------------------------------------------------------
971 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
972 and the C library asctime format. It requires the GNU library function
973 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
974 reason the C library does not provide any such function :< This also
975 handles the weird, but unambiguous FTP time format*/
976 bool StrToTime(const string &Val,time_t &Result)
977 {
978 struct tm Tm;
979 char Month[10];
980
981 // Skip the day of the week
982 const char *I = strchr(Val.c_str(), ' ');
983
984 // Handle RFC 1123 time
985 Month[0] = 0;
986 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
987 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
988 {
989 // Handle RFC 1036 time
990 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
991 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
992 Tm.tm_year += 1900;
993 else
994 {
995 // asctime format
996 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
997 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
998 {
999 // 'ftp' time
1000 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
1001 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1002 return false;
1003 Tm.tm_mon--;
1004 }
1005 }
1006 }
1007
1008 Tm.tm_isdst = 0;
1009 if (Month[0] != 0)
1010 Tm.tm_mon = MonthConv(Month);
1011 else
1012 Tm.tm_mon = 0; // we don't have a month, so pick something
1013 Tm.tm_year -= 1900;
1014
1015 // Convert to local time and then to GMT
1016 Result = timegm(&Tm);
1017 return true;
1018 }
1019 /*}}}*/
1020 // StrToNum - Convert a fixed length string to a number /*{{{*/
1021 // ---------------------------------------------------------------------
1022 /* This is used in decoding the crazy fixed length string headers in
1023 tar and ar files. */
1024 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1025 {
1026 char S[30];
1027 if (Len >= sizeof(S))
1028 return false;
1029 memcpy(S,Str,Len);
1030 S[Len] = 0;
1031
1032 // All spaces is a zero
1033 Res = 0;
1034 unsigned I;
1035 for (I = 0; S[I] == ' '; I++);
1036 if (S[I] == 0)
1037 return true;
1038
1039 char *End;
1040 Res = strtoul(S,&End,Base);
1041 if (End == S)
1042 return false;
1043
1044 return true;
1045 }
1046 /*}}}*/
1047 // StrToNum - Convert a fixed length string to a number /*{{{*/
1048 // ---------------------------------------------------------------------
1049 /* This is used in decoding the crazy fixed length string headers in
1050 tar and ar files. */
1051 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1052 {
1053 char S[30];
1054 if (Len >= sizeof(S))
1055 return false;
1056 memcpy(S,Str,Len);
1057 S[Len] = 0;
1058
1059 // All spaces is a zero
1060 Res = 0;
1061 unsigned I;
1062 for (I = 0; S[I] == ' '; I++);
1063 if (S[I] == 0)
1064 return true;
1065
1066 char *End;
1067 Res = strtoull(S,&End,Base);
1068 if (End == S)
1069 return false;
1070
1071 return true;
1072 }
1073 /*}}}*/
1074
1075 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1076 // ---------------------------------------------------------------------
1077 /* This is used in decoding the 256bit encoded fixed length fields in
1078 tar files */
1079 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1080 {
1081 if ((Str[0] & 0x80) == 0)
1082 return false;
1083 else
1084 {
1085 Res = Str[0] & 0x7F;
1086 for(unsigned int i = 1; i < Len; ++i)
1087 Res = (Res<<8) + Str[i];
1088 return true;
1089 }
1090 }
1091 /*}}}*/
1092 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1093 // ---------------------------------------------------------------------
1094 /* This is used in decoding the 256bit encoded fixed length fields in
1095 tar files */
1096 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1097 {
1098 unsigned long long Num;
1099 bool rc;
1100
1101 rc = Base256ToNum(Str, Num, Len);
1102 Res = Num;
1103 if (Res != Num)
1104 return false;
1105
1106 return rc;
1107 }
1108 /*}}}*/
1109 // HexDigit - Convert a hex character into an integer /*{{{*/
1110 // ---------------------------------------------------------------------
1111 /* Helper for Hex2Num */
1112 static int HexDigit(int c)
1113 {
1114 if (c >= '0' && c <= '9')
1115 return c - '0';
1116 if (c >= 'a' && c <= 'f')
1117 return c - 'a' + 10;
1118 if (c >= 'A' && c <= 'F')
1119 return c - 'A' + 10;
1120 return 0;
1121 }
1122 /*}}}*/
1123 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1124 // ---------------------------------------------------------------------
1125 /* The length of the buffer must be exactly 1/2 the length of the string. */
1126 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1127 {
1128 if (Str.length() != Length*2)
1129 return false;
1130
1131 // Convert each digit. We store it in the same order as the string
1132 int J = 0;
1133 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1134 {
1135 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1136 return false;
1137
1138 Num[J] = HexDigit(I[0]) << 4;
1139 Num[J] += HexDigit(I[1]);
1140 }
1141
1142 return true;
1143 }
1144 /*}}}*/
1145 // TokSplitString - Split a string up by a given token /*{{{*/
1146 // ---------------------------------------------------------------------
1147 /* This is intended to be a faster splitter, it does not use dynamic
1148 memories. Input is changed to insert nulls at each token location. */
1149 bool TokSplitString(char Tok,char *Input,char **List,
1150 unsigned long ListMax)
1151 {
1152 // Strip any leading spaces
1153 char *Start = Input;
1154 char *Stop = Start + strlen(Start);
1155 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1156
1157 unsigned long Count = 0;
1158 char *Pos = Start;
1159 while (Pos != Stop)
1160 {
1161 // Skip to the next Token
1162 for (; Pos != Stop && *Pos != Tok; Pos++);
1163
1164 // Back remove spaces
1165 char *End = Pos;
1166 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1167 *End = 0;
1168
1169 List[Count++] = Start;
1170 if (Count >= ListMax)
1171 {
1172 List[Count-1] = 0;
1173 return false;
1174 }
1175
1176 // Advance pos
1177 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1178 Start = Pos;
1179 }
1180
1181 List[Count] = 0;
1182 return true;
1183 }
1184 /*}}}*/
1185 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1186 // ---------------------------------------------------------------------
1187 /* This can be used to split a given string up into a vector, so the
1188 propose is the same as in the method above and this one is a bit slower
1189 also, but the advantage is that we have an iteratable vector */
1190 vector<string> VectorizeString(string const &haystack, char const &split)
1191 {
1192 vector<string> exploded;
1193 if (haystack.empty() == true)
1194 return exploded;
1195 string::const_iterator start = haystack.begin();
1196 string::const_iterator end = start;
1197 do {
1198 for (; end != haystack.end() && *end != split; ++end);
1199 exploded.push_back(string(start, end));
1200 start = end + 1;
1201 } while (end != haystack.end() && (++end) != haystack.end());
1202 return exploded;
1203 }
1204 /*}}}*/
1205 // StringSplit - split a string into a string vector by token /*{{{*/
1206 // ---------------------------------------------------------------------
1207 /* See header for details.
1208 */
1209 vector<string> StringSplit(std::string const &s, std::string const &sep,
1210 unsigned int maxsplit)
1211 {
1212 vector<string> split;
1213 size_t start, pos;
1214
1215 // no seperator given, this is bogus
1216 if(sep.size() == 0)
1217 return split;
1218
1219 start = pos = 0;
1220 while (pos != string::npos)
1221 {
1222 pos = s.find(sep, start);
1223 split.push_back(s.substr(start, pos-start));
1224
1225 // if maxsplit is reached, the remaining string is the last item
1226 if(split.size() >= maxsplit)
1227 {
1228 split[split.size()-1] = s.substr(start);
1229 break;
1230 }
1231 start = pos+sep.size();
1232 }
1233 return split;
1234 }
1235 /*}}}*/
1236 // RegexChoice - Simple regex list/list matcher /*{{{*/
1237 // ---------------------------------------------------------------------
1238 /* */
1239 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1240 const char **ListEnd)
1241 {
1242 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1243 R->Hit = false;
1244
1245 unsigned long Hits = 0;
1246 for (; ListBegin < ListEnd; ++ListBegin)
1247 {
1248 // Check if the name is a regex
1249 const char *I;
1250 bool Regex = true;
1251 for (I = *ListBegin; *I != 0; I++)
1252 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1253 break;
1254 if (*I == 0)
1255 Regex = false;
1256
1257 // Compile the regex pattern
1258 regex_t Pattern;
1259 if (Regex == true)
1260 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1261 REG_NOSUB) != 0)
1262 Regex = false;
1263
1264 // Search the list
1265 bool Done = false;
1266 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1267 {
1268 if (R->Str[0] == 0)
1269 continue;
1270
1271 if (strcasecmp(R->Str,*ListBegin) != 0)
1272 {
1273 if (Regex == false)
1274 continue;
1275 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1276 continue;
1277 }
1278 Done = true;
1279
1280 if (R->Hit == false)
1281 Hits++;
1282
1283 R->Hit = true;
1284 }
1285
1286 if (Regex == true)
1287 regfree(&Pattern);
1288
1289 if (Done == false)
1290 _error->Warning(_("Selection %s not found"),*ListBegin);
1291 }
1292
1293 return Hits;
1294 }
1295 /*}}}*/
1296 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1297 // ---------------------------------------------------------------------
1298 /* This is used to make the internationalization strings easier to translate
1299 and to allow reordering of parameters */
1300 static bool iovprintf(ostream &out, const char *format,
1301 va_list &args, ssize_t &size) {
1302 char *S = (char*)malloc(size);
1303 ssize_t const n = vsnprintf(S, size, format, args);
1304 if (n > -1 && n < size) {
1305 out << S;
1306 free(S);
1307 return true;
1308 } else {
1309 if (n > -1)
1310 size = n + 1;
1311 else
1312 size *= 2;
1313 }
1314 free(S);
1315 return false;
1316 }
1317 void ioprintf(ostream &out,const char *format,...)
1318 {
1319 va_list args;
1320 ssize_t size = 400;
1321 while (true) {
1322 va_start(args,format);
1323 if (iovprintf(out, format, args, size) == true)
1324 return;
1325 va_end(args);
1326 }
1327 }
1328 void strprintf(string &out,const char *format,...)
1329 {
1330 va_list args;
1331 ssize_t size = 400;
1332 std::ostringstream outstr;
1333 while (true) {
1334 va_start(args,format);
1335 if (iovprintf(outstr, format, args, size) == true)
1336 break;
1337 va_end(args);
1338 }
1339 out = outstr.str();
1340 }
1341 /*}}}*/
1342 // safe_snprintf - Safer snprintf /*{{{*/
1343 // ---------------------------------------------------------------------
1344 /* This is a snprintf that will never (ever) go past 'End' and returns a
1345 pointer to the end of the new string. The returned string is always null
1346 terminated unless Buffer == end. This is a better alterantive to using
1347 consecutive snprintfs. */
1348 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1349 {
1350 va_list args;
1351 int Did;
1352
1353 if (End <= Buffer)
1354 return End;
1355 va_start(args,Format);
1356 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1357 va_end(args);
1358
1359 if (Did < 0 || Buffer + Did > End)
1360 return End;
1361 return Buffer + Did;
1362 }
1363 /*}}}*/
1364 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1365 // ---------------------------------------------------------------------
1366 string StripEpoch(const string &VerStr)
1367 {
1368 size_t i = VerStr.find(":");
1369 if (i == string::npos)
1370 return VerStr;
1371 return VerStr.substr(i+1);
1372 }
1373 /*}}}*/
1374 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1375 // ---------------------------------------------------------------------
1376 /* This little function is the most called method we have and tries
1377 therefore to do the absolut minimum - and is notable faster than
1378 standard tolower/toupper and as a bonus avoids problems with different
1379 locales - we only operate on ascii chars anyway. */
1380 int tolower_ascii(int const c)
1381 {
1382 if (c >= 'A' && c <= 'Z')
1383 return c + 32;
1384 return c;
1385 }
1386 /*}}}*/
1387
1388 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1389 // ---------------------------------------------------------------------
1390 /* The domain list is a comma separate list of domains that are suffix
1391 matched against the argument */
1392 bool CheckDomainList(const string &Host,const string &List)
1393 {
1394 string::const_iterator Start = List.begin();
1395 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1396 {
1397 if (Cur < List.end() && *Cur != ',')
1398 continue;
1399
1400 // Match the end of the string..
1401 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1402 Cur - Start != 0 &&
1403 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1404 return true;
1405
1406 Start = Cur + 1;
1407 }
1408 return false;
1409 }
1410 /*}}}*/
1411 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1412 // ---------------------------------------------------------------------
1413 /* */
1414 size_t strv_length(const char **str_array)
1415 {
1416 size_t i;
1417 for (i=0; str_array[i] != NULL; i++)
1418 /* nothing */
1419 ;
1420 return i;
1421 }
1422
1423 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1424 // ---------------------------------------------------------------------
1425 /* */
1426 string DeEscapeString(const string &input)
1427 {
1428 char tmp[3];
1429 string::const_iterator it;
1430 string output;
1431 for (it = input.begin(); it != input.end(); ++it)
1432 {
1433 // just copy non-escape chars
1434 if (*it != '\\')
1435 {
1436 output += *it;
1437 continue;
1438 }
1439
1440 // deal with double escape
1441 if (*it == '\\' &&
1442 (it + 1 < input.end()) && it[1] == '\\')
1443 {
1444 // copy
1445 output += *it;
1446 // advance iterator one step further
1447 ++it;
1448 continue;
1449 }
1450
1451 // ensure we have a char to read
1452 if (it + 1 == input.end())
1453 continue;
1454
1455 // read it
1456 ++it;
1457 switch (*it)
1458 {
1459 case '0':
1460 if (it + 2 <= input.end()) {
1461 tmp[0] = it[1];
1462 tmp[1] = it[2];
1463 tmp[2] = 0;
1464 output += (char)strtol(tmp, 0, 8);
1465 it += 2;
1466 }
1467 break;
1468 case 'x':
1469 if (it + 2 <= input.end()) {
1470 tmp[0] = it[1];
1471 tmp[1] = it[2];
1472 tmp[2] = 0;
1473 output += (char)strtol(tmp, 0, 16);
1474 it += 2;
1475 }
1476 break;
1477 default:
1478 // FIXME: raise exception here?
1479 break;
1480 }
1481 }
1482 return output;
1483 }
1484 /*}}}*/
1485 // URI::CopyFrom - Copy from an object /*{{{*/
1486 // ---------------------------------------------------------------------
1487 /* This parses the URI into all of its components */
1488 void URI::CopyFrom(const string &U)
1489 {
1490 string::const_iterator I = U.begin();
1491
1492 // Locate the first colon, this separates the scheme
1493 for (; I < U.end() && *I != ':' ; ++I);
1494 string::const_iterator FirstColon = I;
1495
1496 /* Determine if this is a host type URI with a leading double //
1497 and then search for the first single / */
1498 string::const_iterator SingleSlash = I;
1499 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1500 SingleSlash += 3;
1501
1502 /* Find the / indicating the end of the hostname, ignoring /'s in the
1503 square brackets */
1504 bool InBracket = false;
1505 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1506 {
1507 if (*SingleSlash == '[')
1508 InBracket = true;
1509 if (InBracket == true && *SingleSlash == ']')
1510 InBracket = false;
1511 }
1512
1513 if (SingleSlash > U.end())
1514 SingleSlash = U.end();
1515
1516 // We can now write the access and path specifiers
1517 Access.assign(U.begin(),FirstColon);
1518 if (SingleSlash != U.end())
1519 Path.assign(SingleSlash,U.end());
1520 if (Path.empty() == true)
1521 Path = "/";
1522
1523 // Now we attempt to locate a user:pass@host fragment
1524 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1525 FirstColon += 3;
1526 else
1527 FirstColon += 1;
1528 if (FirstColon >= U.end())
1529 return;
1530
1531 if (FirstColon > SingleSlash)
1532 FirstColon = SingleSlash;
1533
1534 // Find the colon...
1535 I = FirstColon + 1;
1536 if (I > SingleSlash)
1537 I = SingleSlash;
1538 for (; I < SingleSlash && *I != ':'; ++I);
1539 string::const_iterator SecondColon = I;
1540
1541 // Search for the @ after the colon
1542 for (; I < SingleSlash && *I != '@'; ++I);
1543 string::const_iterator At = I;
1544
1545 // Now write the host and user/pass
1546 if (At == SingleSlash)
1547 {
1548 if (FirstColon < SingleSlash)
1549 Host.assign(FirstColon,SingleSlash);
1550 }
1551 else
1552 {
1553 Host.assign(At+1,SingleSlash);
1554 // username and password must be encoded (RFC 3986)
1555 User.assign(DeQuoteString(FirstColon,SecondColon));
1556 if (SecondColon < At)
1557 Password.assign(DeQuoteString(SecondColon+1,At));
1558 }
1559
1560 // Now we parse the RFC 2732 [] hostnames.
1561 unsigned long PortEnd = 0;
1562 InBracket = false;
1563 for (unsigned I = 0; I != Host.length();)
1564 {
1565 if (Host[I] == '[')
1566 {
1567 InBracket = true;
1568 Host.erase(I,1);
1569 continue;
1570 }
1571
1572 if (InBracket == true && Host[I] == ']')
1573 {
1574 InBracket = false;
1575 Host.erase(I,1);
1576 PortEnd = I;
1577 continue;
1578 }
1579 I++;
1580 }
1581
1582 // Tsk, weird.
1583 if (InBracket == true)
1584 {
1585 Host.clear();
1586 return;
1587 }
1588
1589 // Now we parse off a port number from the hostname
1590 Port = 0;
1591 string::size_type Pos = Host.rfind(':');
1592 if (Pos == string::npos || Pos < PortEnd)
1593 return;
1594
1595 Port = atoi(string(Host,Pos+1).c_str());
1596 Host.assign(Host,0,Pos);
1597 }
1598 /*}}}*/
1599 // URI::operator string - Convert the URI to a string /*{{{*/
1600 // ---------------------------------------------------------------------
1601 /* */
1602 URI::operator string()
1603 {
1604 string Res;
1605
1606 if (Access.empty() == false)
1607 Res = Access + ':';
1608
1609 if (Host.empty() == false)
1610 {
1611 if (Access.empty() == false)
1612 Res += "//";
1613
1614 if (User.empty() == false)
1615 {
1616 // FIXME: Technically userinfo is permitted even less
1617 // characters than these, but this is not conveniently
1618 // expressed with a blacklist.
1619 Res += QuoteString(User, ":/?#[]@");
1620 if (Password.empty() == false)
1621 Res += ":" + QuoteString(Password, ":/?#[]@");
1622 Res += "@";
1623 }
1624
1625 // Add RFC 2732 escaping characters
1626 if (Access.empty() == false &&
1627 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1628 Res += '[' + Host + ']';
1629 else
1630 Res += Host;
1631
1632 if (Port != 0)
1633 {
1634 char S[30];
1635 sprintf(S,":%u",Port);
1636 Res += S;
1637 }
1638 }
1639
1640 if (Path.empty() == false)
1641 {
1642 if (Path[0] != '/')
1643 Res += "/" + Path;
1644 else
1645 Res += Path;
1646 }
1647
1648 return Res;
1649 }
1650 /*}}}*/
1651 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1652 // ---------------------------------------------------------------------
1653 /* */
1654 string URI::SiteOnly(const string &URI)
1655 {
1656 ::URI U(URI);
1657 U.User.clear();
1658 U.Password.clear();
1659 U.Path.clear();
1660 return U;
1661 }
1662 /*}}}*/
1663 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1664 // ---------------------------------------------------------------------
1665 /* */
1666 string URI::NoUserPassword(const string &URI)
1667 {
1668 ::URI U(URI);
1669 U.User.clear();
1670 U.Password.clear();
1671 return U;
1672 }
1673 /*}}}*/