]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
some CXXFLAGS housekeeping
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &str)
49 {
50 // ensure we have at least one character
51 if (str.empty() == true)
52 return str;
53
54 char const * const s = str.c_str();
55 size_t start = 0;
56 for (; isspace(s[start]) != 0; ++start)
57 ; // find the first not-space
58
59 // string contains only whitespaces
60 if (s[start] == '\0')
61 return "";
62
63 size_t end = str.length() - 1;
64 for (; isspace(s[end]) != 0; --end)
65 ; // find the last not-space
66
67 return str.substr(start, end - start + 1);
68 }
69
70 bool Endswith(const std::string &s, const std::string &end)
71 {
72 if (end.size() > s.size())
73 return false;
74 return (s.substr(s.size() - end.size(), s.size()) == end);
75 }
76
77 bool Startswith(const std::string &s, const std::string &start)
78 {
79 if (start.size() > s.size())
80 return false;
81 return (s.substr(0, start.size()) == start);
82 }
83
84 }
85 }
86 /*}}}*/
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
91 {
92 iconv_t cd;
93 const char *inbuf;
94 char *inptr, *outbuf;
95 size_t insize, bufsize;
96 dest->clear();
97
98 cd = iconv_open(codeset, "UTF-8");
99 if (cd == (iconv_t)(-1)) {
100 // Something went wrong
101 if (errno == EINVAL)
102 _error->Error("conversion from 'UTF-8' to '%s' not available",
103 codeset);
104 else
105 perror("iconv_open");
106
107 return false;
108 }
109
110 insize = bufsize = orig.size();
111 inbuf = orig.data();
112 inptr = (char *)inbuf;
113 outbuf = new char[bufsize];
114 size_t lastError = -1;
115
116 while (insize != 0)
117 {
118 char *outptr = outbuf;
119 size_t outsize = bufsize;
120 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
121 dest->append(outbuf, outptr - outbuf);
122 if (err == (size_t)(-1))
123 {
124 switch (errno)
125 {
126 case EILSEQ:
127 insize--;
128 inptr++;
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError != insize) {
131 lastError = insize - 1;
132 dest->append("?");
133 }
134 break;
135 case EINVAL:
136 insize = 0;
137 break;
138 case E2BIG:
139 if (outptr == outbuf)
140 {
141 bufsize *= 2;
142 delete[] outbuf;
143 outbuf = new char[bufsize];
144 }
145 break;
146 }
147 }
148 }
149
150 delete[] outbuf;
151
152 iconv_close(cd);
153
154 return true;
155 }
156 /*}}}*/
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String)
162 {
163 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
164
165 if (*String == 0)
166 return String;
167 return _strrstrip(String);
168 }
169 /*}}}*/
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String)
173 {
174 char *End = String + strlen(String) - 1;
175 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
176 *End == '\r'); End--);
177 End++;
178 *End = 0;
179 return String;
180 }
181 /*}}}*/
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
184 /* */
185 char *_strtabexpand(char *String,size_t Len)
186 {
187 for (char *I = String; I != I + Len && *I != 0; I++)
188 {
189 if (*I != '\t')
190 continue;
191 if (I + 8 > String + Len)
192 {
193 *I = 0;
194 return String;
195 }
196
197 /* Assume the start of the string is 0 and find the next 8 char
198 division */
199 int Len;
200 if (String == I)
201 Len = 1;
202 else
203 Len = 8 - ((String - I) % 8);
204 Len -= 2;
205 if (Len <= 0)
206 {
207 *I = ' ';
208 continue;
209 }
210
211 memmove(I + Len,I + 1,strlen(I) + 1);
212 for (char *J = I; J + Len != I; *I = ' ', I++);
213 }
214 return String;
215 }
216 /*}}}*/
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String,string &Res)
224 {
225 // Skip leading whitespace
226 const char *C = String;
227 for (;*C != 0 && *C == ' '; C++);
228 if (*C == 0)
229 return false;
230
231 // Jump to the next word
232 for (;*C != 0 && isspace(*C) == 0; C++)
233 {
234 if (*C == '"')
235 {
236 C = strchr(C + 1, '"');
237 if (C == NULL)
238 return false;
239 }
240 if (*C == '[')
241 {
242 C = strchr(C + 1, ']');
243 if (C == NULL)
244 return false;
245 }
246 }
247
248 // Now de-quote characters
249 char Buffer[1024];
250 char Tmp[3];
251 const char *Start = String;
252 char *I;
253 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
254 {
255 if (*Start == '%' && Start + 2 < C &&
256 isxdigit(Start[1]) && isxdigit(Start[2]))
257 {
258 Tmp[0] = Start[1];
259 Tmp[1] = Start[2];
260 Tmp[2] = 0;
261 *I = (char)strtol(Tmp,0,16);
262 Start += 3;
263 continue;
264 }
265 if (*Start != '"')
266 *I = *Start;
267 else
268 I--;
269 Start++;
270 }
271 *I = 0;
272 Res = Buffer;
273
274 // Skip ending white space
275 for (;*C != 0 && isspace(*C) != 0; C++);
276 String = C;
277 return true;
278 }
279 /*}}}*/
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String,string &Res)
285 {
286 // Skip leading whitespace
287 const char *C = String;
288 for (;*C != 0 && *C == ' '; C++);
289 if (*C == 0)
290 return false;
291
292 char Buffer[1024];
293 char *Buf = Buffer;
294 if (strlen(String) >= sizeof(Buffer))
295 return false;
296
297 for (; *C != 0; C++)
298 {
299 if (*C == '"')
300 {
301 for (C++; *C != 0 && *C != '"'; C++)
302 *Buf++ = *C;
303
304 if (*C == 0)
305 return false;
306
307 continue;
308 }
309
310 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
311 continue;
312 if (isspace(*C) == 0)
313 return false;
314 *Buf++ = ' ';
315 }
316 *Buf = 0;
317 Res = Buffer;
318 String = C;
319 return true;
320 }
321 /*}}}*/
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
324 /* */
325 string QuoteString(const string &Str, const char *Bad)
326 {
327 std::stringstream Res;
328 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
329 {
330 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
331 *I == 0x25 || // percent '%' char
332 *I <= 0x20 || *I >= 0x7F) // control chars
333 {
334 ioprintf(Res,"%%%02x",(int)*I);
335 }
336 else
337 Res << *I;
338 }
339 return Res.str();
340 }
341 /*}}}*/
342 // DeQuoteString - Convert a string from quoted from /*{{{*/
343 // ---------------------------------------------------------------------
344 /* This undoes QuoteString */
345 string DeQuoteString(const string &Str)
346 {
347 return DeQuoteString(Str.begin(),Str.end());
348 }
349 string DeQuoteString(string::const_iterator const &begin,
350 string::const_iterator const &end)
351 {
352 string Res;
353 for (string::const_iterator I = begin; I != end; ++I)
354 {
355 if (*I == '%' && I + 2 < end &&
356 isxdigit(I[1]) && isxdigit(I[2]))
357 {
358 char Tmp[3];
359 Tmp[0] = I[1];
360 Tmp[1] = I[2];
361 Tmp[2] = 0;
362 Res += (char)strtol(Tmp,0,16);
363 I += 2;
364 continue;
365 }
366 else
367 Res += *I;
368 }
369 return Res;
370 }
371
372 /*}}}*/
373 // SizeToStr - Convert a long into a human readable size /*{{{*/
374 // ---------------------------------------------------------------------
375 /* A max of 4 digits are shown before conversion to the next highest unit.
376 The max length of the string will be 5 chars unless the size is > 10
377 YottaBytes (E24) */
378 string SizeToStr(double Size)
379 {
380 double ASize;
381 if (Size >= 0)
382 ASize = Size;
383 else
384 ASize = -1*Size;
385
386 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
387 ExaBytes, ZettaBytes, YottaBytes */
388 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
389 int I = 0;
390 while (I <= 8)
391 {
392 if (ASize < 100 && I != 0)
393 {
394 std::string S;
395 strprintf(S, "%'.1f %c", ASize, Ext[I]);
396 return S;
397 }
398
399 if (ASize < 10000)
400 {
401 std::string S;
402 strprintf(S, "%'.0f %c", ASize, Ext[I]);
403 return S;
404 }
405 ASize /= 1000.0;
406 I++;
407 }
408 return "";
409 }
410 /*}}}*/
411 // TimeToStr - Convert the time into a string /*{{{*/
412 // ---------------------------------------------------------------------
413 /* Converts a number of seconds to a hms format */
414 string TimeToStr(unsigned long Sec)
415 {
416 std::string S;
417 if (Sec > 60*60*24)
418 {
419 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
420 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
421 }
422 else if (Sec > 60*60)
423 {
424 //TRANSLATOR: h means hours, min means minutes, s means seconds
425 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
426 }
427 else if (Sec > 60)
428 {
429 //TRANSLATOR: min means minutes, s means seconds
430 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
431 }
432 else
433 {
434 //TRANSLATOR: s means seconds
435 strprintf(S,_("%lis"),Sec);
436 }
437 return S;
438 }
439 /*}}}*/
440 // SubstVar - Substitute a string for another string /*{{{*/
441 // ---------------------------------------------------------------------
442 /* This replaces all occurrences of Subst with Contents in Str. */
443 string SubstVar(const string &Str,const string &Subst,const string &Contents)
444 {
445 if (Subst.empty() == true)
446 return Str;
447
448 string::size_type Pos = 0;
449 string::size_type OldPos = 0;
450 string Temp;
451
452 while (OldPos < Str.length() &&
453 (Pos = Str.find(Subst,OldPos)) != string::npos)
454 {
455 if (OldPos != Pos)
456 Temp.append(Str, OldPos, Pos - OldPos);
457 if (Contents.empty() == false)
458 Temp.append(Contents);
459 OldPos = Pos + Subst.length();
460 }
461
462 if (OldPos == 0)
463 return Str;
464
465 if (OldPos >= Str.length())
466 return Temp;
467 return Temp + string(Str,OldPos);
468 }
469 string SubstVar(string Str,const struct SubstVar *Vars)
470 {
471 for (; Vars->Subst != 0; Vars++)
472 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
473 return Str;
474 }
475 /*}}}*/
476 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
477 // ---------------------------------------------------------------------
478 /* Returns a string with the supplied separator depth + 1 times in it */
479 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
480 {
481 std::string output = "";
482 for(unsigned long d=Depth+1; d > 0; d--)
483 output.append(Separator);
484 return output;
485 }
486 /*}}}*/
487 // URItoFileName - Convert the uri into a unique file name /*{{{*/
488 // ---------------------------------------------------------------------
489 /* This converts a URI into a safe filename. It quotes all unsafe characters
490 and converts / to _ and removes the scheme identifier. The resulting
491 file name should be unique and never occur again for a different file */
492 string URItoFileName(const string &URI)
493 {
494 // Nuke 'sensitive' items
495 ::URI U(URI);
496 U.User.clear();
497 U.Password.clear();
498 U.Access.clear();
499
500 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
501 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
502 replace(NewURI.begin(),NewURI.end(),'/','_');
503 return NewURI;
504 }
505 /*}}}*/
506 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
507 // ---------------------------------------------------------------------
508 /* This routine performs a base64 transformation on a string. It was ripped
509 from wget and then patched and bug fixed.
510
511 This spec can be found in rfc2045 */
512 string Base64Encode(const string &S)
513 {
514 // Conversion table.
515 static char tbl[64] = {'A','B','C','D','E','F','G','H',
516 'I','J','K','L','M','N','O','P',
517 'Q','R','S','T','U','V','W','X',
518 'Y','Z','a','b','c','d','e','f',
519 'g','h','i','j','k','l','m','n',
520 'o','p','q','r','s','t','u','v',
521 'w','x','y','z','0','1','2','3',
522 '4','5','6','7','8','9','+','/'};
523
524 // Pre-allocate some space
525 string Final;
526 Final.reserve((4*S.length() + 2)/3 + 2);
527
528 /* Transform the 3x8 bits to 4x6 bits, as required by
529 base64. */
530 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
531 {
532 char Bits[3] = {0,0,0};
533 Bits[0] = I[0];
534 if (I + 1 < S.end())
535 Bits[1] = I[1];
536 if (I + 2 < S.end())
537 Bits[2] = I[2];
538
539 Final += tbl[Bits[0] >> 2];
540 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
541
542 if (I + 1 >= S.end())
543 break;
544
545 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
546
547 if (I + 2 >= S.end())
548 break;
549
550 Final += tbl[Bits[2] & 0x3f];
551 }
552
553 /* Apply the padding elements, this tells how many bytes the remote
554 end should discard */
555 if (S.length() % 3 == 2)
556 Final += '=';
557 if (S.length() % 3 == 1)
558 Final += "==";
559
560 return Final;
561 }
562 /*}}}*/
563 // stringcmp - Arbitrary string compare /*{{{*/
564 // ---------------------------------------------------------------------
565 /* This safely compares two non-null terminated strings of arbitrary
566 length */
567 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
568 {
569 for (; A != AEnd && B != BEnd; A++, B++)
570 if (*A != *B)
571 break;
572
573 if (A == AEnd && B == BEnd)
574 return 0;
575 if (A == AEnd)
576 return 1;
577 if (B == BEnd)
578 return -1;
579 if (*A < *B)
580 return -1;
581 return 1;
582 }
583
584 #if __GNUC__ >= 3
585 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
586 const char *B,const char *BEnd)
587 {
588 for (; A != AEnd && B != BEnd; A++, B++)
589 if (*A != *B)
590 break;
591
592 if (A == AEnd && B == BEnd)
593 return 0;
594 if (A == AEnd)
595 return 1;
596 if (B == BEnd)
597 return -1;
598 if (*A < *B)
599 return -1;
600 return 1;
601 }
602 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
603 string::const_iterator B,string::const_iterator BEnd)
604 {
605 for (; A != AEnd && B != BEnd; A++, B++)
606 if (*A != *B)
607 break;
608
609 if (A == AEnd && B == BEnd)
610 return 0;
611 if (A == AEnd)
612 return 1;
613 if (B == BEnd)
614 return -1;
615 if (*A < *B)
616 return -1;
617 return 1;
618 }
619 #endif
620 /*}}}*/
621 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
622 // ---------------------------------------------------------------------
623 /* */
624 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
625 {
626 for (; A != AEnd && B != BEnd; A++, B++)
627 if (tolower_ascii(*A) != tolower_ascii(*B))
628 break;
629
630 if (A == AEnd && B == BEnd)
631 return 0;
632 if (A == AEnd)
633 return 1;
634 if (B == BEnd)
635 return -1;
636 if (tolower_ascii(*A) < tolower_ascii(*B))
637 return -1;
638 return 1;
639 }
640 #if __GNUC__ >= 3
641 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
642 const char *B,const char *BEnd)
643 {
644 for (; A != AEnd && B != BEnd; A++, B++)
645 if (tolower_ascii(*A) != tolower_ascii(*B))
646 break;
647
648 if (A == AEnd && B == BEnd)
649 return 0;
650 if (A == AEnd)
651 return 1;
652 if (B == BEnd)
653 return -1;
654 if (tolower_ascii(*A) < tolower_ascii(*B))
655 return -1;
656 return 1;
657 }
658 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
659 string::const_iterator B,string::const_iterator BEnd)
660 {
661 for (; A != AEnd && B != BEnd; A++, B++)
662 if (tolower_ascii(*A) != tolower_ascii(*B))
663 break;
664
665 if (A == AEnd && B == BEnd)
666 return 0;
667 if (A == AEnd)
668 return 1;
669 if (B == BEnd)
670 return -1;
671 if (tolower_ascii(*A) < tolower_ascii(*B))
672 return -1;
673 return 1;
674 }
675 #endif
676 /*}}}*/
677 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
678 // ---------------------------------------------------------------------
679 /* The format is like those used in package files and the method
680 communication system */
681 string LookupTag(const string &Message,const char *Tag,const char *Default)
682 {
683 // Look for a matching tag.
684 int Length = strlen(Tag);
685 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
686 {
687 // Found the tag
688 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
689 {
690 // Find the end of line and strip the leading/trailing spaces
691 string::const_iterator J;
692 I += Length + 1;
693 for (; isspace(*I) != 0 && I < Message.end(); ++I);
694 for (J = I; *J != '\n' && J < Message.end(); ++J);
695 for (; J > I && isspace(J[-1]) != 0; --J);
696
697 return string(I,J);
698 }
699
700 for (; *I != '\n' && I < Message.end(); ++I);
701 }
702
703 // Failed to find a match
704 if (Default == 0)
705 return string();
706 return Default;
707 }
708 /*}}}*/
709 // StringToBool - Converts a string into a boolean /*{{{*/
710 // ---------------------------------------------------------------------
711 /* This inspects the string to see if it is true or if it is false and
712 then returns the result. Several varients on true/false are checked. */
713 int StringToBool(const string &Text,int Default)
714 {
715 char *ParseEnd;
716 int Res = strtol(Text.c_str(),&ParseEnd,0);
717 // ensure that the entire string was converted by strtol to avoid
718 // failures on "apt-cache show -a 0ad" where the "0" is converted
719 const char *TextEnd = Text.c_str()+Text.size();
720 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
721 return Res;
722
723 // Check for positives
724 if (strcasecmp(Text.c_str(),"no") == 0 ||
725 strcasecmp(Text.c_str(),"false") == 0 ||
726 strcasecmp(Text.c_str(),"without") == 0 ||
727 strcasecmp(Text.c_str(),"off") == 0 ||
728 strcasecmp(Text.c_str(),"disable") == 0)
729 return 0;
730
731 // Check for negatives
732 if (strcasecmp(Text.c_str(),"yes") == 0 ||
733 strcasecmp(Text.c_str(),"true") == 0 ||
734 strcasecmp(Text.c_str(),"with") == 0 ||
735 strcasecmp(Text.c_str(),"on") == 0 ||
736 strcasecmp(Text.c_str(),"enable") == 0)
737 return 1;
738
739 return Default;
740 }
741 /*}}}*/
742 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
743 // ---------------------------------------------------------------------
744 /* This converts a time_t into a string time representation that is
745 year 2000 complient and timezone neutral */
746 string TimeRFC1123(time_t Date)
747 {
748 struct tm Conv;
749 if (gmtime_r(&Date, &Conv) == NULL)
750 return "";
751
752 char Buf[300];
753 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
754 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
755 "Aug","Sep","Oct","Nov","Dec"};
756
757 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
758 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
759 Conv.tm_min,Conv.tm_sec);
760 return Buf;
761 }
762 /*}}}*/
763 // ReadMessages - Read messages from the FD /*{{{*/
764 // ---------------------------------------------------------------------
765 /* This pulls full messages from the input FD into the message buffer.
766 It assumes that messages will not pause during transit so no
767 fancy buffering is used.
768
769 In particular: this reads blocks from the input until it believes
770 that it's run out of input text. Each block is terminated by a
771 double newline ('\n' followed by '\n').
772 */
773 bool ReadMessages(int Fd, vector<string> &List)
774 {
775 char Buffer[64000];
776 // Represents any left-over from the previous iteration of the
777 // parse loop. (i.e., if a message is split across the end
778 // of the buffer, it goes here)
779 string PartialMessage;
780
781 do {
782 int const Res = read(Fd, Buffer, sizeof(Buffer));
783 if (Res < 0 && errno == EINTR)
784 continue;
785
786 // process we read from has died
787 if (Res == 0)
788 return false;
789
790 // No data
791 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
792 return true;
793 if (Res < 0)
794 return false;
795
796 // extract the message(s) from the buffer
797 char const *Start = Buffer;
798 char const * const End = Buffer + Res;
799
800 char const * NL = (char const *) memchr(Start, '\n', End - Start);
801 if (NL == NULL)
802 {
803 // end of buffer: store what we have so far and read new data in
804 PartialMessage.append(Start, End - Start);
805 Start = End;
806 }
807 else
808 ++NL;
809
810 if (PartialMessage.empty() == false && Start < End)
811 {
812 // if we start with a new line, see if the partial message we have ended with one
813 // so that we properly detect records ending between two read() runs
814 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
815 // the case \r|\n\r\n is handled by the usual double-newline handling
816 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
817 {
818 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
819 {
820 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
821 List.push_back(PartialMessage);
822 PartialMessage.clear();
823 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
824 Start = NL;
825 }
826 }
827 }
828
829 while (Start < End) {
830 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
831 if (NL2 == NULL)
832 {
833 // end of buffer: store what we have so far and read new data in
834 PartialMessage.append(Start, End - Start);
835 break;
836 }
837 ++NL2;
838
839 // did we find a double newline?
840 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
841 {
842 PartialMessage.append(Start, NL2 - Start);
843 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
844 List.push_back(PartialMessage);
845 PartialMessage.clear();
846 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
847 Start = NL2;
848 }
849 NL = NL2;
850 }
851
852 // we have read at least one complete message and nothing left
853 if (PartialMessage.empty() == true)
854 return true;
855
856 if (WaitFd(Fd) == false)
857 return false;
858 } while (true);
859 }
860 /*}}}*/
861 // MonthConv - Converts a month string into a number /*{{{*/
862 // ---------------------------------------------------------------------
863 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
864 Made it a bit more robust with a few tolower_ascii though. */
865 static int MonthConv(char *Month)
866 {
867 switch (tolower_ascii(*Month))
868 {
869 case 'a':
870 return tolower_ascii(Month[1]) == 'p'?3:7;
871 case 'd':
872 return 11;
873 case 'f':
874 return 1;
875 case 'j':
876 if (tolower_ascii(Month[1]) == 'a')
877 return 0;
878 return tolower_ascii(Month[2]) == 'n'?5:6;
879 case 'm':
880 return tolower_ascii(Month[2]) == 'r'?2:4;
881 case 'n':
882 return 10;
883 case 'o':
884 return 9;
885 case 's':
886 return 8;
887
888 // Pretend it is January..
889 default:
890 return 0;
891 }
892 }
893 /*}}}*/
894 // timegm - Internal timegm if the gnu version is not available /*{{{*/
895 // ---------------------------------------------------------------------
896 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
897 than local timezone (mktime assumes the latter).
898
899 This function is a nonstandard GNU extension that is also present on
900 the BSDs and maybe other systems. For others we follow the advice of
901 the manpage of timegm and use his portable replacement. */
902 #ifndef HAVE_TIMEGM
903 static time_t timegm(struct tm *t)
904 {
905 char *tz = getenv("TZ");
906 setenv("TZ", "", 1);
907 tzset();
908 time_t ret = mktime(t);
909 if (tz)
910 setenv("TZ", tz, 1);
911 else
912 unsetenv("TZ");
913 tzset();
914 return ret;
915 }
916 #endif
917 /*}}}*/
918 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
919 // ---------------------------------------------------------------------
920 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
921 with one exception: All timezones (%Z) are accepted but the protocol
922 says that it MUST be GMT, but this one is equal to UTC which we will
923 encounter from time to time (e.g. in Release files) so we accept all
924 here and just assume it is GMT (or UTC) later on */
925 bool RFC1123StrToTime(const char* const str,time_t &time)
926 {
927 struct tm Tm;
928 setlocale (LC_ALL,"C");
929 bool const invalid =
930 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
931 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
932 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
933 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
934 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
935 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
936 setlocale (LC_ALL,"");
937 if (invalid == true)
938 return false;
939
940 time = timegm(&Tm);
941 return true;
942 }
943 /*}}}*/
944 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
945 // ---------------------------------------------------------------------
946 /* */
947 bool FTPMDTMStrToTime(const char* const str,time_t &time)
948 {
949 struct tm Tm;
950 // MDTM includes no whitespaces but recommend and ignored by strptime
951 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
952 return false;
953
954 time = timegm(&Tm);
955 return true;
956 }
957 /*}}}*/
958 // StrToTime - Converts a string into a time_t /*{{{*/
959 // ---------------------------------------------------------------------
960 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
961 and the C library asctime format. It requires the GNU library function
962 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
963 reason the C library does not provide any such function :< This also
964 handles the weird, but unambiguous FTP time format*/
965 bool StrToTime(const string &Val,time_t &Result)
966 {
967 struct tm Tm;
968 char Month[10];
969
970 // Skip the day of the week
971 const char *I = strchr(Val.c_str(), ' ');
972
973 // Handle RFC 1123 time
974 Month[0] = 0;
975 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
976 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
977 {
978 // Handle RFC 1036 time
979 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
980 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
981 Tm.tm_year += 1900;
982 else
983 {
984 // asctime format
985 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
986 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
987 {
988 // 'ftp' time
989 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
990 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
991 return false;
992 Tm.tm_mon--;
993 }
994 }
995 }
996
997 Tm.tm_isdst = 0;
998 if (Month[0] != 0)
999 Tm.tm_mon = MonthConv(Month);
1000 else
1001 Tm.tm_mon = 0; // we don't have a month, so pick something
1002 Tm.tm_year -= 1900;
1003
1004 // Convert to local time and then to GMT
1005 Result = timegm(&Tm);
1006 return true;
1007 }
1008 /*}}}*/
1009 // StrToNum - Convert a fixed length string to a number /*{{{*/
1010 // ---------------------------------------------------------------------
1011 /* This is used in decoding the crazy fixed length string headers in
1012 tar and ar files. */
1013 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1014 {
1015 char S[30];
1016 if (Len >= sizeof(S))
1017 return false;
1018 memcpy(S,Str,Len);
1019 S[Len] = 0;
1020
1021 // All spaces is a zero
1022 Res = 0;
1023 unsigned I;
1024 for (I = 0; S[I] == ' '; I++);
1025 if (S[I] == 0)
1026 return true;
1027
1028 char *End;
1029 Res = strtoul(S,&End,Base);
1030 if (End == S)
1031 return false;
1032
1033 return true;
1034 }
1035 /*}}}*/
1036 // StrToNum - Convert a fixed length string to a number /*{{{*/
1037 // ---------------------------------------------------------------------
1038 /* This is used in decoding the crazy fixed length string headers in
1039 tar and ar files. */
1040 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1041 {
1042 char S[30];
1043 if (Len >= sizeof(S))
1044 return false;
1045 memcpy(S,Str,Len);
1046 S[Len] = 0;
1047
1048 // All spaces is a zero
1049 Res = 0;
1050 unsigned I;
1051 for (I = 0; S[I] == ' '; I++);
1052 if (S[I] == 0)
1053 return true;
1054
1055 char *End;
1056 Res = strtoull(S,&End,Base);
1057 if (End == S)
1058 return false;
1059
1060 return true;
1061 }
1062 /*}}}*/
1063
1064 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1065 // ---------------------------------------------------------------------
1066 /* This is used in decoding the 256bit encoded fixed length fields in
1067 tar files */
1068 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1069 {
1070 if ((Str[0] & 0x80) == 0)
1071 return false;
1072 else
1073 {
1074 Res = Str[0] & 0x7F;
1075 for(unsigned int i = 1; i < Len; ++i)
1076 Res = (Res<<8) + Str[i];
1077 return true;
1078 }
1079 }
1080 /*}}}*/
1081 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1082 // ---------------------------------------------------------------------
1083 /* This is used in decoding the 256bit encoded fixed length fields in
1084 tar files */
1085 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1086 {
1087 unsigned long long Num;
1088 bool rc;
1089
1090 rc = Base256ToNum(Str, Num, Len);
1091 Res = Num;
1092 if (Res != Num)
1093 return false;
1094
1095 return rc;
1096 }
1097 /*}}}*/
1098 // HexDigit - Convert a hex character into an integer /*{{{*/
1099 // ---------------------------------------------------------------------
1100 /* Helper for Hex2Num */
1101 static int HexDigit(int c)
1102 {
1103 if (c >= '0' && c <= '9')
1104 return c - '0';
1105 if (c >= 'a' && c <= 'f')
1106 return c - 'a' + 10;
1107 if (c >= 'A' && c <= 'F')
1108 return c - 'A' + 10;
1109 return 0;
1110 }
1111 /*}}}*/
1112 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1113 // ---------------------------------------------------------------------
1114 /* The length of the buffer must be exactly 1/2 the length of the string. */
1115 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1116 {
1117 if (Str.length() != Length*2)
1118 return false;
1119
1120 // Convert each digit. We store it in the same order as the string
1121 int J = 0;
1122 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
1123 {
1124 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
1125 return false;
1126
1127 Num[J] = HexDigit(I[0]) << 4;
1128 Num[J] += HexDigit(I[1]);
1129 }
1130
1131 return true;
1132 }
1133 /*}}}*/
1134 // TokSplitString - Split a string up by a given token /*{{{*/
1135 // ---------------------------------------------------------------------
1136 /* This is intended to be a faster splitter, it does not use dynamic
1137 memories. Input is changed to insert nulls at each token location. */
1138 bool TokSplitString(char Tok,char *Input,char **List,
1139 unsigned long ListMax)
1140 {
1141 // Strip any leading spaces
1142 char *Start = Input;
1143 char *Stop = Start + strlen(Start);
1144 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1145
1146 unsigned long Count = 0;
1147 char *Pos = Start;
1148 while (Pos != Stop)
1149 {
1150 // Skip to the next Token
1151 for (; Pos != Stop && *Pos != Tok; Pos++);
1152
1153 // Back remove spaces
1154 char *End = Pos;
1155 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1156 *End = 0;
1157
1158 List[Count++] = Start;
1159 if (Count >= ListMax)
1160 {
1161 List[Count-1] = 0;
1162 return false;
1163 }
1164
1165 // Advance pos
1166 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1167 Start = Pos;
1168 }
1169
1170 List[Count] = 0;
1171 return true;
1172 }
1173 /*}}}*/
1174 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1175 // ---------------------------------------------------------------------
1176 /* This can be used to split a given string up into a vector, so the
1177 propose is the same as in the method above and this one is a bit slower
1178 also, but the advantage is that we have an iteratable vector */
1179 vector<string> VectorizeString(string const &haystack, char const &split)
1180 {
1181 vector<string> exploded;
1182 if (haystack.empty() == true)
1183 return exploded;
1184 string::const_iterator start = haystack.begin();
1185 string::const_iterator end = start;
1186 do {
1187 for (; end != haystack.end() && *end != split; ++end);
1188 exploded.push_back(string(start, end));
1189 start = end + 1;
1190 } while (end != haystack.end() && (++end) != haystack.end());
1191 return exploded;
1192 }
1193 /*}}}*/
1194 // StringSplit - split a string into a string vector by token /*{{{*/
1195 // ---------------------------------------------------------------------
1196 /* See header for details.
1197 */
1198 vector<string> StringSplit(std::string const &s, std::string const &sep,
1199 unsigned int maxsplit)
1200 {
1201 vector<string> split;
1202 size_t start, pos;
1203
1204 // no seperator given, this is bogus
1205 if(sep.size() == 0)
1206 return split;
1207
1208 start = pos = 0;
1209 while (pos != string::npos)
1210 {
1211 pos = s.find(sep, start);
1212 split.push_back(s.substr(start, pos-start));
1213
1214 // if maxsplit is reached, the remaining string is the last item
1215 if(split.size() >= maxsplit)
1216 {
1217 split[split.size()-1] = s.substr(start);
1218 break;
1219 }
1220 start = pos+sep.size();
1221 }
1222 return split;
1223 }
1224 /*}}}*/
1225 // RegexChoice - Simple regex list/list matcher /*{{{*/
1226 // ---------------------------------------------------------------------
1227 /* */
1228 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1229 const char **ListEnd)
1230 {
1231 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1232 R->Hit = false;
1233
1234 unsigned long Hits = 0;
1235 for (; ListBegin < ListEnd; ++ListBegin)
1236 {
1237 // Check if the name is a regex
1238 const char *I;
1239 bool Regex = true;
1240 for (I = *ListBegin; *I != 0; I++)
1241 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1242 break;
1243 if (*I == 0)
1244 Regex = false;
1245
1246 // Compile the regex pattern
1247 regex_t Pattern;
1248 if (Regex == true)
1249 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1250 REG_NOSUB) != 0)
1251 Regex = false;
1252
1253 // Search the list
1254 bool Done = false;
1255 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1256 {
1257 if (R->Str[0] == 0)
1258 continue;
1259
1260 if (strcasecmp(R->Str,*ListBegin) != 0)
1261 {
1262 if (Regex == false)
1263 continue;
1264 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1265 continue;
1266 }
1267 Done = true;
1268
1269 if (R->Hit == false)
1270 Hits++;
1271
1272 R->Hit = true;
1273 }
1274
1275 if (Regex == true)
1276 regfree(&Pattern);
1277
1278 if (Done == false)
1279 _error->Warning(_("Selection %s not found"),*ListBegin);
1280 }
1281
1282 return Hits;
1283 }
1284 /*}}}*/
1285 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1286 // ---------------------------------------------------------------------
1287 /* This is used to make the internationalization strings easier to translate
1288 and to allow reordering of parameters */
1289 static bool iovprintf(ostream &out, const char *format,
1290 va_list &args, ssize_t &size) {
1291 char *S = (char*)malloc(size);
1292 ssize_t const n = vsnprintf(S, size, format, args);
1293 if (n > -1 && n < size) {
1294 out << S;
1295 free(S);
1296 return true;
1297 } else {
1298 if (n > -1)
1299 size = n + 1;
1300 else
1301 size *= 2;
1302 }
1303 free(S);
1304 return false;
1305 }
1306 void ioprintf(ostream &out,const char *format,...)
1307 {
1308 va_list args;
1309 ssize_t size = 400;
1310 while (true) {
1311 bool ret;
1312 va_start(args,format);
1313 ret = iovprintf(out, format, args, size);
1314 va_end(args);
1315 if (ret == true)
1316 return;
1317 }
1318 }
1319 void strprintf(string &out,const char *format,...)
1320 {
1321 va_list args;
1322 ssize_t size = 400;
1323 std::ostringstream outstr;
1324 while (true) {
1325 bool ret;
1326 va_start(args,format);
1327 ret = iovprintf(outstr, format, args, size);
1328 va_end(args);
1329 if (ret == true)
1330 break;
1331 }
1332 out = outstr.str();
1333 }
1334 /*}}}*/
1335 // safe_snprintf - Safer snprintf /*{{{*/
1336 // ---------------------------------------------------------------------
1337 /* This is a snprintf that will never (ever) go past 'End' and returns a
1338 pointer to the end of the new string. The returned string is always null
1339 terminated unless Buffer == end. This is a better alterantive to using
1340 consecutive snprintfs. */
1341 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1342 {
1343 va_list args;
1344 int Did;
1345
1346 if (End <= Buffer)
1347 return End;
1348 va_start(args,Format);
1349 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1350 va_end(args);
1351
1352 if (Did < 0 || Buffer + Did > End)
1353 return End;
1354 return Buffer + Did;
1355 }
1356 /*}}}*/
1357 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1358 // ---------------------------------------------------------------------
1359 string StripEpoch(const string &VerStr)
1360 {
1361 size_t i = VerStr.find(":");
1362 if (i == string::npos)
1363 return VerStr;
1364 return VerStr.substr(i+1);
1365 }
1366 /*}}}*/
1367 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1368 // ---------------------------------------------------------------------
1369 /* This little function is the most called method we have and tries
1370 therefore to do the absolut minimum - and is notable faster than
1371 standard tolower/toupper and as a bonus avoids problems with different
1372 locales - we only operate on ascii chars anyway. */
1373 int tolower_ascii(int const c)
1374 {
1375 if (c >= 'A' && c <= 'Z')
1376 return c + 32;
1377 return c;
1378 }
1379 /*}}}*/
1380
1381 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1382 // ---------------------------------------------------------------------
1383 /* The domain list is a comma separate list of domains that are suffix
1384 matched against the argument */
1385 bool CheckDomainList(const string &Host,const string &List)
1386 {
1387 string::const_iterator Start = List.begin();
1388 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1389 {
1390 if (Cur < List.end() && *Cur != ',')
1391 continue;
1392
1393 // Match the end of the string..
1394 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1395 Cur - Start != 0 &&
1396 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1397 return true;
1398
1399 Start = Cur + 1;
1400 }
1401 return false;
1402 }
1403 /*}}}*/
1404 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1405 // ---------------------------------------------------------------------
1406 /* */
1407 size_t strv_length(const char **str_array)
1408 {
1409 size_t i;
1410 for (i=0; str_array[i] != NULL; i++)
1411 /* nothing */
1412 ;
1413 return i;
1414 }
1415 /*}}}*/
1416 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1417 // ---------------------------------------------------------------------
1418 /* */
1419 string DeEscapeString(const string &input)
1420 {
1421 char tmp[3];
1422 string::const_iterator it;
1423 string output;
1424 for (it = input.begin(); it != input.end(); ++it)
1425 {
1426 // just copy non-escape chars
1427 if (*it != '\\')
1428 {
1429 output += *it;
1430 continue;
1431 }
1432
1433 // deal with double escape
1434 if (*it == '\\' &&
1435 (it + 1 < input.end()) && it[1] == '\\')
1436 {
1437 // copy
1438 output += *it;
1439 // advance iterator one step further
1440 ++it;
1441 continue;
1442 }
1443
1444 // ensure we have a char to read
1445 if (it + 1 == input.end())
1446 continue;
1447
1448 // read it
1449 ++it;
1450 switch (*it)
1451 {
1452 case '0':
1453 if (it + 2 <= input.end()) {
1454 tmp[0] = it[1];
1455 tmp[1] = it[2];
1456 tmp[2] = 0;
1457 output += (char)strtol(tmp, 0, 8);
1458 it += 2;
1459 }
1460 break;
1461 case 'x':
1462 if (it + 2 <= input.end()) {
1463 tmp[0] = it[1];
1464 tmp[1] = it[2];
1465 tmp[2] = 0;
1466 output += (char)strtol(tmp, 0, 16);
1467 it += 2;
1468 }
1469 break;
1470 default:
1471 // FIXME: raise exception here?
1472 break;
1473 }
1474 }
1475 return output;
1476 }
1477 /*}}}*/
1478 // URI::CopyFrom - Copy from an object /*{{{*/
1479 // ---------------------------------------------------------------------
1480 /* This parses the URI into all of its components */
1481 void URI::CopyFrom(const string &U)
1482 {
1483 string::const_iterator I = U.begin();
1484
1485 // Locate the first colon, this separates the scheme
1486 for (; I < U.end() && *I != ':' ; ++I);
1487 string::const_iterator FirstColon = I;
1488
1489 /* Determine if this is a host type URI with a leading double //
1490 and then search for the first single / */
1491 string::const_iterator SingleSlash = I;
1492 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1493 SingleSlash += 3;
1494
1495 /* Find the / indicating the end of the hostname, ignoring /'s in the
1496 square brackets */
1497 bool InBracket = false;
1498 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1499 {
1500 if (*SingleSlash == '[')
1501 InBracket = true;
1502 if (InBracket == true && *SingleSlash == ']')
1503 InBracket = false;
1504 }
1505
1506 if (SingleSlash > U.end())
1507 SingleSlash = U.end();
1508
1509 // We can now write the access and path specifiers
1510 Access.assign(U.begin(),FirstColon);
1511 if (SingleSlash != U.end())
1512 Path.assign(SingleSlash,U.end());
1513 if (Path.empty() == true)
1514 Path = "/";
1515
1516 // Now we attempt to locate a user:pass@host fragment
1517 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1518 FirstColon += 3;
1519 else
1520 FirstColon += 1;
1521 if (FirstColon >= U.end())
1522 return;
1523
1524 if (FirstColon > SingleSlash)
1525 FirstColon = SingleSlash;
1526
1527 // Find the colon...
1528 I = FirstColon + 1;
1529 if (I > SingleSlash)
1530 I = SingleSlash;
1531 for (; I < SingleSlash && *I != ':'; ++I);
1532 string::const_iterator SecondColon = I;
1533
1534 // Search for the @ after the colon
1535 for (; I < SingleSlash && *I != '@'; ++I);
1536 string::const_iterator At = I;
1537
1538 // Now write the host and user/pass
1539 if (At == SingleSlash)
1540 {
1541 if (FirstColon < SingleSlash)
1542 Host.assign(FirstColon,SingleSlash);
1543 }
1544 else
1545 {
1546 Host.assign(At+1,SingleSlash);
1547 // username and password must be encoded (RFC 3986)
1548 User.assign(DeQuoteString(FirstColon,SecondColon));
1549 if (SecondColon < At)
1550 Password.assign(DeQuoteString(SecondColon+1,At));
1551 }
1552
1553 // Now we parse the RFC 2732 [] hostnames.
1554 unsigned long PortEnd = 0;
1555 InBracket = false;
1556 for (unsigned I = 0; I != Host.length();)
1557 {
1558 if (Host[I] == '[')
1559 {
1560 InBracket = true;
1561 Host.erase(I,1);
1562 continue;
1563 }
1564
1565 if (InBracket == true && Host[I] == ']')
1566 {
1567 InBracket = false;
1568 Host.erase(I,1);
1569 PortEnd = I;
1570 continue;
1571 }
1572 I++;
1573 }
1574
1575 // Tsk, weird.
1576 if (InBracket == true)
1577 {
1578 Host.clear();
1579 return;
1580 }
1581
1582 // Now we parse off a port number from the hostname
1583 Port = 0;
1584 string::size_type Pos = Host.rfind(':');
1585 if (Pos == string::npos || Pos < PortEnd)
1586 return;
1587
1588 Port = atoi(string(Host,Pos+1).c_str());
1589 Host.assign(Host,0,Pos);
1590 }
1591 /*}}}*/
1592 // URI::operator string - Convert the URI to a string /*{{{*/
1593 // ---------------------------------------------------------------------
1594 /* */
1595 URI::operator string()
1596 {
1597 std::stringstream Res;
1598
1599 if (Access.empty() == false)
1600 Res << Access << ':';
1601
1602 if (Host.empty() == false)
1603 {
1604 if (Access.empty() == false)
1605 Res << "//";
1606
1607 if (User.empty() == false)
1608 {
1609 // FIXME: Technically userinfo is permitted even less
1610 // characters than these, but this is not conveniently
1611 // expressed with a blacklist.
1612 Res << QuoteString(User, ":/?#[]@");
1613 if (Password.empty() == false)
1614 Res << ":" << QuoteString(Password, ":/?#[]@");
1615 Res << "@";
1616 }
1617
1618 // Add RFC 2732 escaping characters
1619 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1620 Res << '[' << Host << ']';
1621 else
1622 Res << Host;
1623
1624 if (Port != 0)
1625 Res << ':' << Port;
1626 }
1627
1628 if (Path.empty() == false)
1629 {
1630 if (Path[0] != '/')
1631 Res << "/" << Path;
1632 else
1633 Res << Path;
1634 }
1635
1636 return Res.str();
1637 }
1638 /*}}}*/
1639 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1640 string URI::SiteOnly(const string &URI)
1641 {
1642 ::URI U(URI);
1643 U.User.clear();
1644 U.Password.clear();
1645 U.Path.clear();
1646 return U;
1647 }
1648 /*}}}*/
1649 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1650 string URI::ArchiveOnly(const string &URI)
1651 {
1652 ::URI U(URI);
1653 U.User.clear();
1654 U.Password.clear();
1655 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1656 U.Path.erase(U.Path.length() - 1);
1657 return U;
1658 }
1659 /*}}}*/
1660 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1661 string URI::NoUserPassword(const string &URI)
1662 {
1663 ::URI U(URI);
1664 U.User.clear();
1665 U.Password.clear();
1666 return U;
1667 }
1668 /*}}}*/