]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
26e303263deb6c21cbd08729ef6d4a5697bbdbbe
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <stddef.h>
25 #include <stdlib.h>
26 #include <time.h>
27 #include <string>
28 #include <vector>
29 #include <ctype.h>
30 #include <string.h>
31 #include <sstream>
32 #include <stdio.h>
33 #include <algorithm>
34 #include <unistd.h>
35 #include <regex.h>
36 #include <errno.h>
37 #include <stdarg.h>
38 #include <iconv.h>
39
40 #include <apti18n.h>
41 /*}}}*/
42 using namespace std;
43
44 // Strip - Remove white space from the front and back of a string /*{{{*/
45 // ---------------------------------------------------------------------
46 namespace APT {
47 namespace String {
48 std::string Strip(const std::string &str)
49 {
50 // ensure we have at least one character
51 if (str.empty() == true)
52 return str;
53
54 char const * const s = str.c_str();
55 size_t start = 0;
56 for (; isspace(s[start]) != 0; ++start)
57 ; // find the first not-space
58
59 // string contains only whitespaces
60 if (s[start] == '\0')
61 return "";
62
63 size_t end = str.length() - 1;
64 for (; isspace(s[end]) != 0; --end)
65 ; // find the last not-space
66
67 return str.substr(start, end - start + 1);
68 }
69
70 bool Endswith(const std::string &s, const std::string &end)
71 {
72 if (end.size() > s.size())
73 return false;
74 return (s.substr(s.size() - end.size(), s.size()) == end);
75 }
76
77 bool Startswith(const std::string &s, const std::string &start)
78 {
79 if (start.size() > s.size())
80 return false;
81 return (s.substr(0, start.size()) == start);
82 }
83
84 }
85 }
86 /*}}}*/
87 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
88 // ---------------------------------------------------------------------
89 /* This is handy to use before display some information for enduser */
90 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
91 {
92 iconv_t cd;
93 const char *inbuf;
94 char *inptr, *outbuf;
95 size_t insize, bufsize;
96 dest->clear();
97
98 cd = iconv_open(codeset, "UTF-8");
99 if (cd == (iconv_t)(-1)) {
100 // Something went wrong
101 if (errno == EINVAL)
102 _error->Error("conversion from 'UTF-8' to '%s' not available",
103 codeset);
104 else
105 perror("iconv_open");
106
107 return false;
108 }
109
110 insize = bufsize = orig.size();
111 inbuf = orig.data();
112 inptr = (char *)inbuf;
113 outbuf = new char[bufsize];
114 size_t lastError = -1;
115
116 while (insize != 0)
117 {
118 char *outptr = outbuf;
119 size_t outsize = bufsize;
120 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
121 dest->append(outbuf, outptr - outbuf);
122 if (err == (size_t)(-1))
123 {
124 switch (errno)
125 {
126 case EILSEQ:
127 insize--;
128 inptr++;
129 // replace a series of unknown multibytes with a single "?"
130 if (lastError != insize) {
131 lastError = insize - 1;
132 dest->append("?");
133 }
134 break;
135 case EINVAL:
136 insize = 0;
137 break;
138 case E2BIG:
139 if (outptr == outbuf)
140 {
141 bufsize *= 2;
142 delete[] outbuf;
143 outbuf = new char[bufsize];
144 }
145 break;
146 }
147 }
148 }
149
150 delete[] outbuf;
151
152 iconv_close(cd);
153
154 return true;
155 }
156 /*}}}*/
157 // strstrip - Remove white space from the front and back of a string /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This is handy to use when parsing a file. It also removes \n's left
160 over from fgets and company */
161 char *_strstrip(char *String)
162 {
163 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
164
165 if (*String == 0)
166 return String;
167 return _strrstrip(String);
168 }
169 /*}}}*/
170 // strrstrip - Remove white space from the back of a string /*{{{*/
171 // ---------------------------------------------------------------------
172 char *_strrstrip(char *String)
173 {
174 char *End = String + strlen(String) - 1;
175 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
176 *End == '\r'); End--);
177 End++;
178 *End = 0;
179 return String;
180 }
181 /*}}}*/
182 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
183 // ---------------------------------------------------------------------
184 /* */
185 char *_strtabexpand(char *String,size_t Len)
186 {
187 for (char *I = String; I != I + Len && *I != 0; I++)
188 {
189 if (*I != '\t')
190 continue;
191 if (I + 8 > String + Len)
192 {
193 *I = 0;
194 return String;
195 }
196
197 /* Assume the start of the string is 0 and find the next 8 char
198 division */
199 int Len;
200 if (String == I)
201 Len = 1;
202 else
203 Len = 8 - ((String - I) % 8);
204 Len -= 2;
205 if (Len <= 0)
206 {
207 *I = ' ';
208 continue;
209 }
210
211 memmove(I + Len,I + 1,strlen(I) + 1);
212 for (char *J = I; J + Len != I; *I = ' ', I++);
213 }
214 return String;
215 }
216 /*}}}*/
217 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
218 // ---------------------------------------------------------------------
219 /* This grabs a single word, converts any % escaped characters to their
220 proper values and advances the pointer. Double quotes are understood
221 and striped out as well. This is for URI/URL parsing. It also can
222 understand [] brackets.*/
223 bool ParseQuoteWord(const char *&String,string &Res)
224 {
225 // Skip leading whitespace
226 const char *C = String;
227 for (;*C != 0 && *C == ' '; C++);
228 if (*C == 0)
229 return false;
230
231 // Jump to the next word
232 for (;*C != 0 && isspace(*C) == 0; C++)
233 {
234 if (*C == '"')
235 {
236 C = strchr(C + 1, '"');
237 if (C == NULL)
238 return false;
239 }
240 if (*C == '[')
241 {
242 C = strchr(C + 1, ']');
243 if (C == NULL)
244 return false;
245 }
246 }
247
248 // Now de-quote characters
249 char Buffer[1024];
250 char Tmp[3];
251 const char *Start = String;
252 char *I;
253 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
254 {
255 if (*Start == '%' && Start + 2 < C &&
256 isxdigit(Start[1]) && isxdigit(Start[2]))
257 {
258 Tmp[0] = Start[1];
259 Tmp[1] = Start[2];
260 Tmp[2] = 0;
261 *I = (char)strtol(Tmp,0,16);
262 Start += 3;
263 continue;
264 }
265 if (*Start != '"')
266 *I = *Start;
267 else
268 I--;
269 Start++;
270 }
271 *I = 0;
272 Res = Buffer;
273
274 // Skip ending white space
275 for (;*C != 0 && isspace(*C) != 0; C++);
276 String = C;
277 return true;
278 }
279 /*}}}*/
280 // ParseCWord - Parses a string like a C "" expression /*{{{*/
281 // ---------------------------------------------------------------------
282 /* This expects a series of space separated strings enclosed in ""'s.
283 It concatenates the ""'s into a single string. */
284 bool ParseCWord(const char *&String,string &Res)
285 {
286 // Skip leading whitespace
287 const char *C = String;
288 for (;*C != 0 && *C == ' '; C++);
289 if (*C == 0)
290 return false;
291
292 char Buffer[1024];
293 char *Buf = Buffer;
294 if (strlen(String) >= sizeof(Buffer))
295 return false;
296
297 for (; *C != 0; C++)
298 {
299 if (*C == '"')
300 {
301 for (C++; *C != 0 && *C != '"'; C++)
302 *Buf++ = *C;
303
304 if (*C == 0)
305 return false;
306
307 continue;
308 }
309
310 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
311 continue;
312 if (isspace(*C) == 0)
313 return false;
314 *Buf++ = ' ';
315 }
316 *Buf = 0;
317 Res = Buffer;
318 String = C;
319 return true;
320 }
321 /*}}}*/
322 // QuoteString - Convert a string into quoted from /*{{{*/
323 // ---------------------------------------------------------------------
324 /* */
325 string QuoteString(const string &Str, const char *Bad)
326 {
327 std::stringstream Res;
328 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
329 {
330 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
331 *I == 0x25 || // percent '%' char
332 *I <= 0x20 || *I >= 0x7F) // control chars
333 {
334 ioprintf(Res, "%%%02hhx", *I);
335 }
336 else
337 Res << *I;
338 }
339 return Res.str();
340 }
341 /*}}}*/
342 // DeQuoteString - Convert a string from quoted from /*{{{*/
343 // ---------------------------------------------------------------------
344 /* This undoes QuoteString */
345 string DeQuoteString(const string &Str)
346 {
347 return DeQuoteString(Str.begin(),Str.end());
348 }
349 string DeQuoteString(string::const_iterator const &begin,
350 string::const_iterator const &end)
351 {
352 string Res;
353 for (string::const_iterator I = begin; I != end; ++I)
354 {
355 if (*I == '%' && I + 2 < end &&
356 isxdigit(I[1]) && isxdigit(I[2]))
357 {
358 char Tmp[3];
359 Tmp[0] = I[1];
360 Tmp[1] = I[2];
361 Tmp[2] = 0;
362 Res += (char)strtol(Tmp,0,16);
363 I += 2;
364 continue;
365 }
366 else
367 Res += *I;
368 }
369 return Res;
370 }
371
372 /*}}}*/
373 // SizeToStr - Convert a long into a human readable size /*{{{*/
374 // ---------------------------------------------------------------------
375 /* A max of 4 digits are shown before conversion to the next highest unit.
376 The max length of the string will be 5 chars unless the size is > 10
377 YottaBytes (E24) */
378 string SizeToStr(double Size)
379 {
380 double ASize;
381 if (Size >= 0)
382 ASize = Size;
383 else
384 ASize = -1*Size;
385
386 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
387 ExaBytes, ZettaBytes, YottaBytes */
388 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
389 int I = 0;
390 while (I <= 8)
391 {
392 if (ASize < 100 && I != 0)
393 {
394 std::string S;
395 strprintf(S, "%'.1f %c", ASize, Ext[I]);
396 return S;
397 }
398
399 if (ASize < 10000)
400 {
401 std::string S;
402 strprintf(S, "%'.0f %c", ASize, Ext[I]);
403 return S;
404 }
405 ASize /= 1000.0;
406 I++;
407 }
408 return "";
409 }
410 /*}}}*/
411 // TimeToStr - Convert the time into a string /*{{{*/
412 // ---------------------------------------------------------------------
413 /* Converts a number of seconds to a hms format */
414 string TimeToStr(unsigned long Sec)
415 {
416 std::string S;
417 if (Sec > 60*60*24)
418 {
419 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
420 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
421 }
422 else if (Sec > 60*60)
423 {
424 //TRANSLATOR: h means hours, min means minutes, s means seconds
425 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
426 }
427 else if (Sec > 60)
428 {
429 //TRANSLATOR: min means minutes, s means seconds
430 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
431 }
432 else
433 {
434 //TRANSLATOR: s means seconds
435 strprintf(S,_("%lis"),Sec);
436 }
437 return S;
438 }
439 /*}}}*/
440 // SubstVar - Substitute a string for another string /*{{{*/
441 // ---------------------------------------------------------------------
442 /* This replaces all occurrences of Subst with Contents in Str. */
443 string SubstVar(const string &Str,const string &Subst,const string &Contents)
444 {
445 if (Subst.empty() == true)
446 return Str;
447
448 string::size_type Pos = 0;
449 string::size_type OldPos = 0;
450 string Temp;
451
452 while (OldPos < Str.length() &&
453 (Pos = Str.find(Subst,OldPos)) != string::npos)
454 {
455 if (OldPos != Pos)
456 Temp.append(Str, OldPos, Pos - OldPos);
457 if (Contents.empty() == false)
458 Temp.append(Contents);
459 OldPos = Pos + Subst.length();
460 }
461
462 if (OldPos == 0)
463 return Str;
464
465 if (OldPos >= Str.length())
466 return Temp;
467
468 Temp.append(Str, OldPos, string::npos);
469 return Temp;
470 }
471 string SubstVar(string Str,const struct SubstVar *Vars)
472 {
473 for (; Vars->Subst != 0; Vars++)
474 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
475 return Str;
476 }
477 /*}}}*/
478 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
479 // ---------------------------------------------------------------------
480 /* Returns a string with the supplied separator depth + 1 times in it */
481 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
482 {
483 std::string output = "";
484 for(unsigned long d=Depth+1; d > 0; d--)
485 output.append(Separator);
486 return output;
487 }
488 /*}}}*/
489 // URItoFileName - Convert the uri into a unique file name /*{{{*/
490 // ---------------------------------------------------------------------
491 /* This converts a URI into a safe filename. It quotes all unsafe characters
492 and converts / to _ and removes the scheme identifier. The resulting
493 file name should be unique and never occur again for a different file */
494 string URItoFileName(const string &URI)
495 {
496 // Nuke 'sensitive' items
497 ::URI U(URI);
498 U.User.clear();
499 U.Password.clear();
500 U.Access.clear();
501
502 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
503 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
504 replace(NewURI.begin(),NewURI.end(),'/','_');
505 return NewURI;
506 }
507 /*}}}*/
508 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
509 // ---------------------------------------------------------------------
510 /* This routine performs a base64 transformation on a string. It was ripped
511 from wget and then patched and bug fixed.
512
513 This spec can be found in rfc2045 */
514 string Base64Encode(const string &S)
515 {
516 // Conversion table.
517 static char tbl[64] = {'A','B','C','D','E','F','G','H',
518 'I','J','K','L','M','N','O','P',
519 'Q','R','S','T','U','V','W','X',
520 'Y','Z','a','b','c','d','e','f',
521 'g','h','i','j','k','l','m','n',
522 'o','p','q','r','s','t','u','v',
523 'w','x','y','z','0','1','2','3',
524 '4','5','6','7','8','9','+','/'};
525
526 // Pre-allocate some space
527 string Final;
528 Final.reserve((4*S.length() + 2)/3 + 2);
529
530 /* Transform the 3x8 bits to 4x6 bits, as required by
531 base64. */
532 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
533 {
534 char Bits[3] = {0,0,0};
535 Bits[0] = I[0];
536 if (I + 1 < S.end())
537 Bits[1] = I[1];
538 if (I + 2 < S.end())
539 Bits[2] = I[2];
540
541 Final += tbl[Bits[0] >> 2];
542 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
543
544 if (I + 1 >= S.end())
545 break;
546
547 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
548
549 if (I + 2 >= S.end())
550 break;
551
552 Final += tbl[Bits[2] & 0x3f];
553 }
554
555 /* Apply the padding elements, this tells how many bytes the remote
556 end should discard */
557 if (S.length() % 3 == 2)
558 Final += '=';
559 if (S.length() % 3 == 1)
560 Final += "==";
561
562 return Final;
563 }
564 /*}}}*/
565 // stringcmp - Arbitrary string compare /*{{{*/
566 // ---------------------------------------------------------------------
567 /* This safely compares two non-null terminated strings of arbitrary
568 length */
569 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
570 {
571 for (; A != AEnd && B != BEnd; A++, B++)
572 if (*A != *B)
573 break;
574
575 if (A == AEnd && B == BEnd)
576 return 0;
577 if (A == AEnd)
578 return 1;
579 if (B == BEnd)
580 return -1;
581 if (*A < *B)
582 return -1;
583 return 1;
584 }
585
586 #if __GNUC__ >= 3
587 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
588 const char *B,const char *BEnd)
589 {
590 for (; A != AEnd && B != BEnd; A++, B++)
591 if (*A != *B)
592 break;
593
594 if (A == AEnd && B == BEnd)
595 return 0;
596 if (A == AEnd)
597 return 1;
598 if (B == BEnd)
599 return -1;
600 if (*A < *B)
601 return -1;
602 return 1;
603 }
604 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
605 string::const_iterator B,string::const_iterator BEnd)
606 {
607 for (; A != AEnd && B != BEnd; A++, B++)
608 if (*A != *B)
609 break;
610
611 if (A == AEnd && B == BEnd)
612 return 0;
613 if (A == AEnd)
614 return 1;
615 if (B == BEnd)
616 return -1;
617 if (*A < *B)
618 return -1;
619 return 1;
620 }
621 #endif
622 /*}}}*/
623 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
624 // ---------------------------------------------------------------------
625 /* */
626 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
627 {
628 for (; A != AEnd && B != BEnd; A++, B++)
629 if (tolower_ascii(*A) != tolower_ascii(*B))
630 break;
631
632 if (A == AEnd && B == BEnd)
633 return 0;
634 if (A == AEnd)
635 return 1;
636 if (B == BEnd)
637 return -1;
638 if (tolower_ascii(*A) < tolower_ascii(*B))
639 return -1;
640 return 1;
641 }
642 #if __GNUC__ >= 3
643 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
644 const char *B,const char *BEnd)
645 {
646 for (; A != AEnd && B != BEnd; A++, B++)
647 if (tolower_ascii(*A) != tolower_ascii(*B))
648 break;
649
650 if (A == AEnd && B == BEnd)
651 return 0;
652 if (A == AEnd)
653 return 1;
654 if (B == BEnd)
655 return -1;
656 if (tolower_ascii(*A) < tolower_ascii(*B))
657 return -1;
658 return 1;
659 }
660 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
661 string::const_iterator B,string::const_iterator BEnd)
662 {
663 for (; A != AEnd && B != BEnd; A++, B++)
664 if (tolower_ascii(*A) != tolower_ascii(*B))
665 break;
666
667 if (A == AEnd && B == BEnd)
668 return 0;
669 if (A == AEnd)
670 return 1;
671 if (B == BEnd)
672 return -1;
673 if (tolower_ascii(*A) < tolower_ascii(*B))
674 return -1;
675 return 1;
676 }
677 #endif
678 /*}}}*/
679 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
680 // ---------------------------------------------------------------------
681 /* The format is like those used in package files and the method
682 communication system */
683 string LookupTag(const string &Message,const char *Tag,const char *Default)
684 {
685 // Look for a matching tag.
686 int Length = strlen(Tag);
687 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
688 {
689 // Found the tag
690 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
691 {
692 // Find the end of line and strip the leading/trailing spaces
693 string::const_iterator J;
694 I += Length + 1;
695 for (; isspace_ascii(*I) != 0 && I < Message.end(); ++I);
696 for (J = I; *J != '\n' && J < Message.end(); ++J);
697 for (; J > I && isspace_ascii(J[-1]) != 0; --J);
698
699 return string(I,J);
700 }
701
702 for (; *I != '\n' && I < Message.end(); ++I);
703 }
704
705 // Failed to find a match
706 if (Default == 0)
707 return string();
708 return Default;
709 }
710 /*}}}*/
711 // StringToBool - Converts a string into a boolean /*{{{*/
712 // ---------------------------------------------------------------------
713 /* This inspects the string to see if it is true or if it is false and
714 then returns the result. Several varients on true/false are checked. */
715 int StringToBool(const string &Text,int Default)
716 {
717 char *ParseEnd;
718 int Res = strtol(Text.c_str(),&ParseEnd,0);
719 // ensure that the entire string was converted by strtol to avoid
720 // failures on "apt-cache show -a 0ad" where the "0" is converted
721 const char *TextEnd = Text.c_str()+Text.size();
722 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
723 return Res;
724
725 // Check for positives
726 if (strcasecmp(Text.c_str(),"no") == 0 ||
727 strcasecmp(Text.c_str(),"false") == 0 ||
728 strcasecmp(Text.c_str(),"without") == 0 ||
729 strcasecmp(Text.c_str(),"off") == 0 ||
730 strcasecmp(Text.c_str(),"disable") == 0)
731 return 0;
732
733 // Check for negatives
734 if (strcasecmp(Text.c_str(),"yes") == 0 ||
735 strcasecmp(Text.c_str(),"true") == 0 ||
736 strcasecmp(Text.c_str(),"with") == 0 ||
737 strcasecmp(Text.c_str(),"on") == 0 ||
738 strcasecmp(Text.c_str(),"enable") == 0)
739 return 1;
740
741 return Default;
742 }
743 /*}}}*/
744 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
745 // ---------------------------------------------------------------------
746 /* This converts a time_t into a string time representation that is
747 year 2000 complient and timezone neutral */
748 string TimeRFC1123(time_t Date)
749 {
750 struct tm Conv;
751 if (gmtime_r(&Date, &Conv) == NULL)
752 return "";
753
754 char Buf[300];
755 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
756 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
757 "Aug","Sep","Oct","Nov","Dec"};
758
759 snprintf(Buf, sizeof(Buf), "%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
760 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
761 Conv.tm_min,Conv.tm_sec);
762 return Buf;
763 }
764 /*}}}*/
765 // ReadMessages - Read messages from the FD /*{{{*/
766 // ---------------------------------------------------------------------
767 /* This pulls full messages from the input FD into the message buffer.
768 It assumes that messages will not pause during transit so no
769 fancy buffering is used.
770
771 In particular: this reads blocks from the input until it believes
772 that it's run out of input text. Each block is terminated by a
773 double newline ('\n' followed by '\n').
774 */
775 bool ReadMessages(int Fd, vector<string> &List)
776 {
777 char Buffer[64000];
778 // Represents any left-over from the previous iteration of the
779 // parse loop. (i.e., if a message is split across the end
780 // of the buffer, it goes here)
781 string PartialMessage;
782
783 do {
784 int const Res = read(Fd, Buffer, sizeof(Buffer));
785 if (Res < 0 && errno == EINTR)
786 continue;
787
788 // process we read from has died
789 if (Res == 0)
790 return false;
791
792 // No data
793 #if EAGAIN != EWOULDBLOCK
794 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
795 #else
796 if (Res < 0 && errno == EAGAIN)
797 #endif
798 return true;
799 if (Res < 0)
800 return false;
801
802 // extract the message(s) from the buffer
803 char const *Start = Buffer;
804 char const * const End = Buffer + Res;
805
806 char const * NL = (char const *) memchr(Start, '\n', End - Start);
807 if (NL == NULL)
808 {
809 // end of buffer: store what we have so far and read new data in
810 PartialMessage.append(Start, End - Start);
811 Start = End;
812 }
813 else
814 ++NL;
815
816 if (PartialMessage.empty() == false && Start < End)
817 {
818 // if we start with a new line, see if the partial message we have ended with one
819 // so that we properly detect records ending between two read() runs
820 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
821 // the case \r|\n\r\n is handled by the usual double-newline handling
822 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
823 {
824 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
825 {
826 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
827 List.push_back(PartialMessage);
828 PartialMessage.clear();
829 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
830 Start = NL;
831 }
832 }
833 }
834
835 while (Start < End) {
836 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
837 if (NL2 == NULL)
838 {
839 // end of buffer: store what we have so far and read new data in
840 PartialMessage.append(Start, End - Start);
841 break;
842 }
843 ++NL2;
844
845 // did we find a double newline?
846 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
847 {
848 PartialMessage.append(Start, NL2 - Start);
849 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
850 List.push_back(PartialMessage);
851 PartialMessage.clear();
852 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
853 Start = NL2;
854 }
855 NL = NL2;
856 }
857
858 // we have read at least one complete message and nothing left
859 if (PartialMessage.empty() == true)
860 return true;
861
862 if (WaitFd(Fd) == false)
863 return false;
864 } while (true);
865 }
866 /*}}}*/
867 // MonthConv - Converts a month string into a number /*{{{*/
868 // ---------------------------------------------------------------------
869 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
870 Made it a bit more robust with a few tolower_ascii though. */
871 static int MonthConv(char *Month)
872 {
873 switch (tolower_ascii(*Month))
874 {
875 case 'a':
876 return tolower_ascii(Month[1]) == 'p'?3:7;
877 case 'd':
878 return 11;
879 case 'f':
880 return 1;
881 case 'j':
882 if (tolower_ascii(Month[1]) == 'a')
883 return 0;
884 return tolower_ascii(Month[2]) == 'n'?5:6;
885 case 'm':
886 return tolower_ascii(Month[2]) == 'r'?2:4;
887 case 'n':
888 return 10;
889 case 'o':
890 return 9;
891 case 's':
892 return 8;
893
894 // Pretend it is January..
895 default:
896 return 0;
897 }
898 }
899 /*}}}*/
900 // timegm - Internal timegm if the gnu version is not available /*{{{*/
901 // ---------------------------------------------------------------------
902 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
903 than local timezone (mktime assumes the latter).
904
905 This function is a nonstandard GNU extension that is also present on
906 the BSDs and maybe other systems. For others we follow the advice of
907 the manpage of timegm and use his portable replacement. */
908 #ifndef HAVE_TIMEGM
909 static time_t timegm(struct tm *t)
910 {
911 char *tz = getenv("TZ");
912 setenv("TZ", "", 1);
913 tzset();
914 time_t ret = mktime(t);
915 if (tz)
916 setenv("TZ", tz, 1);
917 else
918 unsetenv("TZ");
919 tzset();
920 return ret;
921 }
922 #endif
923 /*}}}*/
924 // FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
925 // ---------------------------------------------------------------------
926 /* tries to parses a full date as specified in RFC2616 Section 3.3.1
927 with one exception: All timezones (%Z) are accepted but the protocol
928 says that it MUST be GMT, but this one is equal to UTC which we will
929 encounter from time to time (e.g. in Release files) so we accept all
930 here and just assume it is GMT (or UTC) later on */
931 bool RFC1123StrToTime(const char* const str,time_t &time)
932 {
933 struct tm Tm;
934 setlocale (LC_ALL,"C");
935 bool const invalid =
936 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
937 (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
938 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
939 strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
940 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
941 strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
942 setlocale (LC_ALL,"");
943 if (invalid == true)
944 return false;
945
946 time = timegm(&Tm);
947 return true;
948 }
949 /*}}}*/
950 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
951 // ---------------------------------------------------------------------
952 /* */
953 bool FTPMDTMStrToTime(const char* const str,time_t &time)
954 {
955 struct tm Tm;
956 // MDTM includes no whitespaces but recommend and ignored by strptime
957 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
958 return false;
959
960 time = timegm(&Tm);
961 return true;
962 }
963 /*}}}*/
964 // StrToTime - Converts a string into a time_t /*{{{*/
965 // ---------------------------------------------------------------------
966 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
967 and the C library asctime format. It requires the GNU library function
968 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
969 reason the C library does not provide any such function :< This also
970 handles the weird, but unambiguous FTP time format*/
971 bool StrToTime(const string &Val,time_t &Result)
972 {
973 struct tm Tm;
974 char Month[10];
975
976 // Skip the day of the week
977 const char *I = strchr(Val.c_str(), ' ');
978
979 // Handle RFC 1123 time
980 Month[0] = 0;
981 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
982 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
983 {
984 // Handle RFC 1036 time
985 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
986 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
987 Tm.tm_year += 1900;
988 else
989 {
990 // asctime format
991 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
992 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
993 {
994 // 'ftp' time
995 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
996 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
997 return false;
998 Tm.tm_mon--;
999 }
1000 }
1001 }
1002
1003 Tm.tm_isdst = 0;
1004 if (Month[0] != 0)
1005 Tm.tm_mon = MonthConv(Month);
1006 else
1007 Tm.tm_mon = 0; // we don't have a month, so pick something
1008 Tm.tm_year -= 1900;
1009
1010 // Convert to local time and then to GMT
1011 Result = timegm(&Tm);
1012 return true;
1013 }
1014 /*}}}*/
1015 // StrToNum - Convert a fixed length string to a number /*{{{*/
1016 // ---------------------------------------------------------------------
1017 /* This is used in decoding the crazy fixed length string headers in
1018 tar and ar files. */
1019 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1020 {
1021 char S[30];
1022 if (Len >= sizeof(S))
1023 return false;
1024 memcpy(S,Str,Len);
1025 S[Len] = 0;
1026
1027 // All spaces is a zero
1028 Res = 0;
1029 unsigned I;
1030 for (I = 0; S[I] == ' '; I++);
1031 if (S[I] == 0)
1032 return true;
1033
1034 char *End;
1035 Res = strtoul(S,&End,Base);
1036 if (End == S)
1037 return false;
1038
1039 return true;
1040 }
1041 /*}}}*/
1042 // StrToNum - Convert a fixed length string to a number /*{{{*/
1043 // ---------------------------------------------------------------------
1044 /* This is used in decoding the crazy fixed length string headers in
1045 tar and ar files. */
1046 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1047 {
1048 char S[30];
1049 if (Len >= sizeof(S))
1050 return false;
1051 memcpy(S,Str,Len);
1052 S[Len] = 0;
1053
1054 // All spaces is a zero
1055 Res = 0;
1056 unsigned I;
1057 for (I = 0; S[I] == ' '; I++);
1058 if (S[I] == 0)
1059 return true;
1060
1061 char *End;
1062 Res = strtoull(S,&End,Base);
1063 if (End == S)
1064 return false;
1065
1066 return true;
1067 }
1068 /*}}}*/
1069
1070 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1071 // ---------------------------------------------------------------------
1072 /* This is used in decoding the 256bit encoded fixed length fields in
1073 tar files */
1074 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1075 {
1076 if ((Str[0] & 0x80) == 0)
1077 return false;
1078 else
1079 {
1080 Res = Str[0] & 0x7F;
1081 for(unsigned int i = 1; i < Len; ++i)
1082 Res = (Res<<8) + Str[i];
1083 return true;
1084 }
1085 }
1086 /*}}}*/
1087 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1088 // ---------------------------------------------------------------------
1089 /* This is used in decoding the 256bit encoded fixed length fields in
1090 tar files */
1091 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1092 {
1093 unsigned long long Num = 0;
1094 bool rc;
1095
1096 rc = Base256ToNum(Str, Num, Len);
1097 // rudimentary check for overflow (Res = ulong, Num = ulonglong)
1098 Res = Num;
1099 if (Res != Num)
1100 return false;
1101
1102 return rc;
1103 }
1104 /*}}}*/
1105 // HexDigit - Convert a hex character into an integer /*{{{*/
1106 // ---------------------------------------------------------------------
1107 /* Helper for Hex2Num */
1108 static int HexDigit(int c)
1109 {
1110 if (c >= '0' && c <= '9')
1111 return c - '0';
1112 if (c >= 'a' && c <= 'f')
1113 return c - 'a' + 10;
1114 if (c >= 'A' && c <= 'F')
1115 return c - 'A' + 10;
1116 return -1;
1117 }
1118 /*}}}*/
1119 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1120 // ---------------------------------------------------------------------
1121 /* The length of the buffer must be exactly 1/2 the length of the string. */
1122 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1123 {
1124 return Hex2Num(APT::StringView(Str), Num, Length);
1125 }
1126
1127 bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length)
1128 {
1129 if (Str.length() != Length*2)
1130 return false;
1131
1132 // Convert each digit. We store it in the same order as the string
1133 int J = 0;
1134 for (auto I = Str.begin(); I != Str.end();J++, I += 2)
1135 {
1136 int first_half = HexDigit(I[0]);
1137 int second_half;
1138 if (first_half < 0)
1139 return false;
1140
1141 second_half = HexDigit(I[1]);
1142 if (second_half < 0)
1143 return false;
1144 Num[J] = first_half << 4;
1145 Num[J] += second_half;
1146 }
1147
1148 return true;
1149 }
1150 /*}}}*/
1151 // TokSplitString - Split a string up by a given token /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* This is intended to be a faster splitter, it does not use dynamic
1154 memories. Input is changed to insert nulls at each token location. */
1155 bool TokSplitString(char Tok,char *Input,char **List,
1156 unsigned long ListMax)
1157 {
1158 // Strip any leading spaces
1159 char *Start = Input;
1160 char *Stop = Start + strlen(Start);
1161 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1162
1163 unsigned long Count = 0;
1164 char *Pos = Start;
1165 while (Pos != Stop)
1166 {
1167 // Skip to the next Token
1168 for (; Pos != Stop && *Pos != Tok; Pos++);
1169
1170 // Back remove spaces
1171 char *End = Pos;
1172 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1173 *End = 0;
1174
1175 List[Count++] = Start;
1176 if (Count >= ListMax)
1177 {
1178 List[Count-1] = 0;
1179 return false;
1180 }
1181
1182 // Advance pos
1183 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1184 Start = Pos;
1185 }
1186
1187 List[Count] = 0;
1188 return true;
1189 }
1190 /*}}}*/
1191 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1192 // ---------------------------------------------------------------------
1193 /* This can be used to split a given string up into a vector, so the
1194 propose is the same as in the method above and this one is a bit slower
1195 also, but the advantage is that we have an iteratable vector */
1196 vector<string> VectorizeString(string const &haystack, char const &split)
1197 {
1198 vector<string> exploded;
1199 if (haystack.empty() == true)
1200 return exploded;
1201 string::const_iterator start = haystack.begin();
1202 string::const_iterator end = start;
1203 do {
1204 for (; end != haystack.end() && *end != split; ++end);
1205 exploded.push_back(string(start, end));
1206 start = end + 1;
1207 } while (end != haystack.end() && (++end) != haystack.end());
1208 return exploded;
1209 }
1210 /*}}}*/
1211 // StringSplit - split a string into a string vector by token /*{{{*/
1212 // ---------------------------------------------------------------------
1213 /* See header for details.
1214 */
1215 vector<string> StringSplit(std::string const &s, std::string const &sep,
1216 unsigned int maxsplit)
1217 {
1218 vector<string> split;
1219 size_t start, pos;
1220
1221 // no separator given, this is bogus
1222 if(sep.size() == 0)
1223 return split;
1224
1225 start = pos = 0;
1226 while (pos != string::npos)
1227 {
1228 pos = s.find(sep, start);
1229 split.push_back(s.substr(start, pos-start));
1230
1231 // if maxsplit is reached, the remaining string is the last item
1232 if(split.size() >= maxsplit)
1233 {
1234 split[split.size()-1] = s.substr(start);
1235 break;
1236 }
1237 start = pos+sep.size();
1238 }
1239 return split;
1240 }
1241 /*}}}*/
1242 // RegexChoice - Simple regex list/list matcher /*{{{*/
1243 // ---------------------------------------------------------------------
1244 /* */
1245 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1246 const char **ListEnd)
1247 {
1248 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1249 R->Hit = false;
1250
1251 unsigned long Hits = 0;
1252 for (; ListBegin < ListEnd; ++ListBegin)
1253 {
1254 // Check if the name is a regex
1255 const char *I;
1256 bool Regex = true;
1257 for (I = *ListBegin; *I != 0; I++)
1258 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1259 break;
1260 if (*I == 0)
1261 Regex = false;
1262
1263 // Compile the regex pattern
1264 regex_t Pattern;
1265 if (Regex == true)
1266 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1267 REG_NOSUB) != 0)
1268 Regex = false;
1269
1270 // Search the list
1271 bool Done = false;
1272 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1273 {
1274 if (R->Str[0] == 0)
1275 continue;
1276
1277 if (strcasecmp(R->Str,*ListBegin) != 0)
1278 {
1279 if (Regex == false)
1280 continue;
1281 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1282 continue;
1283 }
1284 Done = true;
1285
1286 if (R->Hit == false)
1287 Hits++;
1288
1289 R->Hit = true;
1290 }
1291
1292 if (Regex == true)
1293 regfree(&Pattern);
1294
1295 if (Done == false)
1296 _error->Warning(_("Selection %s not found"),*ListBegin);
1297 }
1298
1299 return Hits;
1300 }
1301 /*}}}*/
1302 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1303 // ---------------------------------------------------------------------
1304 /* This is used to make the internationalization strings easier to translate
1305 and to allow reordering of parameters */
1306 static bool iovprintf(ostream &out, const char *format,
1307 va_list &args, ssize_t &size) {
1308 char *S = (char*)malloc(size);
1309 ssize_t const n = vsnprintf(S, size, format, args);
1310 if (n > -1 && n < size) {
1311 out << S;
1312 free(S);
1313 return true;
1314 } else {
1315 if (n > -1)
1316 size = n + 1;
1317 else
1318 size *= 2;
1319 }
1320 free(S);
1321 return false;
1322 }
1323 void ioprintf(ostream &out,const char *format,...)
1324 {
1325 va_list args;
1326 ssize_t size = 400;
1327 while (true) {
1328 bool ret;
1329 va_start(args,format);
1330 ret = iovprintf(out, format, args, size);
1331 va_end(args);
1332 if (ret == true)
1333 return;
1334 }
1335 }
1336 void strprintf(string &out,const char *format,...)
1337 {
1338 va_list args;
1339 ssize_t size = 400;
1340 std::ostringstream outstr;
1341 while (true) {
1342 bool ret;
1343 va_start(args,format);
1344 ret = iovprintf(outstr, format, args, size);
1345 va_end(args);
1346 if (ret == true)
1347 break;
1348 }
1349 out = outstr.str();
1350 }
1351 /*}}}*/
1352 // safe_snprintf - Safer snprintf /*{{{*/
1353 // ---------------------------------------------------------------------
1354 /* This is a snprintf that will never (ever) go past 'End' and returns a
1355 pointer to the end of the new string. The returned string is always null
1356 terminated unless Buffer == end. This is a better alterantive to using
1357 consecutive snprintfs. */
1358 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1359 {
1360 va_list args;
1361 int Did;
1362
1363 if (End <= Buffer)
1364 return End;
1365 va_start(args,Format);
1366 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1367 va_end(args);
1368
1369 if (Did < 0 || Buffer + Did > End)
1370 return End;
1371 return Buffer + Did;
1372 }
1373 /*}}}*/
1374 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1375 // ---------------------------------------------------------------------
1376 string StripEpoch(const string &VerStr)
1377 {
1378 size_t i = VerStr.find(":");
1379 if (i == string::npos)
1380 return VerStr;
1381 return VerStr.substr(i+1);
1382 }
1383 /*}}}*/
1384
1385 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1386 // ---------------------------------------------------------------------
1387 /* This little function is the most called method we have and tries
1388 therefore to do the absolut minimum - and is notable faster than
1389 standard tolower/toupper and as a bonus avoids problems with different
1390 locales - we only operate on ascii chars anyway. */
1391 #undef tolower_ascii
1392 int tolower_ascii(int const c) APT_CONST APT_COLD;
1393 int tolower_ascii(int const c)
1394 {
1395 return tolower_ascii_inline(c);
1396 }
1397 /*}}}*/
1398
1399 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1400 // ---------------------------------------------------------------------
1401 /* This little function is one of the most called methods we have and tries
1402 therefore to do the absolut minimum - and is notable faster than
1403 standard isspace() and as a bonus avoids problems with different
1404 locales - we only operate on ascii chars anyway. */
1405 #undef isspace_ascii
1406 int isspace_ascii(int const c) APT_CONST APT_COLD;
1407 int isspace_ascii(int const c)
1408 {
1409 return isspace_ascii_inline(c);
1410 }
1411 /*}}}*/
1412
1413 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1414 // ---------------------------------------------------------------------
1415 /* The domain list is a comma separate list of domains that are suffix
1416 matched against the argument */
1417 bool CheckDomainList(const string &Host,const string &List)
1418 {
1419 string::const_iterator Start = List.begin();
1420 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1421 {
1422 if (Cur < List.end() && *Cur != ',')
1423 continue;
1424
1425 // Match the end of the string..
1426 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1427 Cur - Start != 0 &&
1428 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1429 return true;
1430
1431 Start = Cur + 1;
1432 }
1433 return false;
1434 }
1435 /*}}}*/
1436 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1437 // ---------------------------------------------------------------------
1438 /* */
1439 size_t strv_length(const char **str_array)
1440 {
1441 size_t i;
1442 for (i=0; str_array[i] != NULL; i++)
1443 /* nothing */
1444 ;
1445 return i;
1446 }
1447 /*}}}*/
1448 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1449 // ---------------------------------------------------------------------
1450 /* */
1451 string DeEscapeString(const string &input)
1452 {
1453 char tmp[3];
1454 string::const_iterator it;
1455 string output;
1456 for (it = input.begin(); it != input.end(); ++it)
1457 {
1458 // just copy non-escape chars
1459 if (*it != '\\')
1460 {
1461 output += *it;
1462 continue;
1463 }
1464
1465 // deal with double escape
1466 if (*it == '\\' &&
1467 (it + 1 < input.end()) && it[1] == '\\')
1468 {
1469 // copy
1470 output += *it;
1471 // advance iterator one step further
1472 ++it;
1473 continue;
1474 }
1475
1476 // ensure we have a char to read
1477 if (it + 1 == input.end())
1478 continue;
1479
1480 // read it
1481 ++it;
1482 switch (*it)
1483 {
1484 case '0':
1485 if (it + 2 <= input.end()) {
1486 tmp[0] = it[1];
1487 tmp[1] = it[2];
1488 tmp[2] = 0;
1489 output += (char)strtol(tmp, 0, 8);
1490 it += 2;
1491 }
1492 break;
1493 case 'x':
1494 if (it + 2 <= input.end()) {
1495 tmp[0] = it[1];
1496 tmp[1] = it[2];
1497 tmp[2] = 0;
1498 output += (char)strtol(tmp, 0, 16);
1499 it += 2;
1500 }
1501 break;
1502 default:
1503 // FIXME: raise exception here?
1504 break;
1505 }
1506 }
1507 return output;
1508 }
1509 /*}}}*/
1510 // URI::CopyFrom - Copy from an object /*{{{*/
1511 // ---------------------------------------------------------------------
1512 /* This parses the URI into all of its components */
1513 void URI::CopyFrom(const string &U)
1514 {
1515 string::const_iterator I = U.begin();
1516
1517 // Locate the first colon, this separates the scheme
1518 for (; I < U.end() && *I != ':' ; ++I);
1519 string::const_iterator FirstColon = I;
1520
1521 /* Determine if this is a host type URI with a leading double //
1522 and then search for the first single / */
1523 string::const_iterator SingleSlash = I;
1524 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1525 SingleSlash += 3;
1526
1527 /* Find the / indicating the end of the hostname, ignoring /'s in the
1528 square brackets */
1529 bool InBracket = false;
1530 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1531 {
1532 if (*SingleSlash == '[')
1533 InBracket = true;
1534 if (InBracket == true && *SingleSlash == ']')
1535 InBracket = false;
1536 }
1537
1538 if (SingleSlash > U.end())
1539 SingleSlash = U.end();
1540
1541 // We can now write the access and path specifiers
1542 Access.assign(U.begin(),FirstColon);
1543 if (SingleSlash != U.end())
1544 Path.assign(SingleSlash,U.end());
1545 if (Path.empty() == true)
1546 Path = "/";
1547
1548 // Now we attempt to locate a user:pass@host fragment
1549 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1550 FirstColon += 3;
1551 else
1552 FirstColon += 1;
1553 if (FirstColon >= U.end())
1554 return;
1555
1556 if (FirstColon > SingleSlash)
1557 FirstColon = SingleSlash;
1558
1559 // Find the colon...
1560 I = FirstColon + 1;
1561 if (I > SingleSlash)
1562 I = SingleSlash;
1563
1564 // Search for the @ separating user:pass from host
1565 auto const RevAt = std::find(
1566 std::string::const_reverse_iterator(SingleSlash),
1567 std::string::const_reverse_iterator(I), '@');
1568 string::const_iterator const At = RevAt.base() == I ? SingleSlash : std::prev(RevAt.base());
1569 // and then look for the colon between user and pass
1570 string::const_iterator const SecondColon = std::find(I, At, ':');
1571
1572 // Now write the host and user/pass
1573 if (At == SingleSlash)
1574 {
1575 if (FirstColon < SingleSlash)
1576 Host.assign(FirstColon,SingleSlash);
1577 }
1578 else
1579 {
1580 Host.assign(At+1,SingleSlash);
1581 // username and password must be encoded (RFC 3986)
1582 User.assign(DeQuoteString(FirstColon,SecondColon));
1583 if (SecondColon < At)
1584 Password.assign(DeQuoteString(SecondColon+1,At));
1585 }
1586
1587 // Now we parse the RFC 2732 [] hostnames.
1588 unsigned long PortEnd = 0;
1589 InBracket = false;
1590 for (unsigned I = 0; I != Host.length();)
1591 {
1592 if (Host[I] == '[')
1593 {
1594 InBracket = true;
1595 Host.erase(I,1);
1596 continue;
1597 }
1598
1599 if (InBracket == true && Host[I] == ']')
1600 {
1601 InBracket = false;
1602 Host.erase(I,1);
1603 PortEnd = I;
1604 continue;
1605 }
1606 I++;
1607 }
1608
1609 // Tsk, weird.
1610 if (InBracket == true)
1611 {
1612 Host.clear();
1613 return;
1614 }
1615
1616 // Now we parse off a port number from the hostname
1617 Port = 0;
1618 string::size_type Pos = Host.rfind(':');
1619 if (Pos == string::npos || Pos < PortEnd)
1620 return;
1621
1622 Port = atoi(string(Host,Pos+1).c_str());
1623 Host.assign(Host,0,Pos);
1624 }
1625 /*}}}*/
1626 // URI::operator string - Convert the URI to a string /*{{{*/
1627 // ---------------------------------------------------------------------
1628 /* */
1629 URI::operator string()
1630 {
1631 std::stringstream Res;
1632
1633 if (Access.empty() == false)
1634 Res << Access << ':';
1635
1636 if (Host.empty() == false)
1637 {
1638 if (Access.empty() == false)
1639 Res << "//";
1640
1641 if (User.empty() == false)
1642 {
1643 // FIXME: Technically userinfo is permitted even less
1644 // characters than these, but this is not conveniently
1645 // expressed with a blacklist.
1646 Res << QuoteString(User, ":/?#[]@");
1647 if (Password.empty() == false)
1648 Res << ":" << QuoteString(Password, ":/?#[]@");
1649 Res << "@";
1650 }
1651
1652 // Add RFC 2732 escaping characters
1653 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1654 Res << '[' << Host << ']';
1655 else
1656 Res << Host;
1657
1658 if (Port != 0)
1659 Res << ':' << std::to_string(Port);
1660 }
1661
1662 if (Path.empty() == false)
1663 {
1664 if (Path[0] != '/')
1665 Res << "/" << Path;
1666 else
1667 Res << Path;
1668 }
1669
1670 return Res.str();
1671 }
1672 /*}}}*/
1673 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1674 string URI::SiteOnly(const string &URI)
1675 {
1676 ::URI U(URI);
1677 U.User.clear();
1678 U.Password.clear();
1679 U.Path.clear();
1680 return U;
1681 }
1682 /*}}}*/
1683 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1684 string URI::ArchiveOnly(const string &URI)
1685 {
1686 ::URI U(URI);
1687 U.User.clear();
1688 U.Password.clear();
1689 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1690 U.Path.erase(U.Path.length() - 1);
1691 return U;
1692 }
1693 /*}}}*/
1694 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1695 string URI::NoUserPassword(const string &URI)
1696 {
1697 ::URI U(URI);
1698 U.User.clear();
1699 U.Password.clear();
1700 return U;
1701 }
1702 /*}}}*/