]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Revert "if the FileFd failed already following calls should fail, too"
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <algorithm>
25 #include <iomanip>
26 #include <locale>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30
31 #include <stddef.h>
32 #include <stdlib.h>
33 #include <time.h>
34 #include <ctype.h>
35 #include <string.h>
36 #include <stdio.h>
37 #include <unistd.h>
38 #include <regex.h>
39 #include <errno.h>
40 #include <stdarg.h>
41 #include <iconv.h>
42
43 #include <apti18n.h>
44 /*}}}*/
45 using namespace std;
46
47 // Strip - Remove white space from the front and back of a string /*{{{*/
48 // ---------------------------------------------------------------------
49 namespace APT {
50 namespace String {
51 std::string Strip(const std::string &str)
52 {
53 // ensure we have at least one character
54 if (str.empty() == true)
55 return str;
56
57 char const * const s = str.c_str();
58 size_t start = 0;
59 for (; isspace(s[start]) != 0; ++start)
60 ; // find the first not-space
61
62 // string contains only whitespaces
63 if (s[start] == '\0')
64 return "";
65
66 size_t end = str.length() - 1;
67 for (; isspace(s[end]) != 0; --end)
68 ; // find the last not-space
69
70 return str.substr(start, end - start + 1);
71 }
72
73 bool Endswith(const std::string &s, const std::string &end)
74 {
75 if (end.size() > s.size())
76 return false;
77 return (s.substr(s.size() - end.size(), s.size()) == end);
78 }
79
80 bool Startswith(const std::string &s, const std::string &start)
81 {
82 if (start.size() > s.size())
83 return false;
84 return (s.substr(0, start.size()) == start);
85 }
86
87 }
88 }
89 /*}}}*/
90 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
91 // ---------------------------------------------------------------------
92 /* This is handy to use before display some information for enduser */
93 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
94 {
95 iconv_t cd;
96 const char *inbuf;
97 char *inptr, *outbuf;
98 size_t insize, bufsize;
99 dest->clear();
100
101 cd = iconv_open(codeset, "UTF-8");
102 if (cd == (iconv_t)(-1)) {
103 // Something went wrong
104 if (errno == EINVAL)
105 _error->Error("conversion from 'UTF-8' to '%s' not available",
106 codeset);
107 else
108 perror("iconv_open");
109
110 return false;
111 }
112
113 insize = bufsize = orig.size();
114 inbuf = orig.data();
115 inptr = (char *)inbuf;
116 outbuf = new char[bufsize];
117 size_t lastError = -1;
118
119 while (insize != 0)
120 {
121 char *outptr = outbuf;
122 size_t outsize = bufsize;
123 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
124 dest->append(outbuf, outptr - outbuf);
125 if (err == (size_t)(-1))
126 {
127 switch (errno)
128 {
129 case EILSEQ:
130 insize--;
131 inptr++;
132 // replace a series of unknown multibytes with a single "?"
133 if (lastError != insize) {
134 lastError = insize - 1;
135 dest->append("?");
136 }
137 break;
138 case EINVAL:
139 insize = 0;
140 break;
141 case E2BIG:
142 if (outptr == outbuf)
143 {
144 bufsize *= 2;
145 delete[] outbuf;
146 outbuf = new char[bufsize];
147 }
148 break;
149 }
150 }
151 }
152
153 delete[] outbuf;
154
155 iconv_close(cd);
156
157 return true;
158 }
159 /*}}}*/
160 // strstrip - Remove white space from the front and back of a string /*{{{*/
161 // ---------------------------------------------------------------------
162 /* This is handy to use when parsing a file. It also removes \n's left
163 over from fgets and company */
164 char *_strstrip(char *String)
165 {
166 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
167
168 if (*String == 0)
169 return String;
170 return _strrstrip(String);
171 }
172 /*}}}*/
173 // strrstrip - Remove white space from the back of a string /*{{{*/
174 // ---------------------------------------------------------------------
175 char *_strrstrip(char *String)
176 {
177 char *End = String + strlen(String) - 1;
178 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
179 *End == '\r'); End--);
180 End++;
181 *End = 0;
182 return String;
183 }
184 /*}}}*/
185 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
186 // ---------------------------------------------------------------------
187 /* */
188 char *_strtabexpand(char *String,size_t Len)
189 {
190 for (char *I = String; I != I + Len && *I != 0; I++)
191 {
192 if (*I != '\t')
193 continue;
194 if (I + 8 > String + Len)
195 {
196 *I = 0;
197 return String;
198 }
199
200 /* Assume the start of the string is 0 and find the next 8 char
201 division */
202 int Len;
203 if (String == I)
204 Len = 1;
205 else
206 Len = 8 - ((String - I) % 8);
207 Len -= 2;
208 if (Len <= 0)
209 {
210 *I = ' ';
211 continue;
212 }
213
214 memmove(I + Len,I + 1,strlen(I) + 1);
215 for (char *J = I; J + Len != I; *I = ' ', I++);
216 }
217 return String;
218 }
219 /*}}}*/
220 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
221 // ---------------------------------------------------------------------
222 /* This grabs a single word, converts any % escaped characters to their
223 proper values and advances the pointer. Double quotes are understood
224 and striped out as well. This is for URI/URL parsing. It also can
225 understand [] brackets.*/
226 bool ParseQuoteWord(const char *&String,string &Res)
227 {
228 // Skip leading whitespace
229 const char *C = String;
230 for (;*C != 0 && *C == ' '; C++);
231 if (*C == 0)
232 return false;
233
234 // Jump to the next word
235 for (;*C != 0 && isspace(*C) == 0; C++)
236 {
237 if (*C == '"')
238 {
239 C = strchr(C + 1, '"');
240 if (C == NULL)
241 return false;
242 }
243 if (*C == '[')
244 {
245 C = strchr(C + 1, ']');
246 if (C == NULL)
247 return false;
248 }
249 }
250
251 // Now de-quote characters
252 char Buffer[1024];
253 char Tmp[3];
254 const char *Start = String;
255 char *I;
256 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
257 {
258 if (*Start == '%' && Start + 2 < C &&
259 isxdigit(Start[1]) && isxdigit(Start[2]))
260 {
261 Tmp[0] = Start[1];
262 Tmp[1] = Start[2];
263 Tmp[2] = 0;
264 *I = (char)strtol(Tmp,0,16);
265 Start += 3;
266 continue;
267 }
268 if (*Start != '"')
269 *I = *Start;
270 else
271 I--;
272 Start++;
273 }
274 *I = 0;
275 Res = Buffer;
276
277 // Skip ending white space
278 for (;*C != 0 && isspace(*C) != 0; C++);
279 String = C;
280 return true;
281 }
282 /*}}}*/
283 // ParseCWord - Parses a string like a C "" expression /*{{{*/
284 // ---------------------------------------------------------------------
285 /* This expects a series of space separated strings enclosed in ""'s.
286 It concatenates the ""'s into a single string. */
287 bool ParseCWord(const char *&String,string &Res)
288 {
289 // Skip leading whitespace
290 const char *C = String;
291 for (;*C != 0 && *C == ' '; C++);
292 if (*C == 0)
293 return false;
294
295 char Buffer[1024];
296 char *Buf = Buffer;
297 if (strlen(String) >= sizeof(Buffer))
298 return false;
299
300 for (; *C != 0; C++)
301 {
302 if (*C == '"')
303 {
304 for (C++; *C != 0 && *C != '"'; C++)
305 *Buf++ = *C;
306
307 if (*C == 0)
308 return false;
309
310 continue;
311 }
312
313 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
314 continue;
315 if (isspace(*C) == 0)
316 return false;
317 *Buf++ = ' ';
318 }
319 *Buf = 0;
320 Res = Buffer;
321 String = C;
322 return true;
323 }
324 /*}}}*/
325 // QuoteString - Convert a string into quoted from /*{{{*/
326 // ---------------------------------------------------------------------
327 /* */
328 string QuoteString(const string &Str, const char *Bad)
329 {
330 std::stringstream Res;
331 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
332 {
333 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
334 *I == 0x25 || // percent '%' char
335 *I <= 0x20 || *I >= 0x7F) // control chars
336 {
337 ioprintf(Res, "%%%02hhx", *I);
338 }
339 else
340 Res << *I;
341 }
342 return Res.str();
343 }
344 /*}}}*/
345 // DeQuoteString - Convert a string from quoted from /*{{{*/
346 // ---------------------------------------------------------------------
347 /* This undoes QuoteString */
348 string DeQuoteString(const string &Str)
349 {
350 return DeQuoteString(Str.begin(),Str.end());
351 }
352 string DeQuoteString(string::const_iterator const &begin,
353 string::const_iterator const &end)
354 {
355 string Res;
356 for (string::const_iterator I = begin; I != end; ++I)
357 {
358 if (*I == '%' && I + 2 < end &&
359 isxdigit(I[1]) && isxdigit(I[2]))
360 {
361 char Tmp[3];
362 Tmp[0] = I[1];
363 Tmp[1] = I[2];
364 Tmp[2] = 0;
365 Res += (char)strtol(Tmp,0,16);
366 I += 2;
367 continue;
368 }
369 else
370 Res += *I;
371 }
372 return Res;
373 }
374
375 /*}}}*/
376 // SizeToStr - Convert a long into a human readable size /*{{{*/
377 // ---------------------------------------------------------------------
378 /* A max of 4 digits are shown before conversion to the next highest unit.
379 The max length of the string will be 5 chars unless the size is > 10
380 YottaBytes (E24) */
381 string SizeToStr(double Size)
382 {
383 double ASize;
384 if (Size >= 0)
385 ASize = Size;
386 else
387 ASize = -1*Size;
388
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 int I = 0;
393 while (I <= 8)
394 {
395 if (ASize < 100 && I != 0)
396 {
397 std::string S;
398 strprintf(S, "%'.1f %c", ASize, Ext[I]);
399 return S;
400 }
401
402 if (ASize < 10000)
403 {
404 std::string S;
405 strprintf(S, "%'.0f %c", ASize, Ext[I]);
406 return S;
407 }
408 ASize /= 1000.0;
409 I++;
410 }
411 return "";
412 }
413 /*}}}*/
414 // TimeToStr - Convert the time into a string /*{{{*/
415 // ---------------------------------------------------------------------
416 /* Converts a number of seconds to a hms format */
417 string TimeToStr(unsigned long Sec)
418 {
419 std::string S;
420 if (Sec > 60*60*24)
421 {
422 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
423 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
424 }
425 else if (Sec > 60*60)
426 {
427 //TRANSLATOR: h means hours, min means minutes, s means seconds
428 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
429 }
430 else if (Sec > 60)
431 {
432 //TRANSLATOR: min means minutes, s means seconds
433 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
434 }
435 else
436 {
437 //TRANSLATOR: s means seconds
438 strprintf(S,_("%lis"),Sec);
439 }
440 return S;
441 }
442 /*}}}*/
443 // SubstVar - Substitute a string for another string /*{{{*/
444 // ---------------------------------------------------------------------
445 /* This replaces all occurrences of Subst with Contents in Str. */
446 string SubstVar(const string &Str,const string &Subst,const string &Contents)
447 {
448 if (Subst.empty() == true)
449 return Str;
450
451 string::size_type Pos = 0;
452 string::size_type OldPos = 0;
453 string Temp;
454
455 while (OldPos < Str.length() &&
456 (Pos = Str.find(Subst,OldPos)) != string::npos)
457 {
458 if (OldPos != Pos)
459 Temp.append(Str, OldPos, Pos - OldPos);
460 if (Contents.empty() == false)
461 Temp.append(Contents);
462 OldPos = Pos + Subst.length();
463 }
464
465 if (OldPos == 0)
466 return Str;
467
468 if (OldPos >= Str.length())
469 return Temp;
470
471 Temp.append(Str, OldPos, string::npos);
472 return Temp;
473 }
474 string SubstVar(string Str,const struct SubstVar *Vars)
475 {
476 for (; Vars->Subst != 0; Vars++)
477 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
478 return Str;
479 }
480 /*}}}*/
481 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
482 // ---------------------------------------------------------------------
483 /* Returns a string with the supplied separator depth + 1 times in it */
484 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
485 {
486 std::string output = "";
487 for(unsigned long d=Depth+1; d > 0; d--)
488 output.append(Separator);
489 return output;
490 }
491 /*}}}*/
492 // URItoFileName - Convert the uri into a unique file name /*{{{*/
493 // ---------------------------------------------------------------------
494 /* This converts a URI into a safe filename. It quotes all unsafe characters
495 and converts / to _ and removes the scheme identifier. The resulting
496 file name should be unique and never occur again for a different file */
497 string URItoFileName(const string &URI)
498 {
499 // Nuke 'sensitive' items
500 ::URI U(URI);
501 U.User.clear();
502 U.Password.clear();
503 U.Access.clear();
504
505 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
506 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
507 replace(NewURI.begin(),NewURI.end(),'/','_');
508 return NewURI;
509 }
510 /*}}}*/
511 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
512 // ---------------------------------------------------------------------
513 /* This routine performs a base64 transformation on a string. It was ripped
514 from wget and then patched and bug fixed.
515
516 This spec can be found in rfc2045 */
517 string Base64Encode(const string &S)
518 {
519 // Conversion table.
520 static char tbl[64] = {'A','B','C','D','E','F','G','H',
521 'I','J','K','L','M','N','O','P',
522 'Q','R','S','T','U','V','W','X',
523 'Y','Z','a','b','c','d','e','f',
524 'g','h','i','j','k','l','m','n',
525 'o','p','q','r','s','t','u','v',
526 'w','x','y','z','0','1','2','3',
527 '4','5','6','7','8','9','+','/'};
528
529 // Pre-allocate some space
530 string Final;
531 Final.reserve((4*S.length() + 2)/3 + 2);
532
533 /* Transform the 3x8 bits to 4x6 bits, as required by
534 base64. */
535 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
536 {
537 char Bits[3] = {0,0,0};
538 Bits[0] = I[0];
539 if (I + 1 < S.end())
540 Bits[1] = I[1];
541 if (I + 2 < S.end())
542 Bits[2] = I[2];
543
544 Final += tbl[Bits[0] >> 2];
545 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
546
547 if (I + 1 >= S.end())
548 break;
549
550 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
551
552 if (I + 2 >= S.end())
553 break;
554
555 Final += tbl[Bits[2] & 0x3f];
556 }
557
558 /* Apply the padding elements, this tells how many bytes the remote
559 end should discard */
560 if (S.length() % 3 == 2)
561 Final += '=';
562 if (S.length() % 3 == 1)
563 Final += "==";
564
565 return Final;
566 }
567 /*}}}*/
568 // stringcmp - Arbitrary string compare /*{{{*/
569 // ---------------------------------------------------------------------
570 /* This safely compares two non-null terminated strings of arbitrary
571 length */
572 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
573 {
574 for (; A != AEnd && B != BEnd; A++, B++)
575 if (*A != *B)
576 break;
577
578 if (A == AEnd && B == BEnd)
579 return 0;
580 if (A == AEnd)
581 return 1;
582 if (B == BEnd)
583 return -1;
584 if (*A < *B)
585 return -1;
586 return 1;
587 }
588
589 #if __GNUC__ >= 3
590 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
591 const char *B,const char *BEnd)
592 {
593 for (; A != AEnd && B != BEnd; A++, B++)
594 if (*A != *B)
595 break;
596
597 if (A == AEnd && B == BEnd)
598 return 0;
599 if (A == AEnd)
600 return 1;
601 if (B == BEnd)
602 return -1;
603 if (*A < *B)
604 return -1;
605 return 1;
606 }
607 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
608 string::const_iterator B,string::const_iterator BEnd)
609 {
610 for (; A != AEnd && B != BEnd; A++, B++)
611 if (*A != *B)
612 break;
613
614 if (A == AEnd && B == BEnd)
615 return 0;
616 if (A == AEnd)
617 return 1;
618 if (B == BEnd)
619 return -1;
620 if (*A < *B)
621 return -1;
622 return 1;
623 }
624 #endif
625 /*}}}*/
626 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
627 // ---------------------------------------------------------------------
628 /* */
629 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
630 {
631 for (; A != AEnd && B != BEnd; A++, B++)
632 if (tolower_ascii(*A) != tolower_ascii(*B))
633 break;
634
635 if (A == AEnd && B == BEnd)
636 return 0;
637 if (A == AEnd)
638 return 1;
639 if (B == BEnd)
640 return -1;
641 if (tolower_ascii(*A) < tolower_ascii(*B))
642 return -1;
643 return 1;
644 }
645 #if __GNUC__ >= 3
646 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
647 const char *B,const char *BEnd)
648 {
649 for (; A != AEnd && B != BEnd; A++, B++)
650 if (tolower_ascii(*A) != tolower_ascii(*B))
651 break;
652
653 if (A == AEnd && B == BEnd)
654 return 0;
655 if (A == AEnd)
656 return 1;
657 if (B == BEnd)
658 return -1;
659 if (tolower_ascii(*A) < tolower_ascii(*B))
660 return -1;
661 return 1;
662 }
663 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
664 string::const_iterator B,string::const_iterator BEnd)
665 {
666 for (; A != AEnd && B != BEnd; A++, B++)
667 if (tolower_ascii(*A) != tolower_ascii(*B))
668 break;
669
670 if (A == AEnd && B == BEnd)
671 return 0;
672 if (A == AEnd)
673 return 1;
674 if (B == BEnd)
675 return -1;
676 if (tolower_ascii(*A) < tolower_ascii(*B))
677 return -1;
678 return 1;
679 }
680 #endif
681 /*}}}*/
682 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
683 // ---------------------------------------------------------------------
684 /* The format is like those used in package files and the method
685 communication system */
686 string LookupTag(const string &Message,const char *Tag,const char *Default)
687 {
688 // Look for a matching tag.
689 int Length = strlen(Tag);
690 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
691 {
692 // Found the tag
693 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
694 {
695 // Find the end of line and strip the leading/trailing spaces
696 string::const_iterator J;
697 I += Length + 1;
698 for (; isspace_ascii(*I) != 0 && I < Message.end(); ++I);
699 for (J = I; *J != '\n' && J < Message.end(); ++J);
700 for (; J > I && isspace_ascii(J[-1]) != 0; --J);
701
702 return string(I,J);
703 }
704
705 for (; *I != '\n' && I < Message.end(); ++I);
706 }
707
708 // Failed to find a match
709 if (Default == 0)
710 return string();
711 return Default;
712 }
713 /*}}}*/
714 // StringToBool - Converts a string into a boolean /*{{{*/
715 // ---------------------------------------------------------------------
716 /* This inspects the string to see if it is true or if it is false and
717 then returns the result. Several varients on true/false are checked. */
718 int StringToBool(const string &Text,int Default)
719 {
720 char *ParseEnd;
721 int Res = strtol(Text.c_str(),&ParseEnd,0);
722 // ensure that the entire string was converted by strtol to avoid
723 // failures on "apt-cache show -a 0ad" where the "0" is converted
724 const char *TextEnd = Text.c_str()+Text.size();
725 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
726 return Res;
727
728 // Check for positives
729 if (strcasecmp(Text.c_str(),"no") == 0 ||
730 strcasecmp(Text.c_str(),"false") == 0 ||
731 strcasecmp(Text.c_str(),"without") == 0 ||
732 strcasecmp(Text.c_str(),"off") == 0 ||
733 strcasecmp(Text.c_str(),"disable") == 0)
734 return 0;
735
736 // Check for negatives
737 if (strcasecmp(Text.c_str(),"yes") == 0 ||
738 strcasecmp(Text.c_str(),"true") == 0 ||
739 strcasecmp(Text.c_str(),"with") == 0 ||
740 strcasecmp(Text.c_str(),"on") == 0 ||
741 strcasecmp(Text.c_str(),"enable") == 0)
742 return 1;
743
744 return Default;
745 }
746 /*}}}*/
747 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
748 // ---------------------------------------------------------------------
749 /* This converts a time_t into a string time representation that is
750 year 2000 complient and timezone neutral */
751 string TimeRFC1123(time_t Date)
752 {
753 struct tm Conv;
754 if (gmtime_r(&Date, &Conv) == NULL)
755 return "";
756
757 auto const posix = std::locale::classic();
758 std::ostringstream datestr;
759 datestr.imbue(posix);
760 datestr << std::put_time(&Conv, "%a, %d %b %Y %H:%M:%S GMT");
761 return datestr.str();
762 }
763 /*}}}*/
764 // ReadMessages - Read messages from the FD /*{{{*/
765 // ---------------------------------------------------------------------
766 /* This pulls full messages from the input FD into the message buffer.
767 It assumes that messages will not pause during transit so no
768 fancy buffering is used.
769
770 In particular: this reads blocks from the input until it believes
771 that it's run out of input text. Each block is terminated by a
772 double newline ('\n' followed by '\n').
773 */
774 bool ReadMessages(int Fd, vector<string> &List)
775 {
776 char Buffer[64000];
777 // Represents any left-over from the previous iteration of the
778 // parse loop. (i.e., if a message is split across the end
779 // of the buffer, it goes here)
780 string PartialMessage;
781
782 do {
783 int const Res = read(Fd, Buffer, sizeof(Buffer));
784 if (Res < 0 && errno == EINTR)
785 continue;
786
787 // process we read from has died
788 if (Res == 0)
789 return false;
790
791 // No data
792 #if EAGAIN != EWOULDBLOCK
793 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
794 #else
795 if (Res < 0 && errno == EAGAIN)
796 #endif
797 return true;
798 if (Res < 0)
799 return false;
800
801 // extract the message(s) from the buffer
802 char const *Start = Buffer;
803 char const * const End = Buffer + Res;
804
805 char const * NL = (char const *) memchr(Start, '\n', End - Start);
806 if (NL == NULL)
807 {
808 // end of buffer: store what we have so far and read new data in
809 PartialMessage.append(Start, End - Start);
810 Start = End;
811 }
812 else
813 ++NL;
814
815 if (PartialMessage.empty() == false && Start < End)
816 {
817 // if we start with a new line, see if the partial message we have ended with one
818 // so that we properly detect records ending between two read() runs
819 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
820 // the case \r|\n\r\n is handled by the usual double-newline handling
821 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
822 {
823 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
824 {
825 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
826 List.push_back(PartialMessage);
827 PartialMessage.clear();
828 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
829 Start = NL;
830 }
831 }
832 }
833
834 while (Start < End) {
835 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
836 if (NL2 == NULL)
837 {
838 // end of buffer: store what we have so far and read new data in
839 PartialMessage.append(Start, End - Start);
840 break;
841 }
842 ++NL2;
843
844 // did we find a double newline?
845 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
846 {
847 PartialMessage.append(Start, NL2 - Start);
848 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
849 List.push_back(PartialMessage);
850 PartialMessage.clear();
851 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
852 Start = NL2;
853 }
854 NL = NL2;
855 }
856
857 // we have read at least one complete message and nothing left
858 if (PartialMessage.empty() == true)
859 return true;
860
861 if (WaitFd(Fd) == false)
862 return false;
863 } while (true);
864 }
865 /*}}}*/
866 // MonthConv - Converts a month string into a number /*{{{*/
867 // ---------------------------------------------------------------------
868 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
869 Made it a bit more robust with a few tolower_ascii though. */
870 static int MonthConv(char const * const Month)
871 {
872 switch (tolower_ascii(*Month))
873 {
874 case 'a':
875 return tolower_ascii(Month[1]) == 'p'?3:7;
876 case 'd':
877 return 11;
878 case 'f':
879 return 1;
880 case 'j':
881 if (tolower_ascii(Month[1]) == 'a')
882 return 0;
883 return tolower_ascii(Month[2]) == 'n'?5:6;
884 case 'm':
885 return tolower_ascii(Month[2]) == 'r'?2:4;
886 case 'n':
887 return 10;
888 case 'o':
889 return 9;
890 case 's':
891 return 8;
892
893 // Pretend it is January..
894 default:
895 return 0;
896 }
897 }
898 /*}}}*/
899 // timegm - Internal timegm if the gnu version is not available /*{{{*/
900 // ---------------------------------------------------------------------
901 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
902 than local timezone (mktime assumes the latter).
903
904 This function is a nonstandard GNU extension that is also present on
905 the BSDs and maybe other systems. For others we follow the advice of
906 the manpage of timegm and use his portable replacement. */
907 #ifndef HAVE_TIMEGM
908 static time_t timegm(struct tm *t)
909 {
910 char *tz = getenv("TZ");
911 setenv("TZ", "", 1);
912 tzset();
913 time_t ret = mktime(t);
914 if (tz)
915 setenv("TZ", tz, 1);
916 else
917 unsetenv("TZ");
918 tzset();
919 return ret;
920 }
921 #endif
922 /*}}}*/
923 // RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
924 // ---------------------------------------------------------------------
925 /* tries to parses a full date as specified in RFC7231 §7.1.1.1
926 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
927 As we encounter dates from UTC or with a numeric timezone in other places,
928 we allow them here to to be able to reuse the method. Either way, a date
929 must be in UTC or parsing will fail. Previous implementations of this
930 method used to ignore the timezone and assume always UTC. */
931 bool RFC1123StrToTime(const char* const str,time_t &time)
932 {
933 unsigned short day = 0;
934 signed int year = 0; // yes, Y23K problem – we gonna worry then…
935 std::string weekday, month, datespec, timespec, zone;
936 std::istringstream ss(str);
937 auto const &posix = std::locale::classic();
938 ss.imbue(posix);
939 ss >> weekday;
940 // we only superficially check weekday, mostly to avoid accepting localized
941 // weekdays here and take only its length to decide which datetime format we
942 // encounter here. The date isn't stored.
943 std::transform(weekday.begin(), weekday.end(), weekday.begin(), ::tolower);
944 std::array<char const * const, 7> c_weekdays = {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
945 if (std::find(c_weekdays.begin(), c_weekdays.end(), weekday.substr(0,3)) == c_weekdays.end())
946 return false;
947
948 switch (weekday.length())
949 {
950 case 4:
951 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
952 if (weekday[3] != ',')
953 return false;
954 ss >> day >> month >> year >> timespec >> zone;
955 break;
956 case 3:
957 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
958 ss >> month >> day >> timespec >> year;
959 zone = "UTC";
960 break;
961 case 0:
962 case 1:
963 case 2:
964 return false;
965 default:
966 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
967 if (weekday[weekday.length() - 1] != ',')
968 return false;
969 ss >> datespec >> timespec >> zone;
970 auto const expldate = VectorizeString(datespec, '-');
971 if (expldate.size() != 3)
972 return false;
973 try {
974 size_t pos;
975 day = std::stoi(expldate[0], &pos);
976 if (pos != expldate[0].length())
977 return false;
978 year = 1900 + std::stoi(expldate[2], &pos);
979 if (pos != expldate[2].length())
980 return false;
981 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(expldate[1].c_str()) + 1, day);
982 } catch (...) {
983 return false;
984 }
985 break;
986 }
987
988 if (ss.fail() || ss.bad() || !ss.eof())
989 return false;
990
991 if (zone != "GMT" && zone != "UTC" && zone != "Z") // RFC 822
992 {
993 // numeric timezones as a should of RFC 1123 and generally preferred
994 try {
995 size_t pos;
996 auto const z = std::stoi(zone, &pos);
997 if (z != 0 || pos != zone.length())
998 return false;
999 } catch (...) {
1000 return false;
1001 }
1002 }
1003
1004 if (datespec.empty())
1005 {
1006 if (month.empty())
1007 return false;
1008 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(month.c_str()) + 1, day);
1009 }
1010
1011 std::string const datetime = datespec + ' ' + timespec;
1012 struct tm Tm;
1013 if (strptime(datetime.c_str(), "%Y-%m-%d %H:%M:%S", &Tm) == nullptr)
1014 return false;
1015 time = timegm(&Tm);
1016 return true;
1017 }
1018 /*}}}*/
1019 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1020 // ---------------------------------------------------------------------
1021 /* */
1022 bool FTPMDTMStrToTime(const char* const str,time_t &time)
1023 {
1024 struct tm Tm;
1025 // MDTM includes no whitespaces but recommend and ignored by strptime
1026 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
1027 return false;
1028
1029 time = timegm(&Tm);
1030 return true;
1031 }
1032 /*}}}*/
1033 // StrToTime - Converts a string into a time_t /*{{{*/
1034 // ---------------------------------------------------------------------
1035 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
1036 and the C library asctime format. It requires the GNU library function
1037 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
1038 reason the C library does not provide any such function :< This also
1039 handles the weird, but unambiguous FTP time format*/
1040 bool StrToTime(const string &Val,time_t &Result)
1041 {
1042 struct tm Tm;
1043 char Month[10];
1044
1045 // Skip the day of the week
1046 const char *I = strchr(Val.c_str(), ' ');
1047
1048 // Handle RFC 1123 time
1049 Month[0] = 0;
1050 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
1051 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1052 {
1053 // Handle RFC 1036 time
1054 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
1055 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
1056 Tm.tm_year += 1900;
1057 else
1058 {
1059 // asctime format
1060 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
1061 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
1062 {
1063 // 'ftp' time
1064 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
1065 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1066 return false;
1067 Tm.tm_mon--;
1068 }
1069 }
1070 }
1071
1072 Tm.tm_isdst = 0;
1073 if (Month[0] != 0)
1074 Tm.tm_mon = MonthConv(Month);
1075 else
1076 Tm.tm_mon = 0; // we don't have a month, so pick something
1077 Tm.tm_year -= 1900;
1078
1079 // Convert to local time and then to GMT
1080 Result = timegm(&Tm);
1081 return true;
1082 }
1083 /*}}}*/
1084 // StrToNum - Convert a fixed length string to a number /*{{{*/
1085 // ---------------------------------------------------------------------
1086 /* This is used in decoding the crazy fixed length string headers in
1087 tar and ar files. */
1088 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1089 {
1090 char S[30];
1091 if (Len >= sizeof(S))
1092 return false;
1093 memcpy(S,Str,Len);
1094 S[Len] = 0;
1095
1096 // All spaces is a zero
1097 Res = 0;
1098 unsigned I;
1099 for (I = 0; S[I] == ' '; I++);
1100 if (S[I] == 0)
1101 return true;
1102
1103 char *End;
1104 Res = strtoul(S,&End,Base);
1105 if (End == S)
1106 return false;
1107
1108 return true;
1109 }
1110 /*}}}*/
1111 // StrToNum - Convert a fixed length string to a number /*{{{*/
1112 // ---------------------------------------------------------------------
1113 /* This is used in decoding the crazy fixed length string headers in
1114 tar and ar files. */
1115 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1116 {
1117 char S[30];
1118 if (Len >= sizeof(S))
1119 return false;
1120 memcpy(S,Str,Len);
1121 S[Len] = 0;
1122
1123 // All spaces is a zero
1124 Res = 0;
1125 unsigned I;
1126 for (I = 0; S[I] == ' '; I++);
1127 if (S[I] == 0)
1128 return true;
1129
1130 char *End;
1131 Res = strtoull(S,&End,Base);
1132 if (End == S)
1133 return false;
1134
1135 return true;
1136 }
1137 /*}}}*/
1138
1139 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1140 // ---------------------------------------------------------------------
1141 /* This is used in decoding the 256bit encoded fixed length fields in
1142 tar files */
1143 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1144 {
1145 if ((Str[0] & 0x80) == 0)
1146 return false;
1147 else
1148 {
1149 Res = Str[0] & 0x7F;
1150 for(unsigned int i = 1; i < Len; ++i)
1151 Res = (Res<<8) + Str[i];
1152 return true;
1153 }
1154 }
1155 /*}}}*/
1156 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1157 // ---------------------------------------------------------------------
1158 /* This is used in decoding the 256bit encoded fixed length fields in
1159 tar files */
1160 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1161 {
1162 unsigned long long Num = 0;
1163 bool rc;
1164
1165 rc = Base256ToNum(Str, Num, Len);
1166 // rudimentary check for overflow (Res = ulong, Num = ulonglong)
1167 Res = Num;
1168 if (Res != Num)
1169 return false;
1170
1171 return rc;
1172 }
1173 /*}}}*/
1174 // HexDigit - Convert a hex character into an integer /*{{{*/
1175 // ---------------------------------------------------------------------
1176 /* Helper for Hex2Num */
1177 static int HexDigit(int c)
1178 {
1179 if (c >= '0' && c <= '9')
1180 return c - '0';
1181 if (c >= 'a' && c <= 'f')
1182 return c - 'a' + 10;
1183 if (c >= 'A' && c <= 'F')
1184 return c - 'A' + 10;
1185 return -1;
1186 }
1187 /*}}}*/
1188 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1189 // ---------------------------------------------------------------------
1190 /* The length of the buffer must be exactly 1/2 the length of the string. */
1191 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1192 {
1193 return Hex2Num(APT::StringView(Str), Num, Length);
1194 }
1195
1196 bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length)
1197 {
1198 if (Str.length() != Length*2)
1199 return false;
1200
1201 // Convert each digit. We store it in the same order as the string
1202 int J = 0;
1203 for (auto I = Str.begin(); I != Str.end();J++, I += 2)
1204 {
1205 int first_half = HexDigit(I[0]);
1206 int second_half;
1207 if (first_half < 0)
1208 return false;
1209
1210 second_half = HexDigit(I[1]);
1211 if (second_half < 0)
1212 return false;
1213 Num[J] = first_half << 4;
1214 Num[J] += second_half;
1215 }
1216
1217 return true;
1218 }
1219 /*}}}*/
1220 // TokSplitString - Split a string up by a given token /*{{{*/
1221 // ---------------------------------------------------------------------
1222 /* This is intended to be a faster splitter, it does not use dynamic
1223 memories. Input is changed to insert nulls at each token location. */
1224 bool TokSplitString(char Tok,char *Input,char **List,
1225 unsigned long ListMax)
1226 {
1227 // Strip any leading spaces
1228 char *Start = Input;
1229 char *Stop = Start + strlen(Start);
1230 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1231
1232 unsigned long Count = 0;
1233 char *Pos = Start;
1234 while (Pos != Stop)
1235 {
1236 // Skip to the next Token
1237 for (; Pos != Stop && *Pos != Tok; Pos++);
1238
1239 // Back remove spaces
1240 char *End = Pos;
1241 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1242 *End = 0;
1243
1244 List[Count++] = Start;
1245 if (Count >= ListMax)
1246 {
1247 List[Count-1] = 0;
1248 return false;
1249 }
1250
1251 // Advance pos
1252 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1253 Start = Pos;
1254 }
1255
1256 List[Count] = 0;
1257 return true;
1258 }
1259 /*}}}*/
1260 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1261 // ---------------------------------------------------------------------
1262 /* This can be used to split a given string up into a vector, so the
1263 propose is the same as in the method above and this one is a bit slower
1264 also, but the advantage is that we have an iteratable vector */
1265 vector<string> VectorizeString(string const &haystack, char const &split)
1266 {
1267 vector<string> exploded;
1268 if (haystack.empty() == true)
1269 return exploded;
1270 string::const_iterator start = haystack.begin();
1271 string::const_iterator end = start;
1272 do {
1273 for (; end != haystack.end() && *end != split; ++end);
1274 exploded.push_back(string(start, end));
1275 start = end + 1;
1276 } while (end != haystack.end() && (++end) != haystack.end());
1277 return exploded;
1278 }
1279 /*}}}*/
1280 // StringSplit - split a string into a string vector by token /*{{{*/
1281 // ---------------------------------------------------------------------
1282 /* See header for details.
1283 */
1284 vector<string> StringSplit(std::string const &s, std::string const &sep,
1285 unsigned int maxsplit)
1286 {
1287 vector<string> split;
1288 size_t start, pos;
1289
1290 // no separator given, this is bogus
1291 if(sep.size() == 0)
1292 return split;
1293
1294 start = pos = 0;
1295 while (pos != string::npos)
1296 {
1297 pos = s.find(sep, start);
1298 split.push_back(s.substr(start, pos-start));
1299
1300 // if maxsplit is reached, the remaining string is the last item
1301 if(split.size() >= maxsplit)
1302 {
1303 split[split.size()-1] = s.substr(start);
1304 break;
1305 }
1306 start = pos+sep.size();
1307 }
1308 return split;
1309 }
1310 /*}}}*/
1311 // RegexChoice - Simple regex list/list matcher /*{{{*/
1312 // ---------------------------------------------------------------------
1313 /* */
1314 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1315 const char **ListEnd)
1316 {
1317 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1318 R->Hit = false;
1319
1320 unsigned long Hits = 0;
1321 for (; ListBegin < ListEnd; ++ListBegin)
1322 {
1323 // Check if the name is a regex
1324 const char *I;
1325 bool Regex = true;
1326 for (I = *ListBegin; *I != 0; I++)
1327 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1328 break;
1329 if (*I == 0)
1330 Regex = false;
1331
1332 // Compile the regex pattern
1333 regex_t Pattern;
1334 if (Regex == true)
1335 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1336 REG_NOSUB) != 0)
1337 Regex = false;
1338
1339 // Search the list
1340 bool Done = false;
1341 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1342 {
1343 if (R->Str[0] == 0)
1344 continue;
1345
1346 if (strcasecmp(R->Str,*ListBegin) != 0)
1347 {
1348 if (Regex == false)
1349 continue;
1350 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1351 continue;
1352 }
1353 Done = true;
1354
1355 if (R->Hit == false)
1356 Hits++;
1357
1358 R->Hit = true;
1359 }
1360
1361 if (Regex == true)
1362 regfree(&Pattern);
1363
1364 if (Done == false)
1365 _error->Warning(_("Selection %s not found"),*ListBegin);
1366 }
1367
1368 return Hits;
1369 }
1370 /*}}}*/
1371 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1372 // ---------------------------------------------------------------------
1373 /* This is used to make the internationalization strings easier to translate
1374 and to allow reordering of parameters */
1375 static bool iovprintf(ostream &out, const char *format,
1376 va_list &args, ssize_t &size) {
1377 char *S = (char*)malloc(size);
1378 ssize_t const n = vsnprintf(S, size, format, args);
1379 if (n > -1 && n < size) {
1380 out << S;
1381 free(S);
1382 return true;
1383 } else {
1384 if (n > -1)
1385 size = n + 1;
1386 else
1387 size *= 2;
1388 }
1389 free(S);
1390 return false;
1391 }
1392 void ioprintf(ostream &out,const char *format,...)
1393 {
1394 va_list args;
1395 ssize_t size = 400;
1396 while (true) {
1397 bool ret;
1398 va_start(args,format);
1399 ret = iovprintf(out, format, args, size);
1400 va_end(args);
1401 if (ret == true)
1402 return;
1403 }
1404 }
1405 void strprintf(string &out,const char *format,...)
1406 {
1407 va_list args;
1408 ssize_t size = 400;
1409 std::ostringstream outstr;
1410 while (true) {
1411 bool ret;
1412 va_start(args,format);
1413 ret = iovprintf(outstr, format, args, size);
1414 va_end(args);
1415 if (ret == true)
1416 break;
1417 }
1418 out = outstr.str();
1419 }
1420 /*}}}*/
1421 // safe_snprintf - Safer snprintf /*{{{*/
1422 // ---------------------------------------------------------------------
1423 /* This is a snprintf that will never (ever) go past 'End' and returns a
1424 pointer to the end of the new string. The returned string is always null
1425 terminated unless Buffer == end. This is a better alterantive to using
1426 consecutive snprintfs. */
1427 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1428 {
1429 va_list args;
1430 int Did;
1431
1432 if (End <= Buffer)
1433 return End;
1434 va_start(args,Format);
1435 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1436 va_end(args);
1437
1438 if (Did < 0 || Buffer + Did > End)
1439 return End;
1440 return Buffer + Did;
1441 }
1442 /*}}}*/
1443 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1444 // ---------------------------------------------------------------------
1445 string StripEpoch(const string &VerStr)
1446 {
1447 size_t i = VerStr.find(":");
1448 if (i == string::npos)
1449 return VerStr;
1450 return VerStr.substr(i+1);
1451 }
1452 /*}}}*/
1453
1454 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1455 // ---------------------------------------------------------------------
1456 /* This little function is the most called method we have and tries
1457 therefore to do the absolut minimum - and is notable faster than
1458 standard tolower/toupper and as a bonus avoids problems with different
1459 locales - we only operate on ascii chars anyway. */
1460 #undef tolower_ascii
1461 int tolower_ascii(int const c) APT_CONST APT_COLD;
1462 int tolower_ascii(int const c)
1463 {
1464 return tolower_ascii_inline(c);
1465 }
1466 /*}}}*/
1467
1468 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1469 // ---------------------------------------------------------------------
1470 /* This little function is one of the most called methods we have and tries
1471 therefore to do the absolut minimum - and is notable faster than
1472 standard isspace() and as a bonus avoids problems with different
1473 locales - we only operate on ascii chars anyway. */
1474 #undef isspace_ascii
1475 int isspace_ascii(int const c) APT_CONST APT_COLD;
1476 int isspace_ascii(int const c)
1477 {
1478 return isspace_ascii_inline(c);
1479 }
1480 /*}}}*/
1481
1482 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1483 // ---------------------------------------------------------------------
1484 /* The domain list is a comma separate list of domains that are suffix
1485 matched against the argument */
1486 bool CheckDomainList(const string &Host,const string &List)
1487 {
1488 string::const_iterator Start = List.begin();
1489 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1490 {
1491 if (Cur < List.end() && *Cur != ',')
1492 continue;
1493
1494 // Match the end of the string..
1495 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1496 Cur - Start != 0 &&
1497 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1498 return true;
1499
1500 Start = Cur + 1;
1501 }
1502 return false;
1503 }
1504 /*}}}*/
1505 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1506 // ---------------------------------------------------------------------
1507 /* */
1508 size_t strv_length(const char **str_array)
1509 {
1510 size_t i;
1511 for (i=0; str_array[i] != NULL; i++)
1512 /* nothing */
1513 ;
1514 return i;
1515 }
1516 /*}}}*/
1517 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1518 // ---------------------------------------------------------------------
1519 /* */
1520 string DeEscapeString(const string &input)
1521 {
1522 char tmp[3];
1523 string::const_iterator it;
1524 string output;
1525 for (it = input.begin(); it != input.end(); ++it)
1526 {
1527 // just copy non-escape chars
1528 if (*it != '\\')
1529 {
1530 output += *it;
1531 continue;
1532 }
1533
1534 // deal with double escape
1535 if (*it == '\\' &&
1536 (it + 1 < input.end()) && it[1] == '\\')
1537 {
1538 // copy
1539 output += *it;
1540 // advance iterator one step further
1541 ++it;
1542 continue;
1543 }
1544
1545 // ensure we have a char to read
1546 if (it + 1 == input.end())
1547 continue;
1548
1549 // read it
1550 ++it;
1551 switch (*it)
1552 {
1553 case '0':
1554 if (it + 2 <= input.end()) {
1555 tmp[0] = it[1];
1556 tmp[1] = it[2];
1557 tmp[2] = 0;
1558 output += (char)strtol(tmp, 0, 8);
1559 it += 2;
1560 }
1561 break;
1562 case 'x':
1563 if (it + 2 <= input.end()) {
1564 tmp[0] = it[1];
1565 tmp[1] = it[2];
1566 tmp[2] = 0;
1567 output += (char)strtol(tmp, 0, 16);
1568 it += 2;
1569 }
1570 break;
1571 default:
1572 // FIXME: raise exception here?
1573 break;
1574 }
1575 }
1576 return output;
1577 }
1578 /*}}}*/
1579 // URI::CopyFrom - Copy from an object /*{{{*/
1580 // ---------------------------------------------------------------------
1581 /* This parses the URI into all of its components */
1582 void URI::CopyFrom(const string &U)
1583 {
1584 string::const_iterator I = U.begin();
1585
1586 // Locate the first colon, this separates the scheme
1587 for (; I < U.end() && *I != ':' ; ++I);
1588 string::const_iterator FirstColon = I;
1589
1590 /* Determine if this is a host type URI with a leading double //
1591 and then search for the first single / */
1592 string::const_iterator SingleSlash = I;
1593 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1594 SingleSlash += 3;
1595
1596 /* Find the / indicating the end of the hostname, ignoring /'s in the
1597 square brackets */
1598 bool InBracket = false;
1599 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1600 {
1601 if (*SingleSlash == '[')
1602 InBracket = true;
1603 if (InBracket == true && *SingleSlash == ']')
1604 InBracket = false;
1605 }
1606
1607 if (SingleSlash > U.end())
1608 SingleSlash = U.end();
1609
1610 // We can now write the access and path specifiers
1611 Access.assign(U.begin(),FirstColon);
1612 if (SingleSlash != U.end())
1613 Path.assign(SingleSlash,U.end());
1614 if (Path.empty() == true)
1615 Path = "/";
1616
1617 // Now we attempt to locate a user:pass@host fragment
1618 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1619 FirstColon += 3;
1620 else
1621 FirstColon += 1;
1622 if (FirstColon >= U.end())
1623 return;
1624
1625 if (FirstColon > SingleSlash)
1626 FirstColon = SingleSlash;
1627
1628 // Find the colon...
1629 I = FirstColon + 1;
1630 if (I > SingleSlash)
1631 I = SingleSlash;
1632
1633 // Search for the @ separating user:pass from host
1634 auto const RevAt = std::find(
1635 std::string::const_reverse_iterator(SingleSlash),
1636 std::string::const_reverse_iterator(I), '@');
1637 string::const_iterator const At = RevAt.base() == I ? SingleSlash : std::prev(RevAt.base());
1638 // and then look for the colon between user and pass
1639 string::const_iterator const SecondColon = std::find(I, At, ':');
1640
1641 // Now write the host and user/pass
1642 if (At == SingleSlash)
1643 {
1644 if (FirstColon < SingleSlash)
1645 Host.assign(FirstColon,SingleSlash);
1646 }
1647 else
1648 {
1649 Host.assign(At+1,SingleSlash);
1650 // username and password must be encoded (RFC 3986)
1651 User.assign(DeQuoteString(FirstColon,SecondColon));
1652 if (SecondColon < At)
1653 Password.assign(DeQuoteString(SecondColon+1,At));
1654 }
1655
1656 // Now we parse the RFC 2732 [] hostnames.
1657 unsigned long PortEnd = 0;
1658 InBracket = false;
1659 for (unsigned I = 0; I != Host.length();)
1660 {
1661 if (Host[I] == '[')
1662 {
1663 InBracket = true;
1664 Host.erase(I,1);
1665 continue;
1666 }
1667
1668 if (InBracket == true && Host[I] == ']')
1669 {
1670 InBracket = false;
1671 Host.erase(I,1);
1672 PortEnd = I;
1673 continue;
1674 }
1675 I++;
1676 }
1677
1678 // Tsk, weird.
1679 if (InBracket == true)
1680 {
1681 Host.clear();
1682 return;
1683 }
1684
1685 // Now we parse off a port number from the hostname
1686 Port = 0;
1687 string::size_type Pos = Host.rfind(':');
1688 if (Pos == string::npos || Pos < PortEnd)
1689 return;
1690
1691 Port = atoi(string(Host,Pos+1).c_str());
1692 Host.assign(Host,0,Pos);
1693 }
1694 /*}}}*/
1695 // URI::operator string - Convert the URI to a string /*{{{*/
1696 // ---------------------------------------------------------------------
1697 /* */
1698 URI::operator string()
1699 {
1700 std::stringstream Res;
1701
1702 if (Access.empty() == false)
1703 Res << Access << ':';
1704
1705 if (Host.empty() == false)
1706 {
1707 if (Access.empty() == false)
1708 Res << "//";
1709
1710 if (User.empty() == false)
1711 {
1712 // FIXME: Technically userinfo is permitted even less
1713 // characters than these, but this is not conveniently
1714 // expressed with a blacklist.
1715 Res << QuoteString(User, ":/?#[]@");
1716 if (Password.empty() == false)
1717 Res << ":" << QuoteString(Password, ":/?#[]@");
1718 Res << "@";
1719 }
1720
1721 // Add RFC 2732 escaping characters
1722 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1723 Res << '[' << Host << ']';
1724 else
1725 Res << Host;
1726
1727 if (Port != 0)
1728 Res << ':' << std::to_string(Port);
1729 }
1730
1731 if (Path.empty() == false)
1732 {
1733 if (Path[0] != '/')
1734 Res << "/" << Path;
1735 else
1736 Res << Path;
1737 }
1738
1739 return Res.str();
1740 }
1741 /*}}}*/
1742 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1743 string URI::SiteOnly(const string &URI)
1744 {
1745 ::URI U(URI);
1746 U.User.clear();
1747 U.Password.clear();
1748 U.Path.clear();
1749 return U;
1750 }
1751 /*}}}*/
1752 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1753 string URI::ArchiveOnly(const string &URI)
1754 {
1755 ::URI U(URI);
1756 U.User.clear();
1757 U.Password.clear();
1758 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1759 U.Path.erase(U.Path.length() - 1);
1760 return U;
1761 }
1762 /*}}}*/
1763 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1764 string URI::NoUserPassword(const string &URI)
1765 {
1766 ::URI U(URI);
1767 U.User.clear();
1768 U.Password.clear();
1769 return U;
1770 }
1771 /*}}}*/