]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Merge branch 'feature/apt-dpkg-comm'
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <algorithm>
25 #include <iomanip>
26 #include <locale>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30
31 #include <stddef.h>
32 #include <stdlib.h>
33 #include <time.h>
34 #include <ctype.h>
35 #include <string.h>
36 #include <stdio.h>
37 #include <unistd.h>
38 #include <regex.h>
39 #include <errno.h>
40 #include <stdarg.h>
41 #include <iconv.h>
42
43 #include <apti18n.h>
44 /*}}}*/
45 using namespace std;
46
47 // Strip - Remove white space from the front and back of a string /*{{{*/
48 // ---------------------------------------------------------------------
49 namespace APT {
50 namespace String {
51 std::string Strip(const std::string &str)
52 {
53 // ensure we have at least one character
54 if (str.empty() == true)
55 return str;
56
57 char const * const s = str.c_str();
58 size_t start = 0;
59 for (; isspace(s[start]) != 0; ++start)
60 ; // find the first not-space
61
62 // string contains only whitespaces
63 if (s[start] == '\0')
64 return "";
65
66 size_t end = str.length() - 1;
67 for (; isspace(s[end]) != 0; --end)
68 ; // find the last not-space
69
70 return str.substr(start, end - start + 1);
71 }
72
73 bool Endswith(const std::string &s, const std::string &end)
74 {
75 if (end.size() > s.size())
76 return false;
77 return (s.compare(s.size() - end.size(), end.size(), end) == 0);
78 }
79
80 bool Startswith(const std::string &s, const std::string &start)
81 {
82 if (start.size() > s.size())
83 return false;
84 return (s.compare(0, start.size(), start) == 0);
85 }
86
87 }
88 }
89 /*}}}*/
90 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
91 // ---------------------------------------------------------------------
92 /* This is handy to use before display some information for enduser */
93 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
94 {
95 iconv_t cd;
96 const char *inbuf;
97 char *inptr, *outbuf;
98 size_t insize, bufsize;
99 dest->clear();
100
101 cd = iconv_open(codeset, "UTF-8");
102 if (cd == (iconv_t)(-1)) {
103 // Something went wrong
104 if (errno == EINVAL)
105 _error->Error("conversion from 'UTF-8' to '%s' not available",
106 codeset);
107 else
108 perror("iconv_open");
109
110 return false;
111 }
112
113 insize = bufsize = orig.size();
114 inbuf = orig.data();
115 inptr = (char *)inbuf;
116 outbuf = new char[bufsize];
117 size_t lastError = -1;
118
119 while (insize != 0)
120 {
121 char *outptr = outbuf;
122 size_t outsize = bufsize;
123 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
124 dest->append(outbuf, outptr - outbuf);
125 if (err == (size_t)(-1))
126 {
127 switch (errno)
128 {
129 case EILSEQ:
130 insize--;
131 inptr++;
132 // replace a series of unknown multibytes with a single "?"
133 if (lastError != insize) {
134 lastError = insize - 1;
135 dest->append("?");
136 }
137 break;
138 case EINVAL:
139 insize = 0;
140 break;
141 case E2BIG:
142 if (outptr == outbuf)
143 {
144 bufsize *= 2;
145 delete[] outbuf;
146 outbuf = new char[bufsize];
147 }
148 break;
149 }
150 }
151 }
152
153 delete[] outbuf;
154
155 iconv_close(cd);
156
157 return true;
158 }
159 /*}}}*/
160 // strstrip - Remove white space from the front and back of a string /*{{{*/
161 // ---------------------------------------------------------------------
162 /* This is handy to use when parsing a file. It also removes \n's left
163 over from fgets and company */
164 char *_strstrip(char *String)
165 {
166 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
167
168 if (*String == 0)
169 return String;
170 return _strrstrip(String);
171 }
172 /*}}}*/
173 // strrstrip - Remove white space from the back of a string /*{{{*/
174 // ---------------------------------------------------------------------
175 char *_strrstrip(char *String)
176 {
177 char *End = String + strlen(String) - 1;
178 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
179 *End == '\r'); End--);
180 End++;
181 *End = 0;
182 return String;
183 }
184 /*}}}*/
185 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
186 // ---------------------------------------------------------------------
187 /* */
188 char *_strtabexpand(char *String,size_t Len)
189 {
190 for (char *I = String; I != I + Len && *I != 0; I++)
191 {
192 if (*I != '\t')
193 continue;
194 if (I + 8 > String + Len)
195 {
196 *I = 0;
197 return String;
198 }
199
200 /* Assume the start of the string is 0 and find the next 8 char
201 division */
202 int Len;
203 if (String == I)
204 Len = 1;
205 else
206 Len = 8 - ((String - I) % 8);
207 Len -= 2;
208 if (Len <= 0)
209 {
210 *I = ' ';
211 continue;
212 }
213
214 memmove(I + Len,I + 1,strlen(I) + 1);
215 for (char *J = I; J + Len != I; *I = ' ', I++);
216 }
217 return String;
218 }
219 /*}}}*/
220 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
221 // ---------------------------------------------------------------------
222 /* This grabs a single word, converts any % escaped characters to their
223 proper values and advances the pointer. Double quotes are understood
224 and striped out as well. This is for URI/URL parsing. It also can
225 understand [] brackets.*/
226 bool ParseQuoteWord(const char *&String,string &Res)
227 {
228 // Skip leading whitespace
229 const char *C = String;
230 for (;*C != 0 && *C == ' '; C++);
231 if (*C == 0)
232 return false;
233
234 // Jump to the next word
235 for (;*C != 0 && isspace(*C) == 0; C++)
236 {
237 if (*C == '"')
238 {
239 C = strchr(C + 1, '"');
240 if (C == NULL)
241 return false;
242 }
243 if (*C == '[')
244 {
245 C = strchr(C + 1, ']');
246 if (C == NULL)
247 return false;
248 }
249 }
250
251 // Now de-quote characters
252 char Buffer[1024];
253 char Tmp[3];
254 const char *Start = String;
255 char *I;
256 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
257 {
258 if (*Start == '%' && Start + 2 < C &&
259 isxdigit(Start[1]) && isxdigit(Start[2]))
260 {
261 Tmp[0] = Start[1];
262 Tmp[1] = Start[2];
263 Tmp[2] = 0;
264 *I = (char)strtol(Tmp,0,16);
265 Start += 3;
266 continue;
267 }
268 if (*Start != '"')
269 *I = *Start;
270 else
271 I--;
272 Start++;
273 }
274 *I = 0;
275 Res = Buffer;
276
277 // Skip ending white space
278 for (;*C != 0 && isspace(*C) != 0; C++);
279 String = C;
280 return true;
281 }
282 /*}}}*/
283 // ParseCWord - Parses a string like a C "" expression /*{{{*/
284 // ---------------------------------------------------------------------
285 /* This expects a series of space separated strings enclosed in ""'s.
286 It concatenates the ""'s into a single string. */
287 bool ParseCWord(const char *&String,string &Res)
288 {
289 // Skip leading whitespace
290 const char *C = String;
291 for (;*C != 0 && *C == ' '; C++);
292 if (*C == 0)
293 return false;
294
295 char Buffer[1024];
296 char *Buf = Buffer;
297 if (strlen(String) >= sizeof(Buffer))
298 return false;
299
300 for (; *C != 0; C++)
301 {
302 if (*C == '"')
303 {
304 for (C++; *C != 0 && *C != '"'; C++)
305 *Buf++ = *C;
306
307 if (*C == 0)
308 return false;
309
310 continue;
311 }
312
313 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
314 continue;
315 if (isspace(*C) == 0)
316 return false;
317 *Buf++ = ' ';
318 }
319 *Buf = 0;
320 Res = Buffer;
321 String = C;
322 return true;
323 }
324 /*}}}*/
325 // QuoteString - Convert a string into quoted from /*{{{*/
326 // ---------------------------------------------------------------------
327 /* */
328 string QuoteString(const string &Str, const char *Bad)
329 {
330 std::stringstream Res;
331 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
332 {
333 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
334 *I == 0x25 || // percent '%' char
335 *I <= 0x20 || *I >= 0x7F) // control chars
336 {
337 ioprintf(Res, "%%%02hhx", *I);
338 }
339 else
340 Res << *I;
341 }
342 return Res.str();
343 }
344 /*}}}*/
345 // DeQuoteString - Convert a string from quoted from /*{{{*/
346 // ---------------------------------------------------------------------
347 /* This undoes QuoteString */
348 string DeQuoteString(const string &Str)
349 {
350 return DeQuoteString(Str.begin(),Str.end());
351 }
352 string DeQuoteString(string::const_iterator const &begin,
353 string::const_iterator const &end)
354 {
355 string Res;
356 for (string::const_iterator I = begin; I != end; ++I)
357 {
358 if (*I == '%' && I + 2 < end &&
359 isxdigit(I[1]) && isxdigit(I[2]))
360 {
361 char Tmp[3];
362 Tmp[0] = I[1];
363 Tmp[1] = I[2];
364 Tmp[2] = 0;
365 Res += (char)strtol(Tmp,0,16);
366 I += 2;
367 continue;
368 }
369 else
370 Res += *I;
371 }
372 return Res;
373 }
374
375 /*}}}*/
376 // SizeToStr - Convert a long into a human readable size /*{{{*/
377 // ---------------------------------------------------------------------
378 /* A max of 4 digits are shown before conversion to the next highest unit.
379 The max length of the string will be 5 chars unless the size is > 10
380 YottaBytes (E24) */
381 string SizeToStr(double Size)
382 {
383 double ASize;
384 if (Size >= 0)
385 ASize = Size;
386 else
387 ASize = -1*Size;
388
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 int I = 0;
393 while (I <= 8)
394 {
395 if (ASize < 100 && I != 0)
396 {
397 std::string S;
398 strprintf(S, "%'.1f %c", ASize, Ext[I]);
399 return S;
400 }
401
402 if (ASize < 10000)
403 {
404 std::string S;
405 strprintf(S, "%'.0f %c", ASize, Ext[I]);
406 return S;
407 }
408 ASize /= 1000.0;
409 I++;
410 }
411 return "";
412 }
413 /*}}}*/
414 // TimeToStr - Convert the time into a string /*{{{*/
415 // ---------------------------------------------------------------------
416 /* Converts a number of seconds to a hms format */
417 string TimeToStr(unsigned long Sec)
418 {
419 std::string S;
420 if (Sec > 60*60*24)
421 {
422 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
423 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
424 }
425 else if (Sec > 60*60)
426 {
427 //TRANSLATOR: h means hours, min means minutes, s means seconds
428 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
429 }
430 else if (Sec > 60)
431 {
432 //TRANSLATOR: min means minutes, s means seconds
433 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
434 }
435 else
436 {
437 //TRANSLATOR: s means seconds
438 strprintf(S,_("%lis"),Sec);
439 }
440 return S;
441 }
442 /*}}}*/
443 // SubstVar - Substitute a string for another string /*{{{*/
444 // ---------------------------------------------------------------------
445 /* This replaces all occurrences of Subst with Contents in Str. */
446 string SubstVar(const string &Str,const string &Subst,const string &Contents)
447 {
448 if (Subst.empty() == true)
449 return Str;
450
451 string::size_type Pos = 0;
452 string::size_type OldPos = 0;
453 string Temp;
454
455 while (OldPos < Str.length() &&
456 (Pos = Str.find(Subst,OldPos)) != string::npos)
457 {
458 if (OldPos != Pos)
459 Temp.append(Str, OldPos, Pos - OldPos);
460 if (Contents.empty() == false)
461 Temp.append(Contents);
462 OldPos = Pos + Subst.length();
463 }
464
465 if (OldPos == 0)
466 return Str;
467
468 if (OldPos >= Str.length())
469 return Temp;
470
471 Temp.append(Str, OldPos, string::npos);
472 return Temp;
473 }
474 string SubstVar(string Str,const struct SubstVar *Vars)
475 {
476 for (; Vars->Subst != 0; Vars++)
477 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
478 return Str;
479 }
480 /*}}}*/
481 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
482 // ---------------------------------------------------------------------
483 /* Returns a string with the supplied separator depth + 1 times in it */
484 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
485 {
486 std::string output = "";
487 for(unsigned long d=Depth+1; d > 0; d--)
488 output.append(Separator);
489 return output;
490 }
491 /*}}}*/
492 // URItoFileName - Convert the uri into a unique file name /*{{{*/
493 // ---------------------------------------------------------------------
494 /* This converts a URI into a safe filename. It quotes all unsafe characters
495 and converts / to _ and removes the scheme identifier. The resulting
496 file name should be unique and never occur again for a different file */
497 string URItoFileName(const string &URI)
498 {
499 // Nuke 'sensitive' items
500 ::URI U(URI);
501 U.User.clear();
502 U.Password.clear();
503 U.Access.clear();
504
505 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
506 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
507 replace(NewURI.begin(),NewURI.end(),'/','_');
508 return NewURI;
509 }
510 /*}}}*/
511 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
512 // ---------------------------------------------------------------------
513 /* This routine performs a base64 transformation on a string. It was ripped
514 from wget and then patched and bug fixed.
515
516 This spec can be found in rfc2045 */
517 string Base64Encode(const string &S)
518 {
519 // Conversion table.
520 static char tbl[64] = {'A','B','C','D','E','F','G','H',
521 'I','J','K','L','M','N','O','P',
522 'Q','R','S','T','U','V','W','X',
523 'Y','Z','a','b','c','d','e','f',
524 'g','h','i','j','k','l','m','n',
525 'o','p','q','r','s','t','u','v',
526 'w','x','y','z','0','1','2','3',
527 '4','5','6','7','8','9','+','/'};
528
529 // Pre-allocate some space
530 string Final;
531 Final.reserve((4*S.length() + 2)/3 + 2);
532
533 /* Transform the 3x8 bits to 4x6 bits, as required by
534 base64. */
535 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
536 {
537 char Bits[3] = {0,0,0};
538 Bits[0] = I[0];
539 if (I + 1 < S.end())
540 Bits[1] = I[1];
541 if (I + 2 < S.end())
542 Bits[2] = I[2];
543
544 Final += tbl[Bits[0] >> 2];
545 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
546
547 if (I + 1 >= S.end())
548 break;
549
550 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
551
552 if (I + 2 >= S.end())
553 break;
554
555 Final += tbl[Bits[2] & 0x3f];
556 }
557
558 /* Apply the padding elements, this tells how many bytes the remote
559 end should discard */
560 if (S.length() % 3 == 2)
561 Final += '=';
562 if (S.length() % 3 == 1)
563 Final += "==";
564
565 return Final;
566 }
567 /*}}}*/
568 // stringcmp - Arbitrary string compare /*{{{*/
569 // ---------------------------------------------------------------------
570 /* This safely compares two non-null terminated strings of arbitrary
571 length */
572 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
573 {
574 for (; A != AEnd && B != BEnd; A++, B++)
575 if (*A != *B)
576 break;
577
578 if (A == AEnd && B == BEnd)
579 return 0;
580 if (A == AEnd)
581 return 1;
582 if (B == BEnd)
583 return -1;
584 if (*A < *B)
585 return -1;
586 return 1;
587 }
588
589 #if __GNUC__ >= 3
590 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
591 const char *B,const char *BEnd)
592 {
593 for (; A != AEnd && B != BEnd; A++, B++)
594 if (*A != *B)
595 break;
596
597 if (A == AEnd && B == BEnd)
598 return 0;
599 if (A == AEnd)
600 return 1;
601 if (B == BEnd)
602 return -1;
603 if (*A < *B)
604 return -1;
605 return 1;
606 }
607 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
608 string::const_iterator B,string::const_iterator BEnd)
609 {
610 for (; A != AEnd && B != BEnd; A++, B++)
611 if (*A != *B)
612 break;
613
614 if (A == AEnd && B == BEnd)
615 return 0;
616 if (A == AEnd)
617 return 1;
618 if (B == BEnd)
619 return -1;
620 if (*A < *B)
621 return -1;
622 return 1;
623 }
624 #endif
625 /*}}}*/
626 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
627 // ---------------------------------------------------------------------
628 /* */
629 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
630 {
631 for (; A != AEnd && B != BEnd; A++, B++)
632 if (tolower_ascii(*A) != tolower_ascii(*B))
633 break;
634
635 if (A == AEnd && B == BEnd)
636 return 0;
637 if (A == AEnd)
638 return 1;
639 if (B == BEnd)
640 return -1;
641 if (tolower_ascii(*A) < tolower_ascii(*B))
642 return -1;
643 return 1;
644 }
645 #if __GNUC__ >= 3
646 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
647 const char *B,const char *BEnd)
648 {
649 for (; A != AEnd && B != BEnd; A++, B++)
650 if (tolower_ascii(*A) != tolower_ascii(*B))
651 break;
652
653 if (A == AEnd && B == BEnd)
654 return 0;
655 if (A == AEnd)
656 return 1;
657 if (B == BEnd)
658 return -1;
659 if (tolower_ascii(*A) < tolower_ascii(*B))
660 return -1;
661 return 1;
662 }
663 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
664 string::const_iterator B,string::const_iterator BEnd)
665 {
666 for (; A != AEnd && B != BEnd; A++, B++)
667 if (tolower_ascii(*A) != tolower_ascii(*B))
668 break;
669
670 if (A == AEnd && B == BEnd)
671 return 0;
672 if (A == AEnd)
673 return 1;
674 if (B == BEnd)
675 return -1;
676 if (tolower_ascii(*A) < tolower_ascii(*B))
677 return -1;
678 return 1;
679 }
680 #endif
681 /*}}}*/
682 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
683 // ---------------------------------------------------------------------
684 /* The format is like those used in package files and the method
685 communication system */
686 string LookupTag(const string &Message,const char *Tag,const char *Default)
687 {
688 // Look for a matching tag.
689 int Length = strlen(Tag);
690 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
691 {
692 // Found the tag
693 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
694 {
695 // Find the end of line and strip the leading/trailing spaces
696 string::const_iterator J;
697 I += Length + 1;
698 for (; isspace_ascii(*I) != 0 && I < Message.end(); ++I);
699 for (J = I; *J != '\n' && J < Message.end(); ++J);
700 for (; J > I && isspace_ascii(J[-1]) != 0; --J);
701
702 return string(I,J);
703 }
704
705 for (; *I != '\n' && I < Message.end(); ++I);
706 }
707
708 // Failed to find a match
709 if (Default == 0)
710 return string();
711 return Default;
712 }
713 /*}}}*/
714 // StringToBool - Converts a string into a boolean /*{{{*/
715 // ---------------------------------------------------------------------
716 /* This inspects the string to see if it is true or if it is false and
717 then returns the result. Several varients on true/false are checked. */
718 int StringToBool(const string &Text,int Default)
719 {
720 char *ParseEnd;
721 int Res = strtol(Text.c_str(),&ParseEnd,0);
722 // ensure that the entire string was converted by strtol to avoid
723 // failures on "apt-cache show -a 0ad" where the "0" is converted
724 const char *TextEnd = Text.c_str()+Text.size();
725 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
726 return Res;
727
728 // Check for positives
729 if (strcasecmp(Text.c_str(),"no") == 0 ||
730 strcasecmp(Text.c_str(),"false") == 0 ||
731 strcasecmp(Text.c_str(),"without") == 0 ||
732 strcasecmp(Text.c_str(),"off") == 0 ||
733 strcasecmp(Text.c_str(),"disable") == 0)
734 return 0;
735
736 // Check for negatives
737 if (strcasecmp(Text.c_str(),"yes") == 0 ||
738 strcasecmp(Text.c_str(),"true") == 0 ||
739 strcasecmp(Text.c_str(),"with") == 0 ||
740 strcasecmp(Text.c_str(),"on") == 0 ||
741 strcasecmp(Text.c_str(),"enable") == 0)
742 return 1;
743
744 return Default;
745 }
746 /*}}}*/
747 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
748 // ---------------------------------------------------------------------
749 /* This converts a time_t into a string time representation that is
750 year 2000 complient and timezone neutral */
751 string TimeRFC1123(time_t Date)
752 {
753 return TimeRFC1123(Date, false);
754 }
755 string TimeRFC1123(time_t Date, bool const NumericTimezone)
756 {
757 struct tm Conv;
758 if (gmtime_r(&Date, &Conv) == NULL)
759 return "";
760
761 auto const posix = std::locale("C.UTF-8");
762 std::ostringstream datestr;
763 datestr.imbue(posix);
764 APT::StringView const fmt("%a, %d %b %Y %H:%M:%S");
765 std::use_facet<std::time_put<char>>(posix).put(
766 std::ostreambuf_iterator<char>(datestr),
767 datestr, ' ', &Conv, fmt.data(), fmt.data() + fmt.size());
768 if (NumericTimezone)
769 datestr << " +0000";
770 else
771 datestr << " GMT";
772 return datestr.str();
773 }
774 /*}}}*/
775 // ReadMessages - Read messages from the FD /*{{{*/
776 // ---------------------------------------------------------------------
777 /* This pulls full messages from the input FD into the message buffer.
778 It assumes that messages will not pause during transit so no
779 fancy buffering is used.
780
781 In particular: this reads blocks from the input until it believes
782 that it's run out of input text. Each block is terminated by a
783 double newline ('\n' followed by '\n').
784 */
785 bool ReadMessages(int Fd, vector<string> &List)
786 {
787 char Buffer[64000];
788 // Represents any left-over from the previous iteration of the
789 // parse loop. (i.e., if a message is split across the end
790 // of the buffer, it goes here)
791 string PartialMessage;
792
793 do {
794 int const Res = read(Fd, Buffer, sizeof(Buffer));
795 if (Res < 0 && errno == EINTR)
796 continue;
797
798 // process we read from has died
799 if (Res == 0)
800 return false;
801
802 // No data
803 #if EAGAIN != EWOULDBLOCK
804 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
805 #else
806 if (Res < 0 && errno == EAGAIN)
807 #endif
808 return true;
809 if (Res < 0)
810 return false;
811
812 // extract the message(s) from the buffer
813 char const *Start = Buffer;
814 char const * const End = Buffer + Res;
815
816 char const * NL = (char const *) memchr(Start, '\n', End - Start);
817 if (NL == NULL)
818 {
819 // end of buffer: store what we have so far and read new data in
820 PartialMessage.append(Start, End - Start);
821 Start = End;
822 }
823 else
824 ++NL;
825
826 if (PartialMessage.empty() == false && Start < End)
827 {
828 // if we start with a new line, see if the partial message we have ended with one
829 // so that we properly detect records ending between two read() runs
830 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
831 // the case \r|\n\r\n is handled by the usual double-newline handling
832 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
833 {
834 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
835 {
836 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
837 List.push_back(PartialMessage);
838 PartialMessage.clear();
839 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
840 Start = NL;
841 }
842 }
843 }
844
845 while (Start < End) {
846 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
847 if (NL2 == NULL)
848 {
849 // end of buffer: store what we have so far and read new data in
850 PartialMessage.append(Start, End - Start);
851 break;
852 }
853 ++NL2;
854
855 // did we find a double newline?
856 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
857 {
858 PartialMessage.append(Start, NL2 - Start);
859 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
860 List.push_back(PartialMessage);
861 PartialMessage.clear();
862 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
863 Start = NL2;
864 }
865 NL = NL2;
866 }
867
868 // we have read at least one complete message and nothing left
869 if (PartialMessage.empty() == true)
870 return true;
871
872 if (WaitFd(Fd) == false)
873 return false;
874 } while (true);
875 }
876 /*}}}*/
877 // MonthConv - Converts a month string into a number /*{{{*/
878 // ---------------------------------------------------------------------
879 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
880 Made it a bit more robust with a few tolower_ascii though. */
881 static int MonthConv(char const * const Month)
882 {
883 switch (tolower_ascii(*Month))
884 {
885 case 'a':
886 return tolower_ascii(Month[1]) == 'p'?3:7;
887 case 'd':
888 return 11;
889 case 'f':
890 return 1;
891 case 'j':
892 if (tolower_ascii(Month[1]) == 'a')
893 return 0;
894 return tolower_ascii(Month[2]) == 'n'?5:6;
895 case 'm':
896 return tolower_ascii(Month[2]) == 'r'?2:4;
897 case 'n':
898 return 10;
899 case 'o':
900 return 9;
901 case 's':
902 return 8;
903
904 // Pretend it is January..
905 default:
906 return 0;
907 }
908 }
909 /*}}}*/
910 // timegm - Internal timegm if the gnu version is not available /*{{{*/
911 // ---------------------------------------------------------------------
912 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
913 than local timezone (mktime assumes the latter).
914
915 This function is a nonstandard GNU extension that is also present on
916 the BSDs and maybe other systems. For others we follow the advice of
917 the manpage of timegm and use his portable replacement. */
918 #ifndef HAVE_TIMEGM
919 static time_t timegm(struct tm *t)
920 {
921 char *tz = getenv("TZ");
922 setenv("TZ", "", 1);
923 tzset();
924 time_t ret = mktime(t);
925 if (tz)
926 setenv("TZ", tz, 1);
927 else
928 unsetenv("TZ");
929 tzset();
930 return ret;
931 }
932 #endif
933 /*}}}*/
934 // RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
935 // ---------------------------------------------------------------------
936 /* tries to parses a full date as specified in RFC7231 §7.1.1.1
937 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
938 As we encounter dates from UTC or with a numeric timezone in other places,
939 we allow them here to to be able to reuse the method. Either way, a date
940 must be in UTC or parsing will fail. Previous implementations of this
941 method used to ignore the timezone and assume always UTC. */
942 bool RFC1123StrToTime(const char* const str,time_t &time)
943 {
944 unsigned short day = 0;
945 signed int year = 0; // yes, Y23K problem – we gonna worry then…
946 std::string weekday, month, datespec, timespec, zone;
947 std::istringstream ss(str);
948 auto const &posix = std::locale("C.UTF-8");
949 ss.imbue(posix);
950 ss >> weekday;
951 // we only superficially check weekday, mostly to avoid accepting localized
952 // weekdays here and take only its length to decide which datetime format we
953 // encounter here. The date isn't stored.
954 std::transform(weekday.begin(), weekday.end(), weekday.begin(), ::tolower);
955 std::array<char const * const, 7> c_weekdays = {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
956 if (std::find(c_weekdays.begin(), c_weekdays.end(), weekday.substr(0,3)) == c_weekdays.end())
957 return false;
958
959 switch (weekday.length())
960 {
961 case 4:
962 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
963 if (weekday[3] != ',')
964 return false;
965 ss >> day >> month >> year >> timespec >> zone;
966 break;
967 case 3:
968 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
969 ss >> month >> day >> timespec >> year;
970 zone = "UTC";
971 break;
972 case 0:
973 case 1:
974 case 2:
975 return false;
976 default:
977 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
978 if (weekday[weekday.length() - 1] != ',')
979 return false;
980 ss >> datespec >> timespec >> zone;
981 auto const expldate = VectorizeString(datespec, '-');
982 if (expldate.size() != 3)
983 return false;
984 try {
985 size_t pos;
986 day = std::stoi(expldate[0], &pos);
987 if (pos != expldate[0].length())
988 return false;
989 year = 1900 + std::stoi(expldate[2], &pos);
990 if (pos != expldate[2].length())
991 return false;
992 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(expldate[1].c_str()) + 1, day);
993 } catch (...) {
994 return false;
995 }
996 break;
997 }
998
999 if (ss.fail() || ss.bad() || !ss.eof())
1000 return false;
1001
1002 if (zone != "GMT" && zone != "UTC" && zone != "Z") // RFC 822
1003 {
1004 // numeric timezones as a should of RFC 1123 and generally preferred
1005 try {
1006 size_t pos;
1007 auto const z = std::stoi(zone, &pos);
1008 if (z != 0 || pos != zone.length())
1009 return false;
1010 } catch (...) {
1011 return false;
1012 }
1013 }
1014
1015 if (datespec.empty())
1016 {
1017 if (month.empty())
1018 return false;
1019 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(month.c_str()) + 1, day);
1020 }
1021
1022 std::string const datetime = datespec + ' ' + timespec;
1023 struct tm Tm;
1024 if (strptime(datetime.c_str(), "%Y-%m-%d %H:%M:%S", &Tm) == nullptr)
1025 return false;
1026 time = timegm(&Tm);
1027 return true;
1028 }
1029 /*}}}*/
1030 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1031 // ---------------------------------------------------------------------
1032 /* */
1033 bool FTPMDTMStrToTime(const char* const str,time_t &time)
1034 {
1035 struct tm Tm;
1036 // MDTM includes no whitespaces but recommend and ignored by strptime
1037 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
1038 return false;
1039
1040 time = timegm(&Tm);
1041 return true;
1042 }
1043 /*}}}*/
1044 // StrToTime - Converts a string into a time_t /*{{{*/
1045 // ---------------------------------------------------------------------
1046 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
1047 and the C library asctime format. It requires the GNU library function
1048 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
1049 reason the C library does not provide any such function :< This also
1050 handles the weird, but unambiguous FTP time format*/
1051 bool StrToTime(const string &Val,time_t &Result)
1052 {
1053 struct tm Tm;
1054 char Month[10];
1055
1056 // Skip the day of the week
1057 const char *I = strchr(Val.c_str(), ' ');
1058
1059 // Handle RFC 1123 time
1060 Month[0] = 0;
1061 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
1062 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1063 {
1064 // Handle RFC 1036 time
1065 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
1066 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
1067 Tm.tm_year += 1900;
1068 else
1069 {
1070 // asctime format
1071 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
1072 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
1073 {
1074 // 'ftp' time
1075 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
1076 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1077 return false;
1078 Tm.tm_mon--;
1079 }
1080 }
1081 }
1082
1083 Tm.tm_isdst = 0;
1084 if (Month[0] != 0)
1085 Tm.tm_mon = MonthConv(Month);
1086 else
1087 Tm.tm_mon = 0; // we don't have a month, so pick something
1088 Tm.tm_year -= 1900;
1089
1090 // Convert to local time and then to GMT
1091 Result = timegm(&Tm);
1092 return true;
1093 }
1094 /*}}}*/
1095 // StrToNum - Convert a fixed length string to a number /*{{{*/
1096 // ---------------------------------------------------------------------
1097 /* This is used in decoding the crazy fixed length string headers in
1098 tar and ar files. */
1099 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1100 {
1101 char S[30];
1102 if (Len >= sizeof(S))
1103 return false;
1104 memcpy(S,Str,Len);
1105 S[Len] = 0;
1106
1107 // All spaces is a zero
1108 Res = 0;
1109 unsigned I;
1110 for (I = 0; S[I] == ' '; I++);
1111 if (S[I] == 0)
1112 return true;
1113
1114 char *End;
1115 Res = strtoul(S,&End,Base);
1116 if (End == S)
1117 return false;
1118
1119 return true;
1120 }
1121 /*}}}*/
1122 // StrToNum - Convert a fixed length string to a number /*{{{*/
1123 // ---------------------------------------------------------------------
1124 /* This is used in decoding the crazy fixed length string headers in
1125 tar and ar files. */
1126 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1127 {
1128 char S[30];
1129 if (Len >= sizeof(S))
1130 return false;
1131 memcpy(S,Str,Len);
1132 S[Len] = 0;
1133
1134 // All spaces is a zero
1135 Res = 0;
1136 unsigned I;
1137 for (I = 0; S[I] == ' '; I++);
1138 if (S[I] == 0)
1139 return true;
1140
1141 char *End;
1142 Res = strtoull(S,&End,Base);
1143 if (End == S)
1144 return false;
1145
1146 return true;
1147 }
1148 /*}}}*/
1149
1150 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1151 // ---------------------------------------------------------------------
1152 /* This is used in decoding the 256bit encoded fixed length fields in
1153 tar files */
1154 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1155 {
1156 if ((Str[0] & 0x80) == 0)
1157 return false;
1158 else
1159 {
1160 Res = Str[0] & 0x7F;
1161 for(unsigned int i = 1; i < Len; ++i)
1162 Res = (Res<<8) + Str[i];
1163 return true;
1164 }
1165 }
1166 /*}}}*/
1167 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1168 // ---------------------------------------------------------------------
1169 /* This is used in decoding the 256bit encoded fixed length fields in
1170 tar files */
1171 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1172 {
1173 unsigned long long Num;
1174 bool rc;
1175
1176 rc = Base256ToNum(Str, Num, Len);
1177 Res = Num;
1178 if (Res != Num)
1179 return false;
1180
1181 return rc;
1182 }
1183 /*}}}*/
1184 // HexDigit - Convert a hex character into an integer /*{{{*/
1185 // ---------------------------------------------------------------------
1186 /* Helper for Hex2Num */
1187 static int HexDigit(int c)
1188 {
1189 if (c >= '0' && c <= '9')
1190 return c - '0';
1191 if (c >= 'a' && c <= 'f')
1192 return c - 'a' + 10;
1193 if (c >= 'A' && c <= 'F')
1194 return c - 'A' + 10;
1195 return -1;
1196 }
1197 /*}}}*/
1198 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1199 // ---------------------------------------------------------------------
1200 /* The length of the buffer must be exactly 1/2 the length of the string. */
1201 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1202 {
1203 return Hex2Num(APT::StringView(Str), Num, Length);
1204 }
1205
1206 bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length)
1207 {
1208 if (Str.length() != Length*2)
1209 return false;
1210
1211 // Convert each digit. We store it in the same order as the string
1212 int J = 0;
1213 for (auto I = Str.begin(); I != Str.end();J++, I += 2)
1214 {
1215 int first_half = HexDigit(I[0]);
1216 int second_half;
1217 if (first_half < 0)
1218 return false;
1219
1220 second_half = HexDigit(I[1]);
1221 if (second_half < 0)
1222 return false;
1223 Num[J] = first_half << 4;
1224 Num[J] += second_half;
1225 }
1226
1227 return true;
1228 }
1229 /*}}}*/
1230 // TokSplitString - Split a string up by a given token /*{{{*/
1231 // ---------------------------------------------------------------------
1232 /* This is intended to be a faster splitter, it does not use dynamic
1233 memories. Input is changed to insert nulls at each token location. */
1234 bool TokSplitString(char Tok,char *Input,char **List,
1235 unsigned long ListMax)
1236 {
1237 // Strip any leading spaces
1238 char *Start = Input;
1239 char *Stop = Start + strlen(Start);
1240 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1241
1242 unsigned long Count = 0;
1243 char *Pos = Start;
1244 while (Pos != Stop)
1245 {
1246 // Skip to the next Token
1247 for (; Pos != Stop && *Pos != Tok; Pos++);
1248
1249 // Back remove spaces
1250 char *End = Pos;
1251 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1252 *End = 0;
1253
1254 List[Count++] = Start;
1255 if (Count >= ListMax)
1256 {
1257 List[Count-1] = 0;
1258 return false;
1259 }
1260
1261 // Advance pos
1262 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1263 Start = Pos;
1264 }
1265
1266 List[Count] = 0;
1267 return true;
1268 }
1269 /*}}}*/
1270 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1271 // ---------------------------------------------------------------------
1272 /* This can be used to split a given string up into a vector, so the
1273 propose is the same as in the method above and this one is a bit slower
1274 also, but the advantage is that we have an iteratable vector */
1275 vector<string> VectorizeString(string const &haystack, char const &split)
1276 {
1277 vector<string> exploded;
1278 if (haystack.empty() == true)
1279 return exploded;
1280 string::const_iterator start = haystack.begin();
1281 string::const_iterator end = start;
1282 do {
1283 for (; end != haystack.end() && *end != split; ++end);
1284 exploded.push_back(string(start, end));
1285 start = end + 1;
1286 } while (end != haystack.end() && (++end) != haystack.end());
1287 return exploded;
1288 }
1289 /*}}}*/
1290 // StringSplit - split a string into a string vector by token /*{{{*/
1291 // ---------------------------------------------------------------------
1292 /* See header for details.
1293 */
1294 vector<string> StringSplit(std::string const &s, std::string const &sep,
1295 unsigned int maxsplit)
1296 {
1297 vector<string> split;
1298 size_t start, pos;
1299
1300 // no separator given, this is bogus
1301 if(sep.size() == 0)
1302 return split;
1303
1304 start = pos = 0;
1305 while (pos != string::npos)
1306 {
1307 pos = s.find(sep, start);
1308 split.push_back(s.substr(start, pos-start));
1309
1310 // if maxsplit is reached, the remaining string is the last item
1311 if(split.size() >= maxsplit)
1312 {
1313 split[split.size()-1] = s.substr(start);
1314 break;
1315 }
1316 start = pos+sep.size();
1317 }
1318 return split;
1319 }
1320 /*}}}*/
1321 // RegexChoice - Simple regex list/list matcher /*{{{*/
1322 // ---------------------------------------------------------------------
1323 /* */
1324 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1325 const char **ListEnd)
1326 {
1327 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1328 R->Hit = false;
1329
1330 unsigned long Hits = 0;
1331 for (; ListBegin < ListEnd; ++ListBegin)
1332 {
1333 // Check if the name is a regex
1334 const char *I;
1335 bool Regex = true;
1336 for (I = *ListBegin; *I != 0; I++)
1337 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1338 break;
1339 if (*I == 0)
1340 Regex = false;
1341
1342 // Compile the regex pattern
1343 regex_t Pattern;
1344 if (Regex == true)
1345 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1346 REG_NOSUB) != 0)
1347 Regex = false;
1348
1349 // Search the list
1350 bool Done = false;
1351 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1352 {
1353 if (R->Str[0] == 0)
1354 continue;
1355
1356 if (strcasecmp(R->Str,*ListBegin) != 0)
1357 {
1358 if (Regex == false)
1359 continue;
1360 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1361 continue;
1362 }
1363 Done = true;
1364
1365 if (R->Hit == false)
1366 Hits++;
1367
1368 R->Hit = true;
1369 }
1370
1371 if (Regex == true)
1372 regfree(&Pattern);
1373
1374 if (Done == false)
1375 _error->Warning(_("Selection %s not found"),*ListBegin);
1376 }
1377
1378 return Hits;
1379 }
1380 /*}}}*/
1381 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1382 // ---------------------------------------------------------------------
1383 /* This is used to make the internationalization strings easier to translate
1384 and to allow reordering of parameters */
1385 static bool iovprintf(ostream &out, const char *format,
1386 va_list &args, ssize_t &size) {
1387 char *S = (char*)malloc(size);
1388 ssize_t const n = vsnprintf(S, size, format, args);
1389 if (n > -1 && n < size) {
1390 out << S;
1391 free(S);
1392 return true;
1393 } else {
1394 if (n > -1)
1395 size = n + 1;
1396 else
1397 size *= 2;
1398 }
1399 free(S);
1400 return false;
1401 }
1402 void ioprintf(ostream &out,const char *format,...)
1403 {
1404 va_list args;
1405 ssize_t size = 400;
1406 while (true) {
1407 bool ret;
1408 va_start(args,format);
1409 ret = iovprintf(out, format, args, size);
1410 va_end(args);
1411 if (ret == true)
1412 return;
1413 }
1414 }
1415 void strprintf(string &out,const char *format,...)
1416 {
1417 va_list args;
1418 ssize_t size = 400;
1419 std::ostringstream outstr;
1420 while (true) {
1421 bool ret;
1422 va_start(args,format);
1423 ret = iovprintf(outstr, format, args, size);
1424 va_end(args);
1425 if (ret == true)
1426 break;
1427 }
1428 out = outstr.str();
1429 }
1430 /*}}}*/
1431 // safe_snprintf - Safer snprintf /*{{{*/
1432 // ---------------------------------------------------------------------
1433 /* This is a snprintf that will never (ever) go past 'End' and returns a
1434 pointer to the end of the new string. The returned string is always null
1435 terminated unless Buffer == end. This is a better alterantive to using
1436 consecutive snprintfs. */
1437 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1438 {
1439 va_list args;
1440 int Did;
1441
1442 if (End <= Buffer)
1443 return End;
1444 va_start(args,Format);
1445 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1446 va_end(args);
1447
1448 if (Did < 0 || Buffer + Did > End)
1449 return End;
1450 return Buffer + Did;
1451 }
1452 /*}}}*/
1453 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1454 // ---------------------------------------------------------------------
1455 string StripEpoch(const string &VerStr)
1456 {
1457 size_t i = VerStr.find(":");
1458 if (i == string::npos)
1459 return VerStr;
1460 return VerStr.substr(i+1);
1461 }
1462 /*}}}*/
1463
1464 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1465 // ---------------------------------------------------------------------
1466 /* This little function is the most called method we have and tries
1467 therefore to do the absolut minimum - and is notable faster than
1468 standard tolower/toupper and as a bonus avoids problems with different
1469 locales - we only operate on ascii chars anyway. */
1470 #undef tolower_ascii
1471 int tolower_ascii(int const c) APT_CONST APT_COLD;
1472 int tolower_ascii(int const c)
1473 {
1474 return tolower_ascii_inline(c);
1475 }
1476 /*}}}*/
1477
1478 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1479 // ---------------------------------------------------------------------
1480 /* This little function is one of the most called methods we have and tries
1481 therefore to do the absolut minimum - and is notable faster than
1482 standard isspace() and as a bonus avoids problems with different
1483 locales - we only operate on ascii chars anyway. */
1484 #undef isspace_ascii
1485 int isspace_ascii(int const c) APT_CONST APT_COLD;
1486 int isspace_ascii(int const c)
1487 {
1488 return isspace_ascii_inline(c);
1489 }
1490 /*}}}*/
1491
1492 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1493 // ---------------------------------------------------------------------
1494 /* The domain list is a comma separate list of domains that are suffix
1495 matched against the argument */
1496 bool CheckDomainList(const string &Host,const string &List)
1497 {
1498 string::const_iterator Start = List.begin();
1499 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1500 {
1501 if (Cur < List.end() && *Cur != ',')
1502 continue;
1503
1504 // Match the end of the string..
1505 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1506 Cur - Start != 0 &&
1507 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1508 return true;
1509
1510 Start = Cur + 1;
1511 }
1512 return false;
1513 }
1514 /*}}}*/
1515 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1516 // ---------------------------------------------------------------------
1517 /* */
1518 size_t strv_length(const char **str_array)
1519 {
1520 size_t i;
1521 for (i=0; str_array[i] != NULL; i++)
1522 /* nothing */
1523 ;
1524 return i;
1525 }
1526 /*}}}*/
1527 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1528 // ---------------------------------------------------------------------
1529 /* */
1530 string DeEscapeString(const string &input)
1531 {
1532 char tmp[3];
1533 string::const_iterator it;
1534 string output;
1535 for (it = input.begin(); it != input.end(); ++it)
1536 {
1537 // just copy non-escape chars
1538 if (*it != '\\')
1539 {
1540 output += *it;
1541 continue;
1542 }
1543
1544 // deal with double escape
1545 if (*it == '\\' &&
1546 (it + 1 < input.end()) && it[1] == '\\')
1547 {
1548 // copy
1549 output += *it;
1550 // advance iterator one step further
1551 ++it;
1552 continue;
1553 }
1554
1555 // ensure we have a char to read
1556 if (it + 1 == input.end())
1557 continue;
1558
1559 // read it
1560 ++it;
1561 switch (*it)
1562 {
1563 case '0':
1564 if (it + 2 <= input.end()) {
1565 tmp[0] = it[1];
1566 tmp[1] = it[2];
1567 tmp[2] = 0;
1568 output += (char)strtol(tmp, 0, 8);
1569 it += 2;
1570 }
1571 break;
1572 case 'x':
1573 if (it + 2 <= input.end()) {
1574 tmp[0] = it[1];
1575 tmp[1] = it[2];
1576 tmp[2] = 0;
1577 output += (char)strtol(tmp, 0, 16);
1578 it += 2;
1579 }
1580 break;
1581 default:
1582 // FIXME: raise exception here?
1583 break;
1584 }
1585 }
1586 return output;
1587 }
1588 /*}}}*/
1589 // URI::CopyFrom - Copy from an object /*{{{*/
1590 // ---------------------------------------------------------------------
1591 /* This parses the URI into all of its components */
1592 void URI::CopyFrom(const string &U)
1593 {
1594 string::const_iterator I = U.begin();
1595
1596 // Locate the first colon, this separates the scheme
1597 for (; I < U.end() && *I != ':' ; ++I);
1598 string::const_iterator FirstColon = I;
1599
1600 /* Determine if this is a host type URI with a leading double //
1601 and then search for the first single / */
1602 string::const_iterator SingleSlash = I;
1603 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1604 SingleSlash += 3;
1605
1606 /* Find the / indicating the end of the hostname, ignoring /'s in the
1607 square brackets */
1608 bool InBracket = false;
1609 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1610 {
1611 if (*SingleSlash == '[')
1612 InBracket = true;
1613 if (InBracket == true && *SingleSlash == ']')
1614 InBracket = false;
1615 }
1616
1617 if (SingleSlash > U.end())
1618 SingleSlash = U.end();
1619
1620 // We can now write the access and path specifiers
1621 Access.assign(U.begin(),FirstColon);
1622 if (SingleSlash != U.end())
1623 Path.assign(SingleSlash,U.end());
1624 if (Path.empty() == true)
1625 Path = "/";
1626
1627 // Now we attempt to locate a user:pass@host fragment
1628 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1629 FirstColon += 3;
1630 else
1631 FirstColon += 1;
1632 if (FirstColon >= U.end())
1633 return;
1634
1635 if (FirstColon > SingleSlash)
1636 FirstColon = SingleSlash;
1637
1638 // Find the colon...
1639 I = FirstColon + 1;
1640 if (I > SingleSlash)
1641 I = SingleSlash;
1642
1643 // Search for the @ separating user:pass from host
1644 auto const RevAt = std::find(
1645 std::string::const_reverse_iterator(SingleSlash),
1646 std::string::const_reverse_iterator(I), '@');
1647 string::const_iterator const At = RevAt.base() == I ? SingleSlash : std::prev(RevAt.base());
1648 // and then look for the colon between user and pass
1649 string::const_iterator const SecondColon = std::find(I, At, ':');
1650
1651 // Now write the host and user/pass
1652 if (At == SingleSlash)
1653 {
1654 if (FirstColon < SingleSlash)
1655 Host.assign(FirstColon,SingleSlash);
1656 }
1657 else
1658 {
1659 Host.assign(At+1,SingleSlash);
1660 // username and password must be encoded (RFC 3986)
1661 User.assign(DeQuoteString(FirstColon,SecondColon));
1662 if (SecondColon < At)
1663 Password.assign(DeQuoteString(SecondColon+1,At));
1664 }
1665
1666 // Now we parse the RFC 2732 [] hostnames.
1667 unsigned long PortEnd = 0;
1668 InBracket = false;
1669 for (unsigned I = 0; I != Host.length();)
1670 {
1671 if (Host[I] == '[')
1672 {
1673 InBracket = true;
1674 Host.erase(I,1);
1675 continue;
1676 }
1677
1678 if (InBracket == true && Host[I] == ']')
1679 {
1680 InBracket = false;
1681 Host.erase(I,1);
1682 PortEnd = I;
1683 continue;
1684 }
1685 I++;
1686 }
1687
1688 // Tsk, weird.
1689 if (InBracket == true)
1690 {
1691 Host.clear();
1692 return;
1693 }
1694
1695 // Now we parse off a port number from the hostname
1696 Port = 0;
1697 string::size_type Pos = Host.rfind(':');
1698 if (Pos == string::npos || Pos < PortEnd)
1699 return;
1700
1701 Port = atoi(string(Host,Pos+1).c_str());
1702 Host.assign(Host,0,Pos);
1703 }
1704 /*}}}*/
1705 // URI::operator string - Convert the URI to a string /*{{{*/
1706 // ---------------------------------------------------------------------
1707 /* */
1708 URI::operator string()
1709 {
1710 std::stringstream Res;
1711
1712 if (Access.empty() == false)
1713 Res << Access << ':';
1714
1715 if (Host.empty() == false)
1716 {
1717 if (Access.empty() == false)
1718 Res << "//";
1719
1720 if (User.empty() == false)
1721 {
1722 // FIXME: Technically userinfo is permitted even less
1723 // characters than these, but this is not conveniently
1724 // expressed with a blacklist.
1725 Res << QuoteString(User, ":/?#[]@");
1726 if (Password.empty() == false)
1727 Res << ":" << QuoteString(Password, ":/?#[]@");
1728 Res << "@";
1729 }
1730
1731 // Add RFC 2732 escaping characters
1732 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1733 Res << '[' << Host << ']';
1734 else
1735 Res << Host;
1736
1737 if (Port != 0)
1738 Res << ':' << std::to_string(Port);
1739 }
1740
1741 if (Path.empty() == false)
1742 {
1743 if (Path[0] != '/')
1744 Res << "/" << Path;
1745 else
1746 Res << Path;
1747 }
1748
1749 return Res.str();
1750 }
1751 /*}}}*/
1752 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1753 string URI::SiteOnly(const string &URI)
1754 {
1755 ::URI U(URI);
1756 U.User.clear();
1757 U.Password.clear();
1758 U.Path.clear();
1759 return U;
1760 }
1761 /*}}}*/
1762 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1763 string URI::ArchiveOnly(const string &URI)
1764 {
1765 ::URI U(URI);
1766 U.User.clear();
1767 U.Password.clear();
1768 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1769 U.Path.erase(U.Path.length() - 1);
1770 return U;
1771 }
1772 /*}}}*/
1773 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1774 string URI::NoUserPassword(const string &URI)
1775 {
1776 ::URI U(URI);
1777 U.User.clear();
1778 U.Password.clear();
1779 return U;
1780 }
1781 /*}}}*/