]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Use C locale instead of C.UTF-8 for protocol strings
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <array>
25 #include <algorithm>
26 #include <iomanip>
27 #include <locale>
28 #include <sstream>
29 #include <string>
30 #include <vector>
31
32 #include <stddef.h>
33 #include <stdlib.h>
34 #include <time.h>
35 #include <ctype.h>
36 #include <string.h>
37 #include <stdio.h>
38 #include <unistd.h>
39 #include <regex.h>
40 #include <errno.h>
41 #include <stdarg.h>
42 #include <iconv.h>
43
44 #include <apti18n.h>
45 /*}}}*/
46 using namespace std;
47
48 // Strip - Remove white space from the front and back of a string /*{{{*/
49 // ---------------------------------------------------------------------
50 namespace APT {
51 namespace String {
52 std::string Strip(const std::string &str)
53 {
54 // ensure we have at least one character
55 if (str.empty() == true)
56 return str;
57
58 char const * const s = str.c_str();
59 size_t start = 0;
60 for (; isspace(s[start]) != 0; ++start)
61 ; // find the first not-space
62
63 // string contains only whitespaces
64 if (s[start] == '\0')
65 return "";
66
67 size_t end = str.length() - 1;
68 for (; isspace(s[end]) != 0; --end)
69 ; // find the last not-space
70
71 return str.substr(start, end - start + 1);
72 }
73
74 bool Endswith(const std::string &s, const std::string &end)
75 {
76 if (end.size() > s.size())
77 return false;
78 return (s.compare(s.size() - end.size(), end.size(), end) == 0);
79 }
80
81 bool Startswith(const std::string &s, const std::string &start)
82 {
83 if (start.size() > s.size())
84 return false;
85 return (s.compare(0, start.size(), start) == 0);
86 }
87
88 }
89 }
90 /*}}}*/
91 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
92 // ---------------------------------------------------------------------
93 /* This is handy to use before display some information for enduser */
94 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
95 {
96 iconv_t cd;
97 const char *inbuf;
98 char *inptr, *outbuf;
99 size_t insize, bufsize;
100 dest->clear();
101
102 cd = iconv_open(codeset, "UTF-8");
103 if (cd == (iconv_t)(-1)) {
104 // Something went wrong
105 if (errno == EINVAL)
106 _error->Error("conversion from 'UTF-8' to '%s' not available",
107 codeset);
108 else
109 perror("iconv_open");
110
111 return false;
112 }
113
114 insize = bufsize = orig.size();
115 inbuf = orig.data();
116 inptr = (char *)inbuf;
117 outbuf = new char[bufsize];
118 size_t lastError = -1;
119
120 while (insize != 0)
121 {
122 char *outptr = outbuf;
123 size_t outsize = bufsize;
124 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
125 dest->append(outbuf, outptr - outbuf);
126 if (err == (size_t)(-1))
127 {
128 switch (errno)
129 {
130 case EILSEQ:
131 insize--;
132 inptr++;
133 // replace a series of unknown multibytes with a single "?"
134 if (lastError != insize) {
135 lastError = insize - 1;
136 dest->append("?");
137 }
138 break;
139 case EINVAL:
140 insize = 0;
141 break;
142 case E2BIG:
143 if (outptr == outbuf)
144 {
145 bufsize *= 2;
146 delete[] outbuf;
147 outbuf = new char[bufsize];
148 }
149 break;
150 }
151 }
152 }
153
154 delete[] outbuf;
155
156 iconv_close(cd);
157
158 return true;
159 }
160 /*}}}*/
161 // strstrip - Remove white space from the front and back of a string /*{{{*/
162 // ---------------------------------------------------------------------
163 /* This is handy to use when parsing a file. It also removes \n's left
164 over from fgets and company */
165 char *_strstrip(char *String)
166 {
167 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
168
169 if (*String == 0)
170 return String;
171 return _strrstrip(String);
172 }
173 /*}}}*/
174 // strrstrip - Remove white space from the back of a string /*{{{*/
175 // ---------------------------------------------------------------------
176 char *_strrstrip(char *String)
177 {
178 char *End = String + strlen(String) - 1;
179 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
180 *End == '\r'); End--);
181 End++;
182 *End = 0;
183 return String;
184 }
185 /*}}}*/
186 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
187 // ---------------------------------------------------------------------
188 /* */
189 char *_strtabexpand(char *String,size_t Len)
190 {
191 for (char *I = String; I != I + Len && *I != 0; I++)
192 {
193 if (*I != '\t')
194 continue;
195 if (I + 8 > String + Len)
196 {
197 *I = 0;
198 return String;
199 }
200
201 /* Assume the start of the string is 0 and find the next 8 char
202 division */
203 int Len;
204 if (String == I)
205 Len = 1;
206 else
207 Len = 8 - ((String - I) % 8);
208 Len -= 2;
209 if (Len <= 0)
210 {
211 *I = ' ';
212 continue;
213 }
214
215 memmove(I + Len,I + 1,strlen(I) + 1);
216 for (char *J = I; J + Len != I; *I = ' ', I++);
217 }
218 return String;
219 }
220 /*}}}*/
221 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
222 // ---------------------------------------------------------------------
223 /* This grabs a single word, converts any % escaped characters to their
224 proper values and advances the pointer. Double quotes are understood
225 and striped out as well. This is for URI/URL parsing. It also can
226 understand [] brackets.*/
227 bool ParseQuoteWord(const char *&String,string &Res)
228 {
229 // Skip leading whitespace
230 const char *C = String;
231 for (;*C != 0 && *C == ' '; C++);
232 if (*C == 0)
233 return false;
234
235 // Jump to the next word
236 for (;*C != 0 && isspace(*C) == 0; C++)
237 {
238 if (*C == '"')
239 {
240 C = strchr(C + 1, '"');
241 if (C == NULL)
242 return false;
243 }
244 if (*C == '[')
245 {
246 C = strchr(C + 1, ']');
247 if (C == NULL)
248 return false;
249 }
250 }
251
252 // Now de-quote characters
253 char Buffer[1024];
254 char Tmp[3];
255 const char *Start = String;
256 char *I;
257 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
258 {
259 if (*Start == '%' && Start + 2 < C &&
260 isxdigit(Start[1]) && isxdigit(Start[2]))
261 {
262 Tmp[0] = Start[1];
263 Tmp[1] = Start[2];
264 Tmp[2] = 0;
265 *I = (char)strtol(Tmp,0,16);
266 Start += 3;
267 continue;
268 }
269 if (*Start != '"')
270 *I = *Start;
271 else
272 I--;
273 Start++;
274 }
275 *I = 0;
276 Res = Buffer;
277
278 // Skip ending white space
279 for (;*C != 0 && isspace(*C) != 0; C++);
280 String = C;
281 return true;
282 }
283 /*}}}*/
284 // ParseCWord - Parses a string like a C "" expression /*{{{*/
285 // ---------------------------------------------------------------------
286 /* This expects a series of space separated strings enclosed in ""'s.
287 It concatenates the ""'s into a single string. */
288 bool ParseCWord(const char *&String,string &Res)
289 {
290 // Skip leading whitespace
291 const char *C = String;
292 for (;*C != 0 && *C == ' '; C++);
293 if (*C == 0)
294 return false;
295
296 char Buffer[1024];
297 char *Buf = Buffer;
298 if (strlen(String) >= sizeof(Buffer))
299 return false;
300
301 for (; *C != 0; C++)
302 {
303 if (*C == '"')
304 {
305 for (C++; *C != 0 && *C != '"'; C++)
306 *Buf++ = *C;
307
308 if (*C == 0)
309 return false;
310
311 continue;
312 }
313
314 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
315 continue;
316 if (isspace(*C) == 0)
317 return false;
318 *Buf++ = ' ';
319 }
320 *Buf = 0;
321 Res = Buffer;
322 String = C;
323 return true;
324 }
325 /*}}}*/
326 // QuoteString - Convert a string into quoted from /*{{{*/
327 // ---------------------------------------------------------------------
328 /* */
329 string QuoteString(const string &Str, const char *Bad)
330 {
331 std::stringstream Res;
332 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
333 {
334 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
335 *I == 0x25 || // percent '%' char
336 *I <= 0x20 || *I >= 0x7F) // control chars
337 {
338 ioprintf(Res, "%%%02hhx", *I);
339 }
340 else
341 Res << *I;
342 }
343 return Res.str();
344 }
345 /*}}}*/
346 // DeQuoteString - Convert a string from quoted from /*{{{*/
347 // ---------------------------------------------------------------------
348 /* This undoes QuoteString */
349 string DeQuoteString(const string &Str)
350 {
351 return DeQuoteString(Str.begin(),Str.end());
352 }
353 string DeQuoteString(string::const_iterator const &begin,
354 string::const_iterator const &end)
355 {
356 string Res;
357 for (string::const_iterator I = begin; I != end; ++I)
358 {
359 if (*I == '%' && I + 2 < end &&
360 isxdigit(I[1]) && isxdigit(I[2]))
361 {
362 char Tmp[3];
363 Tmp[0] = I[1];
364 Tmp[1] = I[2];
365 Tmp[2] = 0;
366 Res += (char)strtol(Tmp,0,16);
367 I += 2;
368 continue;
369 }
370 else
371 Res += *I;
372 }
373 return Res;
374 }
375
376 /*}}}*/
377 // SizeToStr - Convert a long into a human readable size /*{{{*/
378 // ---------------------------------------------------------------------
379 /* A max of 4 digits are shown before conversion to the next highest unit.
380 The max length of the string will be 5 chars unless the size is > 10
381 YottaBytes (E24) */
382 string SizeToStr(double Size)
383 {
384 double ASize;
385 if (Size >= 0)
386 ASize = Size;
387 else
388 ASize = -1*Size;
389
390 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
391 ExaBytes, ZettaBytes, YottaBytes */
392 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
393 int I = 0;
394 while (I <= 8)
395 {
396 if (ASize < 100 && I != 0)
397 {
398 std::string S;
399 strprintf(S, "%'.1f %c", ASize, Ext[I]);
400 return S;
401 }
402
403 if (ASize < 10000)
404 {
405 std::string S;
406 strprintf(S, "%'.0f %c", ASize, Ext[I]);
407 return S;
408 }
409 ASize /= 1000.0;
410 I++;
411 }
412 return "";
413 }
414 /*}}}*/
415 // TimeToStr - Convert the time into a string /*{{{*/
416 // ---------------------------------------------------------------------
417 /* Converts a number of seconds to a hms format */
418 string TimeToStr(unsigned long Sec)
419 {
420 std::string S;
421 if (Sec > 60*60*24)
422 {
423 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
424 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
425 }
426 else if (Sec > 60*60)
427 {
428 //TRANSLATOR: h means hours, min means minutes, s means seconds
429 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
430 }
431 else if (Sec > 60)
432 {
433 //TRANSLATOR: min means minutes, s means seconds
434 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
435 }
436 else
437 {
438 //TRANSLATOR: s means seconds
439 strprintf(S,_("%lis"),Sec);
440 }
441 return S;
442 }
443 /*}}}*/
444 // SubstVar - Substitute a string for another string /*{{{*/
445 // ---------------------------------------------------------------------
446 /* This replaces all occurrences of Subst with Contents in Str. */
447 string SubstVar(const string &Str,const string &Subst,const string &Contents)
448 {
449 if (Subst.empty() == true)
450 return Str;
451
452 string::size_type Pos = 0;
453 string::size_type OldPos = 0;
454 string Temp;
455
456 while (OldPos < Str.length() &&
457 (Pos = Str.find(Subst,OldPos)) != string::npos)
458 {
459 if (OldPos != Pos)
460 Temp.append(Str, OldPos, Pos - OldPos);
461 if (Contents.empty() == false)
462 Temp.append(Contents);
463 OldPos = Pos + Subst.length();
464 }
465
466 if (OldPos == 0)
467 return Str;
468
469 if (OldPos >= Str.length())
470 return Temp;
471
472 Temp.append(Str, OldPos, string::npos);
473 return Temp;
474 }
475 string SubstVar(string Str,const struct SubstVar *Vars)
476 {
477 for (; Vars->Subst != 0; Vars++)
478 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
479 return Str;
480 }
481 /*}}}*/
482 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
483 // ---------------------------------------------------------------------
484 /* Returns a string with the supplied separator depth + 1 times in it */
485 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
486 {
487 std::string output = "";
488 for(unsigned long d=Depth+1; d > 0; d--)
489 output.append(Separator);
490 return output;
491 }
492 /*}}}*/
493 // URItoFileName - Convert the uri into a unique file name /*{{{*/
494 // ---------------------------------------------------------------------
495 /* This converts a URI into a safe filename. It quotes all unsafe characters
496 and converts / to _ and removes the scheme identifier. The resulting
497 file name should be unique and never occur again for a different file */
498 string URItoFileName(const string &URI)
499 {
500 // Nuke 'sensitive' items
501 ::URI U(URI);
502 U.User.clear();
503 U.Password.clear();
504 U.Access.clear();
505
506 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
507 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
508 replace(NewURI.begin(),NewURI.end(),'/','_');
509 return NewURI;
510 }
511 /*}}}*/
512 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
513 // ---------------------------------------------------------------------
514 /* This routine performs a base64 transformation on a string. It was ripped
515 from wget and then patched and bug fixed.
516
517 This spec can be found in rfc2045 */
518 string Base64Encode(const string &S)
519 {
520 // Conversion table.
521 static char tbl[64] = {'A','B','C','D','E','F','G','H',
522 'I','J','K','L','M','N','O','P',
523 'Q','R','S','T','U','V','W','X',
524 'Y','Z','a','b','c','d','e','f',
525 'g','h','i','j','k','l','m','n',
526 'o','p','q','r','s','t','u','v',
527 'w','x','y','z','0','1','2','3',
528 '4','5','6','7','8','9','+','/'};
529
530 // Pre-allocate some space
531 string Final;
532 Final.reserve((4*S.length() + 2)/3 + 2);
533
534 /* Transform the 3x8 bits to 4x6 bits, as required by
535 base64. */
536 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
537 {
538 char Bits[3] = {0,0,0};
539 Bits[0] = I[0];
540 if (I + 1 < S.end())
541 Bits[1] = I[1];
542 if (I + 2 < S.end())
543 Bits[2] = I[2];
544
545 Final += tbl[Bits[0] >> 2];
546 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
547
548 if (I + 1 >= S.end())
549 break;
550
551 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
552
553 if (I + 2 >= S.end())
554 break;
555
556 Final += tbl[Bits[2] & 0x3f];
557 }
558
559 /* Apply the padding elements, this tells how many bytes the remote
560 end should discard */
561 if (S.length() % 3 == 2)
562 Final += '=';
563 if (S.length() % 3 == 1)
564 Final += "==";
565
566 return Final;
567 }
568 /*}}}*/
569 // stringcmp - Arbitrary string compare /*{{{*/
570 // ---------------------------------------------------------------------
571 /* This safely compares two non-null terminated strings of arbitrary
572 length */
573 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
574 {
575 for (; A != AEnd && B != BEnd; A++, B++)
576 if (*A != *B)
577 break;
578
579 if (A == AEnd && B == BEnd)
580 return 0;
581 if (A == AEnd)
582 return 1;
583 if (B == BEnd)
584 return -1;
585 if (*A < *B)
586 return -1;
587 return 1;
588 }
589
590 #if __GNUC__ >= 3
591 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
592 const char *B,const char *BEnd)
593 {
594 for (; A != AEnd && B != BEnd; A++, B++)
595 if (*A != *B)
596 break;
597
598 if (A == AEnd && B == BEnd)
599 return 0;
600 if (A == AEnd)
601 return 1;
602 if (B == BEnd)
603 return -1;
604 if (*A < *B)
605 return -1;
606 return 1;
607 }
608 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
609 string::const_iterator B,string::const_iterator BEnd)
610 {
611 for (; A != AEnd && B != BEnd; A++, B++)
612 if (*A != *B)
613 break;
614
615 if (A == AEnd && B == BEnd)
616 return 0;
617 if (A == AEnd)
618 return 1;
619 if (B == BEnd)
620 return -1;
621 if (*A < *B)
622 return -1;
623 return 1;
624 }
625 #endif
626 /*}}}*/
627 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
628 // ---------------------------------------------------------------------
629 /* */
630 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
631 {
632 for (; A != AEnd && B != BEnd; A++, B++)
633 if (tolower_ascii(*A) != tolower_ascii(*B))
634 break;
635
636 if (A == AEnd && B == BEnd)
637 return 0;
638 if (A == AEnd)
639 return 1;
640 if (B == BEnd)
641 return -1;
642 if (tolower_ascii(*A) < tolower_ascii(*B))
643 return -1;
644 return 1;
645 }
646 #if __GNUC__ >= 3
647 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
648 const char *B,const char *BEnd)
649 {
650 for (; A != AEnd && B != BEnd; A++, B++)
651 if (tolower_ascii(*A) != tolower_ascii(*B))
652 break;
653
654 if (A == AEnd && B == BEnd)
655 return 0;
656 if (A == AEnd)
657 return 1;
658 if (B == BEnd)
659 return -1;
660 if (tolower_ascii(*A) < tolower_ascii(*B))
661 return -1;
662 return 1;
663 }
664 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
665 string::const_iterator B,string::const_iterator BEnd)
666 {
667 for (; A != AEnd && B != BEnd; A++, B++)
668 if (tolower_ascii(*A) != tolower_ascii(*B))
669 break;
670
671 if (A == AEnd && B == BEnd)
672 return 0;
673 if (A == AEnd)
674 return 1;
675 if (B == BEnd)
676 return -1;
677 if (tolower_ascii(*A) < tolower_ascii(*B))
678 return -1;
679 return 1;
680 }
681 #endif
682 /*}}}*/
683 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
684 // ---------------------------------------------------------------------
685 /* The format is like those used in package files and the method
686 communication system */
687 string LookupTag(const string &Message,const char *Tag,const char *Default)
688 {
689 // Look for a matching tag.
690 int Length = strlen(Tag);
691 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
692 {
693 // Found the tag
694 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
695 {
696 // Find the end of line and strip the leading/trailing spaces
697 string::const_iterator J;
698 I += Length + 1;
699 for (; isspace_ascii(*I) != 0 && I < Message.end(); ++I);
700 for (J = I; *J != '\n' && J < Message.end(); ++J);
701 for (; J > I && isspace_ascii(J[-1]) != 0; --J);
702
703 return string(I,J);
704 }
705
706 for (; *I != '\n' && I < Message.end(); ++I);
707 }
708
709 // Failed to find a match
710 if (Default == 0)
711 return string();
712 return Default;
713 }
714 /*}}}*/
715 // StringToBool - Converts a string into a boolean /*{{{*/
716 // ---------------------------------------------------------------------
717 /* This inspects the string to see if it is true or if it is false and
718 then returns the result. Several varients on true/false are checked. */
719 int StringToBool(const string &Text,int Default)
720 {
721 char *ParseEnd;
722 int Res = strtol(Text.c_str(),&ParseEnd,0);
723 // ensure that the entire string was converted by strtol to avoid
724 // failures on "apt-cache show -a 0ad" where the "0" is converted
725 const char *TextEnd = Text.c_str()+Text.size();
726 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
727 return Res;
728
729 // Check for positives
730 if (strcasecmp(Text.c_str(),"no") == 0 ||
731 strcasecmp(Text.c_str(),"false") == 0 ||
732 strcasecmp(Text.c_str(),"without") == 0 ||
733 strcasecmp(Text.c_str(),"off") == 0 ||
734 strcasecmp(Text.c_str(),"disable") == 0)
735 return 0;
736
737 // Check for negatives
738 if (strcasecmp(Text.c_str(),"yes") == 0 ||
739 strcasecmp(Text.c_str(),"true") == 0 ||
740 strcasecmp(Text.c_str(),"with") == 0 ||
741 strcasecmp(Text.c_str(),"on") == 0 ||
742 strcasecmp(Text.c_str(),"enable") == 0)
743 return 1;
744
745 return Default;
746 }
747 /*}}}*/
748 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
749 // ---------------------------------------------------------------------
750 /* This converts a time_t into a string time representation that is
751 year 2000 complient and timezone neutral */
752 string TimeRFC1123(time_t Date)
753 {
754 return TimeRFC1123(Date, false);
755 }
756 string TimeRFC1123(time_t Date, bool const NumericTimezone)
757 {
758 struct tm Conv;
759 if (gmtime_r(&Date, &Conv) == NULL)
760 return "";
761
762 auto const posix = std::locale::classic();
763 std::ostringstream datestr;
764 datestr.imbue(posix);
765 APT::StringView const fmt("%a, %d %b %Y %H:%M:%S");
766 std::use_facet<std::time_put<char>>(posix).put(
767 std::ostreambuf_iterator<char>(datestr),
768 datestr, ' ', &Conv, fmt.data(), fmt.data() + fmt.size());
769 if (NumericTimezone)
770 datestr << " +0000";
771 else
772 datestr << " GMT";
773 return datestr.str();
774 }
775 /*}}}*/
776 // ReadMessages - Read messages from the FD /*{{{*/
777 // ---------------------------------------------------------------------
778 /* This pulls full messages from the input FD into the message buffer.
779 It assumes that messages will not pause during transit so no
780 fancy buffering is used.
781
782 In particular: this reads blocks from the input until it believes
783 that it's run out of input text. Each block is terminated by a
784 double newline ('\n' followed by '\n').
785 */
786 bool ReadMessages(int Fd, vector<string> &List)
787 {
788 char Buffer[64000];
789 // Represents any left-over from the previous iteration of the
790 // parse loop. (i.e., if a message is split across the end
791 // of the buffer, it goes here)
792 string PartialMessage;
793
794 do {
795 int const Res = read(Fd, Buffer, sizeof(Buffer));
796 if (Res < 0 && errno == EINTR)
797 continue;
798
799 // process we read from has died
800 if (Res == 0)
801 return false;
802
803 // No data
804 #if EAGAIN != EWOULDBLOCK
805 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
806 #else
807 if (Res < 0 && errno == EAGAIN)
808 #endif
809 return true;
810 if (Res < 0)
811 return false;
812
813 // extract the message(s) from the buffer
814 char const *Start = Buffer;
815 char const * const End = Buffer + Res;
816
817 char const * NL = (char const *) memchr(Start, '\n', End - Start);
818 if (NL == NULL)
819 {
820 // end of buffer: store what we have so far and read new data in
821 PartialMessage.append(Start, End - Start);
822 Start = End;
823 }
824 else
825 ++NL;
826
827 if (PartialMessage.empty() == false && Start < End)
828 {
829 // if we start with a new line, see if the partial message we have ended with one
830 // so that we properly detect records ending between two read() runs
831 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
832 // the case \r|\n\r\n is handled by the usual double-newline handling
833 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
834 {
835 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
836 {
837 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
838 List.push_back(PartialMessage);
839 PartialMessage.clear();
840 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
841 Start = NL;
842 }
843 }
844 }
845
846 while (Start < End) {
847 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
848 if (NL2 == NULL)
849 {
850 // end of buffer: store what we have so far and read new data in
851 PartialMessage.append(Start, End - Start);
852 break;
853 }
854 ++NL2;
855
856 // did we find a double newline?
857 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
858 {
859 PartialMessage.append(Start, NL2 - Start);
860 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
861 List.push_back(PartialMessage);
862 PartialMessage.clear();
863 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
864 Start = NL2;
865 }
866 NL = NL2;
867 }
868
869 // we have read at least one complete message and nothing left
870 if (PartialMessage.empty() == true)
871 return true;
872
873 if (WaitFd(Fd) == false)
874 return false;
875 } while (true);
876 }
877 /*}}}*/
878 // MonthConv - Converts a month string into a number /*{{{*/
879 // ---------------------------------------------------------------------
880 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
881 Made it a bit more robust with a few tolower_ascii though. */
882 static int MonthConv(char const * const Month)
883 {
884 switch (tolower_ascii(*Month))
885 {
886 case 'a':
887 return tolower_ascii(Month[1]) == 'p'?3:7;
888 case 'd':
889 return 11;
890 case 'f':
891 return 1;
892 case 'j':
893 if (tolower_ascii(Month[1]) == 'a')
894 return 0;
895 return tolower_ascii(Month[2]) == 'n'?5:6;
896 case 'm':
897 return tolower_ascii(Month[2]) == 'r'?2:4;
898 case 'n':
899 return 10;
900 case 'o':
901 return 9;
902 case 's':
903 return 8;
904
905 // Pretend it is January..
906 default:
907 return 0;
908 }
909 }
910 /*}}}*/
911 // timegm - Internal timegm if the gnu version is not available /*{{{*/
912 // ---------------------------------------------------------------------
913 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
914 than local timezone (mktime assumes the latter).
915
916 This function is a nonstandard GNU extension that is also present on
917 the BSDs and maybe other systems. For others we follow the advice of
918 the manpage of timegm and use his portable replacement. */
919 #ifndef HAVE_TIMEGM
920 static time_t timegm(struct tm *t)
921 {
922 char *tz = getenv("TZ");
923 setenv("TZ", "", 1);
924 tzset();
925 time_t ret = mktime(t);
926 if (tz)
927 setenv("TZ", tz, 1);
928 else
929 unsetenv("TZ");
930 tzset();
931 return ret;
932 }
933 #endif
934 /*}}}*/
935 // RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
936 // ---------------------------------------------------------------------
937 /* tries to parses a full date as specified in RFC7231 §7.1.1.1
938 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
939 As we encounter dates from UTC or with a numeric timezone in other places,
940 we allow them here to to be able to reuse the method. Either way, a date
941 must be in UTC or parsing will fail. Previous implementations of this
942 method used to ignore the timezone and assume always UTC. */
943 bool RFC1123StrToTime(const char* const str,time_t &time)
944 {
945 unsigned short day = 0;
946 signed int year = 0; // yes, Y23K problem – we gonna worry then…
947 std::string weekday, month, datespec, timespec, zone;
948 std::istringstream ss(str);
949 auto const &posix = std::locale::classic();
950 ss.imbue(posix);
951 ss >> weekday;
952 // we only superficially check weekday, mostly to avoid accepting localized
953 // weekdays here and take only its length to decide which datetime format we
954 // encounter here. The date isn't stored.
955 std::transform(weekday.begin(), weekday.end(), weekday.begin(), ::tolower);
956 std::array<char const * const, 7> c_weekdays = {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
957 if (std::find(c_weekdays.begin(), c_weekdays.end(), weekday.substr(0,3)) == c_weekdays.end())
958 return false;
959
960 switch (weekday.length())
961 {
962 case 4:
963 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
964 if (weekday[3] != ',')
965 return false;
966 ss >> day >> month >> year >> timespec >> zone;
967 break;
968 case 3:
969 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
970 ss >> month >> day >> timespec >> year;
971 zone = "UTC";
972 break;
973 case 0:
974 case 1:
975 case 2:
976 return false;
977 default:
978 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
979 if (weekday[weekday.length() - 1] != ',')
980 return false;
981 ss >> datespec >> timespec >> zone;
982 auto const expldate = VectorizeString(datespec, '-');
983 if (expldate.size() != 3)
984 return false;
985 try {
986 size_t pos;
987 day = std::stoi(expldate[0], &pos);
988 if (pos != expldate[0].length())
989 return false;
990 year = 1900 + std::stoi(expldate[2], &pos);
991 if (pos != expldate[2].length())
992 return false;
993 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(expldate[1].c_str()) + 1, day);
994 } catch (...) {
995 return false;
996 }
997 break;
998 }
999
1000 if (ss.fail() || ss.bad() || !ss.eof())
1001 return false;
1002
1003 if (zone != "GMT" && zone != "UTC" && zone != "Z") // RFC 822
1004 {
1005 // numeric timezones as a should of RFC 1123 and generally preferred
1006 try {
1007 size_t pos;
1008 auto const z = std::stoi(zone, &pos);
1009 if (z != 0 || pos != zone.length())
1010 return false;
1011 } catch (...) {
1012 return false;
1013 }
1014 }
1015
1016 if (datespec.empty())
1017 {
1018 if (month.empty())
1019 return false;
1020 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(month.c_str()) + 1, day);
1021 }
1022
1023 std::string const datetime = datespec + ' ' + timespec;
1024 struct tm Tm;
1025 if (strptime(datetime.c_str(), "%Y-%m-%d %H:%M:%S", &Tm) == nullptr)
1026 return false;
1027 time = timegm(&Tm);
1028 return true;
1029 }
1030 /*}}}*/
1031 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1032 // ---------------------------------------------------------------------
1033 /* */
1034 bool FTPMDTMStrToTime(const char* const str,time_t &time)
1035 {
1036 struct tm Tm;
1037 // MDTM includes no whitespaces but recommend and ignored by strptime
1038 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
1039 return false;
1040
1041 time = timegm(&Tm);
1042 return true;
1043 }
1044 /*}}}*/
1045 // StrToTime - Converts a string into a time_t /*{{{*/
1046 // ---------------------------------------------------------------------
1047 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
1048 and the C library asctime format. It requires the GNU library function
1049 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
1050 reason the C library does not provide any such function :< This also
1051 handles the weird, but unambiguous FTP time format*/
1052 bool StrToTime(const string &Val,time_t &Result)
1053 {
1054 struct tm Tm;
1055 char Month[10];
1056
1057 // Skip the day of the week
1058 const char *I = strchr(Val.c_str(), ' ');
1059
1060 // Handle RFC 1123 time
1061 Month[0] = 0;
1062 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
1063 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1064 {
1065 // Handle RFC 1036 time
1066 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
1067 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
1068 Tm.tm_year += 1900;
1069 else
1070 {
1071 // asctime format
1072 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
1073 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
1074 {
1075 // 'ftp' time
1076 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
1077 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1078 return false;
1079 Tm.tm_mon--;
1080 }
1081 }
1082 }
1083
1084 Tm.tm_isdst = 0;
1085 if (Month[0] != 0)
1086 Tm.tm_mon = MonthConv(Month);
1087 else
1088 Tm.tm_mon = 0; // we don't have a month, so pick something
1089 Tm.tm_year -= 1900;
1090
1091 // Convert to local time and then to GMT
1092 Result = timegm(&Tm);
1093 return true;
1094 }
1095 /*}}}*/
1096 // StrToNum - Convert a fixed length string to a number /*{{{*/
1097 // ---------------------------------------------------------------------
1098 /* This is used in decoding the crazy fixed length string headers in
1099 tar and ar files. */
1100 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1101 {
1102 char S[30];
1103 if (Len >= sizeof(S))
1104 return false;
1105 memcpy(S,Str,Len);
1106 S[Len] = 0;
1107
1108 // All spaces is a zero
1109 Res = 0;
1110 unsigned I;
1111 for (I = 0; S[I] == ' '; I++);
1112 if (S[I] == 0)
1113 return true;
1114
1115 char *End;
1116 Res = strtoul(S,&End,Base);
1117 if (End == S)
1118 return false;
1119
1120 return true;
1121 }
1122 /*}}}*/
1123 // StrToNum - Convert a fixed length string to a number /*{{{*/
1124 // ---------------------------------------------------------------------
1125 /* This is used in decoding the crazy fixed length string headers in
1126 tar and ar files. */
1127 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1128 {
1129 char S[30];
1130 if (Len >= sizeof(S))
1131 return false;
1132 memcpy(S,Str,Len);
1133 S[Len] = 0;
1134
1135 // All spaces is a zero
1136 Res = 0;
1137 unsigned I;
1138 for (I = 0; S[I] == ' '; I++);
1139 if (S[I] == 0)
1140 return true;
1141
1142 char *End;
1143 Res = strtoull(S,&End,Base);
1144 if (End == S)
1145 return false;
1146
1147 return true;
1148 }
1149 /*}}}*/
1150
1151 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1152 // ---------------------------------------------------------------------
1153 /* This is used in decoding the 256bit encoded fixed length fields in
1154 tar files */
1155 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1156 {
1157 if ((Str[0] & 0x80) == 0)
1158 return false;
1159 else
1160 {
1161 Res = Str[0] & 0x7F;
1162 for(unsigned int i = 1; i < Len; ++i)
1163 Res = (Res<<8) + Str[i];
1164 return true;
1165 }
1166 }
1167 /*}}}*/
1168 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1169 // ---------------------------------------------------------------------
1170 /* This is used in decoding the 256bit encoded fixed length fields in
1171 tar files */
1172 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1173 {
1174 unsigned long long Num;
1175 bool rc;
1176
1177 rc = Base256ToNum(Str, Num, Len);
1178 Res = Num;
1179 if (Res != Num)
1180 return false;
1181
1182 return rc;
1183 }
1184 /*}}}*/
1185 // HexDigit - Convert a hex character into an integer /*{{{*/
1186 // ---------------------------------------------------------------------
1187 /* Helper for Hex2Num */
1188 static int HexDigit(int c)
1189 {
1190 if (c >= '0' && c <= '9')
1191 return c - '0';
1192 if (c >= 'a' && c <= 'f')
1193 return c - 'a' + 10;
1194 if (c >= 'A' && c <= 'F')
1195 return c - 'A' + 10;
1196 return -1;
1197 }
1198 /*}}}*/
1199 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1200 // ---------------------------------------------------------------------
1201 /* The length of the buffer must be exactly 1/2 the length of the string. */
1202 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1203 {
1204 return Hex2Num(APT::StringView(Str), Num, Length);
1205 }
1206
1207 bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length)
1208 {
1209 if (Str.length() != Length*2)
1210 return false;
1211
1212 // Convert each digit. We store it in the same order as the string
1213 int J = 0;
1214 for (auto I = Str.begin(); I != Str.end();J++, I += 2)
1215 {
1216 int first_half = HexDigit(I[0]);
1217 int second_half;
1218 if (first_half < 0)
1219 return false;
1220
1221 second_half = HexDigit(I[1]);
1222 if (second_half < 0)
1223 return false;
1224 Num[J] = first_half << 4;
1225 Num[J] += second_half;
1226 }
1227
1228 return true;
1229 }
1230 /*}}}*/
1231 // TokSplitString - Split a string up by a given token /*{{{*/
1232 // ---------------------------------------------------------------------
1233 /* This is intended to be a faster splitter, it does not use dynamic
1234 memories. Input is changed to insert nulls at each token location. */
1235 bool TokSplitString(char Tok,char *Input,char **List,
1236 unsigned long ListMax)
1237 {
1238 // Strip any leading spaces
1239 char *Start = Input;
1240 char *Stop = Start + strlen(Start);
1241 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1242
1243 unsigned long Count = 0;
1244 char *Pos = Start;
1245 while (Pos != Stop)
1246 {
1247 // Skip to the next Token
1248 for (; Pos != Stop && *Pos != Tok; Pos++);
1249
1250 // Back remove spaces
1251 char *End = Pos;
1252 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1253 *End = 0;
1254
1255 List[Count++] = Start;
1256 if (Count >= ListMax)
1257 {
1258 List[Count-1] = 0;
1259 return false;
1260 }
1261
1262 // Advance pos
1263 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1264 Start = Pos;
1265 }
1266
1267 List[Count] = 0;
1268 return true;
1269 }
1270 /*}}}*/
1271 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1272 // ---------------------------------------------------------------------
1273 /* This can be used to split a given string up into a vector, so the
1274 propose is the same as in the method above and this one is a bit slower
1275 also, but the advantage is that we have an iteratable vector */
1276 vector<string> VectorizeString(string const &haystack, char const &split)
1277 {
1278 vector<string> exploded;
1279 if (haystack.empty() == true)
1280 return exploded;
1281 string::const_iterator start = haystack.begin();
1282 string::const_iterator end = start;
1283 do {
1284 for (; end != haystack.end() && *end != split; ++end);
1285 exploded.push_back(string(start, end));
1286 start = end + 1;
1287 } while (end != haystack.end() && (++end) != haystack.end());
1288 return exploded;
1289 }
1290 /*}}}*/
1291 // StringSplit - split a string into a string vector by token /*{{{*/
1292 // ---------------------------------------------------------------------
1293 /* See header for details.
1294 */
1295 vector<string> StringSplit(std::string const &s, std::string const &sep,
1296 unsigned int maxsplit)
1297 {
1298 vector<string> split;
1299 size_t start, pos;
1300
1301 // no separator given, this is bogus
1302 if(sep.size() == 0)
1303 return split;
1304
1305 start = pos = 0;
1306 while (pos != string::npos)
1307 {
1308 pos = s.find(sep, start);
1309 split.push_back(s.substr(start, pos-start));
1310
1311 // if maxsplit is reached, the remaining string is the last item
1312 if(split.size() >= maxsplit)
1313 {
1314 split[split.size()-1] = s.substr(start);
1315 break;
1316 }
1317 start = pos+sep.size();
1318 }
1319 return split;
1320 }
1321 /*}}}*/
1322 // RegexChoice - Simple regex list/list matcher /*{{{*/
1323 // ---------------------------------------------------------------------
1324 /* */
1325 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1326 const char **ListEnd)
1327 {
1328 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1329 R->Hit = false;
1330
1331 unsigned long Hits = 0;
1332 for (; ListBegin < ListEnd; ++ListBegin)
1333 {
1334 // Check if the name is a regex
1335 const char *I;
1336 bool Regex = true;
1337 for (I = *ListBegin; *I != 0; I++)
1338 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1339 break;
1340 if (*I == 0)
1341 Regex = false;
1342
1343 // Compile the regex pattern
1344 regex_t Pattern;
1345 if (Regex == true)
1346 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1347 REG_NOSUB) != 0)
1348 Regex = false;
1349
1350 // Search the list
1351 bool Done = false;
1352 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1353 {
1354 if (R->Str[0] == 0)
1355 continue;
1356
1357 if (strcasecmp(R->Str,*ListBegin) != 0)
1358 {
1359 if (Regex == false)
1360 continue;
1361 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1362 continue;
1363 }
1364 Done = true;
1365
1366 if (R->Hit == false)
1367 Hits++;
1368
1369 R->Hit = true;
1370 }
1371
1372 if (Regex == true)
1373 regfree(&Pattern);
1374
1375 if (Done == false)
1376 _error->Warning(_("Selection %s not found"),*ListBegin);
1377 }
1378
1379 return Hits;
1380 }
1381 /*}}}*/
1382 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1383 // ---------------------------------------------------------------------
1384 /* This is used to make the internationalization strings easier to translate
1385 and to allow reordering of parameters */
1386 static bool iovprintf(ostream &out, const char *format,
1387 va_list &args, ssize_t &size) {
1388 char *S = (char*)malloc(size);
1389 ssize_t const n = vsnprintf(S, size, format, args);
1390 if (n > -1 && n < size) {
1391 out << S;
1392 free(S);
1393 return true;
1394 } else {
1395 if (n > -1)
1396 size = n + 1;
1397 else
1398 size *= 2;
1399 }
1400 free(S);
1401 return false;
1402 }
1403 void ioprintf(ostream &out,const char *format,...)
1404 {
1405 va_list args;
1406 ssize_t size = 400;
1407 while (true) {
1408 bool ret;
1409 va_start(args,format);
1410 ret = iovprintf(out, format, args, size);
1411 va_end(args);
1412 if (ret == true)
1413 return;
1414 }
1415 }
1416 void strprintf(string &out,const char *format,...)
1417 {
1418 va_list args;
1419 ssize_t size = 400;
1420 std::ostringstream outstr;
1421 while (true) {
1422 bool ret;
1423 va_start(args,format);
1424 ret = iovprintf(outstr, format, args, size);
1425 va_end(args);
1426 if (ret == true)
1427 break;
1428 }
1429 out = outstr.str();
1430 }
1431 /*}}}*/
1432 // safe_snprintf - Safer snprintf /*{{{*/
1433 // ---------------------------------------------------------------------
1434 /* This is a snprintf that will never (ever) go past 'End' and returns a
1435 pointer to the end of the new string. The returned string is always null
1436 terminated unless Buffer == end. This is a better alterantive to using
1437 consecutive snprintfs. */
1438 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1439 {
1440 va_list args;
1441 int Did;
1442
1443 if (End <= Buffer)
1444 return End;
1445 va_start(args,Format);
1446 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1447 va_end(args);
1448
1449 if (Did < 0 || Buffer + Did > End)
1450 return End;
1451 return Buffer + Did;
1452 }
1453 /*}}}*/
1454 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1455 // ---------------------------------------------------------------------
1456 string StripEpoch(const string &VerStr)
1457 {
1458 size_t i = VerStr.find(":");
1459 if (i == string::npos)
1460 return VerStr;
1461 return VerStr.substr(i+1);
1462 }
1463 /*}}}*/
1464
1465 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1466 // ---------------------------------------------------------------------
1467 /* This little function is the most called method we have and tries
1468 therefore to do the absolut minimum - and is notable faster than
1469 standard tolower/toupper and as a bonus avoids problems with different
1470 locales - we only operate on ascii chars anyway. */
1471 #undef tolower_ascii
1472 int tolower_ascii(int const c) APT_CONST APT_COLD;
1473 int tolower_ascii(int const c)
1474 {
1475 return tolower_ascii_inline(c);
1476 }
1477 /*}}}*/
1478
1479 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1480 // ---------------------------------------------------------------------
1481 /* This little function is one of the most called methods we have and tries
1482 therefore to do the absolut minimum - and is notable faster than
1483 standard isspace() and as a bonus avoids problems with different
1484 locales - we only operate on ascii chars anyway. */
1485 #undef isspace_ascii
1486 int isspace_ascii(int const c) APT_CONST APT_COLD;
1487 int isspace_ascii(int const c)
1488 {
1489 return isspace_ascii_inline(c);
1490 }
1491 /*}}}*/
1492
1493 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1494 // ---------------------------------------------------------------------
1495 /* The domain list is a comma separate list of domains that are suffix
1496 matched against the argument */
1497 bool CheckDomainList(const string &Host,const string &List)
1498 {
1499 string::const_iterator Start = List.begin();
1500 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1501 {
1502 if (Cur < List.end() && *Cur != ',')
1503 continue;
1504
1505 // Match the end of the string..
1506 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1507 Cur - Start != 0 &&
1508 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1509 return true;
1510
1511 Start = Cur + 1;
1512 }
1513 return false;
1514 }
1515 /*}}}*/
1516 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1517 // ---------------------------------------------------------------------
1518 /* */
1519 size_t strv_length(const char **str_array)
1520 {
1521 size_t i;
1522 for (i=0; str_array[i] != NULL; i++)
1523 /* nothing */
1524 ;
1525 return i;
1526 }
1527 /*}}}*/
1528 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1529 // ---------------------------------------------------------------------
1530 /* */
1531 string DeEscapeString(const string &input)
1532 {
1533 char tmp[3];
1534 string::const_iterator it;
1535 string output;
1536 for (it = input.begin(); it != input.end(); ++it)
1537 {
1538 // just copy non-escape chars
1539 if (*it != '\\')
1540 {
1541 output += *it;
1542 continue;
1543 }
1544
1545 // deal with double escape
1546 if (*it == '\\' &&
1547 (it + 1 < input.end()) && it[1] == '\\')
1548 {
1549 // copy
1550 output += *it;
1551 // advance iterator one step further
1552 ++it;
1553 continue;
1554 }
1555
1556 // ensure we have a char to read
1557 if (it + 1 == input.end())
1558 continue;
1559
1560 // read it
1561 ++it;
1562 switch (*it)
1563 {
1564 case '0':
1565 if (it + 2 <= input.end()) {
1566 tmp[0] = it[1];
1567 tmp[1] = it[2];
1568 tmp[2] = 0;
1569 output += (char)strtol(tmp, 0, 8);
1570 it += 2;
1571 }
1572 break;
1573 case 'x':
1574 if (it + 2 <= input.end()) {
1575 tmp[0] = it[1];
1576 tmp[1] = it[2];
1577 tmp[2] = 0;
1578 output += (char)strtol(tmp, 0, 16);
1579 it += 2;
1580 }
1581 break;
1582 default:
1583 // FIXME: raise exception here?
1584 break;
1585 }
1586 }
1587 return output;
1588 }
1589 /*}}}*/
1590 // URI::CopyFrom - Copy from an object /*{{{*/
1591 // ---------------------------------------------------------------------
1592 /* This parses the URI into all of its components */
1593 void URI::CopyFrom(const string &U)
1594 {
1595 string::const_iterator I = U.begin();
1596
1597 // Locate the first colon, this separates the scheme
1598 for (; I < U.end() && *I != ':' ; ++I);
1599 string::const_iterator FirstColon = I;
1600
1601 /* Determine if this is a host type URI with a leading double //
1602 and then search for the first single / */
1603 string::const_iterator SingleSlash = I;
1604 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1605 SingleSlash += 3;
1606
1607 /* Find the / indicating the end of the hostname, ignoring /'s in the
1608 square brackets */
1609 bool InBracket = false;
1610 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1611 {
1612 if (*SingleSlash == '[')
1613 InBracket = true;
1614 if (InBracket == true && *SingleSlash == ']')
1615 InBracket = false;
1616 }
1617
1618 if (SingleSlash > U.end())
1619 SingleSlash = U.end();
1620
1621 // We can now write the access and path specifiers
1622 Access.assign(U.begin(),FirstColon);
1623 if (SingleSlash != U.end())
1624 Path.assign(SingleSlash,U.end());
1625 if (Path.empty() == true)
1626 Path = "/";
1627
1628 // Now we attempt to locate a user:pass@host fragment
1629 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1630 FirstColon += 3;
1631 else
1632 FirstColon += 1;
1633 if (FirstColon >= U.end())
1634 return;
1635
1636 if (FirstColon > SingleSlash)
1637 FirstColon = SingleSlash;
1638
1639 // Find the colon...
1640 I = FirstColon + 1;
1641 if (I > SingleSlash)
1642 I = SingleSlash;
1643
1644 // Search for the @ separating user:pass from host
1645 auto const RevAt = std::find(
1646 std::string::const_reverse_iterator(SingleSlash),
1647 std::string::const_reverse_iterator(I), '@');
1648 string::const_iterator const At = RevAt.base() == I ? SingleSlash : std::prev(RevAt.base());
1649 // and then look for the colon between user and pass
1650 string::const_iterator const SecondColon = std::find(I, At, ':');
1651
1652 // Now write the host and user/pass
1653 if (At == SingleSlash)
1654 {
1655 if (FirstColon < SingleSlash)
1656 Host.assign(FirstColon,SingleSlash);
1657 }
1658 else
1659 {
1660 Host.assign(At+1,SingleSlash);
1661 // username and password must be encoded (RFC 3986)
1662 User.assign(DeQuoteString(FirstColon,SecondColon));
1663 if (SecondColon < At)
1664 Password.assign(DeQuoteString(SecondColon+1,At));
1665 }
1666
1667 // Now we parse the RFC 2732 [] hostnames.
1668 unsigned long PortEnd = 0;
1669 InBracket = false;
1670 for (unsigned I = 0; I != Host.length();)
1671 {
1672 if (Host[I] == '[')
1673 {
1674 InBracket = true;
1675 Host.erase(I,1);
1676 continue;
1677 }
1678
1679 if (InBracket == true && Host[I] == ']')
1680 {
1681 InBracket = false;
1682 Host.erase(I,1);
1683 PortEnd = I;
1684 continue;
1685 }
1686 I++;
1687 }
1688
1689 // Tsk, weird.
1690 if (InBracket == true)
1691 {
1692 Host.clear();
1693 return;
1694 }
1695
1696 // Now we parse off a port number from the hostname
1697 Port = 0;
1698 string::size_type Pos = Host.rfind(':');
1699 if (Pos == string::npos || Pos < PortEnd)
1700 return;
1701
1702 Port = atoi(string(Host,Pos+1).c_str());
1703 Host.assign(Host,0,Pos);
1704 }
1705 /*}}}*/
1706 // URI::operator string - Convert the URI to a string /*{{{*/
1707 // ---------------------------------------------------------------------
1708 /* */
1709 URI::operator string()
1710 {
1711 std::stringstream Res;
1712
1713 if (Access.empty() == false)
1714 Res << Access << ':';
1715
1716 if (Host.empty() == false)
1717 {
1718 if (Access.empty() == false)
1719 Res << "//";
1720
1721 if (User.empty() == false)
1722 {
1723 // FIXME: Technically userinfo is permitted even less
1724 // characters than these, but this is not conveniently
1725 // expressed with a blacklist.
1726 Res << QuoteString(User, ":/?#[]@");
1727 if (Password.empty() == false)
1728 Res << ":" << QuoteString(Password, ":/?#[]@");
1729 Res << "@";
1730 }
1731
1732 // Add RFC 2732 escaping characters
1733 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1734 Res << '[' << Host << ']';
1735 else
1736 Res << Host;
1737
1738 if (Port != 0)
1739 Res << ':' << std::to_string(Port);
1740 }
1741
1742 if (Path.empty() == false)
1743 {
1744 if (Path[0] != '/')
1745 Res << "/" << Path;
1746 else
1747 Res << Path;
1748 }
1749
1750 return Res.str();
1751 }
1752 /*}}}*/
1753 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1754 string URI::SiteOnly(const string &URI)
1755 {
1756 ::URI U(URI);
1757 U.User.clear();
1758 U.Password.clear();
1759 U.Path.clear();
1760 return U;
1761 }
1762 /*}}}*/
1763 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1764 string URI::ArchiveOnly(const string &URI)
1765 {
1766 ::URI U(URI);
1767 U.User.clear();
1768 U.Password.clear();
1769 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1770 U.Path.erase(U.Path.length() - 1);
1771 return U;
1772 }
1773 /*}}}*/
1774 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1775 string URI::NoUserPassword(const string &URI)
1776 {
1777 ::URI U(URI);
1778 U.User.clear();
1779 U.Password.clear();
1780 return U;
1781 }
1782 /*}}}*/