]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
d0bc938e44c6adad5b3449e266c24b6d21b888bc
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <config.h>
19
20 #include <apt-pkg/strutl.h>
21 #include <apt-pkg/fileutl.h>
22 #include <apt-pkg/error.h>
23
24 #include <algorithm>
25 #include <iomanip>
26 #include <locale>
27 #include <sstream>
28 #include <string>
29 #include <vector>
30
31 #include <stddef.h>
32 #include <stdlib.h>
33 #include <time.h>
34 #include <ctype.h>
35 #include <string.h>
36 #include <stdio.h>
37 #include <unistd.h>
38 #include <regex.h>
39 #include <errno.h>
40 #include <stdarg.h>
41 #include <iconv.h>
42
43 #include <apti18n.h>
44 /*}}}*/
45 using namespace std;
46
47 // Strip - Remove white space from the front and back of a string /*{{{*/
48 // ---------------------------------------------------------------------
49 namespace APT {
50 namespace String {
51 std::string Strip(const std::string &str)
52 {
53 // ensure we have at least one character
54 if (str.empty() == true)
55 return str;
56
57 char const * const s = str.c_str();
58 size_t start = 0;
59 for (; isspace(s[start]) != 0; ++start)
60 ; // find the first not-space
61
62 // string contains only whitespaces
63 if (s[start] == '\0')
64 return "";
65
66 size_t end = str.length() - 1;
67 for (; isspace(s[end]) != 0; --end)
68 ; // find the last not-space
69
70 return str.substr(start, end - start + 1);
71 }
72
73 bool Endswith(const std::string &s, const std::string &end)
74 {
75 if (end.size() > s.size())
76 return false;
77 return (s.compare(s.size() - end.size(), end.size(), end) == 0);
78 }
79
80 bool Startswith(const std::string &s, const std::string &start)
81 {
82 if (start.size() > s.size())
83 return false;
84 return (s.compare(0, start.size(), start) == 0);
85 }
86
87 }
88 }
89 /*}}}*/
90 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
91 // ---------------------------------------------------------------------
92 /* This is handy to use before display some information for enduser */
93 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
94 {
95 iconv_t cd;
96 const char *inbuf;
97 char *inptr, *outbuf;
98 size_t insize, bufsize;
99 dest->clear();
100
101 cd = iconv_open(codeset, "UTF-8");
102 if (cd == (iconv_t)(-1)) {
103 // Something went wrong
104 if (errno == EINVAL)
105 _error->Error("conversion from 'UTF-8' to '%s' not available",
106 codeset);
107 else
108 perror("iconv_open");
109
110 return false;
111 }
112
113 insize = bufsize = orig.size();
114 inbuf = orig.data();
115 inptr = (char *)inbuf;
116 outbuf = new char[bufsize];
117 size_t lastError = -1;
118
119 while (insize != 0)
120 {
121 char *outptr = outbuf;
122 size_t outsize = bufsize;
123 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
124 dest->append(outbuf, outptr - outbuf);
125 if (err == (size_t)(-1))
126 {
127 switch (errno)
128 {
129 case EILSEQ:
130 insize--;
131 inptr++;
132 // replace a series of unknown multibytes with a single "?"
133 if (lastError != insize) {
134 lastError = insize - 1;
135 dest->append("?");
136 }
137 break;
138 case EINVAL:
139 insize = 0;
140 break;
141 case E2BIG:
142 if (outptr == outbuf)
143 {
144 bufsize *= 2;
145 delete[] outbuf;
146 outbuf = new char[bufsize];
147 }
148 break;
149 }
150 }
151 }
152
153 delete[] outbuf;
154
155 iconv_close(cd);
156
157 return true;
158 }
159 /*}}}*/
160 // strstrip - Remove white space from the front and back of a string /*{{{*/
161 // ---------------------------------------------------------------------
162 /* This is handy to use when parsing a file. It also removes \n's left
163 over from fgets and company */
164 char *_strstrip(char *String)
165 {
166 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
167
168 if (*String == 0)
169 return String;
170 return _strrstrip(String);
171 }
172 /*}}}*/
173 // strrstrip - Remove white space from the back of a string /*{{{*/
174 // ---------------------------------------------------------------------
175 char *_strrstrip(char *String)
176 {
177 char *End = String + strlen(String) - 1;
178 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
179 *End == '\r'); End--);
180 End++;
181 *End = 0;
182 return String;
183 }
184 /*}}}*/
185 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
186 // ---------------------------------------------------------------------
187 /* */
188 char *_strtabexpand(char *String,size_t Len)
189 {
190 for (char *I = String; I != I + Len && *I != 0; I++)
191 {
192 if (*I != '\t')
193 continue;
194 if (I + 8 > String + Len)
195 {
196 *I = 0;
197 return String;
198 }
199
200 /* Assume the start of the string is 0 and find the next 8 char
201 division */
202 int Len;
203 if (String == I)
204 Len = 1;
205 else
206 Len = 8 - ((String - I) % 8);
207 Len -= 2;
208 if (Len <= 0)
209 {
210 *I = ' ';
211 continue;
212 }
213
214 memmove(I + Len,I + 1,strlen(I) + 1);
215 for (char *J = I; J + Len != I; *I = ' ', I++);
216 }
217 return String;
218 }
219 /*}}}*/
220 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
221 // ---------------------------------------------------------------------
222 /* This grabs a single word, converts any % escaped characters to their
223 proper values and advances the pointer. Double quotes are understood
224 and striped out as well. This is for URI/URL parsing. It also can
225 understand [] brackets.*/
226 bool ParseQuoteWord(const char *&String,string &Res)
227 {
228 // Skip leading whitespace
229 const char *C = String;
230 for (;*C != 0 && *C == ' '; C++);
231 if (*C == 0)
232 return false;
233
234 // Jump to the next word
235 for (;*C != 0 && isspace(*C) == 0; C++)
236 {
237 if (*C == '"')
238 {
239 C = strchr(C + 1, '"');
240 if (C == NULL)
241 return false;
242 }
243 if (*C == '[')
244 {
245 C = strchr(C + 1, ']');
246 if (C == NULL)
247 return false;
248 }
249 }
250
251 // Now de-quote characters
252 char Buffer[1024];
253 char Tmp[3];
254 const char *Start = String;
255 char *I;
256 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
257 {
258 if (*Start == '%' && Start + 2 < C &&
259 isxdigit(Start[1]) && isxdigit(Start[2]))
260 {
261 Tmp[0] = Start[1];
262 Tmp[1] = Start[2];
263 Tmp[2] = 0;
264 *I = (char)strtol(Tmp,0,16);
265 Start += 3;
266 continue;
267 }
268 if (*Start != '"')
269 *I = *Start;
270 else
271 I--;
272 Start++;
273 }
274 *I = 0;
275 Res = Buffer;
276
277 // Skip ending white space
278 for (;*C != 0 && isspace(*C) != 0; C++);
279 String = C;
280 return true;
281 }
282 /*}}}*/
283 // ParseCWord - Parses a string like a C "" expression /*{{{*/
284 // ---------------------------------------------------------------------
285 /* This expects a series of space separated strings enclosed in ""'s.
286 It concatenates the ""'s into a single string. */
287 bool ParseCWord(const char *&String,string &Res)
288 {
289 // Skip leading whitespace
290 const char *C = String;
291 for (;*C != 0 && *C == ' '; C++);
292 if (*C == 0)
293 return false;
294
295 char Buffer[1024];
296 char *Buf = Buffer;
297 if (strlen(String) >= sizeof(Buffer))
298 return false;
299
300 for (; *C != 0; C++)
301 {
302 if (*C == '"')
303 {
304 for (C++; *C != 0 && *C != '"'; C++)
305 *Buf++ = *C;
306
307 if (*C == 0)
308 return false;
309
310 continue;
311 }
312
313 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
314 continue;
315 if (isspace(*C) == 0)
316 return false;
317 *Buf++ = ' ';
318 }
319 *Buf = 0;
320 Res = Buffer;
321 String = C;
322 return true;
323 }
324 /*}}}*/
325 // QuoteString - Convert a string into quoted from /*{{{*/
326 // ---------------------------------------------------------------------
327 /* */
328 string QuoteString(const string &Str, const char *Bad)
329 {
330 std::stringstream Res;
331 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
332 {
333 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
334 *I == 0x25 || // percent '%' char
335 *I <= 0x20 || *I >= 0x7F) // control chars
336 {
337 ioprintf(Res, "%%%02hhx", *I);
338 }
339 else
340 Res << *I;
341 }
342 return Res.str();
343 }
344 /*}}}*/
345 // DeQuoteString - Convert a string from quoted from /*{{{*/
346 // ---------------------------------------------------------------------
347 /* This undoes QuoteString */
348 string DeQuoteString(const string &Str)
349 {
350 return DeQuoteString(Str.begin(),Str.end());
351 }
352 string DeQuoteString(string::const_iterator const &begin,
353 string::const_iterator const &end)
354 {
355 string Res;
356 for (string::const_iterator I = begin; I != end; ++I)
357 {
358 if (*I == '%' && I + 2 < end &&
359 isxdigit(I[1]) && isxdigit(I[2]))
360 {
361 char Tmp[3];
362 Tmp[0] = I[1];
363 Tmp[1] = I[2];
364 Tmp[2] = 0;
365 Res += (char)strtol(Tmp,0,16);
366 I += 2;
367 continue;
368 }
369 else
370 Res += *I;
371 }
372 return Res;
373 }
374
375 /*}}}*/
376 // SizeToStr - Convert a long into a human readable size /*{{{*/
377 // ---------------------------------------------------------------------
378 /* A max of 4 digits are shown before conversion to the next highest unit.
379 The max length of the string will be 5 chars unless the size is > 10
380 YottaBytes (E24) */
381 string SizeToStr(double Size)
382 {
383 double ASize;
384 if (Size >= 0)
385 ASize = Size;
386 else
387 ASize = -1*Size;
388
389 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
390 ExaBytes, ZettaBytes, YottaBytes */
391 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
392 int I = 0;
393 while (I <= 8)
394 {
395 if (ASize < 100 && I != 0)
396 {
397 std::string S;
398 strprintf(S, "%'.1f %c", ASize, Ext[I]);
399 return S;
400 }
401
402 if (ASize < 10000)
403 {
404 std::string S;
405 strprintf(S, "%'.0f %c", ASize, Ext[I]);
406 return S;
407 }
408 ASize /= 1000.0;
409 I++;
410 }
411 return "";
412 }
413 /*}}}*/
414 // TimeToStr - Convert the time into a string /*{{{*/
415 // ---------------------------------------------------------------------
416 /* Converts a number of seconds to a hms format */
417 string TimeToStr(unsigned long Sec)
418 {
419 std::string S;
420 if (Sec > 60*60*24)
421 {
422 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
423 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
424 }
425 else if (Sec > 60*60)
426 {
427 //TRANSLATOR: h means hours, min means minutes, s means seconds
428 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
429 }
430 else if (Sec > 60)
431 {
432 //TRANSLATOR: min means minutes, s means seconds
433 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
434 }
435 else
436 {
437 //TRANSLATOR: s means seconds
438 strprintf(S,_("%lis"),Sec);
439 }
440 return S;
441 }
442 /*}}}*/
443 // SubstVar - Substitute a string for another string /*{{{*/
444 // ---------------------------------------------------------------------
445 /* This replaces all occurrences of Subst with Contents in Str. */
446 string SubstVar(const string &Str,const string &Subst,const string &Contents)
447 {
448 if (Subst.empty() == true)
449 return Str;
450
451 string::size_type Pos = 0;
452 string::size_type OldPos = 0;
453 string Temp;
454
455 while (OldPos < Str.length() &&
456 (Pos = Str.find(Subst,OldPos)) != string::npos)
457 {
458 if (OldPos != Pos)
459 Temp.append(Str, OldPos, Pos - OldPos);
460 if (Contents.empty() == false)
461 Temp.append(Contents);
462 OldPos = Pos + Subst.length();
463 }
464
465 if (OldPos == 0)
466 return Str;
467
468 if (OldPos >= Str.length())
469 return Temp;
470
471 Temp.append(Str, OldPos, string::npos);
472 return Temp;
473 }
474 string SubstVar(string Str,const struct SubstVar *Vars)
475 {
476 for (; Vars->Subst != 0; Vars++)
477 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
478 return Str;
479 }
480 /*}}}*/
481 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
482 // ---------------------------------------------------------------------
483 /* Returns a string with the supplied separator depth + 1 times in it */
484 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
485 {
486 std::string output = "";
487 for(unsigned long d=Depth+1; d > 0; d--)
488 output.append(Separator);
489 return output;
490 }
491 /*}}}*/
492 // URItoFileName - Convert the uri into a unique file name /*{{{*/
493 // ---------------------------------------------------------------------
494 /* This converts a URI into a safe filename. It quotes all unsafe characters
495 and converts / to _ and removes the scheme identifier. The resulting
496 file name should be unique and never occur again for a different file */
497 string URItoFileName(const string &URI)
498 {
499 // Nuke 'sensitive' items
500 ::URI U(URI);
501 U.User.clear();
502 U.Password.clear();
503 U.Access.clear();
504
505 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
506 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
507 replace(NewURI.begin(),NewURI.end(),'/','_');
508 return NewURI;
509 }
510 /*}}}*/
511 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
512 // ---------------------------------------------------------------------
513 /* This routine performs a base64 transformation on a string. It was ripped
514 from wget and then patched and bug fixed.
515
516 This spec can be found in rfc2045 */
517 string Base64Encode(const string &S)
518 {
519 // Conversion table.
520 static char tbl[64] = {'A','B','C','D','E','F','G','H',
521 'I','J','K','L','M','N','O','P',
522 'Q','R','S','T','U','V','W','X',
523 'Y','Z','a','b','c','d','e','f',
524 'g','h','i','j','k','l','m','n',
525 'o','p','q','r','s','t','u','v',
526 'w','x','y','z','0','1','2','3',
527 '4','5','6','7','8','9','+','/'};
528
529 // Pre-allocate some space
530 string Final;
531 Final.reserve((4*S.length() + 2)/3 + 2);
532
533 /* Transform the 3x8 bits to 4x6 bits, as required by
534 base64. */
535 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
536 {
537 char Bits[3] = {0,0,0};
538 Bits[0] = I[0];
539 if (I + 1 < S.end())
540 Bits[1] = I[1];
541 if (I + 2 < S.end())
542 Bits[2] = I[2];
543
544 Final += tbl[Bits[0] >> 2];
545 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
546
547 if (I + 1 >= S.end())
548 break;
549
550 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
551
552 if (I + 2 >= S.end())
553 break;
554
555 Final += tbl[Bits[2] & 0x3f];
556 }
557
558 /* Apply the padding elements, this tells how many bytes the remote
559 end should discard */
560 if (S.length() % 3 == 2)
561 Final += '=';
562 if (S.length() % 3 == 1)
563 Final += "==";
564
565 return Final;
566 }
567 /*}}}*/
568 // stringcmp - Arbitrary string compare /*{{{*/
569 // ---------------------------------------------------------------------
570 /* This safely compares two non-null terminated strings of arbitrary
571 length */
572 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
573 {
574 for (; A != AEnd && B != BEnd; A++, B++)
575 if (*A != *B)
576 break;
577
578 if (A == AEnd && B == BEnd)
579 return 0;
580 if (A == AEnd)
581 return 1;
582 if (B == BEnd)
583 return -1;
584 if (*A < *B)
585 return -1;
586 return 1;
587 }
588
589 #if __GNUC__ >= 3
590 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
591 const char *B,const char *BEnd)
592 {
593 for (; A != AEnd && B != BEnd; A++, B++)
594 if (*A != *B)
595 break;
596
597 if (A == AEnd && B == BEnd)
598 return 0;
599 if (A == AEnd)
600 return 1;
601 if (B == BEnd)
602 return -1;
603 if (*A < *B)
604 return -1;
605 return 1;
606 }
607 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
608 string::const_iterator B,string::const_iterator BEnd)
609 {
610 for (; A != AEnd && B != BEnd; A++, B++)
611 if (*A != *B)
612 break;
613
614 if (A == AEnd && B == BEnd)
615 return 0;
616 if (A == AEnd)
617 return 1;
618 if (B == BEnd)
619 return -1;
620 if (*A < *B)
621 return -1;
622 return 1;
623 }
624 #endif
625 /*}}}*/
626 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
627 // ---------------------------------------------------------------------
628 /* */
629 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
630 {
631 for (; A != AEnd && B != BEnd; A++, B++)
632 if (tolower_ascii(*A) != tolower_ascii(*B))
633 break;
634
635 if (A == AEnd && B == BEnd)
636 return 0;
637 if (A == AEnd)
638 return 1;
639 if (B == BEnd)
640 return -1;
641 if (tolower_ascii(*A) < tolower_ascii(*B))
642 return -1;
643 return 1;
644 }
645 #if __GNUC__ >= 3
646 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
647 const char *B,const char *BEnd)
648 {
649 for (; A != AEnd && B != BEnd; A++, B++)
650 if (tolower_ascii(*A) != tolower_ascii(*B))
651 break;
652
653 if (A == AEnd && B == BEnd)
654 return 0;
655 if (A == AEnd)
656 return 1;
657 if (B == BEnd)
658 return -1;
659 if (tolower_ascii(*A) < tolower_ascii(*B))
660 return -1;
661 return 1;
662 }
663 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
664 string::const_iterator B,string::const_iterator BEnd)
665 {
666 for (; A != AEnd && B != BEnd; A++, B++)
667 if (tolower_ascii(*A) != tolower_ascii(*B))
668 break;
669
670 if (A == AEnd && B == BEnd)
671 return 0;
672 if (A == AEnd)
673 return 1;
674 if (B == BEnd)
675 return -1;
676 if (tolower_ascii(*A) < tolower_ascii(*B))
677 return -1;
678 return 1;
679 }
680 #endif
681 /*}}}*/
682 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
683 // ---------------------------------------------------------------------
684 /* The format is like those used in package files and the method
685 communication system */
686 string LookupTag(const string &Message,const char *Tag,const char *Default)
687 {
688 // Look for a matching tag.
689 int Length = strlen(Tag);
690 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
691 {
692 // Found the tag
693 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
694 {
695 // Find the end of line and strip the leading/trailing spaces
696 string::const_iterator J;
697 I += Length + 1;
698 for (; isspace_ascii(*I) != 0 && I < Message.end(); ++I);
699 for (J = I; *J != '\n' && J < Message.end(); ++J);
700 for (; J > I && isspace_ascii(J[-1]) != 0; --J);
701
702 return string(I,J);
703 }
704
705 for (; *I != '\n' && I < Message.end(); ++I);
706 }
707
708 // Failed to find a match
709 if (Default == 0)
710 return string();
711 return Default;
712 }
713 /*}}}*/
714 // StringToBool - Converts a string into a boolean /*{{{*/
715 // ---------------------------------------------------------------------
716 /* This inspects the string to see if it is true or if it is false and
717 then returns the result. Several varients on true/false are checked. */
718 int StringToBool(const string &Text,int Default)
719 {
720 char *ParseEnd;
721 int Res = strtol(Text.c_str(),&ParseEnd,0);
722 // ensure that the entire string was converted by strtol to avoid
723 // failures on "apt-cache show -a 0ad" where the "0" is converted
724 const char *TextEnd = Text.c_str()+Text.size();
725 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
726 return Res;
727
728 // Check for positives
729 if (strcasecmp(Text.c_str(),"no") == 0 ||
730 strcasecmp(Text.c_str(),"false") == 0 ||
731 strcasecmp(Text.c_str(),"without") == 0 ||
732 strcasecmp(Text.c_str(),"off") == 0 ||
733 strcasecmp(Text.c_str(),"disable") == 0)
734 return 0;
735
736 // Check for negatives
737 if (strcasecmp(Text.c_str(),"yes") == 0 ||
738 strcasecmp(Text.c_str(),"true") == 0 ||
739 strcasecmp(Text.c_str(),"with") == 0 ||
740 strcasecmp(Text.c_str(),"on") == 0 ||
741 strcasecmp(Text.c_str(),"enable") == 0)
742 return 1;
743
744 return Default;
745 }
746 /*}}}*/
747 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
748 // ---------------------------------------------------------------------
749 /* This converts a time_t into a string time representation that is
750 year 2000 complient and timezone neutral */
751 string TimeRFC1123(time_t Date)
752 {
753 struct tm Conv;
754 if (gmtime_r(&Date, &Conv) == NULL)
755 return "";
756
757 auto const posix = std::locale("C.UTF-8");
758 std::ostringstream datestr;
759 datestr.imbue(posix);
760 APT::StringView const fmt("%a, %d %b %Y %H:%M:%S GMT");
761 std::use_facet<std::time_put<char>>(posix).put(
762 std::ostreambuf_iterator<char>(datestr),
763 datestr, ' ', &Conv, fmt.data(), fmt.data() + fmt.size());
764 return datestr.str();
765 }
766 /*}}}*/
767 // ReadMessages - Read messages from the FD /*{{{*/
768 // ---------------------------------------------------------------------
769 /* This pulls full messages from the input FD into the message buffer.
770 It assumes that messages will not pause during transit so no
771 fancy buffering is used.
772
773 In particular: this reads blocks from the input until it believes
774 that it's run out of input text. Each block is terminated by a
775 double newline ('\n' followed by '\n').
776 */
777 bool ReadMessages(int Fd, vector<string> &List)
778 {
779 char Buffer[64000];
780 // Represents any left-over from the previous iteration of the
781 // parse loop. (i.e., if a message is split across the end
782 // of the buffer, it goes here)
783 string PartialMessage;
784
785 do {
786 int const Res = read(Fd, Buffer, sizeof(Buffer));
787 if (Res < 0 && errno == EINTR)
788 continue;
789
790 // process we read from has died
791 if (Res == 0)
792 return false;
793
794 // No data
795 #if EAGAIN != EWOULDBLOCK
796 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
797 #else
798 if (Res < 0 && errno == EAGAIN)
799 #endif
800 return true;
801 if (Res < 0)
802 return false;
803
804 // extract the message(s) from the buffer
805 char const *Start = Buffer;
806 char const * const End = Buffer + Res;
807
808 char const * NL = (char const *) memchr(Start, '\n', End - Start);
809 if (NL == NULL)
810 {
811 // end of buffer: store what we have so far and read new data in
812 PartialMessage.append(Start, End - Start);
813 Start = End;
814 }
815 else
816 ++NL;
817
818 if (PartialMessage.empty() == false && Start < End)
819 {
820 // if we start with a new line, see if the partial message we have ended with one
821 // so that we properly detect records ending between two read() runs
822 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
823 // the case \r|\n\r\n is handled by the usual double-newline handling
824 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
825 {
826 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
827 {
828 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
829 List.push_back(PartialMessage);
830 PartialMessage.clear();
831 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
832 Start = NL;
833 }
834 }
835 }
836
837 while (Start < End) {
838 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
839 if (NL2 == NULL)
840 {
841 // end of buffer: store what we have so far and read new data in
842 PartialMessage.append(Start, End - Start);
843 break;
844 }
845 ++NL2;
846
847 // did we find a double newline?
848 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
849 {
850 PartialMessage.append(Start, NL2 - Start);
851 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
852 List.push_back(PartialMessage);
853 PartialMessage.clear();
854 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
855 Start = NL2;
856 }
857 NL = NL2;
858 }
859
860 // we have read at least one complete message and nothing left
861 if (PartialMessage.empty() == true)
862 return true;
863
864 if (WaitFd(Fd) == false)
865 return false;
866 } while (true);
867 }
868 /*}}}*/
869 // MonthConv - Converts a month string into a number /*{{{*/
870 // ---------------------------------------------------------------------
871 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
872 Made it a bit more robust with a few tolower_ascii though. */
873 static int MonthConv(char const * const Month)
874 {
875 switch (tolower_ascii(*Month))
876 {
877 case 'a':
878 return tolower_ascii(Month[1]) == 'p'?3:7;
879 case 'd':
880 return 11;
881 case 'f':
882 return 1;
883 case 'j':
884 if (tolower_ascii(Month[1]) == 'a')
885 return 0;
886 return tolower_ascii(Month[2]) == 'n'?5:6;
887 case 'm':
888 return tolower_ascii(Month[2]) == 'r'?2:4;
889 case 'n':
890 return 10;
891 case 'o':
892 return 9;
893 case 's':
894 return 8;
895
896 // Pretend it is January..
897 default:
898 return 0;
899 }
900 }
901 /*}}}*/
902 // timegm - Internal timegm if the gnu version is not available /*{{{*/
903 // ---------------------------------------------------------------------
904 /* Converts struct tm to time_t, assuming the data in tm is UTC rather
905 than local timezone (mktime assumes the latter).
906
907 This function is a nonstandard GNU extension that is also present on
908 the BSDs and maybe other systems. For others we follow the advice of
909 the manpage of timegm and use his portable replacement. */
910 #ifndef HAVE_TIMEGM
911 static time_t timegm(struct tm *t)
912 {
913 char *tz = getenv("TZ");
914 setenv("TZ", "", 1);
915 tzset();
916 time_t ret = mktime(t);
917 if (tz)
918 setenv("TZ", tz, 1);
919 else
920 unsetenv("TZ");
921 tzset();
922 return ret;
923 }
924 #endif
925 /*}}}*/
926 // RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
927 // ---------------------------------------------------------------------
928 /* tries to parses a full date as specified in RFC7231 §7.1.1.1
929 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
930 As we encounter dates from UTC or with a numeric timezone in other places,
931 we allow them here to to be able to reuse the method. Either way, a date
932 must be in UTC or parsing will fail. Previous implementations of this
933 method used to ignore the timezone and assume always UTC. */
934 bool RFC1123StrToTime(const char* const str,time_t &time)
935 {
936 unsigned short day = 0;
937 signed int year = 0; // yes, Y23K problem – we gonna worry then…
938 std::string weekday, month, datespec, timespec, zone;
939 std::istringstream ss(str);
940 auto const &posix = std::locale("C.UTF-8");
941 ss.imbue(posix);
942 ss >> weekday;
943 // we only superficially check weekday, mostly to avoid accepting localized
944 // weekdays here and take only its length to decide which datetime format we
945 // encounter here. The date isn't stored.
946 std::transform(weekday.begin(), weekday.end(), weekday.begin(), ::tolower);
947 std::array<char const * const, 7> c_weekdays = {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
948 if (std::find(c_weekdays.begin(), c_weekdays.end(), weekday.substr(0,3)) == c_weekdays.end())
949 return false;
950
951 switch (weekday.length())
952 {
953 case 4:
954 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
955 if (weekday[3] != ',')
956 return false;
957 ss >> day >> month >> year >> timespec >> zone;
958 break;
959 case 3:
960 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
961 ss >> month >> day >> timespec >> year;
962 zone = "UTC";
963 break;
964 case 0:
965 case 1:
966 case 2:
967 return false;
968 default:
969 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
970 if (weekday[weekday.length() - 1] != ',')
971 return false;
972 ss >> datespec >> timespec >> zone;
973 auto const expldate = VectorizeString(datespec, '-');
974 if (expldate.size() != 3)
975 return false;
976 try {
977 size_t pos;
978 day = std::stoi(expldate[0], &pos);
979 if (pos != expldate[0].length())
980 return false;
981 year = 1900 + std::stoi(expldate[2], &pos);
982 if (pos != expldate[2].length())
983 return false;
984 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(expldate[1].c_str()) + 1, day);
985 } catch (...) {
986 return false;
987 }
988 break;
989 }
990
991 if (ss.fail() || ss.bad() || !ss.eof())
992 return false;
993
994 if (zone != "GMT" && zone != "UTC" && zone != "Z") // RFC 822
995 {
996 // numeric timezones as a should of RFC 1123 and generally preferred
997 try {
998 size_t pos;
999 auto const z = std::stoi(zone, &pos);
1000 if (z != 0 || pos != zone.length())
1001 return false;
1002 } catch (...) {
1003 return false;
1004 }
1005 }
1006
1007 if (datespec.empty())
1008 {
1009 if (month.empty())
1010 return false;
1011 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(month.c_str()) + 1, day);
1012 }
1013
1014 std::string const datetime = datespec + ' ' + timespec;
1015 struct tm Tm;
1016 if (strptime(datetime.c_str(), "%Y-%m-%d %H:%M:%S", &Tm) == nullptr)
1017 return false;
1018 time = timegm(&Tm);
1019 return true;
1020 }
1021 /*}}}*/
1022 // FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1023 // ---------------------------------------------------------------------
1024 /* */
1025 bool FTPMDTMStrToTime(const char* const str,time_t &time)
1026 {
1027 struct tm Tm;
1028 // MDTM includes no whitespaces but recommend and ignored by strptime
1029 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
1030 return false;
1031
1032 time = timegm(&Tm);
1033 return true;
1034 }
1035 /*}}}*/
1036 // StrToTime - Converts a string into a time_t /*{{{*/
1037 // ---------------------------------------------------------------------
1038 /* This handles all 3 popular time formats including RFC 1123, RFC 1036
1039 and the C library asctime format. It requires the GNU library function
1040 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
1041 reason the C library does not provide any such function :< This also
1042 handles the weird, but unambiguous FTP time format*/
1043 bool StrToTime(const string &Val,time_t &Result)
1044 {
1045 struct tm Tm;
1046 char Month[10];
1047
1048 // Skip the day of the week
1049 const char *I = strchr(Val.c_str(), ' ');
1050
1051 // Handle RFC 1123 time
1052 Month[0] = 0;
1053 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
1054 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1055 {
1056 // Handle RFC 1036 time
1057 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
1058 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
1059 Tm.tm_year += 1900;
1060 else
1061 {
1062 // asctime format
1063 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
1064 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
1065 {
1066 // 'ftp' time
1067 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
1068 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1069 return false;
1070 Tm.tm_mon--;
1071 }
1072 }
1073 }
1074
1075 Tm.tm_isdst = 0;
1076 if (Month[0] != 0)
1077 Tm.tm_mon = MonthConv(Month);
1078 else
1079 Tm.tm_mon = 0; // we don't have a month, so pick something
1080 Tm.tm_year -= 1900;
1081
1082 // Convert to local time and then to GMT
1083 Result = timegm(&Tm);
1084 return true;
1085 }
1086 /*}}}*/
1087 // StrToNum - Convert a fixed length string to a number /*{{{*/
1088 // ---------------------------------------------------------------------
1089 /* This is used in decoding the crazy fixed length string headers in
1090 tar and ar files. */
1091 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1092 {
1093 char S[30];
1094 if (Len >= sizeof(S))
1095 return false;
1096 memcpy(S,Str,Len);
1097 S[Len] = 0;
1098
1099 // All spaces is a zero
1100 Res = 0;
1101 unsigned I;
1102 for (I = 0; S[I] == ' '; I++);
1103 if (S[I] == 0)
1104 return true;
1105
1106 char *End;
1107 Res = strtoul(S,&End,Base);
1108 if (End == S)
1109 return false;
1110
1111 return true;
1112 }
1113 /*}}}*/
1114 // StrToNum - Convert a fixed length string to a number /*{{{*/
1115 // ---------------------------------------------------------------------
1116 /* This is used in decoding the crazy fixed length string headers in
1117 tar and ar files. */
1118 bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1119 {
1120 char S[30];
1121 if (Len >= sizeof(S))
1122 return false;
1123 memcpy(S,Str,Len);
1124 S[Len] = 0;
1125
1126 // All spaces is a zero
1127 Res = 0;
1128 unsigned I;
1129 for (I = 0; S[I] == ' '; I++);
1130 if (S[I] == 0)
1131 return true;
1132
1133 char *End;
1134 Res = strtoull(S,&End,Base);
1135 if (End == S)
1136 return false;
1137
1138 return true;
1139 }
1140 /*}}}*/
1141
1142 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1143 // ---------------------------------------------------------------------
1144 /* This is used in decoding the 256bit encoded fixed length fields in
1145 tar files */
1146 bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
1147 {
1148 if ((Str[0] & 0x80) == 0)
1149 return false;
1150 else
1151 {
1152 Res = Str[0] & 0x7F;
1153 for(unsigned int i = 1; i < Len; ++i)
1154 Res = (Res<<8) + Str[i];
1155 return true;
1156 }
1157 }
1158 /*}}}*/
1159 // Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1160 // ---------------------------------------------------------------------
1161 /* This is used in decoding the 256bit encoded fixed length fields in
1162 tar files */
1163 bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1164 {
1165 unsigned long long Num;
1166 bool rc;
1167
1168 rc = Base256ToNum(Str, Num, Len);
1169 Res = Num;
1170 if (Res != Num)
1171 return false;
1172
1173 return rc;
1174 }
1175 /*}}}*/
1176 // HexDigit - Convert a hex character into an integer /*{{{*/
1177 // ---------------------------------------------------------------------
1178 /* Helper for Hex2Num */
1179 static int HexDigit(int c)
1180 {
1181 if (c >= '0' && c <= '9')
1182 return c - '0';
1183 if (c >= 'a' && c <= 'f')
1184 return c - 'a' + 10;
1185 if (c >= 'A' && c <= 'F')
1186 return c - 'A' + 10;
1187 return -1;
1188 }
1189 /*}}}*/
1190 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
1191 // ---------------------------------------------------------------------
1192 /* The length of the buffer must be exactly 1/2 the length of the string. */
1193 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
1194 {
1195 return Hex2Num(APT::StringView(Str), Num, Length);
1196 }
1197
1198 bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length)
1199 {
1200 if (Str.length() != Length*2)
1201 return false;
1202
1203 // Convert each digit. We store it in the same order as the string
1204 int J = 0;
1205 for (auto I = Str.begin(); I != Str.end();J++, I += 2)
1206 {
1207 int first_half = HexDigit(I[0]);
1208 int second_half;
1209 if (first_half < 0)
1210 return false;
1211
1212 second_half = HexDigit(I[1]);
1213 if (second_half < 0)
1214 return false;
1215 Num[J] = first_half << 4;
1216 Num[J] += second_half;
1217 }
1218
1219 return true;
1220 }
1221 /*}}}*/
1222 // TokSplitString - Split a string up by a given token /*{{{*/
1223 // ---------------------------------------------------------------------
1224 /* This is intended to be a faster splitter, it does not use dynamic
1225 memories. Input is changed to insert nulls at each token location. */
1226 bool TokSplitString(char Tok,char *Input,char **List,
1227 unsigned long ListMax)
1228 {
1229 // Strip any leading spaces
1230 char *Start = Input;
1231 char *Stop = Start + strlen(Start);
1232 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1233
1234 unsigned long Count = 0;
1235 char *Pos = Start;
1236 while (Pos != Stop)
1237 {
1238 // Skip to the next Token
1239 for (; Pos != Stop && *Pos != Tok; Pos++);
1240
1241 // Back remove spaces
1242 char *End = Pos;
1243 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1244 *End = 0;
1245
1246 List[Count++] = Start;
1247 if (Count >= ListMax)
1248 {
1249 List[Count-1] = 0;
1250 return false;
1251 }
1252
1253 // Advance pos
1254 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1255 Start = Pos;
1256 }
1257
1258 List[Count] = 0;
1259 return true;
1260 }
1261 /*}}}*/
1262 // VectorizeString - Split a string up into a vector of strings /*{{{*/
1263 // ---------------------------------------------------------------------
1264 /* This can be used to split a given string up into a vector, so the
1265 propose is the same as in the method above and this one is a bit slower
1266 also, but the advantage is that we have an iteratable vector */
1267 vector<string> VectorizeString(string const &haystack, char const &split)
1268 {
1269 vector<string> exploded;
1270 if (haystack.empty() == true)
1271 return exploded;
1272 string::const_iterator start = haystack.begin();
1273 string::const_iterator end = start;
1274 do {
1275 for (; end != haystack.end() && *end != split; ++end);
1276 exploded.push_back(string(start, end));
1277 start = end + 1;
1278 } while (end != haystack.end() && (++end) != haystack.end());
1279 return exploded;
1280 }
1281 /*}}}*/
1282 // StringSplit - split a string into a string vector by token /*{{{*/
1283 // ---------------------------------------------------------------------
1284 /* See header for details.
1285 */
1286 vector<string> StringSplit(std::string const &s, std::string const &sep,
1287 unsigned int maxsplit)
1288 {
1289 vector<string> split;
1290 size_t start, pos;
1291
1292 // no separator given, this is bogus
1293 if(sep.size() == 0)
1294 return split;
1295
1296 start = pos = 0;
1297 while (pos != string::npos)
1298 {
1299 pos = s.find(sep, start);
1300 split.push_back(s.substr(start, pos-start));
1301
1302 // if maxsplit is reached, the remaining string is the last item
1303 if(split.size() >= maxsplit)
1304 {
1305 split[split.size()-1] = s.substr(start);
1306 break;
1307 }
1308 start = pos+sep.size();
1309 }
1310 return split;
1311 }
1312 /*}}}*/
1313 // RegexChoice - Simple regex list/list matcher /*{{{*/
1314 // ---------------------------------------------------------------------
1315 /* */
1316 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1317 const char **ListEnd)
1318 {
1319 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1320 R->Hit = false;
1321
1322 unsigned long Hits = 0;
1323 for (; ListBegin < ListEnd; ++ListBegin)
1324 {
1325 // Check if the name is a regex
1326 const char *I;
1327 bool Regex = true;
1328 for (I = *ListBegin; *I != 0; I++)
1329 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1330 break;
1331 if (*I == 0)
1332 Regex = false;
1333
1334 // Compile the regex pattern
1335 regex_t Pattern;
1336 if (Regex == true)
1337 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1338 REG_NOSUB) != 0)
1339 Regex = false;
1340
1341 // Search the list
1342 bool Done = false;
1343 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1344 {
1345 if (R->Str[0] == 0)
1346 continue;
1347
1348 if (strcasecmp(R->Str,*ListBegin) != 0)
1349 {
1350 if (Regex == false)
1351 continue;
1352 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1353 continue;
1354 }
1355 Done = true;
1356
1357 if (R->Hit == false)
1358 Hits++;
1359
1360 R->Hit = true;
1361 }
1362
1363 if (Regex == true)
1364 regfree(&Pattern);
1365
1366 if (Done == false)
1367 _error->Warning(_("Selection %s not found"),*ListBegin);
1368 }
1369
1370 return Hits;
1371 }
1372 /*}}}*/
1373 // {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
1374 // ---------------------------------------------------------------------
1375 /* This is used to make the internationalization strings easier to translate
1376 and to allow reordering of parameters */
1377 static bool iovprintf(ostream &out, const char *format,
1378 va_list &args, ssize_t &size) {
1379 char *S = (char*)malloc(size);
1380 ssize_t const n = vsnprintf(S, size, format, args);
1381 if (n > -1 && n < size) {
1382 out << S;
1383 free(S);
1384 return true;
1385 } else {
1386 if (n > -1)
1387 size = n + 1;
1388 else
1389 size *= 2;
1390 }
1391 free(S);
1392 return false;
1393 }
1394 void ioprintf(ostream &out,const char *format,...)
1395 {
1396 va_list args;
1397 ssize_t size = 400;
1398 while (true) {
1399 bool ret;
1400 va_start(args,format);
1401 ret = iovprintf(out, format, args, size);
1402 va_end(args);
1403 if (ret == true)
1404 return;
1405 }
1406 }
1407 void strprintf(string &out,const char *format,...)
1408 {
1409 va_list args;
1410 ssize_t size = 400;
1411 std::ostringstream outstr;
1412 while (true) {
1413 bool ret;
1414 va_start(args,format);
1415 ret = iovprintf(outstr, format, args, size);
1416 va_end(args);
1417 if (ret == true)
1418 break;
1419 }
1420 out = outstr.str();
1421 }
1422 /*}}}*/
1423 // safe_snprintf - Safer snprintf /*{{{*/
1424 // ---------------------------------------------------------------------
1425 /* This is a snprintf that will never (ever) go past 'End' and returns a
1426 pointer to the end of the new string. The returned string is always null
1427 terminated unless Buffer == end. This is a better alterantive to using
1428 consecutive snprintfs. */
1429 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1430 {
1431 va_list args;
1432 int Did;
1433
1434 if (End <= Buffer)
1435 return End;
1436 va_start(args,Format);
1437 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1438 va_end(args);
1439
1440 if (Did < 0 || Buffer + Did > End)
1441 return End;
1442 return Buffer + Did;
1443 }
1444 /*}}}*/
1445 // StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1446 // ---------------------------------------------------------------------
1447 string StripEpoch(const string &VerStr)
1448 {
1449 size_t i = VerStr.find(":");
1450 if (i == string::npos)
1451 return VerStr;
1452 return VerStr.substr(i+1);
1453 }
1454 /*}}}*/
1455
1456 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1457 // ---------------------------------------------------------------------
1458 /* This little function is the most called method we have and tries
1459 therefore to do the absolut minimum - and is notable faster than
1460 standard tolower/toupper and as a bonus avoids problems with different
1461 locales - we only operate on ascii chars anyway. */
1462 #undef tolower_ascii
1463 int tolower_ascii(int const c) APT_CONST APT_COLD;
1464 int tolower_ascii(int const c)
1465 {
1466 return tolower_ascii_inline(c);
1467 }
1468 /*}}}*/
1469
1470 // isspace_ascii - isspace() function that ignores the locale /*{{{*/
1471 // ---------------------------------------------------------------------
1472 /* This little function is one of the most called methods we have and tries
1473 therefore to do the absolut minimum - and is notable faster than
1474 standard isspace() and as a bonus avoids problems with different
1475 locales - we only operate on ascii chars anyway. */
1476 #undef isspace_ascii
1477 int isspace_ascii(int const c) APT_CONST APT_COLD;
1478 int isspace_ascii(int const c)
1479 {
1480 return isspace_ascii_inline(c);
1481 }
1482 /*}}}*/
1483
1484 // CheckDomainList - See if Host is in a , separate list /*{{{*/
1485 // ---------------------------------------------------------------------
1486 /* The domain list is a comma separate list of domains that are suffix
1487 matched against the argument */
1488 bool CheckDomainList(const string &Host,const string &List)
1489 {
1490 string::const_iterator Start = List.begin();
1491 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
1492 {
1493 if (Cur < List.end() && *Cur != ',')
1494 continue;
1495
1496 // Match the end of the string..
1497 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1498 Cur - Start != 0 &&
1499 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1500 return true;
1501
1502 Start = Cur + 1;
1503 }
1504 return false;
1505 }
1506 /*}}}*/
1507 // strv_length - Return the length of a NULL-terminated string array /*{{{*/
1508 // ---------------------------------------------------------------------
1509 /* */
1510 size_t strv_length(const char **str_array)
1511 {
1512 size_t i;
1513 for (i=0; str_array[i] != NULL; i++)
1514 /* nothing */
1515 ;
1516 return i;
1517 }
1518 /*}}}*/
1519 // DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
1520 // ---------------------------------------------------------------------
1521 /* */
1522 string DeEscapeString(const string &input)
1523 {
1524 char tmp[3];
1525 string::const_iterator it;
1526 string output;
1527 for (it = input.begin(); it != input.end(); ++it)
1528 {
1529 // just copy non-escape chars
1530 if (*it != '\\')
1531 {
1532 output += *it;
1533 continue;
1534 }
1535
1536 // deal with double escape
1537 if (*it == '\\' &&
1538 (it + 1 < input.end()) && it[1] == '\\')
1539 {
1540 // copy
1541 output += *it;
1542 // advance iterator one step further
1543 ++it;
1544 continue;
1545 }
1546
1547 // ensure we have a char to read
1548 if (it + 1 == input.end())
1549 continue;
1550
1551 // read it
1552 ++it;
1553 switch (*it)
1554 {
1555 case '0':
1556 if (it + 2 <= input.end()) {
1557 tmp[0] = it[1];
1558 tmp[1] = it[2];
1559 tmp[2] = 0;
1560 output += (char)strtol(tmp, 0, 8);
1561 it += 2;
1562 }
1563 break;
1564 case 'x':
1565 if (it + 2 <= input.end()) {
1566 tmp[0] = it[1];
1567 tmp[1] = it[2];
1568 tmp[2] = 0;
1569 output += (char)strtol(tmp, 0, 16);
1570 it += 2;
1571 }
1572 break;
1573 default:
1574 // FIXME: raise exception here?
1575 break;
1576 }
1577 }
1578 return output;
1579 }
1580 /*}}}*/
1581 // URI::CopyFrom - Copy from an object /*{{{*/
1582 // ---------------------------------------------------------------------
1583 /* This parses the URI into all of its components */
1584 void URI::CopyFrom(const string &U)
1585 {
1586 string::const_iterator I = U.begin();
1587
1588 // Locate the first colon, this separates the scheme
1589 for (; I < U.end() && *I != ':' ; ++I);
1590 string::const_iterator FirstColon = I;
1591
1592 /* Determine if this is a host type URI with a leading double //
1593 and then search for the first single / */
1594 string::const_iterator SingleSlash = I;
1595 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1596 SingleSlash += 3;
1597
1598 /* Find the / indicating the end of the hostname, ignoring /'s in the
1599 square brackets */
1600 bool InBracket = false;
1601 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
1602 {
1603 if (*SingleSlash == '[')
1604 InBracket = true;
1605 if (InBracket == true && *SingleSlash == ']')
1606 InBracket = false;
1607 }
1608
1609 if (SingleSlash > U.end())
1610 SingleSlash = U.end();
1611
1612 // We can now write the access and path specifiers
1613 Access.assign(U.begin(),FirstColon);
1614 if (SingleSlash != U.end())
1615 Path.assign(SingleSlash,U.end());
1616 if (Path.empty() == true)
1617 Path = "/";
1618
1619 // Now we attempt to locate a user:pass@host fragment
1620 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1621 FirstColon += 3;
1622 else
1623 FirstColon += 1;
1624 if (FirstColon >= U.end())
1625 return;
1626
1627 if (FirstColon > SingleSlash)
1628 FirstColon = SingleSlash;
1629
1630 // Find the colon...
1631 I = FirstColon + 1;
1632 if (I > SingleSlash)
1633 I = SingleSlash;
1634 for (; I < SingleSlash && *I != ':'; ++I);
1635 string::const_iterator SecondColon = I;
1636
1637 // Search for the @ after the colon
1638 for (; I < SingleSlash && *I != '@'; ++I);
1639 string::const_iterator At = I;
1640
1641 // Now write the host and user/pass
1642 if (At == SingleSlash)
1643 {
1644 if (FirstColon < SingleSlash)
1645 Host.assign(FirstColon,SingleSlash);
1646 }
1647 else
1648 {
1649 Host.assign(At+1,SingleSlash);
1650 // username and password must be encoded (RFC 3986)
1651 User.assign(DeQuoteString(FirstColon,SecondColon));
1652 if (SecondColon < At)
1653 Password.assign(DeQuoteString(SecondColon+1,At));
1654 }
1655
1656 // Now we parse the RFC 2732 [] hostnames.
1657 unsigned long PortEnd = 0;
1658 InBracket = false;
1659 for (unsigned I = 0; I != Host.length();)
1660 {
1661 if (Host[I] == '[')
1662 {
1663 InBracket = true;
1664 Host.erase(I,1);
1665 continue;
1666 }
1667
1668 if (InBracket == true && Host[I] == ']')
1669 {
1670 InBracket = false;
1671 Host.erase(I,1);
1672 PortEnd = I;
1673 continue;
1674 }
1675 I++;
1676 }
1677
1678 // Tsk, weird.
1679 if (InBracket == true)
1680 {
1681 Host.clear();
1682 return;
1683 }
1684
1685 // Now we parse off a port number from the hostname
1686 Port = 0;
1687 string::size_type Pos = Host.rfind(':');
1688 if (Pos == string::npos || Pos < PortEnd)
1689 return;
1690
1691 Port = atoi(string(Host,Pos+1).c_str());
1692 Host.assign(Host,0,Pos);
1693 }
1694 /*}}}*/
1695 // URI::operator string - Convert the URI to a string /*{{{*/
1696 // ---------------------------------------------------------------------
1697 /* */
1698 URI::operator string()
1699 {
1700 std::stringstream Res;
1701
1702 if (Access.empty() == false)
1703 Res << Access << ':';
1704
1705 if (Host.empty() == false)
1706 {
1707 if (Access.empty() == false)
1708 Res << "//";
1709
1710 if (User.empty() == false)
1711 {
1712 // FIXME: Technically userinfo is permitted even less
1713 // characters than these, but this is not conveniently
1714 // expressed with a blacklist.
1715 Res << QuoteString(User, ":/?#[]@");
1716 if (Password.empty() == false)
1717 Res << ":" << QuoteString(Password, ":/?#[]@");
1718 Res << "@";
1719 }
1720
1721 // Add RFC 2732 escaping characters
1722 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1723 Res << '[' << Host << ']';
1724 else
1725 Res << Host;
1726
1727 if (Port != 0)
1728 Res << ':' << std::to_string(Port);
1729 }
1730
1731 if (Path.empty() == false)
1732 {
1733 if (Path[0] != '/')
1734 Res << "/" << Path;
1735 else
1736 Res << Path;
1737 }
1738
1739 return Res.str();
1740 }
1741 /*}}}*/
1742 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1743 string URI::SiteOnly(const string &URI)
1744 {
1745 ::URI U(URI);
1746 U.User.clear();
1747 U.Password.clear();
1748 U.Path.clear();
1749 return U;
1750 }
1751 /*}}}*/
1752 // URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1753 string URI::ArchiveOnly(const string &URI)
1754 {
1755 ::URI U(URI);
1756 U.User.clear();
1757 U.Password.clear();
1758 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1759 U.Path.erase(U.Path.length() - 1);
1760 return U;
1761 }
1762 /*}}}*/
1763 // URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
1764 string URI::NoUserPassword(const string &URI)
1765 {
1766 ::URI U(URI);
1767 U.User.clear();
1768 U.Password.clear();
1769 return U;
1770 }
1771 /*}}}*/