]> git.saurik.com Git - apt.git/blame - apt-pkg/contrib/strutl.cc
unfuzzy various strings in manpage po's
[apt.git] / apt-pkg / contrib / strutl.cc
CommitLineData
6c139d6e
AL
1// -*- mode: cpp; mode: fold -*-
2// Description /*{{{*/
d48c6a7d 3// $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
6c139d6e
AL
4/* ######################################################################
5
b2e465d6 6 String Util - Some useful string functions.
6c139d6e 7
b2e465d6
AL
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
24231681 10 especially in APT methods.
6c139d6e
AL
11
12 This source is placed in the Public Domain, do with it what you will
24231681 13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
6c139d6e
AL
14
15 ##################################################################### */
16 /*}}}*/
17// Includes /*{{{*/
ea542140
DK
18#include <config.h>
19
cdcc6d34 20#include <apt-pkg/strutl.h>
7049d16d 21#include <apt-pkg/fileutl.h>
b2e465d6 22#include <apt-pkg/error.h>
0a8a80e5 23
24a59c62 24#include <array>
9febc2b2
DK
25#include <algorithm>
26#include <iomanip>
27#include <locale>
28#include <sstream>
29#include <string>
30#include <vector>
31
453b82a3
DK
32#include <stddef.h>
33#include <stdlib.h>
34#include <time.h>
6c139d6e
AL
35#include <ctype.h>
36#include <string.h>
37#include <stdio.h>
2b154e53 38#include <unistd.h>
b2e465d6 39#include <regex.h>
b0db36b1 40#include <errno.h>
b2e465d6 41#include <stdarg.h>
a52f938b 42#include <iconv.h>
0db4a45b 43
ea542140 44#include <apti18n.h>
6c139d6e 45 /*}}}*/
453b82a3
DK
46using namespace std;
47
65dbd5a1
MV
48// Strip - Remove white space from the front and back of a string /*{{{*/
49// ---------------------------------------------------------------------
50namespace APT {
51 namespace String {
b5787388 52std::string Strip(const std::string &str)
65dbd5a1 53{
b5787388
DK
54 // ensure we have at least one character
55 if (str.empty() == true)
56 return str;
57
58 char const * const s = str.c_str();
59 size_t start = 0;
60 for (; isspace(s[start]) != 0; ++start)
61 ; // find the first not-space
62
63 // string contains only whitespaces
64 if (s[start] == '\0')
65dbd5a1 65 return "";
b5787388
DK
66
67 size_t end = str.length() - 1;
68 for (; isspace(s[end]) != 0; --end)
69 ; // find the last not-space
70
71 return str.substr(start, end - start + 1);
65dbd5a1 72}
cf993341
MV
73
74bool Endswith(const std::string &s, const std::string &end)
75{
76 if (end.size() > s.size())
77 return false;
c1f961ec 78 return (s.compare(s.size() - end.size(), end.size(), end) == 0);
cf993341
MV
79}
80
a5bb5e1e
MV
81bool Startswith(const std::string &s, const std::string &start)
82{
83 if (start.size() > s.size())
84 return false;
c1f961ec 85 return (s.compare(0, start.size(), start) == 0);
a5bb5e1e
MV
86}
87
65dbd5a1
MV
88}
89}
90 /*}}}*/
a52f938b
OS
91// UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
92// ---------------------------------------------------------------------
93/* This is handy to use before display some information for enduser */
94bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
95{
96 iconv_t cd;
97 const char *inbuf;
b39c1859
MV
98 char *inptr, *outbuf;
99 size_t insize, bufsize;
100 dest->clear();
101
a52f938b
OS
102 cd = iconv_open(codeset, "UTF-8");
103 if (cd == (iconv_t)(-1)) {
104 // Something went wrong
105 if (errno == EINVAL)
106 _error->Error("conversion from 'UTF-8' to '%s' not available",
107 codeset);
108 else
109 perror("iconv_open");
110
a52f938b
OS
111 return false;
112 }
113
b39c1859 114 insize = bufsize = orig.size();
a52f938b
OS
115 inbuf = orig.data();
116 inptr = (char *)inbuf;
b39c1859
MV
117 outbuf = new char[bufsize];
118 size_t lastError = -1;
a52f938b 119
1f99b6d3
DK
120 while (insize != 0)
121 {
b39c1859
MV
122 char *outptr = outbuf;
123 size_t outsize = bufsize;
1f99b6d3 124 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
b39c1859 125 dest->append(outbuf, outptr - outbuf);
1f99b6d3
DK
126 if (err == (size_t)(-1))
127 {
b39c1859
MV
128 switch (errno)
129 {
130 case EILSEQ:
131 insize--;
132 inptr++;
133 // replace a series of unknown multibytes with a single "?"
134 if (lastError != insize) {
135 lastError = insize - 1;
136 dest->append("?");
137 }
138 break;
139 case EINVAL:
140 insize = 0;
141 break;
142 case E2BIG:
143 if (outptr == outbuf)
144 {
145 bufsize *= 2;
146 delete[] outbuf;
147 outbuf = new char[bufsize];
148 }
149 break;
150 }
1f99b6d3
DK
151 }
152 }
a52f938b 153
a52f938b
OS
154 delete[] outbuf;
155
156 iconv_close(cd);
157
158 return true;
159}
160 /*}}}*/
6c139d6e
AL
161// strstrip - Remove white space from the front and back of a string /*{{{*/
162// ---------------------------------------------------------------------
163/* This is handy to use when parsing a file. It also removes \n's left
164 over from fgets and company */
165char *_strstrip(char *String)
166{
167 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
168
169 if (*String == 0)
170 return String;
4fb400a6
MV
171 return _strrstrip(String);
172}
173 /*}}}*/
174// strrstrip - Remove white space from the back of a string /*{{{*/
175// ---------------------------------------------------------------------
176char *_strrstrip(char *String)
177{
6c139d6e
AL
178 char *End = String + strlen(String) - 1;
179 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
180 *End == '\r'); End--);
181 End++;
182 *End = 0;
183 return String;
d3e8fbb3 184}
6c139d6e
AL
185 /*}}}*/
186// strtabexpand - Converts tabs into 8 spaces /*{{{*/
187// ---------------------------------------------------------------------
188/* */
189char *_strtabexpand(char *String,size_t Len)
190{
191 for (char *I = String; I != I + Len && *I != 0; I++)
192 {
193 if (*I != '\t')
194 continue;
195 if (I + 8 > String + Len)
196 {
197 *I = 0;
198 return String;
199 }
200
201 /* Assume the start of the string is 0 and find the next 8 char
202 division */
203 int Len;
204 if (String == I)
205 Len = 1;
206 else
207 Len = 8 - ((String - I) % 8);
208 Len -= 2;
209 if (Len <= 0)
210 {
211 *I = ' ';
212 continue;
213 }
214
215 memmove(I + Len,I + 1,strlen(I) + 1);
216 for (char *J = I; J + Len != I; *I = ' ', I++);
217 }
218 return String;
219}
220 /*}}}*/
221// ParseQuoteWord - Parse a single word out of a string /*{{{*/
222// ---------------------------------------------------------------------
223/* This grabs a single word, converts any % escaped characters to their
224 proper values and advances the pointer. Double quotes are understood
7834cb57
AL
225 and striped out as well. This is for URI/URL parsing. It also can
226 understand [] brackets.*/
6c139d6e
AL
227bool ParseQuoteWord(const char *&String,string &Res)
228{
229 // Skip leading whitespace
230 const char *C = String;
231 for (;*C != 0 && *C == ' '; C++);
232 if (*C == 0)
233 return false;
234
235 // Jump to the next word
36f610f1 236 for (;*C != 0 && isspace(*C) == 0; C++)
6c139d6e
AL
237 {
238 if (*C == '"')
239 {
404528bd
DK
240 C = strchr(C + 1, '"');
241 if (C == NULL)
7834cb57
AL
242 return false;
243 }
244 if (*C == '[')
245 {
404528bd
DK
246 C = strchr(C + 1, ']');
247 if (C == NULL)
6c139d6e
AL
248 return false;
249 }
250 }
251
252 // Now de-quote characters
253 char Buffer[1024];
254 char Tmp[3];
255 const char *Start = String;
256 char *I;
257 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
258 {
436d7eab
DK
259 if (*Start == '%' && Start + 2 < C &&
260 isxdigit(Start[1]) && isxdigit(Start[2]))
6c139d6e
AL
261 {
262 Tmp[0] = Start[1];
263 Tmp[1] = Start[2];
1bc849af 264 Tmp[2] = 0;
6c139d6e
AL
265 *I = (char)strtol(Tmp,0,16);
266 Start += 3;
267 continue;
268 }
269 if (*Start != '"')
270 *I = *Start;
271 else
272 I--;
273 Start++;
274 }
275 *I = 0;
276 Res = Buffer;
277
278 // Skip ending white space
36f610f1 279 for (;*C != 0 && isspace(*C) != 0; C++);
6c139d6e
AL
280 String = C;
281 return true;
282}
283 /*}}}*/
08e8f724
AL
284// ParseCWord - Parses a string like a C "" expression /*{{{*/
285// ---------------------------------------------------------------------
b2e465d6 286/* This expects a series of space separated strings enclosed in ""'s.
08e8f724 287 It concatenates the ""'s into a single string. */
b2e465d6 288bool ParseCWord(const char *&String,string &Res)
08e8f724
AL
289{
290 // Skip leading whitespace
291 const char *C = String;
292 for (;*C != 0 && *C == ' '; C++);
293 if (*C == 0)
294 return false;
295
296 char Buffer[1024];
297 char *Buf = Buffer;
298 if (strlen(String) >= sizeof(Buffer))
299 return false;
300
301 for (; *C != 0; C++)
302 {
303 if (*C == '"')
304 {
305 for (C++; *C != 0 && *C != '"'; C++)
306 *Buf++ = *C;
307
308 if (*C == 0)
309 return false;
310
311 continue;
312 }
313
314 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
315 continue;
316 if (isspace(*C) == 0)
317 return false;
318 *Buf++ = ' ';
b2e465d6 319 }
08e8f724
AL
320 *Buf = 0;
321 Res = Buffer;
b2e465d6 322 String = C;
08e8f724
AL
323 return true;
324}
325 /*}}}*/
6d5dd02a 326// QuoteString - Convert a string into quoted from /*{{{*/
1bc849af 327// ---------------------------------------------------------------------
6d5dd02a 328/* */
171c75f1 329string QuoteString(const string &Str, const char *Bad)
1bc849af 330{
b8eba208 331 std::stringstream Res;
f7f0d6c7 332 for (string::const_iterator I = Str.begin(); I != Str.end(); ++I)
1bc849af 333 {
b8eba208 334 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
436d7eab
DK
335 *I == 0x25 || // percent '%' char
336 *I <= 0x20 || *I >= 0x7F) // control chars
1bc849af 337 {
4453cfdc 338 ioprintf(Res, "%%%02hhx", *I);
1bc849af
AL
339 }
340 else
b8eba208 341 Res << *I;
1bc849af 342 }
b8eba208 343 return Res.str();
1bc849af
AL
344}
345 /*}}}*/
6d5dd02a 346// DeQuoteString - Convert a string from quoted from /*{{{*/
6c139d6e 347// ---------------------------------------------------------------------
6d5dd02a 348/* This undoes QuoteString */
171c75f1 349string DeQuoteString(const string &Str)
436d7eab
DK
350{
351 return DeQuoteString(Str.begin(),Str.end());
352}
353string DeQuoteString(string::const_iterator const &begin,
354 string::const_iterator const &end)
6c139d6e
AL
355{
356 string Res;
f7f0d6c7 357 for (string::const_iterator I = begin; I != end; ++I)
6c139d6e 358 {
436d7eab
DK
359 if (*I == '%' && I + 2 < end &&
360 isxdigit(I[1]) && isxdigit(I[2]))
6c139d6e 361 {
6d5dd02a
AL
362 char Tmp[3];
363 Tmp[0] = I[1];
364 Tmp[1] = I[2];
365 Tmp[2] = 0;
366 Res += (char)strtol(Tmp,0,16);
367 I += 2;
368 continue;
6c139d6e
AL
369 }
370 else
371 Res += *I;
372 }
6d5dd02a 373 return Res;
6c139d6e 374}
6d5dd02a
AL
375
376 /*}}}*/
6c139d6e
AL
377// SizeToStr - Convert a long into a human readable size /*{{{*/
378// ---------------------------------------------------------------------
24231681
AL
379/* A max of 4 digits are shown before conversion to the next highest unit.
380 The max length of the string will be 5 chars unless the size is > 10
6c139d6e
AL
381 YottaBytes (E24) */
382string SizeToStr(double Size)
383{
6c139d6e
AL
384 double ASize;
385 if (Size >= 0)
386 ASize = Size;
387 else
388 ASize = -1*Size;
b8eba208 389
6c139d6e
AL
390 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
391 ExaBytes, ZettaBytes, YottaBytes */
7f25bdff 392 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
6c139d6e
AL
393 int I = 0;
394 while (I <= 8)
395 {
396 if (ASize < 100 && I != 0)
397 {
b8eba208
DK
398 std::string S;
399 strprintf(S, "%'.1f %c", ASize, Ext[I]);
400 return S;
6c139d6e 401 }
b8eba208 402
6c139d6e
AL
403 if (ASize < 10000)
404 {
b8eba208
DK
405 std::string S;
406 strprintf(S, "%'.0f %c", ASize, Ext[I]);
407 return S;
6c139d6e
AL
408 }
409 ASize /= 1000.0;
410 I++;
411 }
b8eba208 412 return "";
6c139d6e
AL
413}
414 /*}}}*/
415// TimeToStr - Convert the time into a string /*{{{*/
416// ---------------------------------------------------------------------
417/* Converts a number of seconds to a hms format */
418string TimeToStr(unsigned long Sec)
419{
b8eba208
DK
420 std::string S;
421 if (Sec > 60*60*24)
6c139d6e 422 {
b8eba208
DK
423 //TRANSLATOR: d means days, h means hours, min means minutes, s means seconds
424 strprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
425 }
426 else if (Sec > 60*60)
427 {
428 //TRANSLATOR: h means hours, min means minutes, s means seconds
429 strprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
430 }
431 else if (Sec > 60)
432 {
433 //TRANSLATOR: min means minutes, s means seconds
434 strprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
435 }
436 else
437 {
438 //TRANSLATOR: s means seconds
439 strprintf(S,_("%lis"),Sec);
6c139d6e 440 }
6c139d6e
AL
441 return S;
442}
443 /*}}}*/
444// SubstVar - Substitute a string for another string /*{{{*/
445// ---------------------------------------------------------------------
1e3f4083 446/* This replaces all occurrences of Subst with Contents in Str. */
171c75f1 447string SubstVar(const string &Str,const string &Subst,const string &Contents)
6c139d6e 448{
224dc038
DK
449 if (Subst.empty() == true)
450 return Str;
451
8efa2a3b 452 string::size_type Pos = 0;
6c139d6e
AL
453 string::size_type OldPos = 0;
454 string Temp;
224dc038
DK
455
456 while (OldPos < Str.length() &&
6c139d6e
AL
457 (Pos = Str.find(Subst,OldPos)) != string::npos)
458 {
224dc038
DK
459 if (OldPos != Pos)
460 Temp.append(Str, OldPos, Pos - OldPos);
461 if (Contents.empty() == false)
462 Temp.append(Contents);
463 OldPos = Pos + Subst.length();
6c139d6e 464 }
224dc038 465
6c139d6e
AL
466 if (OldPos == 0)
467 return Str;
224dc038
DK
468
469 if (OldPos >= Str.length())
470 return Temp;
732510fe
AW
471
472 Temp.append(Str, OldPos, string::npos);
473 return Temp;
6c139d6e 474}
b2e465d6
AL
475string SubstVar(string Str,const struct SubstVar *Vars)
476{
477 for (; Vars->Subst != 0; Vars++)
478 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
479 return Str;
480}
6c139d6e 481 /*}}}*/
fa3b0945
MV
482// OutputInDepth - return a string with separator multiplied with depth /*{{{*/
483// ---------------------------------------------------------------------
484/* Returns a string with the supplied separator depth + 1 times in it */
485std::string OutputInDepth(const unsigned long Depth, const char* Separator)
486{
487 std::string output = "";
488 for(unsigned long d=Depth+1; d > 0; d--)
489 output.append(Separator);
490 return output;
491}
492 /*}}}*/
ad00ae81
AL
493// URItoFileName - Convert the uri into a unique file name /*{{{*/
494// ---------------------------------------------------------------------
495/* This converts a URI into a safe filename. It quotes all unsafe characters
496 and converts / to _ and removes the scheme identifier. The resulting
497 file name should be unique and never occur again for a different file */
171c75f1 498string URItoFileName(const string &URI)
ad00ae81 499{
54cf15cb
AL
500 // Nuke 'sensitive' items
501 ::URI U(URI);
171c75f1
MV
502 U.User.clear();
503 U.Password.clear();
504 U.Access.clear();
54cf15cb 505
ad00ae81 506 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
171c75f1
MV
507 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
508 replace(NewURI.begin(),NewURI.end(),'/','_');
509 return NewURI;
ad00ae81
AL
510}
511 /*}}}*/
6c139d6e
AL
512// Base64Encode - Base64 Encoding routine for short strings /*{{{*/
513// ---------------------------------------------------------------------
514/* This routine performs a base64 transformation on a string. It was ripped
515 from wget and then patched and bug fixed.
516
517 This spec can be found in rfc2045 */
171c75f1 518string Base64Encode(const string &S)
6c139d6e
AL
519{
520 // Conversion table.
521 static char tbl[64] = {'A','B','C','D','E','F','G','H',
522 'I','J','K','L','M','N','O','P',
523 'Q','R','S','T','U','V','W','X',
524 'Y','Z','a','b','c','d','e','f',
525 'g','h','i','j','k','l','m','n',
526 'o','p','q','r','s','t','u','v',
527 'w','x','y','z','0','1','2','3',
528 '4','5','6','7','8','9','+','/'};
529
530 // Pre-allocate some space
531 string Final;
532 Final.reserve((4*S.length() + 2)/3 + 2);
533
534 /* Transform the 3x8 bits to 4x6 bits, as required by
535 base64. */
5933aab2 536 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
6c139d6e
AL
537 {
538 char Bits[3] = {0,0,0};
539 Bits[0] = I[0];
5933aab2 540 if (I + 1 < S.end())
6c139d6e 541 Bits[1] = I[1];
5933aab2 542 if (I + 2 < S.end())
6c139d6e
AL
543 Bits[2] = I[2];
544
545 Final += tbl[Bits[0] >> 2];
546 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
547
5933aab2 548 if (I + 1 >= S.end())
6c139d6e
AL
549 break;
550
551 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
552
5933aab2 553 if (I + 2 >= S.end())
6c139d6e
AL
554 break;
555
556 Final += tbl[Bits[2] & 0x3f];
557 }
558
559 /* Apply the padding elements, this tells how many bytes the remote
560 end should discard */
561 if (S.length() % 3 == 2)
562 Final += '=';
563 if (S.length() % 3 == 1)
564 Final += "==";
565
566 return Final;
567}
568 /*}}}*/
0da8987a 569// stringcmp - Arbitrary string compare /*{{{*/
6c139d6e 570// ---------------------------------------------------------------------
7365ff46 571/* This safely compares two non-null terminated strings of arbitrary
6c139d6e
AL
572 length */
573int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
47db8997
AL
574{
575 for (; A != AEnd && B != BEnd; A++, B++)
576 if (*A != *B)
577 break;
578
579 if (A == AEnd && B == BEnd)
580 return 0;
581 if (A == AEnd)
582 return 1;
583 if (B == BEnd)
584 return -1;
585 if (*A < *B)
586 return -1;
587 return 1;
588}
ae0b19f5
AL
589
590#if __GNUC__ >= 3
47db8997
AL
591int stringcmp(string::const_iterator A,string::const_iterator AEnd,
592 const char *B,const char *BEnd)
593{
594 for (; A != AEnd && B != BEnd; A++, B++)
595 if (*A != *B)
596 break;
597
598 if (A == AEnd && B == BEnd)
599 return 0;
600 if (A == AEnd)
601 return 1;
602 if (B == BEnd)
603 return -1;
604 if (*A < *B)
605 return -1;
606 return 1;
607}
608int stringcmp(string::const_iterator A,string::const_iterator AEnd,
609 string::const_iterator B,string::const_iterator BEnd)
6c139d6e
AL
610{
611 for (; A != AEnd && B != BEnd; A++, B++)
612 if (*A != *B)
613 break;
614
615 if (A == AEnd && B == BEnd)
616 return 0;
617 if (A == AEnd)
618 return 1;
619 if (B == BEnd)
620 return -1;
621 if (*A < *B)
622 return -1;
623 return 1;
624}
ae0b19f5 625#endif
6c139d6e 626 /*}}}*/
0da8987a 627// stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
6c139d6e
AL
628// ---------------------------------------------------------------------
629/* */
630int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
47db8997
AL
631{
632 for (; A != AEnd && B != BEnd; A++, B++)
6dc60370 633 if (tolower_ascii(*A) != tolower_ascii(*B))
47db8997
AL
634 break;
635
636 if (A == AEnd && B == BEnd)
637 return 0;
638 if (A == AEnd)
639 return 1;
640 if (B == BEnd)
641 return -1;
6dc60370 642 if (tolower_ascii(*A) < tolower_ascii(*B))
47db8997
AL
643 return -1;
644 return 1;
645}
ae0b19f5 646#if __GNUC__ >= 3
47db8997
AL
647int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
648 const char *B,const char *BEnd)
649{
650 for (; A != AEnd && B != BEnd; A++, B++)
6dc60370 651 if (tolower_ascii(*A) != tolower_ascii(*B))
47db8997
AL
652 break;
653
654 if (A == AEnd && B == BEnd)
655 return 0;
656 if (A == AEnd)
657 return 1;
658 if (B == BEnd)
659 return -1;
6dc60370 660 if (tolower_ascii(*A) < tolower_ascii(*B))
47db8997
AL
661 return -1;
662 return 1;
663}
664int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
665 string::const_iterator B,string::const_iterator BEnd)
6c139d6e
AL
666{
667 for (; A != AEnd && B != BEnd; A++, B++)
6dc60370 668 if (tolower_ascii(*A) != tolower_ascii(*B))
6c139d6e 669 break;
3b5421b4 670
6c139d6e
AL
671 if (A == AEnd && B == BEnd)
672 return 0;
673 if (A == AEnd)
674 return 1;
675 if (B == BEnd)
676 return -1;
6dc60370 677 if (tolower_ascii(*A) < tolower_ascii(*B))
6c139d6e
AL
678 return -1;
679 return 1;
680}
ae0b19f5 681#endif
6c139d6e 682 /*}}}*/
3b5421b4
AL
683// LookupTag - Lookup the value of a tag in a taged string /*{{{*/
684// ---------------------------------------------------------------------
685/* The format is like those used in package files and the method
686 communication system */
171c75f1 687string LookupTag(const string &Message,const char *Tag,const char *Default)
3b5421b4
AL
688{
689 // Look for a matching tag.
690 int Length = strlen(Tag);
f7f0d6c7 691 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); ++I)
3b5421b4
AL
692 {
693 // Found the tag
694 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
695 {
696 // Find the end of line and strip the leading/trailing spaces
171c75f1 697 string::const_iterator J;
3b5421b4 698 I += Length + 1;
74dedb4a 699 for (; isspace_ascii(*I) != 0 && I < Message.end(); ++I);
f7f0d6c7 700 for (J = I; *J != '\n' && J < Message.end(); ++J);
74dedb4a 701 for (; J > I && isspace_ascii(J[-1]) != 0; --J);
3b5421b4 702
0db4a45b 703 return string(I,J);
3b5421b4
AL
704 }
705
f7f0d6c7 706 for (; *I != '\n' && I < Message.end(); ++I);
3b5421b4
AL
707 }
708
709 // Failed to find a match
710 if (Default == 0)
711 return string();
712 return Default;
713}
714 /*}}}*/
715// StringToBool - Converts a string into a boolean /*{{{*/
716// ---------------------------------------------------------------------
717/* This inspects the string to see if it is true or if it is false and
718 then returns the result. Several varients on true/false are checked. */
171c75f1 719int StringToBool(const string &Text,int Default)
3b5421b4 720{
08be0ca3
MV
721 char *ParseEnd;
722 int Res = strtol(Text.c_str(),&ParseEnd,0);
723 // ensure that the entire string was converted by strtol to avoid
724 // failures on "apt-cache show -a 0ad" where the "0" is converted
725 const char *TextEnd = Text.c_str()+Text.size();
726 if (ParseEnd == TextEnd && Res >= 0 && Res <= 1)
3b5421b4
AL
727 return Res;
728
729 // Check for positives
730 if (strcasecmp(Text.c_str(),"no") == 0 ||
731 strcasecmp(Text.c_str(),"false") == 0 ||
732 strcasecmp(Text.c_str(),"without") == 0 ||
7f25bdff 733 strcasecmp(Text.c_str(),"off") == 0 ||
3b5421b4
AL
734 strcasecmp(Text.c_str(),"disable") == 0)
735 return 0;
736
737 // Check for negatives
738 if (strcasecmp(Text.c_str(),"yes") == 0 ||
739 strcasecmp(Text.c_str(),"true") == 0 ||
740 strcasecmp(Text.c_str(),"with") == 0 ||
7f25bdff 741 strcasecmp(Text.c_str(),"on") == 0 ||
3b5421b4
AL
742 strcasecmp(Text.c_str(),"enable") == 0)
743 return 1;
744
745 return Default;
746}
747 /*}}}*/
0a8a80e5
AL
748// TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
749// ---------------------------------------------------------------------
750/* This converts a time_t into a string time representation that is
751 year 2000 complient and timezone neutral */
752string TimeRFC1123(time_t Date)
0b45b6e5
DK
753{
754 return TimeRFC1123(Date, false);
755}
756string TimeRFC1123(time_t Date, bool const NumericTimezone)
0a8a80e5 757{
410327e1
DK
758 struct tm Conv;
759 if (gmtime_r(&Date, &Conv) == NULL)
760 return "";
0a8a80e5 761
0fb16c3e 762 auto const posix = std::locale::classic();
eceb219c
DK
763 std::ostringstream datestr;
764 datestr.imbue(posix);
0b45b6e5 765 APT::StringView const fmt("%a, %d %b %Y %H:%M:%S");
cfc6566d
DK
766 std::use_facet<std::time_put<char>>(posix).put(
767 std::ostreambuf_iterator<char>(datestr),
768 datestr, ' ', &Conv, fmt.data(), fmt.data() + fmt.size());
0b45b6e5
DK
769 if (NumericTimezone)
770 datestr << " +0000";
771 else
772 datestr << " GMT";
eceb219c 773 return datestr.str();
0a8a80e5
AL
774}
775 /*}}}*/
776// ReadMessages - Read messages from the FD /*{{{*/
777// ---------------------------------------------------------------------
778/* This pulls full messages from the input FD into the message buffer.
779 It assumes that messages will not pause during transit so no
ffc36991
DB
780 fancy buffering is used.
781
782 In particular: this reads blocks from the input until it believes
783 that it's run out of input text. Each block is terminated by a
d8c71b3b 784 double newline ('\n' followed by '\n').
ffc36991 785 */
0a8a80e5
AL
786bool ReadMessages(int Fd, vector<string> &List)
787{
aee70518 788 char Buffer[64000];
ffc36991
DB
789 // Represents any left-over from the previous iteration of the
790 // parse loop. (i.e., if a message is split across the end
791 // of the buffer, it goes here)
792 string PartialMessage;
d8c71b3b
DK
793
794 do {
795 int const Res = read(Fd, Buffer, sizeof(Buffer));
b0db36b1
AL
796 if (Res < 0 && errno == EINTR)
797 continue;
d8c71b3b
DK
798
799 // process we read from has died
0a8a80e5
AL
800 if (Res == 0)
801 return false;
d8c71b3b 802
0a8a80e5 803 // No data
3b0e76ec 804#if EAGAIN != EWOULDBLOCK
d8c71b3b 805 if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
3b0e76ec
DK
806#else
807 if (Res < 0 && errno == EAGAIN)
808#endif
0a8a80e5 809 return true;
b2e465d6
AL
810 if (Res < 0)
811 return false;
d8c71b3b
DK
812
813 // extract the message(s) from the buffer
814 char const *Start = Buffer;
815 char const * const End = Buffer + Res;
816
817 char const * NL = (char const *) memchr(Start, '\n', End - Start);
818 if (NL == NULL)
0a8a80e5 819 {
d8c71b3b
DK
820 // end of buffer: store what we have so far and read new data in
821 PartialMessage.append(Start, End - Start);
822 Start = End;
0a8a80e5 823 }
ffc36991 824 else
d8c71b3b
DK
825 ++NL;
826
827 if (PartialMessage.empty() == false && Start < End)
828 {
829 // if we start with a new line, see if the partial message we have ended with one
830 // so that we properly detect records ending between two read() runs
831 // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n
832 // the case \r|\n\r\n is handled by the usual double-newline handling
833 if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r'))
834 {
835 if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r"))
836 {
837 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
838 List.push_back(PartialMessage);
839 PartialMessage.clear();
840 while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL;
841 Start = NL;
842 }
843 }
844 }
845
846 while (Start < End) {
847 char const * NL2 = (char const *) memchr(NL, '\n', End - NL);
848 if (NL2 == NULL)
849 {
850 // end of buffer: store what we have so far and read new data in
851 PartialMessage.append(Start, End - Start);
852 break;
853 }
854 ++NL2;
855
856 // did we find a double newline?
857 if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r'))
858 {
859 PartialMessage.append(Start, NL2 - Start);
860 PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1);
861 List.push_back(PartialMessage);
862 PartialMessage.clear();
863 while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2;
864 Start = NL2;
865 }
866 NL = NL2;
867 }
868
869 // we have read at least one complete message and nothing left
870 if (PartialMessage.empty() == true)
871 return true;
0a8a80e5
AL
872
873 if (WaitFd(Fd) == false)
874 return false;
d8c71b3b 875 } while (true);
0a8a80e5
AL
876}
877 /*}}}*/
24231681
AL
878// MonthConv - Converts a month string into a number /*{{{*/
879// ---------------------------------------------------------------------
880/* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
6dc60370 881 Made it a bit more robust with a few tolower_ascii though. */
1d742e01 882static int MonthConv(char const * const Month)
24231681 883{
6dc60370 884 switch (tolower_ascii(*Month))
24231681 885 {
6dc60370
DK
886 case 'a':
887 return tolower_ascii(Month[1]) == 'p'?3:7;
888 case 'd':
24231681 889 return 11;
6dc60370 890 case 'f':
24231681 891 return 1;
6dc60370
DK
892 case 'j':
893 if (tolower_ascii(Month[1]) == 'a')
24231681 894 return 0;
6dc60370
DK
895 return tolower_ascii(Month[2]) == 'n'?5:6;
896 case 'm':
897 return tolower_ascii(Month[2]) == 'r'?2:4;
898 case 'n':
24231681 899 return 10;
6dc60370 900 case 'o':
24231681 901 return 9;
6dc60370 902 case 's':
24231681
AL
903 return 8;
904
905 // Pretend it is January..
906 default:
907 return 0;
908 }
909}
910 /*}}}*/
55089145 911// timegm - Internal timegm if the gnu version is not available /*{{{*/
6d5dd02a 912// ---------------------------------------------------------------------
55089145 913/* Converts struct tm to time_t, assuming the data in tm is UTC rather
6d5dd02a 914 than local timezone (mktime assumes the latter).
41b6caf4 915
55089145
DK
916 This function is a nonstandard GNU extension that is also present on
917 the BSDs and maybe other systems. For others we follow the advice of
918 the manpage of timegm and use his portable replacement. */
919#ifndef HAVE_TIMEGM
6d5dd02a
AL
920static time_t timegm(struct tm *t)
921{
55089145
DK
922 char *tz = getenv("TZ");
923 setenv("TZ", "", 1);
924 tzset();
925 time_t ret = mktime(t);
926 if (tz)
927 setenv("TZ", tz, 1);
928 else
929 unsetenv("TZ");
930 tzset();
931 return ret;
6d5dd02a
AL
932}
933#endif
934 /*}}}*/
9febc2b2 935// RFC1123StrToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
cd8cf88f 936// ---------------------------------------------------------------------
9febc2b2
DK
937/* tries to parses a full date as specified in RFC7231 §7.1.1.1
938 with one exception: HTTP/1.1 valid dates need to have GMT as timezone.
939 As we encounter dates from UTC or with a numeric timezone in other places,
940 we allow them here to to be able to reuse the method. Either way, a date
941 must be in UTC or parsing will fail. Previous implementations of this
942 method used to ignore the timezone and assume always UTC. */
cd8cf88f
DK
943bool RFC1123StrToTime(const char* const str,time_t &time)
944{
1d742e01
DK
945 unsigned short day = 0;
946 signed int year = 0; // yes, Y23K problem – we gonna worry then…
947 std::string weekday, month, datespec, timespec, zone;
948 std::istringstream ss(str);
0fb16c3e 949 auto const &posix = std::locale::classic();
3bdff17c 950 ss.imbue(posix);
1d742e01
DK
951 ss >> weekday;
952 // we only superficially check weekday, mostly to avoid accepting localized
953 // weekdays here and take only its length to decide which datetime format we
954 // encounter here. The date isn't stored.
955 std::transform(weekday.begin(), weekday.end(), weekday.begin(), ::tolower);
956 std::array<char const * const, 7> c_weekdays = {{ "sun", "mon", "tue", "wed", "thu", "fri", "sat" }};
957 if (std::find(c_weekdays.begin(), c_weekdays.end(), weekday.substr(0,3)) == c_weekdays.end())
958 return false;
959
960 switch (weekday.length())
961 {
962 case 4:
963 // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
964 if (weekday[3] != ',')
965 return false;
966 ss >> day >> month >> year >> timespec >> zone;
967 break;
968 case 3:
969 // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
970 ss >> month >> day >> timespec >> year;
971 zone = "UTC";
972 break;
973 case 0:
974 case 1:
975 case 2:
976 return false;
977 default:
978 // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
979 if (weekday[weekday.length() - 1] != ',')
980 return false;
981 ss >> datespec >> timespec >> zone;
982 auto const expldate = VectorizeString(datespec, '-');
983 if (expldate.size() != 3)
984 return false;
985 try {
986 size_t pos;
987 day = std::stoi(expldate[0], &pos);
988 if (pos != expldate[0].length())
9febc2b2 989 return false;
1d742e01
DK
990 year = 1900 + std::stoi(expldate[2], &pos);
991 if (pos != expldate[2].length())
992 return false;
993 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(expldate[1].c_str()) + 1, day);
994 } catch (...) {
995 return false;
9febc2b2 996 }
1d742e01
DK
997 break;
998 }
9febc2b2 999
1d742e01 1000 if (ss.fail() || ss.bad() || !ss.eof())
cd8cf88f
DK
1001 return false;
1002
1d742e01
DK
1003 if (zone != "GMT" && zone != "UTC" && zone != "Z") // RFC 822
1004 {
1005 // numeric timezones as a should of RFC 1123 and generally preferred
1006 try {
1007 size_t pos;
1008 auto const z = std::stoi(zone, &pos);
1009 if (z != 0 || pos != zone.length())
1010 return false;
1011 } catch (...) {
1012 return false;
1013 }
1014 }
1015
1016 if (datespec.empty())
1017 {
1018 if (month.empty())
1019 return false;
1020 strprintf(datespec, "%.4d-%.2d-%.2d", year, MonthConv(month.c_str()) + 1, day);
1021 }
1022
1023 std::string const datetime = datespec + ' ' + timespec;
1024 struct tm Tm;
1025 if (strptime(datetime.c_str(), "%Y-%m-%d %H:%M:%S", &Tm) == nullptr)
1026 return false;
1027 time = timegm(&Tm);
cd8cf88f
DK
1028 return true;
1029}
1030 /*}}}*/
1031// FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
1032// ---------------------------------------------------------------------
1033/* */
1034bool FTPMDTMStrToTime(const char* const str,time_t &time)
1035{
1036 struct tm Tm;
1037 // MDTM includes no whitespaces but recommend and ignored by strptime
1038 if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
1039 return false;
1040
1041 time = timegm(&Tm);
1042 return true;
1043}
1044 /*}}}*/
24231681
AL
1045// StrToTime - Converts a string into a time_t /*{{{*/
1046// ---------------------------------------------------------------------
1e3f4083 1047/* This handles all 3 popular time formats including RFC 1123, RFC 1036
24231681
AL
1048 and the C library asctime format. It requires the GNU library function
1049 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
f58a97d3
AL
1050 reason the C library does not provide any such function :< This also
1051 handles the weird, but unambiguous FTP time format*/
171c75f1 1052bool StrToTime(const string &Val,time_t &Result)
24231681
AL
1053{
1054 struct tm Tm;
1055 char Month[10];
404528bd 1056
24231681 1057 // Skip the day of the week
404528bd
DK
1058 const char *I = strchr(Val.c_str(), ' ');
1059
24231681 1060 // Handle RFC 1123 time
f58a97d3 1061 Month[0] = 0;
324cbd56 1062 if (sscanf(I," %2d %3s %4d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
24231681
AL
1063 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1064 {
1065 // Handle RFC 1036 time
324cbd56 1066 if (sscanf(I," %2d-%3s-%3d %2d:%2d:%2d GMT",&Tm.tm_mday,Month,
24231681
AL
1067 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
1068 Tm.tm_year += 1900;
1069 else
1070 {
1071 // asctime format
324cbd56 1072 if (sscanf(I," %3s %2d %2d:%2d:%2d %4d",Month,&Tm.tm_mday,
24231681 1073 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
f58a97d3
AL
1074 {
1075 // 'ftp' time
7ef72446 1076 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
f58a97d3
AL
1077 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
1078 return false;
1079 Tm.tm_mon--;
1080 }
24231681
AL
1081 }
1082 }
1083
1084 Tm.tm_isdst = 0;
f58a97d3
AL
1085 if (Month[0] != 0)
1086 Tm.tm_mon = MonthConv(Month);
70e0c168
MV
1087 else
1088 Tm.tm_mon = 0; // we don't have a month, so pick something
24231681
AL
1089 Tm.tm_year -= 1900;
1090
1091 // Convert to local time and then to GMT
1092 Result = timegm(&Tm);
1093 return true;
1094}
1095 /*}}}*/
ddc1d8d0
AL
1096// StrToNum - Convert a fixed length string to a number /*{{{*/
1097// ---------------------------------------------------------------------
1098/* This is used in decoding the crazy fixed length string headers in
1099 tar and ar files. */
1100bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
1101{
1102 char S[30];
1103 if (Len >= sizeof(S))
1104 return false;
1105 memcpy(S,Str,Len);
1106 S[Len] = 0;
1107
1108 // All spaces is a zero
1109 Res = 0;
1110 unsigned I;
1111 for (I = 0; S[I] == ' '; I++);
1112 if (S[I] == 0)
1113 return true;
1114
1115 char *End;
1116 Res = strtoul(S,&End,Base);
1117 if (End == S)
1118 return false;
1119
1120 return true;
1121}
1122 /*}}}*/
650faab0
DK
1123// StrToNum - Convert a fixed length string to a number /*{{{*/
1124// ---------------------------------------------------------------------
1125/* This is used in decoding the crazy fixed length string headers in
1126 tar and ar files. */
1127bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base)
1128{
1129 char S[30];
1130 if (Len >= sizeof(S))
1131 return false;
1132 memcpy(S,Str,Len);
1133 S[Len] = 0;
1134
1135 // All spaces is a zero
1136 Res = 0;
1137 unsigned I;
1138 for (I = 0; S[I] == ' '; I++);
1139 if (S[I] == 0)
1140 return true;
1141
1142 char *End;
1143 Res = strtoull(S,&End,Base);
1144 if (End == S)
1145 return false;
1146
1147 return true;
1148}
1149 /*}}}*/
1150
54f2f0a3
NH
1151// Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1152// ---------------------------------------------------------------------
1153/* This is used in decoding the 256bit encoded fixed length fields in
1154 tar files */
3c09d634 1155bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len)
54f2f0a3 1156{
54f2f0a3
NH
1157 if ((Str[0] & 0x80) == 0)
1158 return false;
1159 else
1160 {
1161 Res = Str[0] & 0x7F;
f688d1d3 1162 for(unsigned int i = 1; i < Len; ++i)
54f2f0a3
NH
1163 Res = (Res<<8) + Str[i];
1164 return true;
1165 }
1166}
1167 /*}}}*/
3c09d634
GJ
1168// Base256ToNum - Convert a fixed length binary to a number /*{{{*/
1169// ---------------------------------------------------------------------
1170/* This is used in decoding the 256bit encoded fixed length fields in
1171 tar files */
1172bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len)
1173{
cf7503d8 1174 unsigned long long Num = 0;
3c09d634
GJ
1175 bool rc;
1176
1177 rc = Base256ToNum(Str, Num, Len);
cf7503d8 1178 // rudimentary check for overflow (Res = ulong, Num = ulonglong)
3c09d634
GJ
1179 Res = Num;
1180 if (Res != Num)
1181 return false;
1182
1183 return rc;
1184}
1185 /*}}}*/
6e52073f
AL
1186// HexDigit - Convert a hex character into an integer /*{{{*/
1187// ---------------------------------------------------------------------
1188/* Helper for Hex2Num */
1189static int HexDigit(int c)
1190{
1191 if (c >= '0' && c <= '9')
1192 return c - '0';
1193 if (c >= 'a' && c <= 'f')
1194 return c - 'a' + 10;
1195 if (c >= 'A' && c <= 'F')
1196 return c - 'A' + 10;
fc8f1c22 1197 return -1;
6e52073f
AL
1198}
1199 /*}}}*/
1200// Hex2Num - Convert a long hex number into a buffer /*{{{*/
1201// ---------------------------------------------------------------------
1202/* The length of the buffer must be exactly 1/2 the length of the string. */
171c75f1 1203bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
eff0c22e
JAK
1204{
1205 return Hex2Num(APT::StringView(Str), Num, Length);
1206}
1207
1208bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length)
6e52073f 1209{
0db4a45b 1210 if (Str.length() != Length*2)
6e52073f
AL
1211 return false;
1212
1213 // Convert each digit. We store it in the same order as the string
1214 int J = 0;
eff0c22e 1215 for (auto I = Str.begin(); I != Str.end();J++, I += 2)
6e52073f 1216 {
fc8f1c22
NT
1217 int first_half = HexDigit(I[0]);
1218 int second_half;
1219 if (first_half < 0)
6e52073f
AL
1220 return false;
1221
fc8f1c22
NT
1222 second_half = HexDigit(I[1]);
1223 if (second_half < 0)
1224 return false;
1225 Num[J] = first_half << 4;
1226 Num[J] += second_half;
6e52073f
AL
1227 }
1228
1229 return true;
1230}
1231 /*}}}*/
b2e465d6
AL
1232// TokSplitString - Split a string up by a given token /*{{{*/
1233// ---------------------------------------------------------------------
1234/* This is intended to be a faster splitter, it does not use dynamic
1235 memories. Input is changed to insert nulls at each token location. */
1236bool TokSplitString(char Tok,char *Input,char **List,
1237 unsigned long ListMax)
1238{
1239 // Strip any leading spaces
1240 char *Start = Input;
1241 char *Stop = Start + strlen(Start);
1242 for (; *Start != 0 && isspace(*Start) != 0; Start++);
1243
1244 unsigned long Count = 0;
1245 char *Pos = Start;
1246 while (Pos != Stop)
1247 {
1248 // Skip to the next Token
1249 for (; Pos != Stop && *Pos != Tok; Pos++);
1250
1251 // Back remove spaces
1252 char *End = Pos;
1253 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
1254 *End = 0;
1255
1256 List[Count++] = Start;
1257 if (Count >= ListMax)
1258 {
1259 List[Count-1] = 0;
1260 return false;
1261 }
1262
1263 // Advance pos
1264 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
1265 Start = Pos;
1266 }
1267
1268 List[Count] = 0;
1269 return true;
1270}
1271 /*}}}*/
3f42500d 1272// VectorizeString - Split a string up into a vector of strings /*{{{*/
d7cf5923
DK
1273// ---------------------------------------------------------------------
1274/* This can be used to split a given string up into a vector, so the
1275 propose is the same as in the method above and this one is a bit slower
3f42500d
DK
1276 also, but the advantage is that we have an iteratable vector */
1277vector<string> VectorizeString(string const &haystack, char const &split)
d7cf5923 1278{
a5414e56
DK
1279 vector<string> exploded;
1280 if (haystack.empty() == true)
1281 return exploded;
d7cf5923
DK
1282 string::const_iterator start = haystack.begin();
1283 string::const_iterator end = start;
d7cf5923
DK
1284 do {
1285 for (; end != haystack.end() && *end != split; ++end);
1286 exploded.push_back(string(start, end));
1287 start = end + 1;
1288 } while (end != haystack.end() && (++end) != haystack.end());
1289 return exploded;
1290}
1291 /*}}}*/
9572a54b 1292// StringSplit - split a string into a string vector by token /*{{{*/
00f4d9ff 1293// ---------------------------------------------------------------------
41053d72 1294/* See header for details.
00f4d9ff 1295 */
41053d72 1296vector<string> StringSplit(std::string const &s, std::string const &sep,
85bf0019 1297 unsigned int maxsplit)
00f4d9ff
MV
1298{
1299 vector<string> split;
1300 size_t start, pos;
85bf0019 1301
8d89cda7 1302 // no separator given, this is bogus
00f4d9ff
MV
1303 if(sep.size() == 0)
1304 return split;
85bf0019
MV
1305
1306 start = pos = 0;
9572a54b
MV
1307 while (pos != string::npos)
1308 {
00f4d9ff
MV
1309 pos = s.find(sep, start);
1310 split.push_back(s.substr(start, pos-start));
85bf0019 1311
9572a54b 1312 // if maxsplit is reached, the remaining string is the last item
2ddab3fb 1313 if(split.size() >= maxsplit)
85bf0019
MV
1314 {
1315 split[split.size()-1] = s.substr(start);
1316 break;
1317 }
1318 start = pos+sep.size();
9572a54b 1319 }
00f4d9ff
MV
1320 return split;
1321}
1322 /*}}}*/
b2e465d6
AL
1323// RegexChoice - Simple regex list/list matcher /*{{{*/
1324// ---------------------------------------------------------------------
1325/* */
1326unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
1327 const char **ListEnd)
1328{
1329 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1330 R->Hit = false;
1331
1332 unsigned long Hits = 0;
ef74268b 1333 for (; ListBegin < ListEnd; ++ListBegin)
b2e465d6
AL
1334 {
1335 // Check if the name is a regex
1336 const char *I;
1337 bool Regex = true;
1338 for (I = *ListBegin; *I != 0; I++)
1339 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1340 break;
1341 if (*I == 0)
1342 Regex = false;
1343
1344 // Compile the regex pattern
1345 regex_t Pattern;
1346 if (Regex == true)
1347 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1348 REG_NOSUB) != 0)
1349 Regex = false;
1350
1351 // Search the list
1352 bool Done = false;
1353 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1354 {
1355 if (R->Str[0] == 0)
1356 continue;
1357
1358 if (strcasecmp(R->Str,*ListBegin) != 0)
1359 {
1360 if (Regex == false)
1361 continue;
1362 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1363 continue;
1364 }
1365 Done = true;
1366
1367 if (R->Hit == false)
1368 Hits++;
1369
1370 R->Hit = true;
1371 }
1372
1373 if (Regex == true)
1374 regfree(&Pattern);
1375
1376 if (Done == false)
1377 _error->Warning(_("Selection %s not found"),*ListBegin);
1378 }
1379
1380 return Hits;
1381}
1382 /*}}}*/
5076b3c2 1383// {str,io}printf - C format string outputter to C++ strings/iostreams /*{{{*/
b2e465d6 1384// ---------------------------------------------------------------------
1168596f
AL
1385/* This is used to make the internationalization strings easier to translate
1386 and to allow reordering of parameters */
5076b3c2
DK
1387static bool iovprintf(ostream &out, const char *format,
1388 va_list &args, ssize_t &size) {
1389 char *S = (char*)malloc(size);
1390 ssize_t const n = vsnprintf(S, size, format, args);
1391 if (n > -1 && n < size) {
1392 out << S;
1393 free(S);
1394 return true;
1395 } else {
1396 if (n > -1)
1397 size = n + 1;
1398 else
1399 size *= 2;
1400 }
1401 free(S);
1402 return false;
1403}
1404void ioprintf(ostream &out,const char *format,...)
b2e465d6
AL
1405{
1406 va_list args;
5076b3c2
DK
1407 ssize_t size = 400;
1408 while (true) {
e8afd168 1409 bool ret;
5076b3c2 1410 va_start(args,format);
ce105e87 1411 ret = iovprintf(out, format, args, size);
5076b3c2 1412 va_end(args);
ce105e87
DK
1413 if (ret == true)
1414 return;
5076b3c2 1415 }
1168596f 1416}
5076b3c2 1417void strprintf(string &out,const char *format,...)
d4cd303e
MV
1418{
1419 va_list args;
5076b3c2
DK
1420 ssize_t size = 400;
1421 std::ostringstream outstr;
1422 while (true) {
e8afd168 1423 bool ret;
5076b3c2 1424 va_start(args,format);
ce105e87 1425 ret = iovprintf(outstr, format, args, size);
5076b3c2 1426 va_end(args);
ce105e87
DK
1427 if (ret == true)
1428 break;
5076b3c2
DK
1429 }
1430 out = outstr.str();
d4cd303e
MV
1431}
1432 /*}}}*/
1168596f
AL
1433// safe_snprintf - Safer snprintf /*{{{*/
1434// ---------------------------------------------------------------------
1435/* This is a snprintf that will never (ever) go past 'End' and returns a
1436 pointer to the end of the new string. The returned string is always null
1437 terminated unless Buffer == end. This is a better alterantive to using
1438 consecutive snprintfs. */
1439char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1440{
1441 va_list args;
ea6db08d 1442 int Did;
1168596f 1443
1168596f
AL
1444 if (End <= Buffer)
1445 return End;
163dc55b 1446 va_start(args,Format);
1168596f 1447 Did = vsnprintf(Buffer,End - Buffer,Format,args);
163dc55b
MV
1448 va_end(args);
1449
1168596f
AL
1450 if (Did < 0 || Buffer + Did > End)
1451 return End;
1452 return Buffer + Did;
b2e465d6
AL
1453}
1454 /*}}}*/
cdb9307c
MV
1455// StripEpoch - Remove the version "epoch" from a version string /*{{{*/
1456// ---------------------------------------------------------------------
1457string StripEpoch(const string &VerStr)
1458{
1459 size_t i = VerStr.find(":");
1460 if (i == string::npos)
1461 return VerStr;
1462 return VerStr.substr(i+1);
1463}
69c2ecbd 1464 /*}}}*/
98eb4e9e 1465
4e86942a
MV
1466// tolower_ascii - tolower() function that ignores the locale /*{{{*/
1467// ---------------------------------------------------------------------
6dc60370 1468/* This little function is the most called method we have and tries
1e3f4083 1469 therefore to do the absolut minimum - and is notable faster than
6dc60370
DK
1470 standard tolower/toupper and as a bonus avoids problems with different
1471 locales - we only operate on ascii chars anyway. */
98eb4e9e 1472#undef tolower_ascii
390344f9 1473int tolower_ascii(int const c) APT_CONST APT_COLD;
6dc60370 1474int tolower_ascii(int const c)
4e86942a 1475{
98eb4e9e 1476 return tolower_ascii_inline(c);
4e86942a
MV
1477}
1478 /*}}}*/
1479
98b06343
JAK
1480// isspace_ascii - isspace() function that ignores the locale /*{{{*/
1481// ---------------------------------------------------------------------
1482/* This little function is one of the most called methods we have and tries
1483 therefore to do the absolut minimum - and is notable faster than
1484 standard isspace() and as a bonus avoids problems with different
1485 locales - we only operate on ascii chars anyway. */
98eb4e9e 1486#undef isspace_ascii
390344f9 1487int isspace_ascii(int const c) APT_CONST APT_COLD;
98b06343
JAK
1488int isspace_ascii(int const c)
1489{
98eb4e9e 1490 return isspace_ascii_inline(c);
98b06343
JAK
1491}
1492 /*}}}*/
1493
1e3f4083 1494// CheckDomainList - See if Host is in a , separate list /*{{{*/
f8081133 1495// ---------------------------------------------------------------------
1e3f4083 1496/* The domain list is a comma separate list of domains that are suffix
f8081133 1497 matched against the argument */
171c75f1 1498bool CheckDomainList(const string &Host,const string &List)
f8081133 1499{
47db8997 1500 string::const_iterator Start = List.begin();
f7f0d6c7 1501 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); ++Cur)
f8081133 1502 {
47db8997 1503 if (Cur < List.end() && *Cur != ',')
f8081133
AL
1504 continue;
1505
1506 // Match the end of the string..
e2c7e6b5 1507 if ((Host.size() >= (unsigned)(Cur - Start)) &&
f8081133 1508 Cur - Start != 0 &&
47db8997 1509 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
f8081133
AL
1510 return true;
1511
1512 Start = Cur + 1;
1513 }
1514 return false;
1515}
1516 /*}}}*/
b9179170
MV
1517// strv_length - Return the length of a NULL-terminated string array /*{{{*/
1518// ---------------------------------------------------------------------
1519/* */
1520size_t strv_length(const char **str_array)
1521{
1522 size_t i;
1523 for (i=0; str_array[i] != NULL; i++)
1524 /* nothing */
1525 ;
1526 return i;
1527}
b8eba208 1528 /*}}}*/
69c2ecbd 1529// DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/
a513ace2 1530// ---------------------------------------------------------------------
cca2efe6
MV
1531/* */
1532string DeEscapeString(const string &input)
a513ace2 1533{
b9dc4706 1534 char tmp[3];
69c2ecbd
DK
1535 string::const_iterator it;
1536 string output;
f7f0d6c7 1537 for (it = input.begin(); it != input.end(); ++it)
a513ace2
MV
1538 {
1539 // just copy non-escape chars
1540 if (*it != '\\')
1541 {
1542 output += *it;
1543 continue;
1544 }
f8081133 1545
a513ace2
MV
1546 // deal with double escape
1547 if (*it == '\\' &&
1548 (it + 1 < input.end()) && it[1] == '\\')
1549 {
1550 // copy
1551 output += *it;
1552 // advance iterator one step further
f7f0d6c7 1553 ++it;
a513ace2
MV
1554 continue;
1555 }
1556
1557 // ensure we have a char to read
1558 if (it + 1 == input.end())
1559 continue;
f8081133 1560
a513ace2 1561 // read it
f7f0d6c7 1562 ++it;
a513ace2
MV
1563 switch (*it)
1564 {
1565 case '0':
b9dc4706 1566 if (it + 2 <= input.end()) {
a513ace2
MV
1567 tmp[0] = it[1];
1568 tmp[1] = it[2];
b9dc4706 1569 tmp[2] = 0;
a513ace2
MV
1570 output += (char)strtol(tmp, 0, 8);
1571 it += 2;
1572 }
1573 break;
1574 case 'x':
1575 if (it + 2 <= input.end()) {
1576 tmp[0] = it[1];
1577 tmp[1] = it[2];
1578 tmp[2] = 0;
1579 output += (char)strtol(tmp, 0, 16);
1580 it += 2;
1581 }
1582 break;
1583 default:
1584 // FIXME: raise exception here?
a513ace2
MV
1585 break;
1586 }
1587 }
1588 return output;
1589}
1590 /*}}}*/
be4401bf 1591// URI::CopyFrom - Copy from an object /*{{{*/
93bf083d
AL
1592// ---------------------------------------------------------------------
1593/* This parses the URI into all of its components */
171c75f1 1594void URI::CopyFrom(const string &U)
93bf083d 1595{
5933aab2 1596 string::const_iterator I = U.begin();
93bf083d 1597
b2e465d6 1598 // Locate the first colon, this separates the scheme
f7f0d6c7 1599 for (; I < U.end() && *I != ':' ; ++I);
5933aab2 1600 string::const_iterator FirstColon = I;
93bf083d 1601
bfd22fc0
AL
1602 /* Determine if this is a host type URI with a leading double //
1603 and then search for the first single / */
5933aab2
AL
1604 string::const_iterator SingleSlash = I;
1605 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
bfd22fc0 1606 SingleSlash += 3;
67ff87bf
AL
1607
1608 /* Find the / indicating the end of the hostname, ignoring /'s in the
1609 square brackets */
1610 bool InBracket = false;
f7f0d6c7 1611 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); ++SingleSlash)
67ff87bf
AL
1612 {
1613 if (*SingleSlash == '[')
1614 InBracket = true;
1615 if (InBracket == true && *SingleSlash == ']')
1616 InBracket = false;
1617 }
1618
5933aab2
AL
1619 if (SingleSlash > U.end())
1620 SingleSlash = U.end();
93bf083d
AL
1621
1622 // We can now write the access and path specifiers
171c75f1 1623 Access.assign(U.begin(),FirstColon);
5933aab2 1624 if (SingleSlash != U.end())
171c75f1 1625 Path.assign(SingleSlash,U.end());
92e889c8
AL
1626 if (Path.empty() == true)
1627 Path = "/";
1628
93bf083d 1629 // Now we attempt to locate a user:pass@host fragment
d48c6a7d 1630 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
f46e7681
AL
1631 FirstColon += 3;
1632 else
1633 FirstColon += 1;
5933aab2 1634 if (FirstColon >= U.end())
93bf083d
AL
1635 return;
1636
1637 if (FirstColon > SingleSlash)
1638 FirstColon = SingleSlash;
1639
3856756b
AL
1640 // Find the colon...
1641 I = FirstColon + 1;
1d38d0e9
AL
1642 if (I > SingleSlash)
1643 I = SingleSlash;
a1f3ac8a
DK
1644
1645 // Search for the @ separating user:pass from host
1646 auto const RevAt = std::find(
1647 std::string::const_reverse_iterator(SingleSlash),
1648 std::string::const_reverse_iterator(I), '@');
1649 string::const_iterator const At = RevAt.base() == I ? SingleSlash : std::prev(RevAt.base());
1650 // and then look for the colon between user and pass
1651 string::const_iterator const SecondColon = std::find(I, At, ':');
1652
93bf083d
AL
1653 // Now write the host and user/pass
1654 if (At == SingleSlash)
1655 {
1656 if (FirstColon < SingleSlash)
171c75f1 1657 Host.assign(FirstColon,SingleSlash);
93bf083d
AL
1658 }
1659 else
1660 {
171c75f1 1661 Host.assign(At+1,SingleSlash);
436d7eab
DK
1662 // username and password must be encoded (RFC 3986)
1663 User.assign(DeQuoteString(FirstColon,SecondColon));
93bf083d 1664 if (SecondColon < At)
436d7eab 1665 Password.assign(DeQuoteString(SecondColon+1,At));
93bf083d
AL
1666 }
1667
67ff87bf
AL
1668 // Now we parse the RFC 2732 [] hostnames.
1669 unsigned long PortEnd = 0;
1670 InBracket = false;
1671 for (unsigned I = 0; I != Host.length();)
1672 {
1673 if (Host[I] == '[')
1674 {
1675 InBracket = true;
1676 Host.erase(I,1);
1677 continue;
1678 }
1679
1680 if (InBracket == true && Host[I] == ']')
1681 {
1682 InBracket = false;
1683 Host.erase(I,1);
1684 PortEnd = I;
1685 continue;
1686 }
1687 I++;
1688 }
1689
1690 // Tsk, weird.
1691 if (InBracket == true)
1692 {
171c75f1 1693 Host.clear();
67ff87bf
AL
1694 return;
1695 }
1696
1d38d0e9 1697 // Now we parse off a port number from the hostname
93bf083d
AL
1698 Port = 0;
1699 string::size_type Pos = Host.rfind(':');
67ff87bf 1700 if (Pos == string::npos || Pos < PortEnd)
93bf083d
AL
1701 return;
1702
1703 Port = atoi(string(Host,Pos+1).c_str());
171c75f1 1704 Host.assign(Host,0,Pos);
93bf083d
AL
1705}
1706 /*}}}*/
1707// URI::operator string - Convert the URI to a string /*{{{*/
1708// ---------------------------------------------------------------------
1709/* */
1710URI::operator string()
1711{
b8eba208
DK
1712 std::stringstream Res;
1713
54cf15cb 1714 if (Access.empty() == false)
b8eba208
DK
1715 Res << Access << ':';
1716
93bf083d 1717 if (Host.empty() == false)
b8eba208 1718 {
54cf15cb 1719 if (Access.empty() == false)
b8eba208
DK
1720 Res << "//";
1721
93bf083d
AL
1722 if (User.empty() == false)
1723 {
5b63d2a9
MV
1724 // FIXME: Technically userinfo is permitted even less
1725 // characters than these, but this is not conveniently
1726 // expressed with a blacklist.
b8eba208 1727 Res << QuoteString(User, ":/?#[]@");
93bf083d 1728 if (Password.empty() == false)
b8eba208
DK
1729 Res << ":" << QuoteString(Password, ":/?#[]@");
1730 Res << "@";
93bf083d 1731 }
b8eba208 1732
7834cb57 1733 // Add RFC 2732 escaping characters
b8eba208
DK
1734 if (Access.empty() == false && Host.find_first_of("/:") != string::npos)
1735 Res << '[' << Host << ']';
7834cb57 1736 else
b8eba208
DK
1737 Res << Host;
1738
492f957a 1739 if (Port != 0)
b58e2c7c 1740 Res << ':' << std::to_string(Port);
93bf083d 1741 }
b8eba208 1742
93bf083d 1743 if (Path.empty() == false)
492f957a
AL
1744 {
1745 if (Path[0] != '/')
b8eba208 1746 Res << "/" << Path;
492f957a 1747 else
b8eba208 1748 Res << Path;
492f957a 1749 }
b8eba208
DK
1750
1751 return Res.str();
93bf083d
AL
1752}
1753 /*}}}*/
b2e465d6 1754// URI::SiteOnly - Return the schema and site for the URI /*{{{*/
171c75f1 1755string URI::SiteOnly(const string &URI)
b2e465d6
AL
1756{
1757 ::URI U(URI);
171c75f1
MV
1758 U.User.clear();
1759 U.Password.clear();
1760 U.Path.clear();
b2e465d6
AL
1761 return U;
1762}
1763 /*}}}*/
1da3b7b8
DK
1764// URI::ArchiveOnly - Return the schema, site and cleaned path for the URI /*{{{*/
1765string URI::ArchiveOnly(const string &URI)
1766{
1767 ::URI U(URI);
1768 U.User.clear();
1769 U.Password.clear();
1770 if (U.Path.empty() == false && U.Path[U.Path.length() - 1] == '/')
1771 U.Path.erase(U.Path.length() - 1);
1772 return U;
1773}
1774 /*}}}*/
5e02df82 1775// URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
5e02df82
MV
1776string URI::NoUserPassword(const string &URI)
1777{
1778 ::URI U(URI);
1779 U.User.clear();
1780 U.Password.clear();
5e02df82
MV
1781 return U;
1782}
1783 /*}}}*/