]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
* cmdline/apt-get.cc:
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/error.h>
21
22 #include <apti18n.h>
23
24 #include <ctype.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <algorithm>
28 #include <unistd.h>
29 #include <regex.h>
30 #include <errno.h>
31 #include <stdarg.h>
32 #include <iconv.h>
33
34 #include "config.h"
35
36 using namespace std;
37 /*}}}*/
38
39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40 // ---------------------------------------------------------------------
41 /* This is handy to use before display some information for enduser */
42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
43 {
44 iconv_t cd;
45 const char *inbuf;
46 char *inptr, *outbuf, *outptr;
47 size_t insize, outsize;
48
49 cd = iconv_open(codeset, "UTF-8");
50 if (cd == (iconv_t)(-1)) {
51 // Something went wrong
52 if (errno == EINVAL)
53 _error->Error("conversion from 'UTF-8' to '%s' not available",
54 codeset);
55 else
56 perror("iconv_open");
57
58 // Clean the destination string
59 *dest = "";
60
61 return false;
62 }
63
64 insize = outsize = orig.size();
65 inbuf = orig.data();
66 inptr = (char *)inbuf;
67 outbuf = new char[insize+1];
68 outptr = outbuf;
69
70 iconv(cd, &inptr, &insize, &outptr, &outsize);
71 *outptr = '\0';
72
73 *dest = outbuf;
74 delete[] outbuf;
75
76 iconv_close(cd);
77
78 return true;
79 }
80 /*}}}*/
81 // strstrip - Remove white space from the front and back of a string /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This is handy to use when parsing a file. It also removes \n's left
84 over from fgets and company */
85 char *_strstrip(char *String)
86 {
87 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
88
89 if (*String == 0)
90 return String;
91
92 char *End = String + strlen(String) - 1;
93 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
94 *End == '\r'); End--);
95 End++;
96 *End = 0;
97 return String;
98 };
99 /*}}}*/
100 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
101 // ---------------------------------------------------------------------
102 /* */
103 char *_strtabexpand(char *String,size_t Len)
104 {
105 for (char *I = String; I != I + Len && *I != 0; I++)
106 {
107 if (*I != '\t')
108 continue;
109 if (I + 8 > String + Len)
110 {
111 *I = 0;
112 return String;
113 }
114
115 /* Assume the start of the string is 0 and find the next 8 char
116 division */
117 int Len;
118 if (String == I)
119 Len = 1;
120 else
121 Len = 8 - ((String - I) % 8);
122 Len -= 2;
123 if (Len <= 0)
124 {
125 *I = ' ';
126 continue;
127 }
128
129 memmove(I + Len,I + 1,strlen(I) + 1);
130 for (char *J = I; J + Len != I; *I = ' ', I++);
131 }
132 return String;
133 }
134 /*}}}*/
135 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
136 // ---------------------------------------------------------------------
137 /* This grabs a single word, converts any % escaped characters to their
138 proper values and advances the pointer. Double quotes are understood
139 and striped out as well. This is for URI/URL parsing. It also can
140 understand [] brackets.*/
141 bool ParseQuoteWord(const char *&String,string &Res)
142 {
143 // Skip leading whitespace
144 const char *C = String;
145 for (;*C != 0 && *C == ' '; C++);
146 if (*C == 0)
147 return false;
148
149 // Jump to the next word
150 for (;*C != 0 && isspace(*C) == 0; C++)
151 {
152 if (*C == '"')
153 {
154 for (C++; *C != 0 && *C != '"'; C++);
155 if (*C == 0)
156 return false;
157 }
158 if (*C == '[')
159 {
160 for (C++; *C != 0 && *C != ']'; C++);
161 if (*C == 0)
162 return false;
163 }
164 }
165
166 // Now de-quote characters
167 char Buffer[1024];
168 char Tmp[3];
169 const char *Start = String;
170 char *I;
171 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
172 {
173 if (*Start == '%' && Start + 2 < C)
174 {
175 Tmp[0] = Start[1];
176 Tmp[1] = Start[2];
177 Tmp[2] = 0;
178 *I = (char)strtol(Tmp,0,16);
179 Start += 3;
180 continue;
181 }
182 if (*Start != '"')
183 *I = *Start;
184 else
185 I--;
186 Start++;
187 }
188 *I = 0;
189 Res = Buffer;
190
191 // Skip ending white space
192 for (;*C != 0 && isspace(*C) != 0; C++);
193 String = C;
194 return true;
195 }
196 /*}}}*/
197 // ParseCWord - Parses a string like a C "" expression /*{{{*/
198 // ---------------------------------------------------------------------
199 /* This expects a series of space separated strings enclosed in ""'s.
200 It concatenates the ""'s into a single string. */
201 bool ParseCWord(const char *&String,string &Res)
202 {
203 // Skip leading whitespace
204 const char *C = String;
205 for (;*C != 0 && *C == ' '; C++);
206 if (*C == 0)
207 return false;
208
209 char Buffer[1024];
210 char *Buf = Buffer;
211 if (strlen(String) >= sizeof(Buffer))
212 return false;
213
214 for (; *C != 0; C++)
215 {
216 if (*C == '"')
217 {
218 for (C++; *C != 0 && *C != '"'; C++)
219 *Buf++ = *C;
220
221 if (*C == 0)
222 return false;
223
224 continue;
225 }
226
227 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
228 continue;
229 if (isspace(*C) == 0)
230 return false;
231 *Buf++ = ' ';
232 }
233 *Buf = 0;
234 Res = Buffer;
235 String = C;
236 return true;
237 }
238 /*}}}*/
239 // QuoteString - Convert a string into quoted from /*{{{*/
240 // ---------------------------------------------------------------------
241 /* */
242 string QuoteString(const string &Str, const char *Bad)
243 {
244 string Res;
245 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
246 {
247 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
248 *I <= 0x20 || *I >= 0x7F)
249 {
250 char Buf[10];
251 sprintf(Buf,"%%%02x",(int)*I);
252 Res += Buf;
253 }
254 else
255 Res += *I;
256 }
257 return Res;
258 }
259 /*}}}*/
260 // DeQuoteString - Convert a string from quoted from /*{{{*/
261 // ---------------------------------------------------------------------
262 /* This undoes QuoteString */
263 string DeQuoteString(const string &Str)
264 {
265 string Res;
266 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
267 {
268 if (*I == '%' && I + 2 < Str.end())
269 {
270 char Tmp[3];
271 Tmp[0] = I[1];
272 Tmp[1] = I[2];
273 Tmp[2] = 0;
274 Res += (char)strtol(Tmp,0,16);
275 I += 2;
276 continue;
277 }
278 else
279 Res += *I;
280 }
281 return Res;
282 }
283
284 /*}}}*/
285 // SizeToStr - Convert a long into a human readable size /*{{{*/
286 // ---------------------------------------------------------------------
287 /* A max of 4 digits are shown before conversion to the next highest unit.
288 The max length of the string will be 5 chars unless the size is > 10
289 YottaBytes (E24) */
290 string SizeToStr(double Size)
291 {
292 char S[300];
293 double ASize;
294 if (Size >= 0)
295 ASize = Size;
296 else
297 ASize = -1*Size;
298
299 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
300 ExaBytes, ZettaBytes, YottaBytes */
301 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
302 int I = 0;
303 while (I <= 8)
304 {
305 if (ASize < 100 && I != 0)
306 {
307 sprintf(S,"%.1f%c",ASize,Ext[I]);
308 break;
309 }
310
311 if (ASize < 10000)
312 {
313 sprintf(S,"%.0f%c",ASize,Ext[I]);
314 break;
315 }
316 ASize /= 1000.0;
317 I++;
318 }
319
320 return S;
321 }
322 /*}}}*/
323 // TimeToStr - Convert the time into a string /*{{{*/
324 // ---------------------------------------------------------------------
325 /* Converts a number of seconds to a hms format */
326 string TimeToStr(unsigned long Sec)
327 {
328 char S[300];
329
330 while (1)
331 {
332 if (Sec > 60*60*24)
333 {
334 //d means days, h means hours, min means minutes, s means seconds
335 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
336 break;
337 }
338
339 if (Sec > 60*60)
340 {
341 //h means hours, min means minutes, s means seconds
342 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
343 break;
344 }
345
346 if (Sec > 60)
347 {
348 //min means minutes, s means seconds
349 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
350 break;
351 }
352
353 //s means seconds
354 sprintf(S,_("%lis"),Sec);
355 break;
356 }
357
358 return S;
359 }
360 /*}}}*/
361 // SubstVar - Substitute a string for another string /*{{{*/
362 // ---------------------------------------------------------------------
363 /* This replaces all occurances of Subst with Contents in Str. */
364 string SubstVar(const string &Str,const string &Subst,const string &Contents)
365 {
366 string::size_type Pos = 0;
367 string::size_type OldPos = 0;
368 string Temp;
369
370 while (OldPos < Str.length() &&
371 (Pos = Str.find(Subst,OldPos)) != string::npos)
372 {
373 Temp += string(Str,OldPos,Pos) + Contents;
374 OldPos = Pos + Subst.length();
375 }
376
377 if (OldPos == 0)
378 return Str;
379
380 return Temp + string(Str,OldPos);
381 }
382
383 string SubstVar(string Str,const struct SubstVar *Vars)
384 {
385 for (; Vars->Subst != 0; Vars++)
386 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
387 return Str;
388 }
389 /*}}}*/
390 // URItoFileName - Convert the uri into a unique file name /*{{{*/
391 // ---------------------------------------------------------------------
392 /* This converts a URI into a safe filename. It quotes all unsafe characters
393 and converts / to _ and removes the scheme identifier. The resulting
394 file name should be unique and never occur again for a different file */
395 string URItoFileName(const string &URI)
396 {
397 // Nuke 'sensitive' items
398 ::URI U(URI);
399 U.User.clear();
400 U.Password.clear();
401 U.Access.clear();
402
403 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
404 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
405 replace(NewURI.begin(),NewURI.end(),'/','_');
406 return NewURI;
407 }
408 /*}}}*/
409 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
410 // ---------------------------------------------------------------------
411 /* This routine performs a base64 transformation on a string. It was ripped
412 from wget and then patched and bug fixed.
413
414 This spec can be found in rfc2045 */
415 string Base64Encode(const string &S)
416 {
417 // Conversion table.
418 static char tbl[64] = {'A','B','C','D','E','F','G','H',
419 'I','J','K','L','M','N','O','P',
420 'Q','R','S','T','U','V','W','X',
421 'Y','Z','a','b','c','d','e','f',
422 'g','h','i','j','k','l','m','n',
423 'o','p','q','r','s','t','u','v',
424 'w','x','y','z','0','1','2','3',
425 '4','5','6','7','8','9','+','/'};
426
427 // Pre-allocate some space
428 string Final;
429 Final.reserve((4*S.length() + 2)/3 + 2);
430
431 /* Transform the 3x8 bits to 4x6 bits, as required by
432 base64. */
433 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
434 {
435 char Bits[3] = {0,0,0};
436 Bits[0] = I[0];
437 if (I + 1 < S.end())
438 Bits[1] = I[1];
439 if (I + 2 < S.end())
440 Bits[2] = I[2];
441
442 Final += tbl[Bits[0] >> 2];
443 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
444
445 if (I + 1 >= S.end())
446 break;
447
448 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
449
450 if (I + 2 >= S.end())
451 break;
452
453 Final += tbl[Bits[2] & 0x3f];
454 }
455
456 /* Apply the padding elements, this tells how many bytes the remote
457 end should discard */
458 if (S.length() % 3 == 2)
459 Final += '=';
460 if (S.length() % 3 == 1)
461 Final += "==";
462
463 return Final;
464 }
465 /*}}}*/
466 // stringcmp - Arbitrary string compare /*{{{*/
467 // ---------------------------------------------------------------------
468 /* This safely compares two non-null terminated strings of arbitrary
469 length */
470 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
471 {
472 for (; A != AEnd && B != BEnd; A++, B++)
473 if (*A != *B)
474 break;
475
476 if (A == AEnd && B == BEnd)
477 return 0;
478 if (A == AEnd)
479 return 1;
480 if (B == BEnd)
481 return -1;
482 if (*A < *B)
483 return -1;
484 return 1;
485 }
486
487 #if __GNUC__ >= 3
488 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
489 const char *B,const char *BEnd)
490 {
491 for (; A != AEnd && B != BEnd; A++, B++)
492 if (*A != *B)
493 break;
494
495 if (A == AEnd && B == BEnd)
496 return 0;
497 if (A == AEnd)
498 return 1;
499 if (B == BEnd)
500 return -1;
501 if (*A < *B)
502 return -1;
503 return 1;
504 }
505 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
506 string::const_iterator B,string::const_iterator BEnd)
507 {
508 for (; A != AEnd && B != BEnd; A++, B++)
509 if (*A != *B)
510 break;
511
512 if (A == AEnd && B == BEnd)
513 return 0;
514 if (A == AEnd)
515 return 1;
516 if (B == BEnd)
517 return -1;
518 if (*A < *B)
519 return -1;
520 return 1;
521 }
522 #endif
523 /*}}}*/
524 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
525 // ---------------------------------------------------------------------
526 /* */
527 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
528 {
529 for (; A != AEnd && B != BEnd; A++, B++)
530 if (toupper(*A) != toupper(*B))
531 break;
532
533 if (A == AEnd && B == BEnd)
534 return 0;
535 if (A == AEnd)
536 return 1;
537 if (B == BEnd)
538 return -1;
539 if (toupper(*A) < toupper(*B))
540 return -1;
541 return 1;
542 }
543 #if __GNUC__ >= 3
544 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
545 const char *B,const char *BEnd)
546 {
547 for (; A != AEnd && B != BEnd; A++, B++)
548 if (toupper(*A) != toupper(*B))
549 break;
550
551 if (A == AEnd && B == BEnd)
552 return 0;
553 if (A == AEnd)
554 return 1;
555 if (B == BEnd)
556 return -1;
557 if (toupper(*A) < toupper(*B))
558 return -1;
559 return 1;
560 }
561 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
562 string::const_iterator B,string::const_iterator BEnd)
563 {
564 for (; A != AEnd && B != BEnd; A++, B++)
565 if (toupper(*A) != toupper(*B))
566 break;
567
568 if (A == AEnd && B == BEnd)
569 return 0;
570 if (A == AEnd)
571 return 1;
572 if (B == BEnd)
573 return -1;
574 if (toupper(*A) < toupper(*B))
575 return -1;
576 return 1;
577 }
578 #endif
579 /*}}}*/
580 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
581 // ---------------------------------------------------------------------
582 /* The format is like those used in package files and the method
583 communication system */
584 string LookupTag(const string &Message,const char *Tag,const char *Default)
585 {
586 // Look for a matching tag.
587 int Length = strlen(Tag);
588 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
589 {
590 // Found the tag
591 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
592 {
593 // Find the end of line and strip the leading/trailing spaces
594 string::const_iterator J;
595 I += Length + 1;
596 for (; isspace(*I) != 0 && I < Message.end(); I++);
597 for (J = I; *J != '\n' && J < Message.end(); J++);
598 for (; J > I && isspace(J[-1]) != 0; J--);
599
600 return string(I,J);
601 }
602
603 for (; *I != '\n' && I < Message.end(); I++);
604 }
605
606 // Failed to find a match
607 if (Default == 0)
608 return string();
609 return Default;
610 }
611 /*}}}*/
612 // StringToBool - Converts a string into a boolean /*{{{*/
613 // ---------------------------------------------------------------------
614 /* This inspects the string to see if it is true or if it is false and
615 then returns the result. Several varients on true/false are checked. */
616 int StringToBool(const string &Text,int Default)
617 {
618 char *End;
619 int Res = strtol(Text.c_str(),&End,0);
620 if (End != Text.c_str() && Res >= 0 && Res <= 1)
621 return Res;
622
623 // Check for positives
624 if (strcasecmp(Text.c_str(),"no") == 0 ||
625 strcasecmp(Text.c_str(),"false") == 0 ||
626 strcasecmp(Text.c_str(),"without") == 0 ||
627 strcasecmp(Text.c_str(),"off") == 0 ||
628 strcasecmp(Text.c_str(),"disable") == 0)
629 return 0;
630
631 // Check for negatives
632 if (strcasecmp(Text.c_str(),"yes") == 0 ||
633 strcasecmp(Text.c_str(),"true") == 0 ||
634 strcasecmp(Text.c_str(),"with") == 0 ||
635 strcasecmp(Text.c_str(),"on") == 0 ||
636 strcasecmp(Text.c_str(),"enable") == 0)
637 return 1;
638
639 return Default;
640 }
641 /*}}}*/
642 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
643 // ---------------------------------------------------------------------
644 /* This converts a time_t into a string time representation that is
645 year 2000 complient and timezone neutral */
646 string TimeRFC1123(time_t Date)
647 {
648 struct tm Conv = *gmtime(&Date);
649 char Buf[300];
650
651 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
652 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
653 "Aug","Sep","Oct","Nov","Dec"};
654
655 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
656 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
657 Conv.tm_min,Conv.tm_sec);
658 return Buf;
659 }
660 /*}}}*/
661 // ReadMessages - Read messages from the FD /*{{{*/
662 // ---------------------------------------------------------------------
663 /* This pulls full messages from the input FD into the message buffer.
664 It assumes that messages will not pause during transit so no
665 fancy buffering is used.
666
667 In particular: this reads blocks from the input until it believes
668 that it's run out of input text. Each block is terminated by a
669 double newline ('\n' followed by '\n'). As noted below, there is a
670 bug in this code: it assumes that all the blocks have been read if
671 it doesn't see additional text in the buffer after the last one is
672 parsed, which will cause it to lose blocks if the last block
673 coincides with the end of the buffer.
674 */
675 bool ReadMessages(int Fd, vector<string> &List)
676 {
677 char Buffer[64000];
678 char *End = Buffer;
679 // Represents any left-over from the previous iteration of the
680 // parse loop. (i.e., if a message is split across the end
681 // of the buffer, it goes here)
682 string PartialMessage;
683
684 while (1)
685 {
686 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
687 if (Res < 0 && errno == EINTR)
688 continue;
689
690 // Process is dead, this is kind of bad..
691 if (Res == 0)
692 return false;
693
694 // No data
695 if (Res < 0 && errno == EAGAIN)
696 return true;
697 if (Res < 0)
698 return false;
699
700 End += Res;
701
702 // Look for the end of the message
703 for (char *I = Buffer; I + 1 < End; I++)
704 {
705 if (I[0] != '\n' || I[1] != '\n')
706 continue;
707
708 // Pull the message out
709 string Message(Buffer,I-Buffer);
710 PartialMessage += Message;
711
712 // Fix up the buffer
713 for (; I < End && *I == '\n'; I++);
714 End -= I-Buffer;
715 memmove(Buffer,I,End-Buffer);
716 I = Buffer;
717
718 List.push_back(PartialMessage);
719 PartialMessage.clear();
720 }
721 if (End != Buffer)
722 {
723 // If there's text left in the buffer, store it
724 // in PartialMessage and throw the rest of the buffer
725 // away. This allows us to handle messages that
726 // are longer than the static buffer size.
727 PartialMessage += string(Buffer, End);
728 End = Buffer;
729 }
730 else
731 {
732 // BUG ALERT: if a message block happens to end at a
733 // multiple of 64000 characters, this will cause it to
734 // terminate early, leading to a badly formed block and
735 // probably crashing the method. However, this is the only
736 // way we have to find the end of the message block. I have
737 // an idea of how to fix this, but it will require changes
738 // to the protocol (essentially to mark the beginning and
739 // end of the block).
740 //
741 // -- dburrows 2008-04-02
742 return true;
743 }
744
745 if (WaitFd(Fd) == false)
746 return false;
747 }
748 }
749 /*}}}*/
750 // MonthConv - Converts a month string into a number /*{{{*/
751 // ---------------------------------------------------------------------
752 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
753 Made it a bit more robust with a few touppers though. */
754 static int MonthConv(char *Month)
755 {
756 switch (toupper(*Month))
757 {
758 case 'A':
759 return toupper(Month[1]) == 'P'?3:7;
760 case 'D':
761 return 11;
762 case 'F':
763 return 1;
764 case 'J':
765 if (toupper(Month[1]) == 'A')
766 return 0;
767 return toupper(Month[2]) == 'N'?5:6;
768 case 'M':
769 return toupper(Month[2]) == 'R'?2:4;
770 case 'N':
771 return 10;
772 case 'O':
773 return 9;
774 case 'S':
775 return 8;
776
777 // Pretend it is January..
778 default:
779 return 0;
780 }
781 }
782 /*}}}*/
783 // timegm - Internal timegm function if gnu is not available /*{{{*/
784 // ---------------------------------------------------------------------
785 /* Ripped this evil little function from wget - I prefer the use of
786 GNU timegm if possible as this technique will have interesting problems
787 with leap seconds, timezones and other.
788
789 Converts struct tm to time_t, assuming the data in tm is UTC rather
790 than local timezone (mktime assumes the latter).
791
792 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
793 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
794
795 /* Turned it into an autoconf check, because GNU is not the only thing which
796 can provide timegm. -- 2002-09-22, Joel Baker */
797
798 #ifndef HAVE_TIMEGM // Now with autoconf!
799 static time_t timegm(struct tm *t)
800 {
801 time_t tl, tb;
802
803 tl = mktime (t);
804 if (tl == -1)
805 return -1;
806 tb = mktime (gmtime (&tl));
807 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
808 }
809 #endif
810 /*}}}*/
811 // StrToTime - Converts a string into a time_t /*{{{*/
812 // ---------------------------------------------------------------------
813 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
814 and the C library asctime format. It requires the GNU library function
815 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
816 reason the C library does not provide any such function :< This also
817 handles the weird, but unambiguous FTP time format*/
818 bool StrToTime(const string &Val,time_t &Result)
819 {
820 struct tm Tm;
821 char Month[10];
822 const char *I = Val.c_str();
823
824 // Skip the day of the week
825 for (;*I != 0 && *I != ' '; I++);
826
827 // Handle RFC 1123 time
828 Month[0] = 0;
829 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
830 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
831 {
832 // Handle RFC 1036 time
833 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
834 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
835 Tm.tm_year += 1900;
836 else
837 {
838 // asctime format
839 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
840 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
841 {
842 // 'ftp' time
843 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
844 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
845 return false;
846 Tm.tm_mon--;
847 }
848 }
849 }
850
851 Tm.tm_isdst = 0;
852 if (Month[0] != 0)
853 Tm.tm_mon = MonthConv(Month);
854 Tm.tm_year -= 1900;
855
856 // Convert to local time and then to GMT
857 Result = timegm(&Tm);
858 return true;
859 }
860 /*}}}*/
861 // StrToNum - Convert a fixed length string to a number /*{{{*/
862 // ---------------------------------------------------------------------
863 /* This is used in decoding the crazy fixed length string headers in
864 tar and ar files. */
865 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
866 {
867 char S[30];
868 if (Len >= sizeof(S))
869 return false;
870 memcpy(S,Str,Len);
871 S[Len] = 0;
872
873 // All spaces is a zero
874 Res = 0;
875 unsigned I;
876 for (I = 0; S[I] == ' '; I++);
877 if (S[I] == 0)
878 return true;
879
880 char *End;
881 Res = strtoul(S,&End,Base);
882 if (End == S)
883 return false;
884
885 return true;
886 }
887 /*}}}*/
888 // HexDigit - Convert a hex character into an integer /*{{{*/
889 // ---------------------------------------------------------------------
890 /* Helper for Hex2Num */
891 static int HexDigit(int c)
892 {
893 if (c >= '0' && c <= '9')
894 return c - '0';
895 if (c >= 'a' && c <= 'f')
896 return c - 'a' + 10;
897 if (c >= 'A' && c <= 'F')
898 return c - 'A' + 10;
899 return 0;
900 }
901 /*}}}*/
902 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
903 // ---------------------------------------------------------------------
904 /* The length of the buffer must be exactly 1/2 the length of the string. */
905 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
906 {
907 if (Str.length() != Length*2)
908 return false;
909
910 // Convert each digit. We store it in the same order as the string
911 int J = 0;
912 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
913 {
914 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
915 return false;
916
917 Num[J] = HexDigit(I[0]) << 4;
918 Num[J] += HexDigit(I[1]);
919 }
920
921 return true;
922 }
923 /*}}}*/
924 // TokSplitString - Split a string up by a given token /*{{{*/
925 // ---------------------------------------------------------------------
926 /* This is intended to be a faster splitter, it does not use dynamic
927 memories. Input is changed to insert nulls at each token location. */
928 bool TokSplitString(char Tok,char *Input,char **List,
929 unsigned long ListMax)
930 {
931 // Strip any leading spaces
932 char *Start = Input;
933 char *Stop = Start + strlen(Start);
934 for (; *Start != 0 && isspace(*Start) != 0; Start++);
935
936 unsigned long Count = 0;
937 char *Pos = Start;
938 while (Pos != Stop)
939 {
940 // Skip to the next Token
941 for (; Pos != Stop && *Pos != Tok; Pos++);
942
943 // Back remove spaces
944 char *End = Pos;
945 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
946 *End = 0;
947
948 List[Count++] = Start;
949 if (Count >= ListMax)
950 {
951 List[Count-1] = 0;
952 return false;
953 }
954
955 // Advance pos
956 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
957 Start = Pos;
958 }
959
960 List[Count] = 0;
961 return true;
962 }
963 /*}}}*/
964 // RegexChoice - Simple regex list/list matcher /*{{{*/
965 // ---------------------------------------------------------------------
966 /* */
967 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
968 const char **ListEnd)
969 {
970 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
971 R->Hit = false;
972
973 unsigned long Hits = 0;
974 for (; ListBegin != ListEnd; ListBegin++)
975 {
976 // Check if the name is a regex
977 const char *I;
978 bool Regex = true;
979 for (I = *ListBegin; *I != 0; I++)
980 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
981 break;
982 if (*I == 0)
983 Regex = false;
984
985 // Compile the regex pattern
986 regex_t Pattern;
987 if (Regex == true)
988 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
989 REG_NOSUB) != 0)
990 Regex = false;
991
992 // Search the list
993 bool Done = false;
994 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
995 {
996 if (R->Str[0] == 0)
997 continue;
998
999 if (strcasecmp(R->Str,*ListBegin) != 0)
1000 {
1001 if (Regex == false)
1002 continue;
1003 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1004 continue;
1005 }
1006 Done = true;
1007
1008 if (R->Hit == false)
1009 Hits++;
1010
1011 R->Hit = true;
1012 }
1013
1014 if (Regex == true)
1015 regfree(&Pattern);
1016
1017 if (Done == false)
1018 _error->Warning(_("Selection %s not found"),*ListBegin);
1019 }
1020
1021 return Hits;
1022 }
1023 /*}}}*/
1024 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
1025 // ---------------------------------------------------------------------
1026 /* This is used to make the internationalization strings easier to translate
1027 and to allow reordering of parameters */
1028 void ioprintf(ostream &out,const char *format,...)
1029 {
1030 va_list args;
1031 va_start(args,format);
1032
1033 // sprintf the description
1034 char S[4096];
1035 vsnprintf(S,sizeof(S),format,args);
1036 out << S;
1037 }
1038 /*}}}*/
1039 // strprintf - C format string outputter to C++ strings /*{{{*/
1040 // ---------------------------------------------------------------------
1041 /* This is used to make the internationalization strings easier to translate
1042 and to allow reordering of parameters */
1043 void strprintf(string &out,const char *format,...)
1044 {
1045 va_list args;
1046 va_start(args,format);
1047
1048 // sprintf the description
1049 char S[4096];
1050 vsnprintf(S,sizeof(S),format,args);
1051 out = string(S);
1052 }
1053 /*}}}*/
1054 // safe_snprintf - Safer snprintf /*{{{*/
1055 // ---------------------------------------------------------------------
1056 /* This is a snprintf that will never (ever) go past 'End' and returns a
1057 pointer to the end of the new string. The returned string is always null
1058 terminated unless Buffer == end. This is a better alterantive to using
1059 consecutive snprintfs. */
1060 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1061 {
1062 va_list args;
1063 unsigned long Did;
1064
1065 va_start(args,Format);
1066
1067 if (End <= Buffer)
1068 return End;
1069
1070 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1071 if (Did < 0 || Buffer + Did > End)
1072 return End;
1073 return Buffer + Did;
1074 }
1075 /*}}}*/
1076
1077 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1078 // ---------------------------------------------------------------------
1079 /* */
1080 int tolower_ascii(int c)
1081 {
1082 if (c >= 'A' and c <= 'Z')
1083 return c + 32;
1084 return c;
1085 }
1086 /*}}}*/
1087
1088 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1089 // ---------------------------------------------------------------------
1090 /* The domain list is a comma seperate list of domains that are suffix
1091 matched against the argument */
1092 bool CheckDomainList(const string &Host,const string &List)
1093 {
1094 string::const_iterator Start = List.begin();
1095 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1096 {
1097 if (Cur < List.end() && *Cur != ',')
1098 continue;
1099
1100 // Match the end of the string..
1101 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1102 Cur - Start != 0 &&
1103 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1104 return true;
1105
1106 Start = Cur + 1;
1107 }
1108 return false;
1109 }
1110 /*}}}*/
1111
1112 // URI::CopyFrom - Copy from an object /*{{{*/
1113 // ---------------------------------------------------------------------
1114 /* This parses the URI into all of its components */
1115 void URI::CopyFrom(const string &U)
1116 {
1117 string::const_iterator I = U.begin();
1118
1119 // Locate the first colon, this separates the scheme
1120 for (; I < U.end() && *I != ':' ; I++);
1121 string::const_iterator FirstColon = I;
1122
1123 /* Determine if this is a host type URI with a leading double //
1124 and then search for the first single / */
1125 string::const_iterator SingleSlash = I;
1126 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1127 SingleSlash += 3;
1128
1129 /* Find the / indicating the end of the hostname, ignoring /'s in the
1130 square brackets */
1131 bool InBracket = false;
1132 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1133 {
1134 if (*SingleSlash == '[')
1135 InBracket = true;
1136 if (InBracket == true && *SingleSlash == ']')
1137 InBracket = false;
1138 }
1139
1140 if (SingleSlash > U.end())
1141 SingleSlash = U.end();
1142
1143 // We can now write the access and path specifiers
1144 Access.assign(U.begin(),FirstColon);
1145 if (SingleSlash != U.end())
1146 Path.assign(SingleSlash,U.end());
1147 if (Path.empty() == true)
1148 Path = "/";
1149
1150 // Now we attempt to locate a user:pass@host fragment
1151 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1152 FirstColon += 3;
1153 else
1154 FirstColon += 1;
1155 if (FirstColon >= U.end())
1156 return;
1157
1158 if (FirstColon > SingleSlash)
1159 FirstColon = SingleSlash;
1160
1161 // Find the colon...
1162 I = FirstColon + 1;
1163 if (I > SingleSlash)
1164 I = SingleSlash;
1165 for (; I < SingleSlash && *I != ':'; I++);
1166 string::const_iterator SecondColon = I;
1167
1168 // Search for the @ after the colon
1169 for (; I < SingleSlash && *I != '@'; I++);
1170 string::const_iterator At = I;
1171
1172 // Now write the host and user/pass
1173 if (At == SingleSlash)
1174 {
1175 if (FirstColon < SingleSlash)
1176 Host.assign(FirstColon,SingleSlash);
1177 }
1178 else
1179 {
1180 Host.assign(At+1,SingleSlash);
1181 User.assign(FirstColon,SecondColon);
1182 if (SecondColon < At)
1183 Password.assign(SecondColon+1,At);
1184 }
1185
1186 // Now we parse the RFC 2732 [] hostnames.
1187 unsigned long PortEnd = 0;
1188 InBracket = false;
1189 for (unsigned I = 0; I != Host.length();)
1190 {
1191 if (Host[I] == '[')
1192 {
1193 InBracket = true;
1194 Host.erase(I,1);
1195 continue;
1196 }
1197
1198 if (InBracket == true && Host[I] == ']')
1199 {
1200 InBracket = false;
1201 Host.erase(I,1);
1202 PortEnd = I;
1203 continue;
1204 }
1205 I++;
1206 }
1207
1208 // Tsk, weird.
1209 if (InBracket == true)
1210 {
1211 Host.clear();
1212 return;
1213 }
1214
1215 // Now we parse off a port number from the hostname
1216 Port = 0;
1217 string::size_type Pos = Host.rfind(':');
1218 if (Pos == string::npos || Pos < PortEnd)
1219 return;
1220
1221 Port = atoi(string(Host,Pos+1).c_str());
1222 Host.assign(Host,0,Pos);
1223 }
1224 /*}}}*/
1225 // URI::operator string - Convert the URI to a string /*{{{*/
1226 // ---------------------------------------------------------------------
1227 /* */
1228 URI::operator string()
1229 {
1230 string Res;
1231
1232 if (Access.empty() == false)
1233 Res = Access + ':';
1234
1235 if (Host.empty() == false)
1236 {
1237 if (Access.empty() == false)
1238 Res += "//";
1239
1240 if (User.empty() == false)
1241 {
1242 Res += User;
1243 if (Password.empty() == false)
1244 Res += ":" + Password;
1245 Res += "@";
1246 }
1247
1248 // Add RFC 2732 escaping characters
1249 if (Access.empty() == false &&
1250 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1251 Res += '[' + Host + ']';
1252 else
1253 Res += Host;
1254
1255 if (Port != 0)
1256 {
1257 char S[30];
1258 sprintf(S,":%u",Port);
1259 Res += S;
1260 }
1261 }
1262
1263 if (Path.empty() == false)
1264 {
1265 if (Path[0] != '/')
1266 Res += "/" + Path;
1267 else
1268 Res += Path;
1269 }
1270
1271 return Res;
1272 }
1273 /*}}}*/
1274 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1275 // ---------------------------------------------------------------------
1276 /* */
1277 string URI::SiteOnly(const string &URI)
1278 {
1279 ::URI U(URI);
1280 U.User.clear();
1281 U.Password.clear();
1282 U.Path.clear();
1283 U.Port = 0;
1284 return U;
1285 }
1286 /*}}}*/