]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
a69cf01eca77ef1d449eea7b16398e57e896274b
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/error.h>
21
22 #include <apti18n.h>
23
24 #include <ctype.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <algorithm>
28 #include <unistd.h>
29 #include <regex.h>
30 #include <errno.h>
31 #include <stdarg.h>
32 #include <iconv.h>
33
34 #include "config.h"
35
36 using namespace std;
37 /*}}}*/
38
39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40 // ---------------------------------------------------------------------
41 /* This is handy to use before display some information for enduser */
42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
43 {
44 iconv_t cd;
45 const char *inbuf;
46 char *inptr, *outbuf, *outptr;
47 size_t insize, outsize;
48
49 cd = iconv_open(codeset, "UTF-8");
50 if (cd == (iconv_t)(-1)) {
51 // Something went wrong
52 if (errno == EINVAL)
53 _error->Error("conversion from 'UTF-8' to '%s' not available",
54 codeset);
55 else
56 perror("iconv_open");
57
58 // Clean the destination string
59 *dest = "";
60
61 return false;
62 }
63
64 insize = outsize = orig.size();
65 inbuf = orig.data();
66 inptr = (char *)inbuf;
67 outbuf = new char[insize+1];
68 outptr = outbuf;
69
70 iconv(cd, &inptr, &insize, &outptr, &outsize);
71 *outptr = '\0';
72
73 *dest = outbuf;
74 delete[] outbuf;
75
76 iconv_close(cd);
77
78 return true;
79 }
80 /*}}}*/
81 // strstrip - Remove white space from the front and back of a string /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This is handy to use when parsing a file. It also removes \n's left
84 over from fgets and company */
85 char *_strstrip(char *String)
86 {
87 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
88
89 if (*String == 0)
90 return String;
91
92 char *End = String + strlen(String) - 1;
93 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
94 *End == '\r'); End--);
95 End++;
96 *End = 0;
97 return String;
98 };
99 /*}}}*/
100 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
101 // ---------------------------------------------------------------------
102 /* */
103 char *_strtabexpand(char *String,size_t Len)
104 {
105 for (char *I = String; I != I + Len && *I != 0; I++)
106 {
107 if (*I != '\t')
108 continue;
109 if (I + 8 > String + Len)
110 {
111 *I = 0;
112 return String;
113 }
114
115 /* Assume the start of the string is 0 and find the next 8 char
116 division */
117 int Len;
118 if (String == I)
119 Len = 1;
120 else
121 Len = 8 - ((String - I) % 8);
122 Len -= 2;
123 if (Len <= 0)
124 {
125 *I = ' ';
126 continue;
127 }
128
129 memmove(I + Len,I + 1,strlen(I) + 1);
130 for (char *J = I; J + Len != I; *I = ' ', I++);
131 }
132 return String;
133 }
134 /*}}}*/
135 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
136 // ---------------------------------------------------------------------
137 /* This grabs a single word, converts any % escaped characters to their
138 proper values and advances the pointer. Double quotes are understood
139 and striped out as well. This is for URI/URL parsing. It also can
140 understand [] brackets.*/
141 bool ParseQuoteWord(const char *&String,string &Res)
142 {
143 // Skip leading whitespace
144 const char *C = String;
145 for (;*C != 0 && *C == ' '; C++);
146 if (*C == 0)
147 return false;
148
149 // Jump to the next word
150 for (;*C != 0 && isspace(*C) == 0; C++)
151 {
152 if (*C == '"')
153 {
154 for (C++; *C != 0 && *C != '"'; C++);
155 if (*C == 0)
156 return false;
157 }
158 if (*C == '[')
159 {
160 for (C++; *C != 0 && *C != ']'; C++);
161 if (*C == 0)
162 return false;
163 }
164 }
165
166 // Now de-quote characters
167 char Buffer[1024];
168 char Tmp[3];
169 const char *Start = String;
170 char *I;
171 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
172 {
173 if (*Start == '%' && Start + 2 < C)
174 {
175 Tmp[0] = Start[1];
176 Tmp[1] = Start[2];
177 Tmp[2] = 0;
178 *I = (char)strtol(Tmp,0,16);
179 Start += 3;
180 continue;
181 }
182 if (*Start != '"')
183 *I = *Start;
184 else
185 I--;
186 Start++;
187 }
188 *I = 0;
189 Res = Buffer;
190
191 // Skip ending white space
192 for (;*C != 0 && isspace(*C) != 0; C++);
193 String = C;
194 return true;
195 }
196 /*}}}*/
197 // ParseCWord - Parses a string like a C "" expression /*{{{*/
198 // ---------------------------------------------------------------------
199 /* This expects a series of space separated strings enclosed in ""'s.
200 It concatenates the ""'s into a single string. */
201 bool ParseCWord(const char *&String,string &Res)
202 {
203 // Skip leading whitespace
204 const char *C = String;
205 for (;*C != 0 && *C == ' '; C++);
206 if (*C == 0)
207 return false;
208
209 char Buffer[1024];
210 char *Buf = Buffer;
211 if (strlen(String) >= sizeof(Buffer))
212 return false;
213
214 for (; *C != 0; C++)
215 {
216 if (*C == '"')
217 {
218 for (C++; *C != 0 && *C != '"'; C++)
219 *Buf++ = *C;
220
221 if (*C == 0)
222 return false;
223
224 continue;
225 }
226
227 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
228 continue;
229 if (isspace(*C) == 0)
230 return false;
231 *Buf++ = ' ';
232 }
233 *Buf = 0;
234 Res = Buffer;
235 String = C;
236 return true;
237 }
238 /*}}}*/
239 // QuoteString - Convert a string into quoted from /*{{{*/
240 // ---------------------------------------------------------------------
241 /* */
242 string QuoteString(const string &Str, const char *Bad)
243 {
244 string Res;
245 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
246 {
247 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
248 *I <= 0x20 || *I >= 0x7F)
249 {
250 char Buf[10];
251 sprintf(Buf,"%%%02x",(int)*I);
252 Res += Buf;
253 }
254 else
255 Res += *I;
256 }
257 return Res;
258 }
259 /*}}}*/
260 // DeQuoteString - Convert a string from quoted from /*{{{*/
261 // ---------------------------------------------------------------------
262 /* This undoes QuoteString */
263 string DeQuoteString(const string &Str)
264 {
265 string Res;
266 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
267 {
268 if (*I == '%' && I + 2 < Str.end())
269 {
270 char Tmp[3];
271 Tmp[0] = I[1];
272 Tmp[1] = I[2];
273 Tmp[2] = 0;
274 Res += (char)strtol(Tmp,0,16);
275 I += 2;
276 continue;
277 }
278 else
279 Res += *I;
280 }
281 return Res;
282 }
283
284 /*}}}*/
285 // SizeToStr - Convert a long into a human readable size /*{{{*/
286 // ---------------------------------------------------------------------
287 /* A max of 4 digits are shown before conversion to the next highest unit.
288 The max length of the string will be 5 chars unless the size is > 10
289 YottaBytes (E24) */
290 string SizeToStr(double Size)
291 {
292 char S[300];
293 double ASize;
294 if (Size >= 0)
295 ASize = Size;
296 else
297 ASize = -1*Size;
298
299 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
300 ExaBytes, ZettaBytes, YottaBytes */
301 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
302 int I = 0;
303 while (I <= 8)
304 {
305 if (ASize < 100 && I != 0)
306 {
307 sprintf(S,"%.1f%c",ASize,Ext[I]);
308 break;
309 }
310
311 if (ASize < 10000)
312 {
313 sprintf(S,"%.0f%c",ASize,Ext[I]);
314 break;
315 }
316 ASize /= 1000.0;
317 I++;
318 }
319
320 return S;
321 }
322 /*}}}*/
323 // TimeToStr - Convert the time into a string /*{{{*/
324 // ---------------------------------------------------------------------
325 /* Converts a number of seconds to a hms format */
326 string TimeToStr(unsigned long Sec)
327 {
328 char S[300];
329
330 while (1)
331 {
332 if (Sec > 60*60*24)
333 {
334 //d means days, h means hours, min means minutes, s means seconds
335 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
336 break;
337 }
338
339 if (Sec > 60*60)
340 {
341 //h means hours, min means minutes, s means seconds
342 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
343 break;
344 }
345
346 if (Sec > 60)
347 {
348 //min means minutes, s means seconds
349 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
350 break;
351 }
352
353 //s means seconds
354 sprintf(S,_("%lis"),Sec);
355 break;
356 }
357
358 return S;
359 }
360 /*}}}*/
361 // SubstVar - Substitute a string for another string /*{{{*/
362 // ---------------------------------------------------------------------
363 /* This replaces all occurances of Subst with Contents in Str. */
364 string SubstVar(const string &Str,const string &Subst,const string &Contents)
365 {
366 string::size_type Pos = 0;
367 string::size_type OldPos = 0;
368 string Temp;
369
370 while (OldPos < Str.length() &&
371 (Pos = Str.find(Subst,OldPos)) != string::npos)
372 {
373 Temp += string(Str,OldPos,Pos) + Contents;
374 OldPos = Pos + Subst.length();
375 }
376
377 if (OldPos == 0)
378 return Str;
379
380 return Temp + string(Str,OldPos);
381 }
382
383 string SubstVar(string Str,const struct SubstVar *Vars)
384 {
385 for (; Vars->Subst != 0; Vars++)
386 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
387 return Str;
388 }
389 /*}}}*/
390 // URItoFileName - Convert the uri into a unique file name /*{{{*/
391 // ---------------------------------------------------------------------
392 /* This converts a URI into a safe filename. It quotes all unsafe characters
393 and converts / to _ and removes the scheme identifier. The resulting
394 file name should be unique and never occur again for a different file */
395 string URItoFileName(const string &URI)
396 {
397 // Nuke 'sensitive' items
398 ::URI U(URI);
399 U.User.clear();
400 U.Password.clear();
401 U.Access.clear();
402
403 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
404 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
405 replace(NewURI.begin(),NewURI.end(),'/','_');
406 return NewURI;
407 }
408 /*}}}*/
409 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
410 // ---------------------------------------------------------------------
411 /* This routine performs a base64 transformation on a string. It was ripped
412 from wget and then patched and bug fixed.
413
414 This spec can be found in rfc2045 */
415 string Base64Encode(const string &S)
416 {
417 // Conversion table.
418 static char tbl[64] = {'A','B','C','D','E','F','G','H',
419 'I','J','K','L','M','N','O','P',
420 'Q','R','S','T','U','V','W','X',
421 'Y','Z','a','b','c','d','e','f',
422 'g','h','i','j','k','l','m','n',
423 'o','p','q','r','s','t','u','v',
424 'w','x','y','z','0','1','2','3',
425 '4','5','6','7','8','9','+','/'};
426
427 // Pre-allocate some space
428 string Final;
429 Final.reserve((4*S.length() + 2)/3 + 2);
430
431 /* Transform the 3x8 bits to 4x6 bits, as required by
432 base64. */
433 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
434 {
435 char Bits[3] = {0,0,0};
436 Bits[0] = I[0];
437 if (I + 1 < S.end())
438 Bits[1] = I[1];
439 if (I + 2 < S.end())
440 Bits[2] = I[2];
441
442 Final += tbl[Bits[0] >> 2];
443 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
444
445 if (I + 1 >= S.end())
446 break;
447
448 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
449
450 if (I + 2 >= S.end())
451 break;
452
453 Final += tbl[Bits[2] & 0x3f];
454 }
455
456 /* Apply the padding elements, this tells how many bytes the remote
457 end should discard */
458 if (S.length() % 3 == 2)
459 Final += '=';
460 if (S.length() % 3 == 1)
461 Final += "==";
462
463 return Final;
464 }
465 /*}}}*/
466 // stringcmp - Arbitrary string compare /*{{{*/
467 // ---------------------------------------------------------------------
468 /* This safely compares two non-null terminated strings of arbitrary
469 length */
470 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
471 {
472 for (; A != AEnd && B != BEnd; A++, B++)
473 if (*A != *B)
474 break;
475
476 if (A == AEnd && B == BEnd)
477 return 0;
478 if (A == AEnd)
479 return 1;
480 if (B == BEnd)
481 return -1;
482 if (*A < *B)
483 return -1;
484 return 1;
485 }
486
487 #if __GNUC__ >= 3
488 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
489 const char *B,const char *BEnd)
490 {
491 for (; A != AEnd && B != BEnd; A++, B++)
492 if (*A != *B)
493 break;
494
495 if (A == AEnd && B == BEnd)
496 return 0;
497 if (A == AEnd)
498 return 1;
499 if (B == BEnd)
500 return -1;
501 if (*A < *B)
502 return -1;
503 return 1;
504 }
505 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
506 string::const_iterator B,string::const_iterator BEnd)
507 {
508 for (; A != AEnd && B != BEnd; A++, B++)
509 if (*A != *B)
510 break;
511
512 if (A == AEnd && B == BEnd)
513 return 0;
514 if (A == AEnd)
515 return 1;
516 if (B == BEnd)
517 return -1;
518 if (*A < *B)
519 return -1;
520 return 1;
521 }
522 #endif
523 /*}}}*/
524 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
525 // ---------------------------------------------------------------------
526 /* */
527 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
528 {
529 for (; A != AEnd && B != BEnd; A++, B++)
530 if (toupper(*A) != toupper(*B))
531 break;
532
533 if (A == AEnd && B == BEnd)
534 return 0;
535 if (A == AEnd)
536 return 1;
537 if (B == BEnd)
538 return -1;
539 if (toupper(*A) < toupper(*B))
540 return -1;
541 return 1;
542 }
543 #if __GNUC__ >= 3
544 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
545 const char *B,const char *BEnd)
546 {
547 for (; A != AEnd && B != BEnd; A++, B++)
548 if (toupper(*A) != toupper(*B))
549 break;
550
551 if (A == AEnd && B == BEnd)
552 return 0;
553 if (A == AEnd)
554 return 1;
555 if (B == BEnd)
556 return -1;
557 if (toupper(*A) < toupper(*B))
558 return -1;
559 return 1;
560 }
561 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
562 string::const_iterator B,string::const_iterator BEnd)
563 {
564 for (; A != AEnd && B != BEnd; A++, B++)
565 if (toupper(*A) != toupper(*B))
566 break;
567
568 if (A == AEnd && B == BEnd)
569 return 0;
570 if (A == AEnd)
571 return 1;
572 if (B == BEnd)
573 return -1;
574 if (toupper(*A) < toupper(*B))
575 return -1;
576 return 1;
577 }
578 #endif
579 /*}}}*/
580 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
581 // ---------------------------------------------------------------------
582 /* The format is like those used in package files and the method
583 communication system */
584 string LookupTag(const string &Message,const char *Tag,const char *Default)
585 {
586 // Look for a matching tag.
587 int Length = strlen(Tag);
588 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
589 {
590 // Found the tag
591 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
592 {
593 // Find the end of line and strip the leading/trailing spaces
594 string::const_iterator J;
595 I += Length + 1;
596 for (; isspace(*I) != 0 && I < Message.end(); I++);
597 for (J = I; *J != '\n' && J < Message.end(); J++);
598 for (; J > I && isspace(J[-1]) != 0; J--);
599
600 return string(I,J);
601 }
602
603 for (; *I != '\n' && I < Message.end(); I++);
604 }
605
606 // Failed to find a match
607 if (Default == 0)
608 return string();
609 return Default;
610 }
611 /*}}}*/
612 // StringToBool - Converts a string into a boolean /*{{{*/
613 // ---------------------------------------------------------------------
614 /* This inspects the string to see if it is true or if it is false and
615 then returns the result. Several varients on true/false are checked. */
616 int StringToBool(const string &Text,int Default)
617 {
618 char *End;
619 int Res = strtol(Text.c_str(),&End,0);
620 if (End != Text.c_str() && Res >= 0 && Res <= 1)
621 return Res;
622
623 // Check for positives
624 if (strcasecmp(Text.c_str(),"no") == 0 ||
625 strcasecmp(Text.c_str(),"false") == 0 ||
626 strcasecmp(Text.c_str(),"without") == 0 ||
627 strcasecmp(Text.c_str(),"off") == 0 ||
628 strcasecmp(Text.c_str(),"disable") == 0)
629 return 0;
630
631 // Check for negatives
632 if (strcasecmp(Text.c_str(),"yes") == 0 ||
633 strcasecmp(Text.c_str(),"true") == 0 ||
634 strcasecmp(Text.c_str(),"with") == 0 ||
635 strcasecmp(Text.c_str(),"on") == 0 ||
636 strcasecmp(Text.c_str(),"enable") == 0)
637 return 1;
638
639 return Default;
640 }
641 /*}}}*/
642 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
643 // ---------------------------------------------------------------------
644 /* This converts a time_t into a string time representation that is
645 year 2000 complient and timezone neutral */
646 string TimeRFC1123(time_t Date)
647 {
648 struct tm Conv = *gmtime(&Date);
649 char Buf[300];
650
651 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
652 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
653 "Aug","Sep","Oct","Nov","Dec"};
654
655 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
656 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
657 Conv.tm_min,Conv.tm_sec);
658 return Buf;
659 }
660 /*}}}*/
661 // ReadMessages - Read messages from the FD /*{{{*/
662 // ---------------------------------------------------------------------
663 /* This pulls full messages from the input FD into the message buffer.
664 It assumes that messages will not pause during transit so no
665 fancy buffering is used.
666
667 In particular: this reads blocks from the input until it believes
668 that it's run out of input text. Each block is terminated by a
669 double newline ('\n' followed by '\n'). As noted below, there is a
670 bug in this code: it assumes that all the blocks have been read if
671 it doesn't see additional text in the buffer after the last one is
672 parsed, which will cause it to lose blocks if the last block
673 coincides with the end of the buffer.
674 */
675 bool ReadMessages(int Fd, vector<string> &List)
676 {
677 char Buffer[64000];
678 char *End = Buffer;
679 // Represents any left-over from the previous iteration of the
680 // parse loop. (i.e., if a message is split across the end
681 // of the buffer, it goes here)
682 string PartialMessage;
683
684 while (1)
685 {
686 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
687 if (Res < 0 && errno == EINTR)
688 continue;
689
690 // Process is dead, this is kind of bad..
691 if (Res == 0)
692 return false;
693
694 // No data
695 if (Res < 0 && errno == EAGAIN)
696 return true;
697 if (Res < 0)
698 return false;
699
700 End += Res;
701
702 // Look for the end of the message
703 for (char *I = Buffer; I + 1 < End; I++)
704 {
705 if (I[0] != '\n' || I[1] != '\n')
706 continue;
707
708 // Pull the message out
709 string Message(Buffer,I-Buffer);
710 PartialMessage += Message;
711
712 // Fix up the buffer
713 for (; I < End && *I == '\n'; I++);
714 End -= I-Buffer;
715 memmove(Buffer,I,End-Buffer);
716 I = Buffer;
717
718 List.push_back(PartialMessage);
719 PartialMessage.clear();
720 }
721 if (End != Buffer)
722 {
723 // If there's text left in the buffer, store it
724 // in PartialMessage and throw the rest of the buffer
725 // away. This allows us to handle messages that
726 // are longer than the static buffer size.
727 PartialMessage += string(Buffer, End);
728 End = Buffer;
729 }
730 else
731 {
732 // BUG ALERT: if a message block happens to end at a
733 // multiple of 64000 characters, this will cause it to
734 // terminate early, leading to a badly formed block and
735 // probably crashing the method. However, this is the only
736 // way we have to find the end of the message block. I have
737 // an idea of how to fix this, but it will require changes
738 // to the protocol (essentially to mark the beginning and
739 // end of the block).
740 //
741 // -- dburrows 2008-04-02
742 return true;
743 }
744
745 if (WaitFd(Fd) == false)
746 return false;
747 }
748 }
749 /*}}}*/
750 // MonthConv - Converts a month string into a number /*{{{*/
751 // ---------------------------------------------------------------------
752 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
753 Made it a bit more robust with a few touppers though. */
754 static int MonthConv(char *Month)
755 {
756 switch (toupper(*Month))
757 {
758 case 'A':
759 return toupper(Month[1]) == 'P'?3:7;
760 case 'D':
761 return 11;
762 case 'F':
763 return 1;
764 case 'J':
765 if (toupper(Month[1]) == 'A')
766 return 0;
767 return toupper(Month[2]) == 'N'?5:6;
768 case 'M':
769 return toupper(Month[2]) == 'R'?2:4;
770 case 'N':
771 return 10;
772 case 'O':
773 return 9;
774 case 'S':
775 return 8;
776
777 // Pretend it is January..
778 default:
779 return 0;
780 }
781 }
782 /*}}}*/
783 // timegm - Internal timegm function if gnu is not available /*{{{*/
784 // ---------------------------------------------------------------------
785 /* Ripped this evil little function from wget - I prefer the use of
786 GNU timegm if possible as this technique will have interesting problems
787 with leap seconds, timezones and other.
788
789 Converts struct tm to time_t, assuming the data in tm is UTC rather
790 than local timezone (mktime assumes the latter).
791
792 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
793 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
794
795 /* Turned it into an autoconf check, because GNU is not the only thing which
796 can provide timegm. -- 2002-09-22, Joel Baker */
797
798 #ifndef HAVE_TIMEGM // Now with autoconf!
799 static time_t timegm(struct tm *t)
800 {
801 time_t tl, tb;
802
803 tl = mktime (t);
804 if (tl == -1)
805 return -1;
806 tb = mktime (gmtime (&tl));
807 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
808 }
809 #endif
810 /*}}}*/
811 // StrToTime - Converts a string into a time_t /*{{{*/
812 // ---------------------------------------------------------------------
813 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
814 and the C library asctime format. It requires the GNU library function
815 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
816 reason the C library does not provide any such function :< This also
817 handles the weird, but unambiguous FTP time format*/
818 bool StrToTime(const string &Val,time_t &Result)
819 {
820 struct tm Tm;
821 char Month[10];
822 const char *I = Val.c_str();
823
824 // Skip the day of the week
825 for (;*I != 0 && *I != ' '; I++);
826
827 // Handle RFC 1123 time
828 Month[0] = 0;
829 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
830 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
831 {
832 // Handle RFC 1036 time
833 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
834 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
835 Tm.tm_year += 1900;
836 else
837 {
838 // asctime format
839 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
840 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
841 {
842 // 'ftp' time
843 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
844 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
845 return false;
846 Tm.tm_mon--;
847 }
848 }
849 }
850
851 Tm.tm_isdst = 0;
852 if (Month[0] != 0)
853 Tm.tm_mon = MonthConv(Month);
854 Tm.tm_year -= 1900;
855
856 // Convert to local time and then to GMT
857 Result = timegm(&Tm);
858 return true;
859 }
860 /*}}}*/
861 // StrToNum - Convert a fixed length string to a number /*{{{*/
862 // ---------------------------------------------------------------------
863 /* This is used in decoding the crazy fixed length string headers in
864 tar and ar files. */
865 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
866 {
867 char S[30];
868 if (Len >= sizeof(S))
869 return false;
870 memcpy(S,Str,Len);
871 S[Len] = 0;
872
873 // All spaces is a zero
874 Res = 0;
875 unsigned I;
876 for (I = 0; S[I] == ' '; I++);
877 if (S[I] == 0)
878 return true;
879
880 char *End;
881 Res = strtoul(S,&End,Base);
882 if (End == S)
883 return false;
884
885 return true;
886 }
887 /*}}}*/
888 // HexDigit - Convert a hex character into an integer /*{{{*/
889 // ---------------------------------------------------------------------
890 /* Helper for Hex2Num */
891 static int HexDigit(int c)
892 {
893 if (c >= '0' && c <= '9')
894 return c - '0';
895 if (c >= 'a' && c <= 'f')
896 return c - 'a' + 10;
897 if (c >= 'A' && c <= 'F')
898 return c - 'A' + 10;
899 return 0;
900 }
901 /*}}}*/
902 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
903 // ---------------------------------------------------------------------
904 /* The length of the buffer must be exactly 1/2 the length of the string. */
905 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
906 {
907 if (Str.length() != Length*2)
908 return false;
909
910 // Convert each digit. We store it in the same order as the string
911 int J = 0;
912 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
913 {
914 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
915 return false;
916
917 Num[J] = HexDigit(I[0]) << 4;
918 Num[J] += HexDigit(I[1]);
919 }
920
921 return true;
922 }
923 /*}}}*/
924 // TokSplitString - Split a string up by a given token /*{{{*/
925 // ---------------------------------------------------------------------
926 /* This is intended to be a faster splitter, it does not use dynamic
927 memories. Input is changed to insert nulls at each token location. */
928 bool TokSplitString(char Tok,char *Input,char **List,
929 unsigned long ListMax)
930 {
931 // Strip any leading spaces
932 char *Start = Input;
933 char *Stop = Start + strlen(Start);
934 for (; *Start != 0 && isspace(*Start) != 0; Start++);
935
936 unsigned long Count = 0;
937 char *Pos = Start;
938 while (Pos != Stop)
939 {
940 // Skip to the next Token
941 for (; Pos != Stop && *Pos != Tok; Pos++);
942
943 // Back remove spaces
944 char *End = Pos;
945 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
946 *End = 0;
947
948 List[Count++] = Start;
949 if (Count >= ListMax)
950 {
951 List[Count-1] = 0;
952 return false;
953 }
954
955 // Advance pos
956 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
957 Start = Pos;
958 }
959
960 List[Count] = 0;
961 return true;
962 }
963 /*}}}*/
964 // RegexChoice - Simple regex list/list matcher /*{{{*/
965 // ---------------------------------------------------------------------
966 /* */
967 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
968 const char **ListEnd)
969 {
970 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
971 R->Hit = false;
972
973 unsigned long Hits = 0;
974 for (; ListBegin != ListEnd; ListBegin++)
975 {
976 // Check if the name is a regex
977 const char *I;
978 bool Regex = true;
979 for (I = *ListBegin; *I != 0; I++)
980 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
981 break;
982 if (*I == 0)
983 Regex = false;
984
985 // Compile the regex pattern
986 regex_t Pattern;
987 if (Regex == true)
988 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
989 REG_NOSUB) != 0)
990 Regex = false;
991
992 // Search the list
993 bool Done = false;
994 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
995 {
996 if (R->Str[0] == 0)
997 continue;
998
999 if (strcasecmp(R->Str,*ListBegin) != 0)
1000 {
1001 if (Regex == false)
1002 continue;
1003 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1004 continue;
1005 }
1006 Done = true;
1007
1008 if (R->Hit == false)
1009 Hits++;
1010
1011 R->Hit = true;
1012 }
1013
1014 if (Regex == true)
1015 regfree(&Pattern);
1016
1017 if (Done == false)
1018 _error->Warning(_("Selection %s not found"),*ListBegin);
1019 }
1020
1021 return Hits;
1022 }
1023 /*}}}*/
1024 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
1025 // ---------------------------------------------------------------------
1026 /* This is used to make the internationalization strings easier to translate
1027 and to allow reordering of parameters */
1028 void ioprintf(ostream &out,const char *format,...)
1029 {
1030 va_list args;
1031 va_start(args,format);
1032
1033 // sprintf the description
1034 char S[400];
1035 vsnprintf(S,sizeof(S),format,args);
1036 out << S;
1037 }
1038 /*}}}*/
1039 // safe_snprintf - Safer snprintf /*{{{*/
1040 // ---------------------------------------------------------------------
1041 /* This is a snprintf that will never (ever) go past 'End' and returns a
1042 pointer to the end of the new string. The returned string is always null
1043 terminated unless Buffer == end. This is a better alterantive to using
1044 consecutive snprintfs. */
1045 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1046 {
1047 va_list args;
1048 unsigned long Did;
1049
1050 va_start(args,Format);
1051
1052 if (End <= Buffer)
1053 return End;
1054
1055 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1056 if (Did < 0 || Buffer + Did > End)
1057 return End;
1058 return Buffer + Did;
1059 }
1060 /*}}}*/
1061
1062 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1063 // ---------------------------------------------------------------------
1064 /* The domain list is a comma seperate list of domains that are suffix
1065 matched against the argument */
1066 bool CheckDomainList(const string &Host,const string &List)
1067 {
1068 string::const_iterator Start = List.begin();
1069 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1070 {
1071 if (Cur < List.end() && *Cur != ',')
1072 continue;
1073
1074 // Match the end of the string..
1075 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1076 Cur - Start != 0 &&
1077 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1078 return true;
1079
1080 Start = Cur + 1;
1081 }
1082 return false;
1083 }
1084 /*}}}*/
1085
1086 // URI::CopyFrom - Copy from an object /*{{{*/
1087 // ---------------------------------------------------------------------
1088 /* This parses the URI into all of its components */
1089 void URI::CopyFrom(const string &U)
1090 {
1091 string::const_iterator I = U.begin();
1092
1093 // Locate the first colon, this separates the scheme
1094 for (; I < U.end() && *I != ':' ; I++);
1095 string::const_iterator FirstColon = I;
1096
1097 /* Determine if this is a host type URI with a leading double //
1098 and then search for the first single / */
1099 string::const_iterator SingleSlash = I;
1100 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1101 SingleSlash += 3;
1102
1103 /* Find the / indicating the end of the hostname, ignoring /'s in the
1104 square brackets */
1105 bool InBracket = false;
1106 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1107 {
1108 if (*SingleSlash == '[')
1109 InBracket = true;
1110 if (InBracket == true && *SingleSlash == ']')
1111 InBracket = false;
1112 }
1113
1114 if (SingleSlash > U.end())
1115 SingleSlash = U.end();
1116
1117 // We can now write the access and path specifiers
1118 Access.assign(U.begin(),FirstColon);
1119 if (SingleSlash != U.end())
1120 Path.assign(SingleSlash,U.end());
1121 if (Path.empty() == true)
1122 Path = "/";
1123
1124 // Now we attempt to locate a user:pass@host fragment
1125 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1126 FirstColon += 3;
1127 else
1128 FirstColon += 1;
1129 if (FirstColon >= U.end())
1130 return;
1131
1132 if (FirstColon > SingleSlash)
1133 FirstColon = SingleSlash;
1134
1135 // Find the colon...
1136 I = FirstColon + 1;
1137 if (I > SingleSlash)
1138 I = SingleSlash;
1139 for (; I < SingleSlash && *I != ':'; I++);
1140 string::const_iterator SecondColon = I;
1141
1142 // Search for the @ after the colon
1143 for (; I < SingleSlash && *I != '@'; I++);
1144 string::const_iterator At = I;
1145
1146 // Now write the host and user/pass
1147 if (At == SingleSlash)
1148 {
1149 if (FirstColon < SingleSlash)
1150 Host.assign(FirstColon,SingleSlash);
1151 }
1152 else
1153 {
1154 Host.assign(At+1,SingleSlash);
1155 User.assign(FirstColon,SecondColon);
1156 if (SecondColon < At)
1157 Password.assign(SecondColon+1,At);
1158 }
1159
1160 // Now we parse the RFC 2732 [] hostnames.
1161 unsigned long PortEnd = 0;
1162 InBracket = false;
1163 for (unsigned I = 0; I != Host.length();)
1164 {
1165 if (Host[I] == '[')
1166 {
1167 InBracket = true;
1168 Host.erase(I,1);
1169 continue;
1170 }
1171
1172 if (InBracket == true && Host[I] == ']')
1173 {
1174 InBracket = false;
1175 Host.erase(I,1);
1176 PortEnd = I;
1177 continue;
1178 }
1179 I++;
1180 }
1181
1182 // Tsk, weird.
1183 if (InBracket == true)
1184 {
1185 Host.clear();
1186 return;
1187 }
1188
1189 // Now we parse off a port number from the hostname
1190 Port = 0;
1191 string::size_type Pos = Host.rfind(':');
1192 if (Pos == string::npos || Pos < PortEnd)
1193 return;
1194
1195 Port = atoi(string(Host,Pos+1).c_str());
1196 Host.assign(Host,0,Pos);
1197 }
1198 /*}}}*/
1199 // URI::operator string - Convert the URI to a string /*{{{*/
1200 // ---------------------------------------------------------------------
1201 /* */
1202 URI::operator string()
1203 {
1204 string Res;
1205
1206 if (Access.empty() == false)
1207 Res = Access + ':';
1208
1209 if (Host.empty() == false)
1210 {
1211 if (Access.empty() == false)
1212 Res += "//";
1213
1214 if (User.empty() == false)
1215 {
1216 Res += User;
1217 if (Password.empty() == false)
1218 Res += ":" + Password;
1219 Res += "@";
1220 }
1221
1222 // Add RFC 2732 escaping characters
1223 if (Access.empty() == false &&
1224 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1225 Res += '[' + Host + ']';
1226 else
1227 Res += Host;
1228
1229 if (Port != 0)
1230 {
1231 char S[30];
1232 sprintf(S,":%u",Port);
1233 Res += S;
1234 }
1235 }
1236
1237 if (Path.empty() == false)
1238 {
1239 if (Path[0] != '/')
1240 Res += "/" + Path;
1241 else
1242 Res += Path;
1243 }
1244
1245 return Res;
1246 }
1247 /*}}}*/
1248 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1249 // ---------------------------------------------------------------------
1250 /* */
1251 string URI::SiteOnly(const string &URI)
1252 {
1253 ::URI U(URI);
1254 U.User.clear();
1255 U.Password.clear();
1256 U.Path.clear();
1257 U.Port = 0;
1258 return U;
1259 }
1260 /*}}}*/