]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Simplified CHinese translation update
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.48 2003/07/18 14:15:11 mdz Exp $
4 /* ######################################################################
5
6 String Util - Some useful string functions.
7
8 These have been collected from here and there to do all sorts of useful
9 things to strings. They are useful in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <apt-pkg/strutl.h>
19 #include <apt-pkg/fileutl.h>
20 #include <apt-pkg/error.h>
21
22 #include <apti18n.h>
23
24 #include <ctype.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <algorithm>
28 #include <unistd.h>
29 #include <regex.h>
30 #include <errno.h>
31 #include <stdarg.h>
32 #include <iconv.h>
33
34 #include "config.h"
35
36 using namespace std;
37 /*}}}*/
38
39 // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/
40 // ---------------------------------------------------------------------
41 /* This is handy to use before display some information for enduser */
42 bool UTF8ToCodeset(const char *codeset, const string &orig, string *dest)
43 {
44 iconv_t cd;
45 const char *inbuf;
46 char *inptr, *outbuf, *outptr;
47 size_t insize, outsize;
48
49 cd = iconv_open(codeset, "UTF-8");
50 if (cd == (iconv_t)(-1)) {
51 // Something went wrong
52 if (errno == EINVAL)
53 _error->Error("conversion from 'UTF-8' to '%s' not available",
54 codeset);
55 else
56 perror("iconv_open");
57
58 // Clean the destination string
59 *dest = "";
60
61 return false;
62 }
63
64 insize = outsize = orig.size();
65 inbuf = orig.data();
66 inptr = (char *)inbuf;
67 outbuf = new char[insize+1];
68 outptr = outbuf;
69
70 while (insize != 0)
71 {
72 size_t const err = iconv(cd, &inptr, &insize, &outptr, &outsize);
73 if (err == (size_t)(-1))
74 {
75 insize--;
76 outsize++;
77 inptr++;
78 *outptr = '?';
79 outptr++;
80 }
81 }
82
83 *outptr = '\0';
84 *dest = outbuf;
85 delete[] outbuf;
86
87 iconv_close(cd);
88
89 return true;
90 }
91 /*}}}*/
92 // strstrip - Remove white space from the front and back of a string /*{{{*/
93 // ---------------------------------------------------------------------
94 /* This is handy to use when parsing a file. It also removes \n's left
95 over from fgets and company */
96 char *_strstrip(char *String)
97 {
98 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
99
100 if (*String == 0)
101 return String;
102
103 char *End = String + strlen(String) - 1;
104 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
105 *End == '\r'); End--);
106 End++;
107 *End = 0;
108 return String;
109 };
110 /*}}}*/
111 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
112 // ---------------------------------------------------------------------
113 /* */
114 char *_strtabexpand(char *String,size_t Len)
115 {
116 for (char *I = String; I != I + Len && *I != 0; I++)
117 {
118 if (*I != '\t')
119 continue;
120 if (I + 8 > String + Len)
121 {
122 *I = 0;
123 return String;
124 }
125
126 /* Assume the start of the string is 0 and find the next 8 char
127 division */
128 int Len;
129 if (String == I)
130 Len = 1;
131 else
132 Len = 8 - ((String - I) % 8);
133 Len -= 2;
134 if (Len <= 0)
135 {
136 *I = ' ';
137 continue;
138 }
139
140 memmove(I + Len,I + 1,strlen(I) + 1);
141 for (char *J = I; J + Len != I; *I = ' ', I++);
142 }
143 return String;
144 }
145 /*}}}*/
146 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
147 // ---------------------------------------------------------------------
148 /* This grabs a single word, converts any % escaped characters to their
149 proper values and advances the pointer. Double quotes are understood
150 and striped out as well. This is for URI/URL parsing. It also can
151 understand [] brackets.*/
152 bool ParseQuoteWord(const char *&String,string &Res)
153 {
154 // Skip leading whitespace
155 const char *C = String;
156 for (;*C != 0 && *C == ' '; C++);
157 if (*C == 0)
158 return false;
159
160 // Jump to the next word
161 for (;*C != 0 && isspace(*C) == 0; C++)
162 {
163 if (*C == '"')
164 {
165 for (C++; *C != 0 && *C != '"'; C++);
166 if (*C == 0)
167 return false;
168 }
169 if (*C == '[')
170 {
171 for (C++; *C != 0 && *C != ']'; C++);
172 if (*C == 0)
173 return false;
174 }
175 }
176
177 // Now de-quote characters
178 char Buffer[1024];
179 char Tmp[3];
180 const char *Start = String;
181 char *I;
182 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
183 {
184 if (*Start == '%' && Start + 2 < C)
185 {
186 Tmp[0] = Start[1];
187 Tmp[1] = Start[2];
188 Tmp[2] = 0;
189 *I = (char)strtol(Tmp,0,16);
190 Start += 3;
191 continue;
192 }
193 if (*Start != '"')
194 *I = *Start;
195 else
196 I--;
197 Start++;
198 }
199 *I = 0;
200 Res = Buffer;
201
202 // Skip ending white space
203 for (;*C != 0 && isspace(*C) != 0; C++);
204 String = C;
205 return true;
206 }
207 /*}}}*/
208 // ParseCWord - Parses a string like a C "" expression /*{{{*/
209 // ---------------------------------------------------------------------
210 /* This expects a series of space separated strings enclosed in ""'s.
211 It concatenates the ""'s into a single string. */
212 bool ParseCWord(const char *&String,string &Res)
213 {
214 // Skip leading whitespace
215 const char *C = String;
216 for (;*C != 0 && *C == ' '; C++);
217 if (*C == 0)
218 return false;
219
220 char Buffer[1024];
221 char *Buf = Buffer;
222 if (strlen(String) >= sizeof(Buffer))
223 return false;
224
225 for (; *C != 0; C++)
226 {
227 if (*C == '"')
228 {
229 for (C++; *C != 0 && *C != '"'; C++)
230 *Buf++ = *C;
231
232 if (*C == 0)
233 return false;
234
235 continue;
236 }
237
238 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
239 continue;
240 if (isspace(*C) == 0)
241 return false;
242 *Buf++ = ' ';
243 }
244 *Buf = 0;
245 Res = Buffer;
246 String = C;
247 return true;
248 }
249 /*}}}*/
250 // QuoteString - Convert a string into quoted from /*{{{*/
251 // ---------------------------------------------------------------------
252 /* */
253 string QuoteString(const string &Str, const char *Bad)
254 {
255 string Res;
256 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
257 {
258 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
259 *I <= 0x20 || *I >= 0x7F)
260 {
261 char Buf[10];
262 sprintf(Buf,"%%%02x",(int)*I);
263 Res += Buf;
264 }
265 else
266 Res += *I;
267 }
268 return Res;
269 }
270 /*}}}*/
271 // DeQuoteString - Convert a string from quoted from /*{{{*/
272 // ---------------------------------------------------------------------
273 /* This undoes QuoteString */
274 string DeQuoteString(const string &Str)
275 {
276 string Res;
277 for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
278 {
279 if (*I == '%' && I + 2 < Str.end())
280 {
281 char Tmp[3];
282 Tmp[0] = I[1];
283 Tmp[1] = I[2];
284 Tmp[2] = 0;
285 Res += (char)strtol(Tmp,0,16);
286 I += 2;
287 continue;
288 }
289 else
290 Res += *I;
291 }
292 return Res;
293 }
294
295 /*}}}*/
296 // SizeToStr - Convert a long into a human readable size /*{{{*/
297 // ---------------------------------------------------------------------
298 /* A max of 4 digits are shown before conversion to the next highest unit.
299 The max length of the string will be 5 chars unless the size is > 10
300 YottaBytes (E24) */
301 string SizeToStr(double Size)
302 {
303 char S[300];
304 double ASize;
305 if (Size >= 0)
306 ASize = Size;
307 else
308 ASize = -1*Size;
309
310 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
311 ExaBytes, ZettaBytes, YottaBytes */
312 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
313 int I = 0;
314 while (I <= 8)
315 {
316 if (ASize < 100 && I != 0)
317 {
318 sprintf(S,"%'.1f%c",ASize,Ext[I]);
319 break;
320 }
321
322 if (ASize < 10000)
323 {
324 sprintf(S,"%'.0f%c",ASize,Ext[I]);
325 break;
326 }
327 ASize /= 1000.0;
328 I++;
329 }
330
331 return S;
332 }
333 /*}}}*/
334 // TimeToStr - Convert the time into a string /*{{{*/
335 // ---------------------------------------------------------------------
336 /* Converts a number of seconds to a hms format */
337 string TimeToStr(unsigned long Sec)
338 {
339 char S[300];
340
341 while (1)
342 {
343 if (Sec > 60*60*24)
344 {
345 //d means days, h means hours, min means minutes, s means seconds
346 sprintf(S,_("%lid %lih %limin %lis"),Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
347 break;
348 }
349
350 if (Sec > 60*60)
351 {
352 //h means hours, min means minutes, s means seconds
353 sprintf(S,_("%lih %limin %lis"),Sec/60/60,(Sec/60) % 60,Sec % 60);
354 break;
355 }
356
357 if (Sec > 60)
358 {
359 //min means minutes, s means seconds
360 sprintf(S,_("%limin %lis"),Sec/60,Sec % 60);
361 break;
362 }
363
364 //s means seconds
365 sprintf(S,_("%lis"),Sec);
366 break;
367 }
368
369 return S;
370 }
371 /*}}}*/
372 // SubstVar - Substitute a string for another string /*{{{*/
373 // ---------------------------------------------------------------------
374 /* This replaces all occurances of Subst with Contents in Str. */
375 string SubstVar(const string &Str,const string &Subst,const string &Contents)
376 {
377 string::size_type Pos = 0;
378 string::size_type OldPos = 0;
379 string Temp;
380
381 while (OldPos < Str.length() &&
382 (Pos = Str.find(Subst,OldPos)) != string::npos)
383 {
384 Temp += string(Str,OldPos,Pos) + Contents;
385 OldPos = Pos + Subst.length();
386 }
387
388 if (OldPos == 0)
389 return Str;
390
391 return Temp + string(Str,OldPos);
392 }
393
394 string SubstVar(string Str,const struct SubstVar *Vars)
395 {
396 for (; Vars->Subst != 0; Vars++)
397 Str = SubstVar(Str,Vars->Subst,*Vars->Contents);
398 return Str;
399 }
400 /*}}}*/
401 // OutputInDepth - return a string with separator multiplied with depth /*{{{*/
402 // ---------------------------------------------------------------------
403 /* Returns a string with the supplied separator depth + 1 times in it */
404 std::string OutputInDepth(const unsigned long Depth, const char* Separator)
405 {
406 std::string output = "";
407 for(unsigned long d=Depth+1; d > 0; d--)
408 output.append(Separator);
409 return output;
410 }
411 /*}}}*/
412 // URItoFileName - Convert the uri into a unique file name /*{{{*/
413 // ---------------------------------------------------------------------
414 /* This converts a URI into a safe filename. It quotes all unsafe characters
415 and converts / to _ and removes the scheme identifier. The resulting
416 file name should be unique and never occur again for a different file */
417 string URItoFileName(const string &URI)
418 {
419 // Nuke 'sensitive' items
420 ::URI U(URI);
421 U.User.clear();
422 U.Password.clear();
423 U.Access.clear();
424
425 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
426 string NewURI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
427 replace(NewURI.begin(),NewURI.end(),'/','_');
428 return NewURI;
429 }
430 /*}}}*/
431 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
432 // ---------------------------------------------------------------------
433 /* This routine performs a base64 transformation on a string. It was ripped
434 from wget and then patched and bug fixed.
435
436 This spec can be found in rfc2045 */
437 string Base64Encode(const string &S)
438 {
439 // Conversion table.
440 static char tbl[64] = {'A','B','C','D','E','F','G','H',
441 'I','J','K','L','M','N','O','P',
442 'Q','R','S','T','U','V','W','X',
443 'Y','Z','a','b','c','d','e','f',
444 'g','h','i','j','k','l','m','n',
445 'o','p','q','r','s','t','u','v',
446 'w','x','y','z','0','1','2','3',
447 '4','5','6','7','8','9','+','/'};
448
449 // Pre-allocate some space
450 string Final;
451 Final.reserve((4*S.length() + 2)/3 + 2);
452
453 /* Transform the 3x8 bits to 4x6 bits, as required by
454 base64. */
455 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
456 {
457 char Bits[3] = {0,0,0};
458 Bits[0] = I[0];
459 if (I + 1 < S.end())
460 Bits[1] = I[1];
461 if (I + 2 < S.end())
462 Bits[2] = I[2];
463
464 Final += tbl[Bits[0] >> 2];
465 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
466
467 if (I + 1 >= S.end())
468 break;
469
470 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
471
472 if (I + 2 >= S.end())
473 break;
474
475 Final += tbl[Bits[2] & 0x3f];
476 }
477
478 /* Apply the padding elements, this tells how many bytes the remote
479 end should discard */
480 if (S.length() % 3 == 2)
481 Final += '=';
482 if (S.length() % 3 == 1)
483 Final += "==";
484
485 return Final;
486 }
487 /*}}}*/
488 // stringcmp - Arbitrary string compare /*{{{*/
489 // ---------------------------------------------------------------------
490 /* This safely compares two non-null terminated strings of arbitrary
491 length */
492 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
493 {
494 for (; A != AEnd && B != BEnd; A++, B++)
495 if (*A != *B)
496 break;
497
498 if (A == AEnd && B == BEnd)
499 return 0;
500 if (A == AEnd)
501 return 1;
502 if (B == BEnd)
503 return -1;
504 if (*A < *B)
505 return -1;
506 return 1;
507 }
508
509 #if __GNUC__ >= 3
510 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
511 const char *B,const char *BEnd)
512 {
513 for (; A != AEnd && B != BEnd; A++, B++)
514 if (*A != *B)
515 break;
516
517 if (A == AEnd && B == BEnd)
518 return 0;
519 if (A == AEnd)
520 return 1;
521 if (B == BEnd)
522 return -1;
523 if (*A < *B)
524 return -1;
525 return 1;
526 }
527 int stringcmp(string::const_iterator A,string::const_iterator AEnd,
528 string::const_iterator B,string::const_iterator BEnd)
529 {
530 for (; A != AEnd && B != BEnd; A++, B++)
531 if (*A != *B)
532 break;
533
534 if (A == AEnd && B == BEnd)
535 return 0;
536 if (A == AEnd)
537 return 1;
538 if (B == BEnd)
539 return -1;
540 if (*A < *B)
541 return -1;
542 return 1;
543 }
544 #endif
545 /*}}}*/
546 // stringcasecmp - Arbitrary case insensitive string compare /*{{{*/
547 // ---------------------------------------------------------------------
548 /* */
549 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
550 {
551 for (; A != AEnd && B != BEnd; A++, B++)
552 if (toupper(*A) != toupper(*B))
553 break;
554
555 if (A == AEnd && B == BEnd)
556 return 0;
557 if (A == AEnd)
558 return 1;
559 if (B == BEnd)
560 return -1;
561 if (toupper(*A) < toupper(*B))
562 return -1;
563 return 1;
564 }
565 #if __GNUC__ >= 3
566 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
567 const char *B,const char *BEnd)
568 {
569 for (; A != AEnd && B != BEnd; A++, B++)
570 if (toupper(*A) != toupper(*B))
571 break;
572
573 if (A == AEnd && B == BEnd)
574 return 0;
575 if (A == AEnd)
576 return 1;
577 if (B == BEnd)
578 return -1;
579 if (toupper(*A) < toupper(*B))
580 return -1;
581 return 1;
582 }
583 int stringcasecmp(string::const_iterator A,string::const_iterator AEnd,
584 string::const_iterator B,string::const_iterator BEnd)
585 {
586 for (; A != AEnd && B != BEnd; A++, B++)
587 if (toupper(*A) != toupper(*B))
588 break;
589
590 if (A == AEnd && B == BEnd)
591 return 0;
592 if (A == AEnd)
593 return 1;
594 if (B == BEnd)
595 return -1;
596 if (toupper(*A) < toupper(*B))
597 return -1;
598 return 1;
599 }
600 #endif
601 /*}}}*/
602 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
603 // ---------------------------------------------------------------------
604 /* The format is like those used in package files and the method
605 communication system */
606 string LookupTag(const string &Message,const char *Tag,const char *Default)
607 {
608 // Look for a matching tag.
609 int Length = strlen(Tag);
610 for (string::const_iterator I = Message.begin(); I + Length < Message.end(); I++)
611 {
612 // Found the tag
613 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
614 {
615 // Find the end of line and strip the leading/trailing spaces
616 string::const_iterator J;
617 I += Length + 1;
618 for (; isspace(*I) != 0 && I < Message.end(); I++);
619 for (J = I; *J != '\n' && J < Message.end(); J++);
620 for (; J > I && isspace(J[-1]) != 0; J--);
621
622 return string(I,J);
623 }
624
625 for (; *I != '\n' && I < Message.end(); I++);
626 }
627
628 // Failed to find a match
629 if (Default == 0)
630 return string();
631 return Default;
632 }
633 /*}}}*/
634 // StringToBool - Converts a string into a boolean /*{{{*/
635 // ---------------------------------------------------------------------
636 /* This inspects the string to see if it is true or if it is false and
637 then returns the result. Several varients on true/false are checked. */
638 int StringToBool(const string &Text,int Default)
639 {
640 char *End;
641 int Res = strtol(Text.c_str(),&End,0);
642 if (End != Text.c_str() && Res >= 0 && Res <= 1)
643 return Res;
644
645 // Check for positives
646 if (strcasecmp(Text.c_str(),"no") == 0 ||
647 strcasecmp(Text.c_str(),"false") == 0 ||
648 strcasecmp(Text.c_str(),"without") == 0 ||
649 strcasecmp(Text.c_str(),"off") == 0 ||
650 strcasecmp(Text.c_str(),"disable") == 0)
651 return 0;
652
653 // Check for negatives
654 if (strcasecmp(Text.c_str(),"yes") == 0 ||
655 strcasecmp(Text.c_str(),"true") == 0 ||
656 strcasecmp(Text.c_str(),"with") == 0 ||
657 strcasecmp(Text.c_str(),"on") == 0 ||
658 strcasecmp(Text.c_str(),"enable") == 0)
659 return 1;
660
661 return Default;
662 }
663 /*}}}*/
664 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
665 // ---------------------------------------------------------------------
666 /* This converts a time_t into a string time representation that is
667 year 2000 complient and timezone neutral */
668 string TimeRFC1123(time_t Date)
669 {
670 struct tm Conv = *gmtime(&Date);
671 char Buf[300];
672
673 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
674 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
675 "Aug","Sep","Oct","Nov","Dec"};
676
677 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
678 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
679 Conv.tm_min,Conv.tm_sec);
680 return Buf;
681 }
682 /*}}}*/
683 // ReadMessages - Read messages from the FD /*{{{*/
684 // ---------------------------------------------------------------------
685 /* This pulls full messages from the input FD into the message buffer.
686 It assumes that messages will not pause during transit so no
687 fancy buffering is used.
688
689 In particular: this reads blocks from the input until it believes
690 that it's run out of input text. Each block is terminated by a
691 double newline ('\n' followed by '\n'). As noted below, there is a
692 bug in this code: it assumes that all the blocks have been read if
693 it doesn't see additional text in the buffer after the last one is
694 parsed, which will cause it to lose blocks if the last block
695 coincides with the end of the buffer.
696 */
697 bool ReadMessages(int Fd, vector<string> &List)
698 {
699 char Buffer[64000];
700 char *End = Buffer;
701 // Represents any left-over from the previous iteration of the
702 // parse loop. (i.e., if a message is split across the end
703 // of the buffer, it goes here)
704 string PartialMessage;
705
706 while (1)
707 {
708 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
709 if (Res < 0 && errno == EINTR)
710 continue;
711
712 // Process is dead, this is kind of bad..
713 if (Res == 0)
714 return false;
715
716 // No data
717 if (Res < 0 && errno == EAGAIN)
718 return true;
719 if (Res < 0)
720 return false;
721
722 End += Res;
723
724 // Look for the end of the message
725 for (char *I = Buffer; I + 1 < End; I++)
726 {
727 if (I[0] != '\n' || I[1] != '\n')
728 continue;
729
730 // Pull the message out
731 string Message(Buffer,I-Buffer);
732 PartialMessage += Message;
733
734 // Fix up the buffer
735 for (; I < End && *I == '\n'; I++);
736 End -= I-Buffer;
737 memmove(Buffer,I,End-Buffer);
738 I = Buffer;
739
740 List.push_back(PartialMessage);
741 PartialMessage.clear();
742 }
743 if (End != Buffer)
744 {
745 // If there's text left in the buffer, store it
746 // in PartialMessage and throw the rest of the buffer
747 // away. This allows us to handle messages that
748 // are longer than the static buffer size.
749 PartialMessage += string(Buffer, End);
750 End = Buffer;
751 }
752 else
753 {
754 // BUG ALERT: if a message block happens to end at a
755 // multiple of 64000 characters, this will cause it to
756 // terminate early, leading to a badly formed block and
757 // probably crashing the method. However, this is the only
758 // way we have to find the end of the message block. I have
759 // an idea of how to fix this, but it will require changes
760 // to the protocol (essentially to mark the beginning and
761 // end of the block).
762 //
763 // -- dburrows 2008-04-02
764 return true;
765 }
766
767 if (WaitFd(Fd) == false)
768 return false;
769 }
770 }
771 /*}}}*/
772 // MonthConv - Converts a month string into a number /*{{{*/
773 // ---------------------------------------------------------------------
774 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
775 Made it a bit more robust with a few touppers though. */
776 static int MonthConv(char *Month)
777 {
778 switch (toupper(*Month))
779 {
780 case 'A':
781 return toupper(Month[1]) == 'P'?3:7;
782 case 'D':
783 return 11;
784 case 'F':
785 return 1;
786 case 'J':
787 if (toupper(Month[1]) == 'A')
788 return 0;
789 return toupper(Month[2]) == 'N'?5:6;
790 case 'M':
791 return toupper(Month[2]) == 'R'?2:4;
792 case 'N':
793 return 10;
794 case 'O':
795 return 9;
796 case 'S':
797 return 8;
798
799 // Pretend it is January..
800 default:
801 return 0;
802 }
803 }
804 /*}}}*/
805 // timegm - Internal timegm function if gnu is not available /*{{{*/
806 // ---------------------------------------------------------------------
807 /* Ripped this evil little function from wget - I prefer the use of
808 GNU timegm if possible as this technique will have interesting problems
809 with leap seconds, timezones and other.
810
811 Converts struct tm to time_t, assuming the data in tm is UTC rather
812 than local timezone (mktime assumes the latter).
813
814 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
815 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
816
817 /* Turned it into an autoconf check, because GNU is not the only thing which
818 can provide timegm. -- 2002-09-22, Joel Baker */
819
820 #ifndef HAVE_TIMEGM // Now with autoconf!
821 static time_t timegm(struct tm *t)
822 {
823 time_t tl, tb;
824
825 tl = mktime (t);
826 if (tl == -1)
827 return -1;
828 tb = mktime (gmtime (&tl));
829 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
830 }
831 #endif
832 /*}}}*/
833 // StrToTime - Converts a string into a time_t /*{{{*/
834 // ---------------------------------------------------------------------
835 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
836 and the C library asctime format. It requires the GNU library function
837 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
838 reason the C library does not provide any such function :< This also
839 handles the weird, but unambiguous FTP time format*/
840 bool StrToTime(const string &Val,time_t &Result)
841 {
842 struct tm Tm;
843 char Month[10];
844 const char *I = Val.c_str();
845
846 // Skip the day of the week
847 for (;*I != 0 && *I != ' '; I++);
848
849 // Handle RFC 1123 time
850 Month[0] = 0;
851 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
852 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
853 {
854 // Handle RFC 1036 time
855 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
856 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
857 Tm.tm_year += 1900;
858 else
859 {
860 // asctime format
861 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
862 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
863 {
864 // 'ftp' time
865 if (sscanf(Val.c_str(),"%4d%2d%2d%2d%2d%2d",&Tm.tm_year,&Tm.tm_mon,
866 &Tm.tm_mday,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
867 return false;
868 Tm.tm_mon--;
869 }
870 }
871 }
872
873 Tm.tm_isdst = 0;
874 if (Month[0] != 0)
875 Tm.tm_mon = MonthConv(Month);
876 Tm.tm_year -= 1900;
877
878 // Convert to local time and then to GMT
879 Result = timegm(&Tm);
880 return true;
881 }
882 /*}}}*/
883 // StrToNum - Convert a fixed length string to a number /*{{{*/
884 // ---------------------------------------------------------------------
885 /* This is used in decoding the crazy fixed length string headers in
886 tar and ar files. */
887 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
888 {
889 char S[30];
890 if (Len >= sizeof(S))
891 return false;
892 memcpy(S,Str,Len);
893 S[Len] = 0;
894
895 // All spaces is a zero
896 Res = 0;
897 unsigned I;
898 for (I = 0; S[I] == ' '; I++);
899 if (S[I] == 0)
900 return true;
901
902 char *End;
903 Res = strtoul(S,&End,Base);
904 if (End == S)
905 return false;
906
907 return true;
908 }
909 /*}}}*/
910 // HexDigit - Convert a hex character into an integer /*{{{*/
911 // ---------------------------------------------------------------------
912 /* Helper for Hex2Num */
913 static int HexDigit(int c)
914 {
915 if (c >= '0' && c <= '9')
916 return c - '0';
917 if (c >= 'a' && c <= 'f')
918 return c - 'a' + 10;
919 if (c >= 'A' && c <= 'F')
920 return c - 'A' + 10;
921 return 0;
922 }
923 /*}}}*/
924 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
925 // ---------------------------------------------------------------------
926 /* The length of the buffer must be exactly 1/2 the length of the string. */
927 bool Hex2Num(const string &Str,unsigned char *Num,unsigned int Length)
928 {
929 if (Str.length() != Length*2)
930 return false;
931
932 // Convert each digit. We store it in the same order as the string
933 int J = 0;
934 for (string::const_iterator I = Str.begin(); I != Str.end();J++, I += 2)
935 {
936 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
937 return false;
938
939 Num[J] = HexDigit(I[0]) << 4;
940 Num[J] += HexDigit(I[1]);
941 }
942
943 return true;
944 }
945 /*}}}*/
946 // TokSplitString - Split a string up by a given token /*{{{*/
947 // ---------------------------------------------------------------------
948 /* This is intended to be a faster splitter, it does not use dynamic
949 memories. Input is changed to insert nulls at each token location. */
950 bool TokSplitString(char Tok,char *Input,char **List,
951 unsigned long ListMax)
952 {
953 // Strip any leading spaces
954 char *Start = Input;
955 char *Stop = Start + strlen(Start);
956 for (; *Start != 0 && isspace(*Start) != 0; Start++);
957
958 unsigned long Count = 0;
959 char *Pos = Start;
960 while (Pos != Stop)
961 {
962 // Skip to the next Token
963 for (; Pos != Stop && *Pos != Tok; Pos++);
964
965 // Back remove spaces
966 char *End = Pos;
967 for (; End > Start && (End[-1] == Tok || isspace(End[-1]) != 0); End--);
968 *End = 0;
969
970 List[Count++] = Start;
971 if (Count >= ListMax)
972 {
973 List[Count-1] = 0;
974 return false;
975 }
976
977 // Advance pos
978 for (; Pos != Stop && (*Pos == Tok || isspace(*Pos) != 0 || *Pos == 0); Pos++);
979 Start = Pos;
980 }
981
982 List[Count] = 0;
983 return true;
984 }
985 /*}}}*/
986 // RegexChoice - Simple regex list/list matcher /*{{{*/
987 // ---------------------------------------------------------------------
988 /* */
989 unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin,
990 const char **ListEnd)
991 {
992 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
993 R->Hit = false;
994
995 unsigned long Hits = 0;
996 for (; ListBegin != ListEnd; ListBegin++)
997 {
998 // Check if the name is a regex
999 const char *I;
1000 bool Regex = true;
1001 for (I = *ListBegin; *I != 0; I++)
1002 if (*I == '.' || *I == '?' || *I == '*' || *I == '|')
1003 break;
1004 if (*I == 0)
1005 Regex = false;
1006
1007 // Compile the regex pattern
1008 regex_t Pattern;
1009 if (Regex == true)
1010 if (regcomp(&Pattern,*ListBegin,REG_EXTENDED | REG_ICASE |
1011 REG_NOSUB) != 0)
1012 Regex = false;
1013
1014 // Search the list
1015 bool Done = false;
1016 for (RxChoiceList *R = Rxs; R->Str != 0; R++)
1017 {
1018 if (R->Str[0] == 0)
1019 continue;
1020
1021 if (strcasecmp(R->Str,*ListBegin) != 0)
1022 {
1023 if (Regex == false)
1024 continue;
1025 if (regexec(&Pattern,R->Str,0,0,0) != 0)
1026 continue;
1027 }
1028 Done = true;
1029
1030 if (R->Hit == false)
1031 Hits++;
1032
1033 R->Hit = true;
1034 }
1035
1036 if (Regex == true)
1037 regfree(&Pattern);
1038
1039 if (Done == false)
1040 _error->Warning(_("Selection %s not found"),*ListBegin);
1041 }
1042
1043 return Hits;
1044 }
1045 /*}}}*/
1046 // ioprintf - C format string outputter to C++ iostreams /*{{{*/
1047 // ---------------------------------------------------------------------
1048 /* This is used to make the internationalization strings easier to translate
1049 and to allow reordering of parameters */
1050 void ioprintf(ostream &out,const char *format,...)
1051 {
1052 va_list args;
1053 va_start(args,format);
1054
1055 // sprintf the description
1056 char S[4096];
1057 vsnprintf(S,sizeof(S),format,args);
1058 out << S;
1059 }
1060 /*}}}*/
1061 // strprintf - C format string outputter to C++ strings /*{{{*/
1062 // ---------------------------------------------------------------------
1063 /* This is used to make the internationalization strings easier to translate
1064 and to allow reordering of parameters */
1065 void strprintf(string &out,const char *format,...)
1066 {
1067 va_list args;
1068 va_start(args,format);
1069
1070 // sprintf the description
1071 char S[4096];
1072 vsnprintf(S,sizeof(S),format,args);
1073 out = string(S);
1074 }
1075 /*}}}*/
1076 // safe_snprintf - Safer snprintf /*{{{*/
1077 // ---------------------------------------------------------------------
1078 /* This is a snprintf that will never (ever) go past 'End' and returns a
1079 pointer to the end of the new string. The returned string is always null
1080 terminated unless Buffer == end. This is a better alterantive to using
1081 consecutive snprintfs. */
1082 char *safe_snprintf(char *Buffer,char *End,const char *Format,...)
1083 {
1084 va_list args;
1085 unsigned long Did;
1086
1087 va_start(args,Format);
1088
1089 if (End <= Buffer)
1090 return End;
1091
1092 Did = vsnprintf(Buffer,End - Buffer,Format,args);
1093 if (Did < 0 || Buffer + Did > End)
1094 return End;
1095 return Buffer + Did;
1096 }
1097 /*}}}*/
1098
1099 // tolower_ascii - tolower() function that ignores the locale /*{{{*/
1100 // ---------------------------------------------------------------------
1101 /* */
1102 int tolower_ascii(int c)
1103 {
1104 if (c >= 'A' and c <= 'Z')
1105 return c + 32;
1106 return c;
1107 }
1108 /*}}}*/
1109
1110 // CheckDomainList - See if Host is in a , seperate list /*{{{*/
1111 // ---------------------------------------------------------------------
1112 /* The domain list is a comma seperate list of domains that are suffix
1113 matched against the argument */
1114 bool CheckDomainList(const string &Host,const string &List)
1115 {
1116 string::const_iterator Start = List.begin();
1117 for (string::const_iterator Cur = List.begin(); Cur <= List.end(); Cur++)
1118 {
1119 if (Cur < List.end() && *Cur != ',')
1120 continue;
1121
1122 // Match the end of the string..
1123 if ((Host.size() >= (unsigned)(Cur - Start)) &&
1124 Cur - Start != 0 &&
1125 stringcasecmp(Host.end() - (Cur - Start),Host.end(),Start,Cur) == 0)
1126 return true;
1127
1128 Start = Cur + 1;
1129 }
1130 return false;
1131 }
1132 /*}}}*/
1133
1134 // URI::CopyFrom - Copy from an object /*{{{*/
1135 // ---------------------------------------------------------------------
1136 /* This parses the URI into all of its components */
1137 void URI::CopyFrom(const string &U)
1138 {
1139 string::const_iterator I = U.begin();
1140
1141 // Locate the first colon, this separates the scheme
1142 for (; I < U.end() && *I != ':' ; I++);
1143 string::const_iterator FirstColon = I;
1144
1145 /* Determine if this is a host type URI with a leading double //
1146 and then search for the first single / */
1147 string::const_iterator SingleSlash = I;
1148 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
1149 SingleSlash += 3;
1150
1151 /* Find the / indicating the end of the hostname, ignoring /'s in the
1152 square brackets */
1153 bool InBracket = false;
1154 for (; SingleSlash < U.end() && (*SingleSlash != '/' || InBracket == true); SingleSlash++)
1155 {
1156 if (*SingleSlash == '[')
1157 InBracket = true;
1158 if (InBracket == true && *SingleSlash == ']')
1159 InBracket = false;
1160 }
1161
1162 if (SingleSlash > U.end())
1163 SingleSlash = U.end();
1164
1165 // We can now write the access and path specifiers
1166 Access.assign(U.begin(),FirstColon);
1167 if (SingleSlash != U.end())
1168 Path.assign(SingleSlash,U.end());
1169 if (Path.empty() == true)
1170 Path = "/";
1171
1172 // Now we attempt to locate a user:pass@host fragment
1173 if (FirstColon + 2 <= U.end() && FirstColon[1] == '/' && FirstColon[2] == '/')
1174 FirstColon += 3;
1175 else
1176 FirstColon += 1;
1177 if (FirstColon >= U.end())
1178 return;
1179
1180 if (FirstColon > SingleSlash)
1181 FirstColon = SingleSlash;
1182
1183 // Find the colon...
1184 I = FirstColon + 1;
1185 if (I > SingleSlash)
1186 I = SingleSlash;
1187 for (; I < SingleSlash && *I != ':'; I++);
1188 string::const_iterator SecondColon = I;
1189
1190 // Search for the @ after the colon
1191 for (; I < SingleSlash && *I != '@'; I++);
1192 string::const_iterator At = I;
1193
1194 // Now write the host and user/pass
1195 if (At == SingleSlash)
1196 {
1197 if (FirstColon < SingleSlash)
1198 Host.assign(FirstColon,SingleSlash);
1199 }
1200 else
1201 {
1202 Host.assign(At+1,SingleSlash);
1203 User.assign(FirstColon,SecondColon);
1204 if (SecondColon < At)
1205 Password.assign(SecondColon+1,At);
1206 }
1207
1208 // Now we parse the RFC 2732 [] hostnames.
1209 unsigned long PortEnd = 0;
1210 InBracket = false;
1211 for (unsigned I = 0; I != Host.length();)
1212 {
1213 if (Host[I] == '[')
1214 {
1215 InBracket = true;
1216 Host.erase(I,1);
1217 continue;
1218 }
1219
1220 if (InBracket == true && Host[I] == ']')
1221 {
1222 InBracket = false;
1223 Host.erase(I,1);
1224 PortEnd = I;
1225 continue;
1226 }
1227 I++;
1228 }
1229
1230 // Tsk, weird.
1231 if (InBracket == true)
1232 {
1233 Host.clear();
1234 return;
1235 }
1236
1237 // Now we parse off a port number from the hostname
1238 Port = 0;
1239 string::size_type Pos = Host.rfind(':');
1240 if (Pos == string::npos || Pos < PortEnd)
1241 return;
1242
1243 Port = atoi(string(Host,Pos+1).c_str());
1244 Host.assign(Host,0,Pos);
1245 }
1246 /*}}}*/
1247 // URI::operator string - Convert the URI to a string /*{{{*/
1248 // ---------------------------------------------------------------------
1249 /* */
1250 URI::operator string()
1251 {
1252 string Res;
1253
1254 if (Access.empty() == false)
1255 Res = Access + ':';
1256
1257 if (Host.empty() == false)
1258 {
1259 if (Access.empty() == false)
1260 Res += "//";
1261
1262 if (User.empty() == false)
1263 {
1264 Res += User;
1265 if (Password.empty() == false)
1266 Res += ":" + Password;
1267 Res += "@";
1268 }
1269
1270 // Add RFC 2732 escaping characters
1271 if (Access.empty() == false &&
1272 (Host.find('/') != string::npos || Host.find(':') != string::npos))
1273 Res += '[' + Host + ']';
1274 else
1275 Res += Host;
1276
1277 if (Port != 0)
1278 {
1279 char S[30];
1280 sprintf(S,":%u",Port);
1281 Res += S;
1282 }
1283 }
1284
1285 if (Path.empty() == false)
1286 {
1287 if (Path[0] != '/')
1288 Res += "/" + Path;
1289 else
1290 Res += Path;
1291 }
1292
1293 return Res;
1294 }
1295 /*}}}*/
1296 // URI::SiteOnly - Return the schema and site for the URI /*{{{*/
1297 // ---------------------------------------------------------------------
1298 /* */
1299 string URI::SiteOnly(const string &URI)
1300 {
1301 ::URI U(URI);
1302 U.User.clear();
1303 U.Password.clear();
1304 U.Path.clear();
1305 U.Port = 0;
1306 return U;
1307 }
1308 /*}}}*/