]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Hide hit for local uris
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.17 1999/01/18 06:20:08 jgg Exp $
4 /* ######################################################################
5
6 String Util - Some usefull string functions.
7
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #ifdef __GNUG__
19 #pragma implementation "strutl.h"
20 #endif
21
22 #include <strutl.h>
23 #include <apt-pkg/fileutl.h>
24
25 #include <ctype.h>
26 #include <string.h>
27 #include <stdio.h>
28 /*}}}*/
29
30 // strstrip - Remove white space from the front and back of a string /*{{{*/
31 // ---------------------------------------------------------------------
32 /* This is handy to use when parsing a file. It also removes \n's left
33 over from fgets and company */
34 char *_strstrip(char *String)
35 {
36 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
37
38 if (*String == 0)
39 return String;
40
41 char *End = String + strlen(String) - 1;
42 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
43 *End == '\r'); End--);
44 End++;
45 *End = 0;
46 return String;
47 };
48 /*}}}*/
49 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
50 // ---------------------------------------------------------------------
51 /* */
52 char *_strtabexpand(char *String,size_t Len)
53 {
54 for (char *I = String; I != I + Len && *I != 0; I++)
55 {
56 if (*I != '\t')
57 continue;
58 if (I + 8 > String + Len)
59 {
60 *I = 0;
61 return String;
62 }
63
64 /* Assume the start of the string is 0 and find the next 8 char
65 division */
66 int Len;
67 if (String == I)
68 Len = 1;
69 else
70 Len = 8 - ((String - I) % 8);
71 Len -= 2;
72 if (Len <= 0)
73 {
74 *I = ' ';
75 continue;
76 }
77
78 memmove(I + Len,I + 1,strlen(I) + 1);
79 for (char *J = I; J + Len != I; *I = ' ', I++);
80 }
81 return String;
82 }
83 /*}}}*/
84 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This grabs a single word, converts any % escaped characters to their
87 proper values and advances the pointer. Double quotes are understood
88 and striped out as well. This is for URI/URL parsing. */
89 bool ParseQuoteWord(const char *&String,string &Res)
90 {
91 // Skip leading whitespace
92 const char *C = String;
93 for (;*C != 0 && *C == ' '; C++);
94 if (*C == 0)
95 return false;
96
97 // Jump to the next word
98 for (;*C != 0 && *C != ' '; C++)
99 {
100 if (*C == '"')
101 {
102 for (C++;*C != 0 && *C != '"'; C++);
103 if (*C == 0)
104 return false;
105 }
106 }
107
108 // Now de-quote characters
109 char Buffer[1024];
110 char Tmp[3];
111 const char *Start = String;
112 char *I;
113 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
114 {
115 if (*Start == '%' && Start + 2 < C)
116 {
117 Tmp[0] = Start[1];
118 Tmp[1] = Start[2];
119 Tmp[3] = 0;
120 *I = (char)strtol(Tmp,0,16);
121 Start += 3;
122 continue;
123 }
124 if (*Start != '"')
125 *I = *Start;
126 else
127 I--;
128 Start++;
129 }
130 *I = 0;
131 Res = Buffer;
132
133 // Skip ending white space
134 for (;*C != 0 && *C == ' '; C++);
135 String = C;
136 return true;
137 }
138 /*}}}*/
139 // ParseCWord - Parses a string like a C "" expression /*{{{*/
140 // ---------------------------------------------------------------------
141 /* This expects a series of space seperated strings enclosed in ""'s.
142 It concatenates the ""'s into a single string. */
143 bool ParseCWord(const char *String,string &Res)
144 {
145 // Skip leading whitespace
146 const char *C = String;
147 for (;*C != 0 && *C == ' '; C++);
148 if (*C == 0)
149 return false;
150
151 char Buffer[1024];
152 char *Buf = Buffer;
153 if (strlen(String) >= sizeof(Buffer))
154 return false;
155
156 for (; *C != 0; C++)
157 {
158 if (*C == '"')
159 {
160 for (C++; *C != 0 && *C != '"'; C++)
161 *Buf++ = *C;
162
163 if (*C == 0)
164 return false;
165
166 continue;
167 }
168
169 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
170 continue;
171 if (isspace(*C) == 0)
172 return false;
173 *Buf++ = ' ';
174 }
175 *Buf = 0;
176 Res = Buffer;
177 return true;
178 }
179 /*}}}*/
180 // QuoteString - Convert a string into quoted from /*{{{*/
181 // ---------------------------------------------------------------------
182 /* */
183 string QuoteString(string Str,const char *Bad)
184 {
185 string Res;
186 for (string::iterator I = Str.begin(); I != Str.end(); I++)
187 {
188 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
189 *I <= 0x20 || *I >= 0x7F)
190 {
191 char Buf[10];
192 sprintf(Buf,"%%%02x",(int)*I);
193 Res += Buf;
194 }
195 else
196 Res += *I;
197 }
198 return Res;
199 }
200 /*}}}*/
201 // SizeToStr - Convert a long into a human readable size /*{{{*/
202 // ---------------------------------------------------------------------
203 /* A max of 4 digits are shown before conversion to the next highest unit.
204 The max length of the string will be 5 chars unless the size is > 10
205 YottaBytes (E24) */
206 string SizeToStr(double Size)
207 {
208 char S[300];
209 double ASize;
210 if (Size >= 0)
211 ASize = Size;
212 else
213 ASize = -1*Size;
214
215 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
216 ExaBytes, ZettaBytes, YottaBytes */
217 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
218 int I = 0;
219 while (I <= 8)
220 {
221 if (ASize < 100 && I != 0)
222 {
223 sprintf(S,"%.1f%c",ASize,Ext[I]);
224 break;
225 }
226
227 if (ASize < 10000)
228 {
229 sprintf(S,"%.0f%c",ASize,Ext[I]);
230 break;
231 }
232 ASize /= 1000.0;
233 I++;
234 }
235
236 return S;
237 }
238 /*}}}*/
239 // TimeToStr - Convert the time into a string /*{{{*/
240 // ---------------------------------------------------------------------
241 /* Converts a number of seconds to a hms format */
242 string TimeToStr(unsigned long Sec)
243 {
244 char S[300];
245
246 while (1)
247 {
248 if (Sec > 60*60*24)
249 {
250 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
251 break;
252 }
253
254 if (Sec > 60*60)
255 {
256 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
257 break;
258 }
259
260 if (Sec > 60)
261 {
262 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
263 break;
264 }
265
266 sprintf(S,"%lis",Sec);
267 break;
268 }
269
270 return S;
271 }
272 /*}}}*/
273 // SubstVar - Substitute a string for another string /*{{{*/
274 // ---------------------------------------------------------------------
275 /* This replaces all occurances of Subst with Contents in Str. */
276 string SubstVar(string Str,string Subst,string Contents)
277 {
278 string::size_type Pos = 0;
279 string::size_type OldPos = 0;
280 string Temp;
281
282 while (OldPos < Str.length() &&
283 (Pos = Str.find(Subst,OldPos)) != string::npos)
284 {
285 Temp += string(Str,OldPos,Pos) + Contents;
286 OldPos = Pos + Subst.length();
287 }
288
289 if (OldPos == 0)
290 return Str;
291
292 return Temp + string(Str,OldPos);
293 }
294 /*}}}*/
295 // URItoFileName - Convert the uri into a unique file name /*{{{*/
296 // ---------------------------------------------------------------------
297 /* This converts a URI into a safe filename. It quotes all unsafe characters
298 and converts / to _ and removes the scheme identifier. The resulting
299 file name should be unique and never occur again for a different file */
300 string URItoFileName(string URI)
301 {
302 string::const_iterator I = URI.begin() + URI.find(':') + 1;
303 for (; I < URI.end() && *I == '/'; I++);
304
305 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
306 URI = QuoteString(string(I,URI.end() - I),"\\|{}[]<>\"^~_=!@#$%^&*");
307 string::iterator J = URI.begin();
308 for (; J != URI.end(); J++)
309 if (*J == '/')
310 *J = '_';
311 return URI;
312 }
313 /*}}}*/
314 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
315 // ---------------------------------------------------------------------
316 /* This routine performs a base64 transformation on a string. It was ripped
317 from wget and then patched and bug fixed.
318
319 This spec can be found in rfc2045 */
320 string Base64Encode(string S)
321 {
322 // Conversion table.
323 static char tbl[64] = {'A','B','C','D','E','F','G','H',
324 'I','J','K','L','M','N','O','P',
325 'Q','R','S','T','U','V','W','X',
326 'Y','Z','a','b','c','d','e','f',
327 'g','h','i','j','k','l','m','n',
328 'o','p','q','r','s','t','u','v',
329 'w','x','y','z','0','1','2','3',
330 '4','5','6','7','8','9','+','/'};
331
332 // Pre-allocate some space
333 string Final;
334 Final.reserve((4*S.length() + 2)/3 + 2);
335
336 /* Transform the 3x8 bits to 4x6 bits, as required by
337 base64. */
338 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
339 {
340 char Bits[3] = {0,0,0};
341 Bits[0] = I[0];
342 if (I + 1 < S.end())
343 Bits[1] = I[1];
344 if (I + 2 < S.end())
345 Bits[2] = I[2];
346
347 Final += tbl[Bits[0] >> 2];
348 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
349
350 if (I + 1 >= S.end())
351 break;
352
353 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
354
355 if (I + 2 >= S.end())
356 break;
357
358 Final += tbl[Bits[2] & 0x3f];
359 }
360
361 /* Apply the padding elements, this tells how many bytes the remote
362 end should discard */
363 if (S.length() % 3 == 2)
364 Final += '=';
365 if (S.length() % 3 == 1)
366 Final += "==";
367
368 return Final;
369 }
370 /*}}}*/
371 // stringcmp - Arbitary string compare /*{{{*/
372 // ---------------------------------------------------------------------
373 /* This safely compares two non-null terminated strings of arbitary
374 length */
375 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
376 {
377 for (; A != AEnd && B != BEnd; A++, B++)
378 if (*A != *B)
379 break;
380
381 if (A == AEnd && B == BEnd)
382 return 0;
383 if (A == AEnd)
384 return 1;
385 if (B == BEnd)
386 return -1;
387 if (*A < *B)
388 return -1;
389 return 1;
390 }
391 /*}}}*/
392 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
393 // ---------------------------------------------------------------------
394 /* */
395 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
396 {
397 for (; A != AEnd && B != BEnd; A++, B++)
398 if (toupper(*A) != toupper(*B))
399 break;
400
401 if (A == AEnd && B == BEnd)
402 return 0;
403 if (A == AEnd)
404 return 1;
405 if (B == BEnd)
406 return -1;
407 if (toupper(*A) < toupper(*B))
408 return -1;
409 return 1;
410 }
411 /*}}}*/
412 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
413 // ---------------------------------------------------------------------
414 /* The format is like those used in package files and the method
415 communication system */
416 string LookupTag(string Message,const char *Tag,const char *Default)
417 {
418 // Look for a matching tag.
419 int Length = strlen(Tag);
420 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
421 {
422 // Found the tag
423 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
424 {
425 // Find the end of line and strip the leading/trailing spaces
426 string::iterator J;
427 I += Length + 1;
428 for (; isspace(*I) != 0 && I < Message.end(); I++);
429 for (J = I; *J != '\n' && J < Message.end(); J++);
430 for (; J > I && isspace(J[-1]) != 0; J--);
431
432 return string(I,J-I);
433 }
434
435 for (; *I != '\n' && I < Message.end(); I++);
436 }
437
438 // Failed to find a match
439 if (Default == 0)
440 return string();
441 return Default;
442 }
443 /*}}}*/
444 // StringToBool - Converts a string into a boolean /*{{{*/
445 // ---------------------------------------------------------------------
446 /* This inspects the string to see if it is true or if it is false and
447 then returns the result. Several varients on true/false are checked. */
448 int StringToBool(string Text,int Default = -1)
449 {
450 char *End;
451 int Res = strtol(Text.c_str(),&End,0);
452 if (End != Text.c_str() && Res >= 0 && Res <= 1)
453 return Res;
454
455 // Check for positives
456 if (strcasecmp(Text.c_str(),"no") == 0 ||
457 strcasecmp(Text.c_str(),"false") == 0 ||
458 strcasecmp(Text.c_str(),"without") == 0 ||
459 strcasecmp(Text.c_str(),"off") == 0 ||
460 strcasecmp(Text.c_str(),"disable") == 0)
461 return 0;
462
463 // Check for negatives
464 if (strcasecmp(Text.c_str(),"yes") == 0 ||
465 strcasecmp(Text.c_str(),"true") == 0 ||
466 strcasecmp(Text.c_str(),"with") == 0 ||
467 strcasecmp(Text.c_str(),"on") == 0 ||
468 strcasecmp(Text.c_str(),"enable") == 0)
469 return 1;
470
471 return Default;
472 }
473 /*}}}*/
474 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
475 // ---------------------------------------------------------------------
476 /* This converts a time_t into a string time representation that is
477 year 2000 complient and timezone neutral */
478 string TimeRFC1123(time_t Date)
479 {
480 struct tm Conv = *gmtime(&Date);
481 char Buf[300];
482
483 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
484 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
485 "Aug","Sep","Oct","Nov","Dec"};
486
487 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
488 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
489 Conv.tm_min,Conv.tm_sec);
490 return Buf;
491 }
492 /*}}}*/
493 // ReadMessages - Read messages from the FD /*{{{*/
494 // ---------------------------------------------------------------------
495 /* This pulls full messages from the input FD into the message buffer.
496 It assumes that messages will not pause during transit so no
497 fancy buffering is used. */
498 bool ReadMessages(int Fd, vector<string> &List)
499 {
500 char Buffer[4000];
501 char *End = Buffer;
502
503 while (1)
504 {
505 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
506
507 // Process is dead, this is kind of bad..
508 if (Res == 0)
509 return false;
510
511 // No data
512 if (Res <= 0)
513 return true;
514
515 End += Res;
516
517 // Look for the end of the message
518 for (char *I = Buffer; I + 1 < End; I++)
519 {
520 if (I[0] != '\n' || I[1] != '\n')
521 continue;
522
523 // Pull the message out
524 string Message(Buffer,0,I-Buffer);
525
526 // Fix up the buffer
527 for (; I < End && *I == '\n'; I++);
528 End -= I-Buffer;
529 memmove(Buffer,I,End-Buffer);
530 I = Buffer;
531
532 List.push_back(Message);
533 }
534 if (End == Buffer)
535 return true;
536
537 if (WaitFd(Fd) == false)
538 return false;
539 }
540 }
541 /*}}}*/
542 // MonthConv - Converts a month string into a number /*{{{*/
543 // ---------------------------------------------------------------------
544 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
545 Made it a bit more robust with a few touppers though. */
546 static int MonthConv(char *Month)
547 {
548 switch (toupper(*Month))
549 {
550 case 'A':
551 return toupper(Month[1]) == 'P'?3:7;
552 case 'D':
553 return 11;
554 case 'F':
555 return 1;
556 case 'J':
557 if (toupper(Month[1]) == 'A')
558 return 0;
559 return toupper(Month[2]) == 'N'?5:6;
560 case 'M':
561 return toupper(Month[2]) == 'R'?2:4;
562 case 'N':
563 return 10;
564 case 'O':
565 return 9;
566 case 'S':
567 return 8;
568
569 // Pretend it is January..
570 default:
571 return 0;
572 }
573 }
574 /*}}}*/
575 // StrToTime - Converts a string into a time_t /*{{{*/
576 // ---------------------------------------------------------------------
577 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
578 and the C library asctime format. It requires the GNU library function
579 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
580 reason the C library does not provide any such function :<*/
581 bool StrToTime(string Val,time_t &Result)
582 {
583 struct tm Tm;
584 char Month[10];
585 const char *I = Val.c_str();
586
587 // Skip the day of the week
588 for (;*I != 0 && *I != ' '; I++);
589
590 // Handle RFC 1123 time
591 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
592 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
593 {
594 // Handle RFC 1036 time
595 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
596 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
597 Tm.tm_year += 1900;
598 else
599 {
600 // asctime format
601 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
602 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
603 return false;
604 }
605 }
606
607 Tm.tm_isdst = 0;
608 Tm.tm_mon = MonthConv(Month);
609 Tm.tm_year -= 1900;
610
611 // Convert to local time and then to GMT
612 Result = timegm(&Tm);
613 return true;
614 }
615 /*}}}*/
616
617 // URI::CopyFrom - Copy from an object /*{{{*/
618 // ---------------------------------------------------------------------
619 /* This parses the URI into all of its components */
620 void URI::CopyFrom(string U)
621 {
622 string::const_iterator I = U.begin();
623
624 // Locate the first colon, this seperates the scheme
625 for (; I < U.end() && *I != ':' ; I++);
626 string::const_iterator FirstColon = I;
627
628 /* Determine if this is a host type URI with a leading double //
629 and then search for the first single / */
630 string::const_iterator SingleSlash = I;
631 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
632 SingleSlash += 3;
633 for (; SingleSlash < U.end() && *SingleSlash != '/'; SingleSlash++);
634 if (SingleSlash > U.end())
635 SingleSlash = U.end();
636
637 // We can now write the access and path specifiers
638 Access = string(U,0,FirstColon - U.begin());
639 if (SingleSlash != U.end())
640 Path = string(U,SingleSlash - U.begin());
641 if (Path.empty() == true)
642 Path = "/";
643
644 // Now we attempt to locate a user:pass@host fragment
645 if (FirstColon[1] == '/' && FirstColon[2] == '/')
646 FirstColon += 3;
647 else
648 FirstColon += 1;
649 if (FirstColon >= U.end())
650 return;
651
652 if (FirstColon > SingleSlash)
653 FirstColon = SingleSlash;
654
655 // Search for the @
656 I = FirstColon;
657 for (; I < SingleSlash && *I != '@'; I++);
658 string::const_iterator At = I;
659
660 // Colon in the @ section
661 I = FirstColon + 1;
662 for (; I < At && *I != ':'; I++);
663 string::const_iterator SecondColon = I;
664
665 // Now write the host and user/pass
666 if (At == SingleSlash)
667 {
668 if (FirstColon < SingleSlash)
669 Host = string(U,FirstColon - U.begin(),SingleSlash - FirstColon);
670 }
671 else
672 {
673 Host = string(U,At - U.begin() + 1,SingleSlash - At - 1);
674 User = string(U,FirstColon - U.begin(),SecondColon - FirstColon);
675 if (SecondColon < At)
676 Password = string(U,SecondColon - U.begin() + 1,At - SecondColon - 1);
677 }
678
679 // Now we parse off a pot number from the hostname
680 Port = 0;
681 string::size_type Pos = Host.rfind(':');
682 if (Pos == string::npos)
683 return;
684
685 Port = atoi(string(Host,Pos+1).c_str());
686 Host = string(Host,0,Pos);
687 }
688 /*}}}*/
689 // URI::operator string - Convert the URI to a string /*{{{*/
690 // ---------------------------------------------------------------------
691 /* */
692 URI::operator string()
693 {
694 string Res = Access + ':';
695 if (Host.empty() == false)
696 {
697 Res += "//";
698 if (User.empty() == false)
699 {
700 Res += "//" + User;
701 if (Password.empty() == false)
702 Res += ":" + Password;
703 Res += "@";
704 }
705 Res += Host;
706 if (Port != 0)
707 {
708 char S[30];
709 sprintf(S,":%u",Port);
710 Res += S;
711 }
712 }
713
714 if (Path.empty() == false)
715 {
716 if (Path[0] != '/')
717 Res += "/" + Path;
718 else
719 Res += Path;
720 }
721
722 return Res;
723 }
724 /*}}}*/