]> git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
Sync
[apt.git] / apt-pkg / contrib / strutl.cc
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: strutl.cc,v 1.8 1998/10/24 04:58:07 jgg Exp $
4 /* ######################################################################
5
6 String Util - Some usefull string functions.
7
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
11
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
14
15 ##################################################################### */
16 /*}}}*/
17 // Includes /*{{{*/
18 #include <strutl.h>
19 #include <apt-pkg/fileutl.h>
20
21 #include <ctype.h>
22 #include <string.h>
23 #include <stdio.h>
24 #include <time.h>
25 /*}}}*/
26
27 // strstrip - Remove white space from the front and back of a string /*{{{*/
28 // ---------------------------------------------------------------------
29 /* This is handy to use when parsing a file. It also removes \n's left
30 over from fgets and company */
31 char *_strstrip(char *String)
32 {
33 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
34
35 if (*String == 0)
36 return String;
37
38 char *End = String + strlen(String) - 1;
39 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
40 *End == '\r'); End--);
41 End++;
42 *End = 0;
43 return String;
44 };
45 /*}}}*/
46 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
47 // ---------------------------------------------------------------------
48 /* */
49 char *_strtabexpand(char *String,size_t Len)
50 {
51 for (char *I = String; I != I + Len && *I != 0; I++)
52 {
53 if (*I != '\t')
54 continue;
55 if (I + 8 > String + Len)
56 {
57 *I = 0;
58 return String;
59 }
60
61 /* Assume the start of the string is 0 and find the next 8 char
62 division */
63 int Len;
64 if (String == I)
65 Len = 1;
66 else
67 Len = 8 - ((String - I) % 8);
68 Len -= 2;
69 if (Len <= 0)
70 {
71 *I = ' ';
72 continue;
73 }
74
75 memmove(I + Len,I + 1,strlen(I) + 1);
76 for (char *J = I; J + Len != I; *I = ' ', I++);
77 }
78 return String;
79 }
80 /*}}}*/
81 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This grabs a single word, converts any % escaped characters to their
84 proper values and advances the pointer. Double quotes are understood
85 and striped out as well. This is for URI/URL parsing. */
86 bool ParseQuoteWord(const char *&String,string &Res)
87 {
88 // Skip leading whitespace
89 const char *C = String;
90 for (;*C != 0 && *C == ' '; C++);
91 if (*C == 0)
92 return false;
93
94 // Jump to the next word
95 for (;*C != 0 && *C != ' '; C++)
96 {
97 if (*C == '"')
98 {
99 for (C++;*C != 0 && *C != '"'; C++);
100 if (*C == 0)
101 return false;
102 }
103 }
104
105 // Now de-quote characters
106 char Buffer[1024];
107 char Tmp[3];
108 const char *Start = String;
109 char *I;
110 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
111 {
112 if (*Start == '%' && Start + 2 < C)
113 {
114 Tmp[0] = Start[1];
115 Tmp[1] = Start[2];
116 Tmp[3] = 0;
117 *I = (char)strtol(Tmp,0,16);
118 Start += 3;
119 continue;
120 }
121 if (*Start != '"')
122 *I = *Start;
123 else
124 I--;
125 Start++;
126 }
127 *I = 0;
128 Res = Buffer;
129
130 // Skip ending white space
131 for (;*C != 0 && *C == ' '; C++);
132 String = C;
133 return true;
134 }
135 /*}}}*/
136 // ParseCWord - Parses a string like a C "" expression /*{{{*/
137 // ---------------------------------------------------------------------
138 /* This expects a series of space seperated strings enclosed in ""'s.
139 It concatenates the ""'s into a single string. */
140 bool ParseCWord(const char *String,string &Res)
141 {
142 // Skip leading whitespace
143 const char *C = String;
144 for (;*C != 0 && *C == ' '; C++);
145 if (*C == 0)
146 return false;
147
148 char Buffer[1024];
149 char *Buf = Buffer;
150 if (strlen(String) >= sizeof(Buffer))
151 return false;
152
153 for (; *C != 0; C++)
154 {
155 if (*C == '"')
156 {
157 for (C++; *C != 0 && *C != '"'; C++)
158 *Buf++ = *C;
159
160 if (*C == 0)
161 return false;
162
163 continue;
164 }
165
166 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
167 continue;
168 if (isspace(*C) == 0)
169 return false;
170 *Buf++ = ' ';
171 }
172 *Buf = 0;
173 Res = Buffer;
174 return true;
175 }
176 /*}}}*/
177 // QuoteString - Convert a string into quoted from /*{{{*/
178 // ---------------------------------------------------------------------
179 /* */
180 string QuoteString(string Str,const char *Bad)
181 {
182 string Res;
183 for (string::iterator I = Str.begin(); I != Str.end(); I++)
184 {
185 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
186 *I <= 0x20 || *I >= 0x7F)
187 {
188 char Buf[10];
189 sprintf(Buf,"%%%02x",(int)*I);
190 Res += Buf;
191 }
192 else
193 Res += *I;
194 }
195 return Res;
196 }
197 /*}}}*/
198 // SizeToStr - Convert a long into a human readable size /*{{{*/
199 // ---------------------------------------------------------------------
200 /* A max of 4 digits are shown before conversion to the next highest unit.
201 The max length of the string will be 5 chars unless the size is > 10
202 YottaBytes (E24) */
203 string SizeToStr(double Size)
204 {
205 char S[300];
206 double ASize;
207 if (Size >= 0)
208 ASize = Size;
209 else
210 ASize = -1*Size;
211
212 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
213 ExaBytes, ZettaBytes, YottaBytes */
214 char Ext[] = {'b','k','M','G','T','P','E','Z','Y'};
215 int I = 0;
216 while (I <= 8)
217 {
218 if (ASize < 100 && I != 0)
219 {
220 sprintf(S,"%.1f%c",ASize,Ext[I]);
221 break;
222 }
223
224 if (ASize < 10000)
225 {
226 sprintf(S,"%.0f%c",ASize,Ext[I]);
227 break;
228 }
229 ASize /= 1000.0;
230 I++;
231 }
232
233 return S;
234 }
235 /*}}}*/
236 // TimeToStr - Convert the time into a string /*{{{*/
237 // ---------------------------------------------------------------------
238 /* Converts a number of seconds to a hms format */
239 string TimeToStr(unsigned long Sec)
240 {
241 char S[300];
242
243 while (1)
244 {
245 if (Sec > 60*60*24)
246 {
247 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
248 break;
249 }
250
251 if (Sec > 60*60)
252 {
253 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
254 break;
255 }
256
257 if (Sec > 60)
258 {
259 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
260 break;
261 }
262
263 sprintf(S,"%lis",Sec);
264 break;
265 }
266
267 return S;
268 }
269 /*}}}*/
270 // SubstVar - Substitute a string for another string /*{{{*/
271 // ---------------------------------------------------------------------
272 /* This replaces all occurances of Subst with Contents in Str. */
273 string SubstVar(string Str,string Subst,string Contents)
274 {
275 string::size_type Pos = 0;
276 string::size_type OldPos = 0;
277 string Temp;
278
279 while (OldPos < Str.length() &&
280 (Pos = Str.find(Subst,OldPos)) != string::npos)
281 {
282 Temp += string(Str,OldPos,Pos) + Contents;
283 OldPos = Pos + Subst.length();
284 }
285
286 if (OldPos == 0)
287 return Str;
288
289 return Temp + string(Str,OldPos);
290 }
291 /*}}}*/
292 // URItoFileName - Convert the uri into a unique file name /*{{{*/
293 // ---------------------------------------------------------------------
294 /* This converts a URI into a safe filename. It quotes all unsafe characters
295 and converts / to _ and removes the scheme identifier. The resulting
296 file name should be unique and never occur again for a different file */
297 string URItoFileName(string URI)
298 {
299 string::const_iterator I = URI.begin() + URI.find(':') + 1;
300 for (; I < URI.end() && *I == '/'; I++);
301
302 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
303 URI = QuoteString(string(I,URI.end() - I),"\\|{}[]<>\"^~_=!@#$%^&*");
304 string::iterator J = URI.begin();
305 for (; J != URI.end(); J++)
306 if (*J == '/')
307 *J = '_';
308 return URI;
309 }
310 /*}}}*/
311 // URIAccess - Return the access method for the URI /*{{{*/
312 // ---------------------------------------------------------------------
313 /* */
314 string URIAccess(string URI)
315 {
316 string::size_type Pos = URI.find(':');
317 if (Pos == string::npos)
318 return URI;
319 return string(URI,0,Pos);
320 }
321 /*}}}*/
322 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
323 // ---------------------------------------------------------------------
324 /* This routine performs a base64 transformation on a string. It was ripped
325 from wget and then patched and bug fixed.
326
327 This spec can be found in rfc2045 */
328 string Base64Encode(string S)
329 {
330 // Conversion table.
331 static char tbl[64] = {'A','B','C','D','E','F','G','H',
332 'I','J','K','L','M','N','O','P',
333 'Q','R','S','T','U','V','W','X',
334 'Y','Z','a','b','c','d','e','f',
335 'g','h','i','j','k','l','m','n',
336 'o','p','q','r','s','t','u','v',
337 'w','x','y','z','0','1','2','3',
338 '4','5','6','7','8','9','+','/'};
339
340 // Pre-allocate some space
341 string Final;
342 Final.reserve((4*S.length() + 2)/3 + 2);
343
344 /* Transform the 3x8 bits to 4x6 bits, as required by
345 base64. */
346 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
347 {
348 char Bits[3] = {0,0,0};
349 Bits[0] = I[0];
350 if (I + 1 < S.end())
351 Bits[1] = I[1];
352 if (I + 2 < S.end())
353 Bits[2] = I[2];
354
355 Final += tbl[Bits[0] >> 2];
356 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
357
358 if (I + 1 >= S.end())
359 break;
360
361 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
362
363 if (I + 2 >= S.end())
364 break;
365
366 Final += tbl[Bits[2] & 0x3f];
367 }
368
369 /* Apply the padding elements, this tells how many bytes the remote
370 end should discard */
371 if (S.length() % 3 == 2)
372 Final += '=';
373 if (S.length() % 3 == 1)
374 Final += "==";
375
376 return Final;
377 }
378 /*}}}*/
379 // stringcmp - Arbitary string compare /*{{{*/
380 // ---------------------------------------------------------------------
381 /* This safely compares two non-null terminated strings of arbitary
382 length */
383 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
384 {
385 for (; A != AEnd && B != BEnd; A++, B++)
386 if (*A != *B)
387 break;
388
389 if (A == AEnd && B == BEnd)
390 return 0;
391 if (A == AEnd)
392 return 1;
393 if (B == BEnd)
394 return -1;
395 if (*A < *B)
396 return -1;
397 return 1;
398 }
399 /*}}}*/
400 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
401 // ---------------------------------------------------------------------
402 /* */
403 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
404 {
405 for (; A != AEnd && B != BEnd; A++, B++)
406 if (toupper(*A) != toupper(*B))
407 break;
408
409 if (A == AEnd && B == BEnd)
410 return 0;
411 if (A == AEnd)
412 return 1;
413 if (B == BEnd)
414 return -1;
415 if (toupper(*A) < toupper(*B))
416 return -1;
417 return 1;
418 }
419 /*}}}*/
420 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
421 // ---------------------------------------------------------------------
422 /* The format is like those used in package files and the method
423 communication system */
424 string LookupTag(string Message,const char *Tag,const char *Default)
425 {
426 // Look for a matching tag.
427 int Length = strlen(Tag);
428 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
429 {
430 // Found the tag
431 if (I[Length] == ':' && stringcasecmp(I,I+Length,Tag) == 0)
432 {
433 // Find the end of line and strip the leading/trailing spaces
434 string::iterator J;
435 I += Length + 1;
436 for (; isspace(*I) != 0 && I < Message.end(); I++);
437 for (J = I; *J != '\n' && J < Message.end(); J++);
438 for (; J > I && isspace(J[-1]) != 0; J--);
439
440 return string(I,J-I);
441 }
442
443 for (; *I != '\n' && I < Message.end(); I++);
444 }
445
446 // Failed to find a match
447 if (Default == 0)
448 return string();
449 return Default;
450 }
451 /*}}}*/
452 // StringToBool - Converts a string into a boolean /*{{{*/
453 // ---------------------------------------------------------------------
454 /* This inspects the string to see if it is true or if it is false and
455 then returns the result. Several varients on true/false are checked. */
456 int StringToBool(string Text,int Default = -1)
457 {
458 char *End;
459 int Res = strtol(Text.c_str(),&End,0);
460 if (End != Text.c_str() && Res >= 0 && Res <= 1)
461 return Res;
462
463 // Check for positives
464 if (strcasecmp(Text.c_str(),"no") == 0 ||
465 strcasecmp(Text.c_str(),"false") == 0 ||
466 strcasecmp(Text.c_str(),"without") == 0 ||
467 strcasecmp(Text.c_str(),"disable") == 0)
468 return 0;
469
470 // Check for negatives
471 if (strcasecmp(Text.c_str(),"yes") == 0 ||
472 strcasecmp(Text.c_str(),"true") == 0 ||
473 strcasecmp(Text.c_str(),"with") == 0 ||
474 strcasecmp(Text.c_str(),"enable") == 0)
475 return 1;
476
477 return Default;
478 }
479 /*}}}*/
480 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
481 // ---------------------------------------------------------------------
482 /* This converts a time_t into a string time representation that is
483 year 2000 complient and timezone neutral */
484 string TimeRFC1123(time_t Date)
485 {
486 struct tm Conv = *gmtime(&Date);
487 char Buf[300];
488
489 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
490 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
491 "Aug","Sep","Oct","Nov","Dec"};
492
493 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
494 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
495 Conv.tm_min,Conv.tm_sec);
496 return Buf;
497 }
498 /*}}}*/
499 // ReadMessages - Read messages from the FD /*{{{*/
500 // ---------------------------------------------------------------------
501 /* This pulls full messages from the input FD into the message buffer.
502 It assumes that messages will not pause during transit so no
503 fancy buffering is used. */
504 bool ReadMessages(int Fd, vector<string> &List)
505 {
506 char Buffer[4000];
507 char *End = Buffer;
508
509 while (1)
510 {
511 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
512
513 // Process is dead, this is kind of bad..
514 if (Res == 0)
515 return false;
516
517 // No data
518 if (Res <= 0)
519 return true;
520
521 End += Res;
522
523 // Look for the end of the message
524 for (char *I = Buffer; I + 1 < End; I++)
525 {
526 if (I[0] != '\n' || I[1] != '\n')
527 continue;
528
529 // Pull the message out
530 string Message(Buffer,0,I-Buffer);
531
532 // Fix up the buffer
533 for (; I < End && *I == '\n'; I++);
534 End -= I-Buffer;
535 memmove(Buffer,I,End-Buffer);
536 I = Buffer;
537
538 List.push_back(Message);
539 }
540 if (End == Buffer)
541 return true;
542
543 if (WaitFd(Fd) == false)
544 return false;
545 }
546 }
547 /*}}}*/
548 // MonthConv - Converts a month string into a number /*{{{*/
549 // ---------------------------------------------------------------------
550 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
551 Made it a bit more robust with a few touppers though. */
552 static int MonthConv(char *Month)
553 {
554 switch (toupper(*Month))
555 {
556 case 'A':
557 return toupper(Month[1]) == 'P'?3:7;
558 case 'D':
559 return 11;
560 case 'F':
561 return 1;
562 case 'J':
563 if (toupper(Month[1]) == 'A')
564 return 0;
565 return toupper(Month[2]) == 'N'?5:6;
566 case 'M':
567 return toupper(Month[2]) == 'R'?2:4;
568 case 'N':
569 return 10;
570 case 'O':
571 return 9;
572 case 'S':
573 return 8;
574
575 // Pretend it is January..
576 default:
577 return 0;
578 }
579 }
580 /*}}}*/
581 // StrToTime - Converts a string into a time_t /*{{{*/
582 // ---------------------------------------------------------------------
583 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
584 and the C library asctime format. It requires the GNU library function
585 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
586 reason the C library does not provide any such function :<*/
587 bool StrToTime(string Val,time_t &Result)
588 {
589 struct tm Tm;
590 char Month[10];
591 const char *I = Val.c_str();
592
593 // Skip the day of the week
594 for (;*I != 0 && *I != ' '; I++);
595
596 // Handle RFC 1123 time
597 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
598 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
599 {
600 // Handle RFC 1036 time
601 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
602 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
603 Tm.tm_year += 1900;
604 else
605 {
606 // asctime format
607 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
608 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
609 return false;
610 }
611 }
612
613 Tm.tm_isdst = 0;
614 Tm.tm_mon = MonthConv(Month);
615 Tm.tm_year -= 1900;
616
617 // Convert to local time and then to GMT
618 Result = timegm(&Tm);
619 return true;
620 }
621 /*}}}*/