]>
git.saurik.com Git - apt.git/blob - apt-pkg/contrib/strutl.cc
68421a2418a3adfc4903cd85a781faf3298f9f2d
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.8 1998/10/24 04:58:07 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #include <apt-pkg/fileutl.h>
27 // strstrip - Remove white space from the front and back of a string /*{{{*/
28 // ---------------------------------------------------------------------
29 /* This is handy to use when parsing a file. It also removes \n's left
30 over from fgets and company */
31 char *_strstrip(char *String
)
33 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
38 char *End
= String
+ strlen(String
) - 1;
39 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
40 *End
== '\r'); End
--);
46 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
47 // ---------------------------------------------------------------------
49 char *_strtabexpand(char *String
,size_t Len
)
51 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
55 if (I
+ 8 > String
+ Len
)
61 /* Assume the start of the string is 0 and find the next 8 char
67 Len
= 8 - ((String
- I
) % 8);
75 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
76 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
81 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This grabs a single word, converts any % escaped characters to their
84 proper values and advances the pointer. Double quotes are understood
85 and striped out as well. This is for URI/URL parsing. */
86 bool ParseQuoteWord(const char *&String
,string
&Res
)
88 // Skip leading whitespace
89 const char *C
= String
;
90 for (;*C
!= 0 && *C
== ' '; C
++);
94 // Jump to the next word
95 for (;*C
!= 0 && *C
!= ' '; C
++)
99 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
105 // Now de-quote characters
108 const char *Start
= String
;
110 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
112 if (*Start
== '%' && Start
+ 2 < C
)
117 *I
= (char)strtol(Tmp
,0,16);
130 // Skip ending white space
131 for (;*C
!= 0 && *C
== ' '; C
++);
136 // ParseCWord - Parses a string like a C "" expression /*{{{*/
137 // ---------------------------------------------------------------------
138 /* This expects a series of space seperated strings enclosed in ""'s.
139 It concatenates the ""'s into a single string. */
140 bool ParseCWord(const char *String
,string
&Res
)
142 // Skip leading whitespace
143 const char *C
= String
;
144 for (;*C
!= 0 && *C
== ' '; C
++);
150 if (strlen(String
) >= sizeof(Buffer
))
157 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
166 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
168 if (isspace(*C
) == 0)
177 // QuoteString - Convert a string into quoted from /*{{{*/
178 // ---------------------------------------------------------------------
180 string
QuoteString(string Str
,const char *Bad
)
183 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
185 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
186 *I
<= 0x20 || *I
>= 0x7F)
189 sprintf(Buf
,"%%%02x",(int)*I
);
198 // SizeToStr - Convert a long into a human readable size /*{{{*/
199 // ---------------------------------------------------------------------
200 /* A max of 4 digits are shown before conversion to the next highest unit.
201 The max length of the string will be 5 chars unless the size is > 10
203 string
SizeToStr(double Size
)
212 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
213 ExaBytes, ZettaBytes, YottaBytes */
214 char Ext
[] = {'b','k','M','G','T','P','E','Z','Y'};
218 if (ASize
< 100 && I
!= 0)
220 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
226 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
236 // TimeToStr - Convert the time into a string /*{{{*/
237 // ---------------------------------------------------------------------
238 /* Converts a number of seconds to a hms format */
239 string
TimeToStr(unsigned long Sec
)
247 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
253 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
259 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
263 sprintf(S
,"%lis",Sec
);
270 // SubstVar - Substitute a string for another string /*{{{*/
271 // ---------------------------------------------------------------------
272 /* This replaces all occurances of Subst with Contents in Str. */
273 string
SubstVar(string Str
,string Subst
,string Contents
)
275 string::size_type Pos
= 0;
276 string::size_type OldPos
= 0;
279 while (OldPos
< Str
.length() &&
280 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
282 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
283 OldPos
= Pos
+ Subst
.length();
289 return Temp
+ string(Str
,OldPos
);
292 // URItoFileName - Convert the uri into a unique file name /*{{{*/
293 // ---------------------------------------------------------------------
294 /* This converts a URI into a safe filename. It quotes all unsafe characters
295 and converts / to _ and removes the scheme identifier. The resulting
296 file name should be unique and never occur again for a different file */
297 string
URItoFileName(string URI
)
299 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
300 for (; I
< URI
.end() && *I
== '/'; I
++);
302 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
303 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
304 string::iterator J
= URI
.begin();
305 for (; J
!= URI
.end(); J
++)
311 // URIAccess - Return the access method for the URI /*{{{*/
312 // ---------------------------------------------------------------------
314 string
URIAccess(string URI
)
316 string::size_type Pos
= URI
.find(':');
317 if (Pos
== string::npos
)
319 return string(URI
,0,Pos
);
322 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
323 // ---------------------------------------------------------------------
324 /* This routine performs a base64 transformation on a string. It was ripped
325 from wget and then patched and bug fixed.
327 This spec can be found in rfc2045 */
328 string
Base64Encode(string S
)
331 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
332 'I','J','K','L','M','N','O','P',
333 'Q','R','S','T','U','V','W','X',
334 'Y','Z','a','b','c','d','e','f',
335 'g','h','i','j','k','l','m','n',
336 'o','p','q','r','s','t','u','v',
337 'w','x','y','z','0','1','2','3',
338 '4','5','6','7','8','9','+','/'};
340 // Pre-allocate some space
342 Final
.reserve((4*S
.length() + 2)/3 + 2);
344 /* Transform the 3x8 bits to 4x6 bits, as required by
346 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
348 char Bits
[3] = {0,0,0};
355 Final
+= tbl
[Bits
[0] >> 2];
356 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
358 if (I
+ 1 >= S
.end())
361 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
363 if (I
+ 2 >= S
.end())
366 Final
+= tbl
[Bits
[2] & 0x3f];
369 /* Apply the padding elements, this tells how many bytes the remote
370 end should discard */
371 if (S
.length() % 3 == 2)
373 if (S
.length() % 3 == 1)
379 // stringcmp - Arbitary string compare /*{{{*/
380 // ---------------------------------------------------------------------
381 /* This safely compares two non-null terminated strings of arbitary
383 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
385 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
389 if (A
== AEnd
&& B
== BEnd
)
400 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
401 // ---------------------------------------------------------------------
403 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
405 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
406 if (toupper(*A
) != toupper(*B
))
409 if (A
== AEnd
&& B
== BEnd
)
415 if (toupper(*A
) < toupper(*B
))
420 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
421 // ---------------------------------------------------------------------
422 /* The format is like those used in package files and the method
423 communication system */
424 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
426 // Look for a matching tag.
427 int Length
= strlen(Tag
);
428 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
431 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
433 // Find the end of line and strip the leading/trailing spaces
436 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
437 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
438 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
440 return string(I
,J
-I
);
443 for (; *I
!= '\n' && I
< Message
.end(); I
++);
446 // Failed to find a match
452 // StringToBool - Converts a string into a boolean /*{{{*/
453 // ---------------------------------------------------------------------
454 /* This inspects the string to see if it is true or if it is false and
455 then returns the result. Several varients on true/false are checked. */
456 int StringToBool(string Text
,int Default
= -1)
459 int Res
= strtol(Text
.c_str(),&End
,0);
460 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
463 // Check for positives
464 if (strcasecmp(Text
.c_str(),"no") == 0 ||
465 strcasecmp(Text
.c_str(),"false") == 0 ||
466 strcasecmp(Text
.c_str(),"without") == 0 ||
467 strcasecmp(Text
.c_str(),"disable") == 0)
470 // Check for negatives
471 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
472 strcasecmp(Text
.c_str(),"true") == 0 ||
473 strcasecmp(Text
.c_str(),"with") == 0 ||
474 strcasecmp(Text
.c_str(),"enable") == 0)
480 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
481 // ---------------------------------------------------------------------
482 /* This converts a time_t into a string time representation that is
483 year 2000 complient and timezone neutral */
484 string
TimeRFC1123(time_t Date
)
486 struct tm Conv
= *gmtime(&Date
);
489 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
490 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
491 "Aug","Sep","Oct","Nov","Dec"};
493 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
494 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
495 Conv
.tm_min
,Conv
.tm_sec
);
499 // ReadMessages - Read messages from the FD /*{{{*/
500 // ---------------------------------------------------------------------
501 /* This pulls full messages from the input FD into the message buffer.
502 It assumes that messages will not pause during transit so no
503 fancy buffering is used. */
504 bool ReadMessages(int Fd
, vector
<string
> &List
)
511 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
513 // Process is dead, this is kind of bad..
523 // Look for the end of the message
524 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
526 if (I
[0] != '\n' || I
[1] != '\n')
529 // Pull the message out
530 string
Message(Buffer
,0,I
-Buffer
);
533 for (; I
< End
&& *I
== '\n'; I
++);
535 memmove(Buffer
,I
,End
-Buffer
);
538 List
.push_back(Message
);
543 if (WaitFd(Fd
) == false)
548 // MonthConv - Converts a month string into a number /*{{{*/
549 // ---------------------------------------------------------------------
550 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
551 Made it a bit more robust with a few touppers though. */
552 static int MonthConv(char *Month
)
554 switch (toupper(*Month
))
557 return toupper(Month
[1]) == 'P'?3:7;
563 if (toupper(Month
[1]) == 'A')
565 return toupper(Month
[2]) == 'N'?5:6;
567 return toupper(Month
[2]) == 'R'?2:4;
575 // Pretend it is January..
581 // StrToTime - Converts a string into a time_t /*{{{*/
582 // ---------------------------------------------------------------------
583 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
584 and the C library asctime format. It requires the GNU library function
585 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
586 reason the C library does not provide any such function :<*/
587 bool StrToTime(string Val
,time_t &Result
)
591 const char *I
= Val
.c_str();
593 // Skip the day of the week
594 for (;*I
!= 0 && *I
!= ' '; I
++);
596 // Handle RFC 1123 time
597 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
598 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
600 // Handle RFC 1036 time
601 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
602 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
607 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
608 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
614 Tm
.tm_mon
= MonthConv(Month
);
617 // Convert to local time and then to GMT
618 Result
= timegm(&Tm
);