1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.11 1998/11/01 08:07:12 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #include <apt-pkg/fileutl.h>
27 // strstrip - Remove white space from the front and back of a string /*{{{*/
28 // ---------------------------------------------------------------------
29 /* This is handy to use when parsing a file. It also removes \n's left
30 over from fgets and company */
31 char *_strstrip(char *String
)
33 for (;*String
!= 0 && (*String
== ' ' || *String
== '\t'); String
++);
38 char *End
= String
+ strlen(String
) - 1;
39 for (;End
!= String
- 1 && (*End
== ' ' || *End
== '\t' || *End
== '\n' ||
40 *End
== '\r'); End
--);
46 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
47 // ---------------------------------------------------------------------
49 char *_strtabexpand(char *String
,size_t Len
)
51 for (char *I
= String
; I
!= I
+ Len
&& *I
!= 0; I
++)
55 if (I
+ 8 > String
+ Len
)
61 /* Assume the start of the string is 0 and find the next 8 char
67 Len
= 8 - ((String
- I
) % 8);
75 memmove(I
+ Len
,I
+ 1,strlen(I
) + 1);
76 for (char *J
= I
; J
+ Len
!= I
; *I
= ' ', I
++);
81 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
82 // ---------------------------------------------------------------------
83 /* This grabs a single word, converts any % escaped characters to their
84 proper values and advances the pointer. Double quotes are understood
85 and striped out as well. This is for URI/URL parsing. */
86 bool ParseQuoteWord(const char *&String
,string
&Res
)
88 // Skip leading whitespace
89 const char *C
= String
;
90 for (;*C
!= 0 && *C
== ' '; C
++);
94 // Jump to the next word
95 for (;*C
!= 0 && *C
!= ' '; C
++)
99 for (C
++;*C
!= 0 && *C
!= '"'; C
++);
105 // Now de-quote characters
108 const char *Start
= String
;
110 for (I
= Buffer
; I
< Buffer
+ sizeof(Buffer
) && Start
!= C
; I
++)
112 if (*Start
== '%' && Start
+ 2 < C
)
117 *I
= (char)strtol(Tmp
,0,16);
130 // Skip ending white space
131 for (;*C
!= 0 && *C
== ' '; C
++);
136 // ParseCWord - Parses a string like a C "" expression /*{{{*/
137 // ---------------------------------------------------------------------
138 /* This expects a series of space seperated strings enclosed in ""'s.
139 It concatenates the ""'s into a single string. */
140 bool ParseCWord(const char *String
,string
&Res
)
142 // Skip leading whitespace
143 const char *C
= String
;
144 for (;*C
!= 0 && *C
== ' '; C
++);
150 if (strlen(String
) >= sizeof(Buffer
))
157 for (C
++; *C
!= 0 && *C
!= '"'; C
++)
166 if (C
!= String
&& isspace(*C
) != 0 && isspace(C
[-1]) != 0)
168 if (isspace(*C
) == 0)
177 // QuoteString - Convert a string into quoted from /*{{{*/
178 // ---------------------------------------------------------------------
180 string
QuoteString(string Str
,const char *Bad
)
183 for (string::iterator I
= Str
.begin(); I
!= Str
.end(); I
++)
185 if (strchr(Bad
,*I
) != 0 || isprint(*I
) == 0 ||
186 *I
<= 0x20 || *I
>= 0x7F)
189 sprintf(Buf
,"%%%02x",(int)*I
);
198 // SizeToStr - Convert a long into a human readable size /*{{{*/
199 // ---------------------------------------------------------------------
200 /* A max of 4 digits are shown before conversion to the next highest unit.
201 The max length of the string will be 5 chars unless the size is > 10
203 string
SizeToStr(double Size
)
212 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
213 ExaBytes, ZettaBytes, YottaBytes */
214 char Ext
[] = {'b','k','M','G','T','P','E','Z','Y'};
218 if (ASize
< 100 && I
!= 0)
220 sprintf(S
,"%.1f%c",ASize
,Ext
[I
]);
226 sprintf(S
,"%.0f%c",ASize
,Ext
[I
]);
236 // TimeToStr - Convert the time into a string /*{{{*/
237 // ---------------------------------------------------------------------
238 /* Converts a number of seconds to a hms format */
239 string
TimeToStr(unsigned long Sec
)
247 sprintf(S
,"%lid %lih%lim%lis",Sec
/60/60/24,(Sec
/60/60) % 24,(Sec
/60) % 60,Sec
% 60);
253 sprintf(S
,"%lih%lim%lis",Sec
/60/60,(Sec
/60) % 60,Sec
% 60);
259 sprintf(S
,"%lim%lis",Sec
/60,Sec
% 60);
263 sprintf(S
,"%lis",Sec
);
270 // SubstVar - Substitute a string for another string /*{{{*/
271 // ---------------------------------------------------------------------
272 /* This replaces all occurances of Subst with Contents in Str. */
273 string
SubstVar(string Str
,string Subst
,string Contents
)
275 string::size_type Pos
= 0;
276 string::size_type OldPos
= 0;
279 while (OldPos
< Str
.length() &&
280 (Pos
= Str
.find(Subst
,OldPos
)) != string::npos
)
282 Temp
+= string(Str
,OldPos
,Pos
) + Contents
;
283 OldPos
= Pos
+ Subst
.length();
289 return Temp
+ string(Str
,OldPos
);
292 // URItoFileName - Convert the uri into a unique file name /*{{{*/
293 // ---------------------------------------------------------------------
294 /* This converts a URI into a safe filename. It quotes all unsafe characters
295 and converts / to _ and removes the scheme identifier. The resulting
296 file name should be unique and never occur again for a different file */
297 string
URItoFileName(string URI
)
299 string::const_iterator I
= URI
.begin() + URI
.find(':') + 1;
300 for (; I
< URI
.end() && *I
== '/'; I
++);
302 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
303 URI
= QuoteString(string(I
,URI
.end() - I
),"\\|{}[]<>\"^~_=!@#$%^&*");
304 string::iterator J
= URI
.begin();
305 for (; J
!= URI
.end(); J
++)
311 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
312 // ---------------------------------------------------------------------
313 /* This routine performs a base64 transformation on a string. It was ripped
314 from wget and then patched and bug fixed.
316 This spec can be found in rfc2045 */
317 string
Base64Encode(string S
)
320 static char tbl
[64] = {'A','B','C','D','E','F','G','H',
321 'I','J','K','L','M','N','O','P',
322 'Q','R','S','T','U','V','W','X',
323 'Y','Z','a','b','c','d','e','f',
324 'g','h','i','j','k','l','m','n',
325 'o','p','q','r','s','t','u','v',
326 'w','x','y','z','0','1','2','3',
327 '4','5','6','7','8','9','+','/'};
329 // Pre-allocate some space
331 Final
.reserve((4*S
.length() + 2)/3 + 2);
333 /* Transform the 3x8 bits to 4x6 bits, as required by
335 for (string::const_iterator I
= S
.begin(); I
< S
.end(); I
+= 3)
337 char Bits
[3] = {0,0,0};
344 Final
+= tbl
[Bits
[0] >> 2];
345 Final
+= tbl
[((Bits
[0] & 3) << 4) + (Bits
[1] >> 4)];
347 if (I
+ 1 >= S
.end())
350 Final
+= tbl
[((Bits
[1] & 0xf) << 2) + (Bits
[2] >> 6)];
352 if (I
+ 2 >= S
.end())
355 Final
+= tbl
[Bits
[2] & 0x3f];
358 /* Apply the padding elements, this tells how many bytes the remote
359 end should discard */
360 if (S
.length() % 3 == 2)
362 if (S
.length() % 3 == 1)
368 // stringcmp - Arbitary string compare /*{{{*/
369 // ---------------------------------------------------------------------
370 /* This safely compares two non-null terminated strings of arbitary
372 int stringcmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
374 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
378 if (A
== AEnd
&& B
== BEnd
)
389 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
390 // ---------------------------------------------------------------------
392 int stringcasecmp(const char *A
,const char *AEnd
,const char *B
,const char *BEnd
)
394 for (; A
!= AEnd
&& B
!= BEnd
; A
++, B
++)
395 if (toupper(*A
) != toupper(*B
))
398 if (A
== AEnd
&& B
== BEnd
)
404 if (toupper(*A
) < toupper(*B
))
409 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
410 // ---------------------------------------------------------------------
411 /* The format is like those used in package files and the method
412 communication system */
413 string
LookupTag(string Message
,const char *Tag
,const char *Default
)
415 // Look for a matching tag.
416 int Length
= strlen(Tag
);
417 for (string::iterator I
= Message
.begin(); I
+ Length
< Message
.end(); I
++)
420 if (I
[Length
] == ':' && stringcasecmp(I
,I
+Length
,Tag
) == 0)
422 // Find the end of line and strip the leading/trailing spaces
425 for (; isspace(*I
) != 0 && I
< Message
.end(); I
++);
426 for (J
= I
; *J
!= '\n' && J
< Message
.end(); J
++);
427 for (; J
> I
&& isspace(J
[-1]) != 0; J
--);
429 return string(I
,J
-I
);
432 for (; *I
!= '\n' && I
< Message
.end(); I
++);
435 // Failed to find a match
441 // StringToBool - Converts a string into a boolean /*{{{*/
442 // ---------------------------------------------------------------------
443 /* This inspects the string to see if it is true or if it is false and
444 then returns the result. Several varients on true/false are checked. */
445 int StringToBool(string Text
,int Default
= -1)
448 int Res
= strtol(Text
.c_str(),&End
,0);
449 if (End
!= Text
.c_str() && Res
>= 0 && Res
<= 1)
452 // Check for positives
453 if (strcasecmp(Text
.c_str(),"no") == 0 ||
454 strcasecmp(Text
.c_str(),"false") == 0 ||
455 strcasecmp(Text
.c_str(),"without") == 0 ||
456 strcasecmp(Text
.c_str(),"disable") == 0)
459 // Check for negatives
460 if (strcasecmp(Text
.c_str(),"yes") == 0 ||
461 strcasecmp(Text
.c_str(),"true") == 0 ||
462 strcasecmp(Text
.c_str(),"with") == 0 ||
463 strcasecmp(Text
.c_str(),"enable") == 0)
469 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
470 // ---------------------------------------------------------------------
471 /* This converts a time_t into a string time representation that is
472 year 2000 complient and timezone neutral */
473 string
TimeRFC1123(time_t Date
)
475 struct tm Conv
= *gmtime(&Date
);
478 const char *Day
[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
479 const char *Month
[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
480 "Aug","Sep","Oct","Nov","Dec"};
482 sprintf(Buf
,"%s, %02i %s %i %02i:%02i:%02i GMT",Day
[Conv
.tm_wday
],
483 Conv
.tm_mday
,Month
[Conv
.tm_mon
],Conv
.tm_year
+1900,Conv
.tm_hour
,
484 Conv
.tm_min
,Conv
.tm_sec
);
488 // ReadMessages - Read messages from the FD /*{{{*/
489 // ---------------------------------------------------------------------
490 /* This pulls full messages from the input FD into the message buffer.
491 It assumes that messages will not pause during transit so no
492 fancy buffering is used. */
493 bool ReadMessages(int Fd
, vector
<string
> &List
)
500 int Res
= read(Fd
,End
,sizeof(Buffer
) - (End
-Buffer
));
502 // Process is dead, this is kind of bad..
512 // Look for the end of the message
513 for (char *I
= Buffer
; I
+ 1 < End
; I
++)
515 if (I
[0] != '\n' || I
[1] != '\n')
518 // Pull the message out
519 string
Message(Buffer
,0,I
-Buffer
);
522 for (; I
< End
&& *I
== '\n'; I
++);
524 memmove(Buffer
,I
,End
-Buffer
);
527 List
.push_back(Message
);
532 if (WaitFd(Fd
) == false)
537 // MonthConv - Converts a month string into a number /*{{{*/
538 // ---------------------------------------------------------------------
539 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
540 Made it a bit more robust with a few touppers though. */
541 static int MonthConv(char *Month
)
543 switch (toupper(*Month
))
546 return toupper(Month
[1]) == 'P'?3:7;
552 if (toupper(Month
[1]) == 'A')
554 return toupper(Month
[2]) == 'N'?5:6;
556 return toupper(Month
[2]) == 'R'?2:4;
564 // Pretend it is January..
570 // StrToTime - Converts a string into a time_t /*{{{*/
571 // ---------------------------------------------------------------------
572 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
573 and the C library asctime format. It requires the GNU library function
574 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
575 reason the C library does not provide any such function :<*/
576 bool StrToTime(string Val
,time_t &Result
)
580 const char *I
= Val
.c_str();
582 // Skip the day of the week
583 for (;*I
!= 0 && *I
!= ' '; I
++);
585 // Handle RFC 1123 time
586 if (sscanf(I
," %d %3s %d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,&Tm
.tm_year
,
587 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) != 6)
589 // Handle RFC 1036 time
590 if (sscanf(I
," %d-%3s-%d %d:%d:%d GMT",&Tm
.tm_mday
,Month
,
591 &Tm
.tm_year
,&Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
) == 6)
596 if (sscanf(I
," %3s %d %d:%d:%d %d",Month
,&Tm
.tm_mday
,
597 &Tm
.tm_hour
,&Tm
.tm_min
,&Tm
.tm_sec
,&Tm
.tm_year
) != 6)
603 Tm
.tm_mon
= MonthConv(Month
);
606 // Convert to local time and then to GMT
607 Result
= timegm(&Tm
);
612 // URI::CopyFrom - Copy from an object /*{{{*/
613 // ---------------------------------------------------------------------
614 /* This parses the URI into all of its components */
615 void URI::CopyFrom(string U
)
617 string::const_iterator I
= U
.begin();
619 // Locate the first colon, this seperates the scheme
620 for (; I
< U
.end() && *I
!= ':' ; I
++);
621 string::const_iterator FirstColon
= I
;
623 // Determine if this is a host type URI with a leading double //
624 string::const_iterator SingleSlash
= I
;
625 if (I
+ 3 < U
.end() && I
[1] == '/' && I
[2] == '/')
627 // Locate the single / that starts the path
628 for (; I
< U
.end(); I
++)
630 if (*I
== '/' && I
+1 < U
.end() && I
[1] == '/')
641 // We can now write the access and path specifiers
642 Access
= string(U
,0,FirstColon
- U
.begin());
643 if (SingleSlash
!= U
.end())
644 Path
= string(U
,SingleSlash
- U
.begin());
645 if (Path
.empty() == true)
648 // Now we attempt to locate a user:pass@host fragment
650 if (FirstColon
>= U
.end())
653 if (FirstColon
> SingleSlash
)
654 FirstColon
= SingleSlash
;
658 for (; I
< SingleSlash
&& *I
!= '@'; I
++);
659 string::const_iterator At
= I
;
661 // Colon in the @ section
663 for (; I
< At
&& *I
!= ':'; I
++);
664 string::const_iterator SecondColon
= I
;
666 // Now write the host and user/pass
667 if (At
== SingleSlash
)
669 if (FirstColon
< SingleSlash
)
670 Host
= string(U
,FirstColon
- U
.begin(),SingleSlash
- FirstColon
);
674 Host
= string(U
,At
- U
.begin() + 1,SingleSlash
- At
- 1);
675 User
= string(U
,FirstColon
- U
.begin(),SecondColon
- FirstColon
);
676 if (SecondColon
< At
)
677 Password
= string(U
,SecondColon
- U
.begin() + 1,At
- SecondColon
- 1);
680 // Now we parse off a pot number from the hostname
682 string::size_type Pos
= Host
.rfind(':');
683 if (Pos
== string::npos
)
686 Port
= atoi(string(Host
,Pos
+1).c_str());
687 Host
= string(Host
,0,Pos
);
690 // URI::operator string - Convert the URI to a string /*{{{*/
691 // ---------------------------------------------------------------------
693 URI::operator string()
695 string Res
= Access
+ ':';
696 if (Host
.empty() == false)
698 if (User
.empty() == false)
701 if (Password
.empty() == false)
702 Res
+= ":" + Password
;
708 if (Path
.empty() == false)