char *I;
for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
{
- if (*Start == '%' && Start + 2 < C)
+ if (*Start == '%' && Start + 2 < C &&
+ isxdigit(Start[1]) && isxdigit(Start[2]))
{
Tmp[0] = Start[1];
Tmp[1] = Start[2];
for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
{
if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
- *I <= 0x20 || *I >= 0x7F)
+ *I == 0x25 || // percent '%' char
+ *I <= 0x20 || *I >= 0x7F) // control chars
{
char Buf[10];
sprintf(Buf,"%%%02x",(int)*I);
// ---------------------------------------------------------------------
/* This undoes QuoteString */
string DeQuoteString(const string &Str)
+{
+ return DeQuoteString(Str.begin(),Str.end());
+}
+string DeQuoteString(string::const_iterator const &begin,
+ string::const_iterator const &end)
{
string Res;
- for (string::const_iterator I = Str.begin(); I != Str.end(); I++)
+ for (string::const_iterator I = begin; I != end; I++)
{
- if (*I == '%' && I + 2 < Str.end())
+ if (*I == '%' && I + 2 < end &&
+ isxdigit(I[1]) && isxdigit(I[2]))
{
char Tmp[3];
Tmp[0] = I[1];
int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
{
for (; A != AEnd && B != BEnd; A++, B++)
- if (toupper(*A) != toupper(*B))
+ if (tolower_ascii(*A) != tolower_ascii(*B))
break;
if (A == AEnd && B == BEnd)
return 1;
if (B == BEnd)
return -1;
- if (toupper(*A) < toupper(*B))
+ if (tolower_ascii(*A) < tolower_ascii(*B))
return -1;
return 1;
}
const char *B,const char *BEnd)
{
for (; A != AEnd && B != BEnd; A++, B++)
- if (toupper(*A) != toupper(*B))
+ if (tolower_ascii(*A) != tolower_ascii(*B))
break;
if (A == AEnd && B == BEnd)
return 1;
if (B == BEnd)
return -1;
- if (toupper(*A) < toupper(*B))
+ if (tolower_ascii(*A) < tolower_ascii(*B))
return -1;
return 1;
}
string::const_iterator B,string::const_iterator BEnd)
{
for (; A != AEnd && B != BEnd; A++, B++)
- if (toupper(*A) != toupper(*B))
+ if (tolower_ascii(*A) != tolower_ascii(*B))
break;
if (A == AEnd && B == BEnd)
return 1;
if (B == BEnd)
return -1;
- if (toupper(*A) < toupper(*B))
+ if (tolower_ascii(*A) < tolower_ascii(*B))
return -1;
return 1;
}
// MonthConv - Converts a month string into a number /*{{{*/
// ---------------------------------------------------------------------
/* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
- Made it a bit more robust with a few touppers though. */
+ Made it a bit more robust with a few tolower_ascii though. */
static int MonthConv(char *Month)
{
- switch (toupper(*Month))
+ switch (tolower_ascii(*Month))
{
- case 'A':
- return toupper(Month[1]) == 'P'?3:7;
- case 'D':
+ case 'a':
+ return tolower_ascii(Month[1]) == 'p'?3:7;
+ case 'd':
return 11;
- case 'F':
+ case 'f':
return 1;
- case 'J':
- if (toupper(Month[1]) == 'A')
+ case 'j':
+ if (tolower_ascii(Month[1]) == 'a')
return 0;
- return toupper(Month[2]) == 'N'?5:6;
- case 'M':
- return toupper(Month[2]) == 'R'?2:4;
- case 'N':
+ return tolower_ascii(Month[2]) == 'n'?5:6;
+ case 'm':
+ return tolower_ascii(Month[2]) == 'r'?2:4;
+ case 'n':
return 10;
- case 'O':
+ case 'o':
return 9;
- case 'S':
+ case 's':
return 8;
// Pretend it is January..
}
}
/*}}}*/
-// timegm - Internal timegm function if gnu is not available /*{{{*/
+// timegm - Internal timegm if the gnu version is not available /*{{{*/
// ---------------------------------------------------------------------
-/* Ripped this evil little function from wget - I prefer the use of
- GNU timegm if possible as this technique will have interesting problems
- with leap seconds, timezones and other.
-
- Converts struct tm to time_t, assuming the data in tm is UTC rather
+/* Converts struct tm to time_t, assuming the data in tm is UTC rather
than local timezone (mktime assumes the latter).
-
- Contributed by Roger Beeman <beeman@cisco.com>, with the help of
- Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
-/* Turned it into an autoconf check, because GNU is not the only thing which
- can provide timegm. -- 2002-09-22, Joel Baker */
-
-#ifndef HAVE_TIMEGM // Now with autoconf!
+ This function is a nonstandard GNU extension that is also present on
+ the BSDs and maybe other systems. For others we follow the advice of
+ the manpage of timegm and use his portable replacement. */
+#ifndef HAVE_TIMEGM
static time_t timegm(struct tm *t)
{
- time_t tl, tb;
-
- tl = mktime (t);
- if (tl == -1)
- return -1;
- tb = mktime (gmtime (&tl));
- return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
+ char *tz = getenv("TZ");
+ setenv("TZ", "", 1);
+ tzset();
+ time_t ret = mktime(t);
+ if (tz)
+ setenv("TZ", tz, 1);
+ else
+ unsetenv("TZ");
+ tzset();
+ return ret;
}
#endif
/*}}}*/
+// FullDateToTime - Converts a HTTP1.1 full date strings into a time_t /*{{{*/
+// ---------------------------------------------------------------------
+/* tries to parses a full date as specified in RFC2616 Section 3.3.1
+ with one exception: All timezones (%Z) are accepted but the protocol
+ says that it MUST be GMT, but this one is equal to UTC which we will
+ encounter from time to time (e.g. in Release files) so we accept all
+ here and just assume it is GMT (or UTC) later on */
+bool RFC1123StrToTime(const char* const str,time_t &time)
+{
+ struct tm Tm;
+ setlocale (LC_ALL,"C");
+ bool const invalid =
+ // Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
+ (strptime(str, "%a, %d %b %Y %H:%M:%S %Z", &Tm) == NULL &&
+ // Sunday, 06-Nov-94 08:49:37 GMT ; RFC 850, obsoleted by RFC 1036
+ strptime(str, "%A, %d-%b-%y %H:%M:%S %Z", &Tm) == NULL &&
+ // Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format
+ strptime(str, "%a %b %d %H:%M:%S %Y", &Tm) == NULL);
+ setlocale (LC_ALL,"");
+ if (invalid == true)
+ return false;
+
+ time = timegm(&Tm);
+ return true;
+}
+ /*}}}*/
+// FTPMDTMStrToTime - Converts a ftp modification date into a time_t /*{{{*/
+// ---------------------------------------------------------------------
+/* */
+bool FTPMDTMStrToTime(const char* const str,time_t &time)
+{
+ struct tm Tm;
+ // MDTM includes no whitespaces but recommend and ignored by strptime
+ if (strptime(str, "%Y %m %d %H %M %S", &Tm) == NULL)
+ return false;
+
+ time = timegm(&Tm);
+ return true;
+}
+ /*}}}*/
// StrToTime - Converts a string into a time_t /*{{{*/
// ---------------------------------------------------------------------
/* This handles all 3 populare time formats including RFC 1123, RFC 1036
return true;
}
/*}}}*/
-// ExplodeString - Split a string up into a vector /*{{{*/
+// VectorizeString - Split a string up into a vector of strings /*{{{*/
// ---------------------------------------------------------------------
/* This can be used to split a given string up into a vector, so the
propose is the same as in the method above and this one is a bit slower
- also, but the advantage is that we an iteratable vector */
-vector<string> ExplodeString(string const &haystack, char const &split) {
- string::const_iterator start = haystack.begin();
- string::const_iterator end = start;
- vector<string> exploded;
- do {
- for (; end != haystack.end() && *end != split; ++end);
- exploded.push_back(string(start, end));
- start = end;
- } while (end != haystack.end() && (++end) != haystack.end());
- return exploded;
+ also, but the advantage is that we have an iteratable vector */
+vector<string> VectorizeString(string const &haystack, char const &split)
+{
+ string::const_iterator start = haystack.begin();
+ string::const_iterator end = start;
+ vector<string> exploded;
+ do {
+ for (; end != haystack.end() && *end != split; ++end);
+ exploded.push_back(string(start, end));
+ start = end + 1;
+ } while (end != haystack.end() && (++end) != haystack.end());
+ return exploded;
}
/*}}}*/
// RegexChoice - Simple regex list/list matcher /*{{{*/
// tolower_ascii - tolower() function that ignores the locale /*{{{*/
// ---------------------------------------------------------------------
-/* */
-int tolower_ascii(int c)
+/* This little function is the most called method we have and tries
+ therefore to do the absolut minimum - and is noteable faster than
+ standard tolower/toupper and as a bonus avoids problems with different
+ locales - we only operate on ascii chars anyway. */
+int tolower_ascii(int const c)
{
- if (c >= 'A' and c <= 'Z')
+ if (c >= 'A' && c <= 'Z')
return c + 32;
return c;
}
else
{
Host.assign(At+1,SingleSlash);
- User.assign(FirstColon,SecondColon);
+ // username and password must be encoded (RFC 3986)
+ User.assign(DeQuoteString(FirstColon,SecondColon));
if (SecondColon < At)
- Password.assign(SecondColon+1,At);
+ Password.assign(DeQuoteString(SecondColon+1,At));
}
// Now we parse the RFC 2732 [] hostnames.
return U;
}
/*}}}*/
+// URI::NoUserPassword - Return the schema, site and path for the URI /*{{{*/
+// ---------------------------------------------------------------------
+/* */
+string URI::NoUserPassword(const string &URI)
+{
+ ::URI U(URI);
+ U.User.clear();
+ U.Password.clear();
+ U.Port = 0;
+ return U;
+}
+ /*}}}*/