X-Git-Url: https://git.saurik.com/apt.git/blobdiff_plain/4fb400a66f2436cba6c89cecdc9560c9b1c54337..f105aaba433f5a8b9c4326dd0d704501bf07d1e5:/apt-pkg/contrib/strutl.cc diff --git a/apt-pkg/contrib/strutl.cc b/apt-pkg/contrib/strutl.cc index 9726138a0..0ac587a9e 100644 --- a/apt-pkg/contrib/strutl.cc +++ b/apt-pkg/contrib/strutl.cc @@ -21,6 +21,11 @@ #include #include +#include +#include +#include +#include +#include #include #include #include @@ -33,10 +38,52 @@ #include #include - -using namespace std; /*}}}*/ +using namespace std; + +// Strip - Remove white space from the front and back of a string /*{{{*/ +// --------------------------------------------------------------------- +namespace APT { + namespace String { +std::string Strip(const std::string &str) +{ + // ensure we have at least one character + if (str.empty() == true) + return str; + + char const * const s = str.c_str(); + size_t start = 0; + for (; isspace(s[start]) != 0; ++start) + ; // find the first not-space + + // string contains only whitespaces + if (s[start] == '\0') + return ""; + + size_t end = str.length() - 1; + for (; isspace(s[end]) != 0; --end) + ; // find the last not-space + + return str.substr(start, end - start + 1); +} + +bool Endswith(const std::string &s, const std::string &end) +{ + if (end.size() > s.size()) + return false; + return (s.substr(s.size() - end.size(), s.size()) == end); +} +bool Startswith(const std::string &s, const std::string &start) +{ + if (start.size() > s.size()) + return false; + return (s.substr(0, start.size()) == start); +} + +} +} + /*}}}*/ // UTF8ToCodeset - Convert some UTF-8 string for some codeset /*{{{*/ // --------------------------------------------------------------------- /* This is handy to use before display some information for enduser */ @@ -130,7 +177,7 @@ char *_strrstrip(char *String) End++; *End = 0; return String; -}; +} /*}}}*/ // strtabexpand - Converts tabs into 8 spaces /*{{{*/ // --------------------------------------------------------------------- @@ -403,26 +450,33 @@ string TimeToStr(unsigned long Sec) /*}}}*/ // SubstVar - Substitute a string for another string /*{{{*/ // --------------------------------------------------------------------- -/* This replaces all occurances of Subst with Contents in Str. */ +/* This replaces all occurrences of Subst with Contents in Str. */ string SubstVar(const string &Str,const string &Subst,const string &Contents) { + if (Subst.empty() == true) + return Str; + string::size_type Pos = 0; string::size_type OldPos = 0; string Temp; - - while (OldPos < Str.length() && + + while (OldPos < Str.length() && (Pos = Str.find(Subst,OldPos)) != string::npos) { - Temp += string(Str,OldPos,Pos) + Contents; - OldPos = Pos + Subst.length(); + if (OldPos != Pos) + Temp.append(Str, OldPos, Pos - OldPos); + if (Contents.empty() == false) + Temp.append(Contents); + OldPos = Pos + Subst.length(); } - + if (OldPos == 0) return Str; - + + if (OldPos >= Str.length()) + return Temp; return Temp + string(Str,OldPos); } - string SubstVar(string Str,const struct SubstVar *Vars) { for (; Vars->Subst != 0; Vars++) @@ -669,9 +723,12 @@ string LookupTag(const string &Message,const char *Tag,const char *Default) then returns the result. Several varients on true/false are checked. */ int StringToBool(const string &Text,int Default) { - char *End; - int Res = strtol(Text.c_str(),&End,0); - if (End != Text.c_str() && Res >= 0 && Res <= 1) + char *ParseEnd; + int Res = strtol(Text.c_str(),&ParseEnd,0); + // ensure that the entire string was converted by strtol to avoid + // failures on "apt-cache show -a 0ad" where the "0" is converted + const char *TextEnd = Text.c_str()+Text.size(); + if (ParseEnd == TextEnd && Res >= 0 && Res <= 1) return Res; // Check for positives @@ -722,85 +779,94 @@ string TimeRFC1123(time_t Date) In particular: this reads blocks from the input until it believes that it's run out of input text. Each block is terminated by a - double newline ('\n' followed by '\n'). As noted below, there is a - bug in this code: it assumes that all the blocks have been read if - it doesn't see additional text in the buffer after the last one is - parsed, which will cause it to lose blocks if the last block - coincides with the end of the buffer. + double newline ('\n' followed by '\n'). */ bool ReadMessages(int Fd, vector &List) { char Buffer[64000]; - char *End = Buffer; // Represents any left-over from the previous iteration of the // parse loop. (i.e., if a message is split across the end // of the buffer, it goes here) string PartialMessage; - - while (1) - { - int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer)); + + do { + int const Res = read(Fd, Buffer, sizeof(Buffer)); if (Res < 0 && errno == EINTR) continue; - - // Process is dead, this is kind of bad.. + + // process we read from has died if (Res == 0) return false; - + // No data - if (Res < 0 && errno == EAGAIN) + if (Res < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) return true; if (Res < 0) return false; - - End += Res; - - // Look for the end of the message - for (char *I = Buffer; I + 1 < End; I++) + + // extract the message(s) from the buffer + char const *Start = Buffer; + char const * const End = Buffer + Res; + + char const * NL = (char const *) memchr(Start, '\n', End - Start); + if (NL == NULL) { - if (I[0] != '\n' || I[1] != '\n') - continue; - - // Pull the message out - string Message(Buffer,I-Buffer); - PartialMessage += Message; - - // Fix up the buffer - for (; I < End && *I == '\n'; I++); - End -= I-Buffer; - memmove(Buffer,I,End-Buffer); - I = Buffer; - - List.push_back(PartialMessage); - PartialMessage.clear(); + // end of buffer: store what we have so far and read new data in + PartialMessage.append(Start, End - Start); + Start = End; } - if (End != Buffer) - { - // If there's text left in the buffer, store it - // in PartialMessage and throw the rest of the buffer - // away. This allows us to handle messages that - // are longer than the static buffer size. - PartialMessage += string(Buffer, End); - End = Buffer; - } else - { - // BUG ALERT: if a message block happens to end at a - // multiple of 64000 characters, this will cause it to - // terminate early, leading to a badly formed block and - // probably crashing the method. However, this is the only - // way we have to find the end of the message block. I have - // an idea of how to fix this, but it will require changes - // to the protocol (essentially to mark the beginning and - // end of the block). - // - // -- dburrows 2008-04-02 - return true; - } + ++NL; + + if (PartialMessage.empty() == false && Start < End) + { + // if we start with a new line, see if the partial message we have ended with one + // so that we properly detect records ending between two read() runs + // cases are: \n|\n , \r\n|\r\n and \r\n\r|\n + // the case \r|\n\r\n is handled by the usual double-newline handling + if ((NL - Start) == 1 || ((NL - Start) == 2 && *Start == '\r')) + { + if (APT::String::Endswith(PartialMessage, "\n") || APT::String::Endswith(PartialMessage, "\r\n\r")) + { + PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1); + List.push_back(PartialMessage); + PartialMessage.clear(); + while (NL < End && (*NL == '\n' || *NL == '\r')) ++NL; + Start = NL; + } + } + } + + while (Start < End) { + char const * NL2 = (char const *) memchr(NL, '\n', End - NL); + if (NL2 == NULL) + { + // end of buffer: store what we have so far and read new data in + PartialMessage.append(Start, End - Start); + break; + } + ++NL2; + + // did we find a double newline? + if ((NL2 - NL) == 1 || ((NL2 - NL) == 2 && *NL == '\r')) + { + PartialMessage.append(Start, NL2 - Start); + PartialMessage.erase(PartialMessage.find_last_not_of("\r\n") + 1); + List.push_back(PartialMessage); + PartialMessage.clear(); + while (NL2 < End && (*NL2 == '\n' || *NL2 == '\r')) ++NL2; + Start = NL2; + } + NL = NL2; + } + + // we have read at least one complete message and nothing left + if (PartialMessage.empty() == true) + return true; if (WaitFd(Fd) == false) return false; - } + } while (true); } /*}}}*/ // MonthConv - Converts a month string into a number /*{{{*/ @@ -902,7 +968,7 @@ bool FTPMDTMStrToTime(const char* const str,time_t &time) /*}}}*/ // StrToTime - Converts a string into a time_t /*{{{*/ // --------------------------------------------------------------------- -/* This handles all 3 populare time formats including RFC 1123, RFC 1036 +/* This handles all 3 popular time formats including RFC 1123, RFC 1036 and the C library asctime format. It requires the GNU library function 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar reason the C library does not provide any such function :< This also @@ -942,6 +1008,8 @@ bool StrToTime(const string &Val,time_t &Result) Tm.tm_isdst = 0; if (Month[0] != 0) Tm.tm_mon = MonthConv(Month); + else + Tm.tm_mon = 0; // we don't have a month, so pick something Tm.tm_year -= 1900; // Convert to local time and then to GMT @@ -1008,7 +1076,7 @@ bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base // --------------------------------------------------------------------- /* This is used in decoding the 256bit encoded fixed length fields in tar files */ -bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len) +bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len) { if ((Str[0] & 0x80) == 0) return false; @@ -1021,6 +1089,23 @@ bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len) } } /*}}}*/ +// Base256ToNum - Convert a fixed length binary to a number /*{{{*/ +// --------------------------------------------------------------------- +/* This is used in decoding the 256bit encoded fixed length fields in + tar files */ +bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len) +{ + unsigned long long Num; + bool rc; + + rc = Base256ToNum(Str, Num, Len); + Res = Num; + if (Res != Num) + return false; + + return rc; +} + /*}}}*/ // HexDigit - Convert a hex character into an integer /*{{{*/ // --------------------------------------------------------------------- /* Helper for Hex2Num */ @@ -1104,9 +1189,11 @@ bool TokSplitString(char Tok,char *Input,char **List, also, but the advantage is that we have an iteratable vector */ vector VectorizeString(string const &haystack, char const &split) { + vector exploded; + if (haystack.empty() == true) + return exploded; string::const_iterator start = haystack.begin(); string::const_iterator end = start; - vector exploded; do { for (; end != haystack.end() && *end != split; ++end); exploded.push_back(string(start, end)); @@ -1115,6 +1202,37 @@ vector VectorizeString(string const &haystack, char const &split) return exploded; } /*}}}*/ +// StringSplit - split a string into a string vector by token /*{{{*/ +// --------------------------------------------------------------------- +/* See header for details. + */ +vector StringSplit(std::string const &s, std::string const &sep, + unsigned int maxsplit) +{ + vector split; + size_t start, pos; + + // no seperator given, this is bogus + if(sep.size() == 0) + return split; + + start = pos = 0; + while (pos != string::npos) + { + pos = s.find(sep, start); + split.push_back(s.substr(start, pos-start)); + + // if maxsplit is reached, the remaining string is the last item + if(split.size() >= maxsplit) + { + split[split.size()-1] = s.substr(start); + break; + } + start = pos+sep.size(); + } + return split; +} + /*}}}*/ // RegexChoice - Simple regex list/list matcher /*{{{*/ // --------------------------------------------------------------------- /* */ @@ -1125,7 +1243,7 @@ unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin, R->Hit = false; unsigned long Hits = 0; - for (; ListBegin != ListEnd; ListBegin++) + for (; ListBegin < ListEnd; ++ListBegin) { // Check if the name is a regex const char *I; @@ -1201,10 +1319,12 @@ void ioprintf(ostream &out,const char *format,...) va_list args; ssize_t size = 400; while (true) { + bool ret = false; va_start(args,format); - if (iovprintf(out, format, args, size) == true) - return; + ret = iovprintf(out, format, args, size); va_end(args); + if (ret == true) + return; } } void strprintf(string &out,const char *format,...) @@ -1213,10 +1333,12 @@ void strprintf(string &out,const char *format,...) ssize_t size = 400; std::ostringstream outstr; while (true) { + bool ret = false; va_start(args,format); - if (iovprintf(outstr, format, args, size) == true) - break; + ret = iovprintf(outstr, format, args, size); va_end(args); + if (ret == true) + break; } out = outstr.str(); } @@ -1232,12 +1354,12 @@ char *safe_snprintf(char *Buffer,char *End,const char *Format,...) va_list args; int Did; - va_start(args,Format); - if (End <= Buffer) return End; - + va_start(args,Format); Did = vsnprintf(Buffer,End - Buffer,Format,args); + va_end(args); + if (Did < 0 || Buffer + Did > End) return End; return Buffer + Did; @@ -1252,11 +1374,11 @@ string StripEpoch(const string &VerStr) return VerStr; return VerStr.substr(i+1); } - + /*}}}*/ // tolower_ascii - tolower() function that ignores the locale /*{{{*/ // --------------------------------------------------------------------- /* This little function is the most called method we have and tries - therefore to do the absolut minimum - and is noteable faster than + therefore to do the absolut minimum - and is notable faster than standard tolower/toupper and as a bonus avoids problems with different locales - we only operate on ascii chars anyway. */ int tolower_ascii(int const c) @@ -1267,9 +1389,9 @@ int tolower_ascii(int const c) } /*}}}*/ -// CheckDomainList - See if Host is in a , seperate list /*{{{*/ +// CheckDomainList - See if Host is in a , separate list /*{{{*/ // --------------------------------------------------------------------- -/* The domain list is a comma seperate list of domains that are suffix +/* The domain list is a comma separate list of domains that are suffix matched against the argument */ bool CheckDomainList(const string &Host,const string &List) { @@ -1290,14 +1412,26 @@ bool CheckDomainList(const string &Host,const string &List) return false; } /*}}}*/ -// DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/ +// strv_length - Return the length of a NULL-terminated string array /*{{{*/ +// --------------------------------------------------------------------- +/* */ +size_t strv_length(const char **str_array) +{ + size_t i; + for (i=0; str_array[i] != NULL; i++) + /* nothing */ + ; + return i; +} + +// DeEscapeString - unescape (\0XX and \xXX) from a string /*{{{*/ // --------------------------------------------------------------------- /* */ string DeEscapeString(const string &input) { char tmp[3]; - string::const_iterator it, escape_start; - string output, octal, hex; + string::const_iterator it; + string output; for (it = input.begin(); it != input.end(); ++it) { // just copy non-escape chars @@ -1483,9 +1617,12 @@ URI::operator string() if (User.empty() == false) { - Res += User; + // FIXME: Technically userinfo is permitted even less + // characters than these, but this is not conveniently + // expressed with a blacklist. + Res += QuoteString(User, ":/?#[]@"); if (Password.empty() == false) - Res += ":" + Password; + Res += ":" + QuoteString(Password, ":/?#[]@"); Res += "@"; } @@ -1524,7 +1661,6 @@ string URI::SiteOnly(const string &URI) U.User.clear(); U.Password.clear(); U.Path.clear(); - U.Port = 0; return U; } /*}}}*/ @@ -1536,7 +1672,6 @@ string URI::NoUserPassword(const string &URI) ::URI U(URI); U.User.clear(); U.Password.clear(); - U.Port = 0; return U; } /*}}}*/