]>
Commit | Line | Data |
---|---|---|
6c139d6e AL |
1 | // -*- mode: cpp; mode: fold -*- |
2 | // Description /*{{{*/ | |
1168596f | 3 | // $Id: strutl.h,v 1.22 2003/02/02 22:20:27 jgg Exp $ |
6c139d6e AL |
4 | /* ###################################################################### |
5 | ||
b2e465d6 | 6 | String Util - These are some useful string functions |
6c139d6e AL |
7 | |
8 | _strstrip is a function to remove whitespace from the front and end | |
9 | of a string. | |
10 | ||
11 | This source is placed in the Public Domain, do with it what you will | |
12 | It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca> | |
13 | ||
14 | ##################################################################### */ | |
15 | /*}}}*/ | |
6c139d6e AL |
16 | #ifndef STRUTL_H |
17 | #define STRUTL_H | |
18 | ||
13500573 | 19 | |
2ddab3fb | 20 | #include <limits> |
6c139d6e | 21 | #include <string> |
a7955daa | 22 | #include <cstring> |
0a8a80e5 | 23 | #include <vector> |
0db4a45b | 24 | #include <iostream> |
eff0c22e JAK |
25 | #ifdef APT_PKG_EXPOSE_STRING_VIEW |
26 | #include <apt-pkg/string_view.h> | |
27 | #endif | |
492f957a | 28 | #include <time.h> |
453b82a3 | 29 | #include <stddef.h> |
6c139d6e | 30 | |
6dc60370 DK |
31 | #include "macros.h" |
32 | ||
453b82a3 DK |
33 | #ifndef APT_10_CLEANER_HEADERS |
34 | #include <stdlib.h> | |
35 | #endif | |
a4f6bdc8 DK |
36 | #ifndef APT_8_CLEANER_HEADERS |
37 | using std::string; | |
38 | using std::vector; | |
39 | using std::ostream; | |
40 | #endif | |
41 | ||
65dbd5a1 MV |
42 | namespace APT { |
43 | namespace String { | |
44 | std::string Strip(const std::string &s); | |
cf993341 | 45 | bool Endswith(const std::string &s, const std::string &ending); |
a5bb5e1e | 46 | bool Startswith(const std::string &s, const std::string &starting); |
d3e8fbb3 DK |
47 | } |
48 | } | |
65dbd5a1 MV |
49 | |
50 | ||
8f3ba4e8 | 51 | bool UTF8ToCodeset(const char *codeset, const std::string &orig, std::string *dest); |
6c139d6e | 52 | char *_strstrip(char *String); |
4fb400a6 | 53 | char *_strrstrip(char *String); // right strip only |
6c139d6e | 54 | char *_strtabexpand(char *String,size_t Len); |
8f3ba4e8 DK |
55 | bool ParseQuoteWord(const char *&String,std::string &Res); |
56 | bool ParseCWord(const char *&String,std::string &Res); | |
57 | std::string QuoteString(const std::string &Str,const char *Bad); | |
58 | std::string DeQuoteString(const std::string &Str); | |
59 | std::string DeQuoteString(std::string::const_iterator const &begin, std::string::const_iterator const &end); | |
a513ace2 | 60 | |
b9dc4706 | 61 | // unescape (\0XX and \xXX) from a string |
8f3ba4e8 DK |
62 | std::string DeEscapeString(const std::string &input); |
63 | ||
64 | std::string SizeToStr(double Bytes); | |
65 | std::string TimeToStr(unsigned long Sec); | |
66 | std::string Base64Encode(const std::string &Str); | |
67 | std::string OutputInDepth(const unsigned long Depth, const char* Separator=" "); | |
68 | std::string URItoFileName(const std::string &URI); | |
0b45b6e5 DK |
69 | APT_DEPRECATED_MSG("Specify if GMT is required or a numeric timezone can be used") std::string TimeRFC1123(time_t Date); |
70 | /** returns a datetime string as needed by HTTP/1.1 and Debian files. | |
71 | * | |
72 | * Note: The date will always be represented in a UTC timezone | |
73 | * | |
74 | * @param Date to be represented as a string | |
75 | * @param NumericTimezone is preferred in general, but HTTP/1.1 requires the use | |
76 | * of GMT as timezone instead. \b true means that the timezone should be denoted | |
77 | * as "+0000" while \b false uses "GMT". | |
78 | */ | |
79 | std::string TimeRFC1123(time_t Date, bool const NumericTimezone); | |
9febc2b2 DK |
80 | /** parses time as needed by HTTP/1.1 and Debian files. |
81 | * | |
82 | * HTTP/1.1 prefers dates in RFC1123 format (but the other two obsolete date formats | |
83 | * are supported to) and e.g. Release files use the same format in Date & Valid-Until | |
84 | * fields. | |
85 | * | |
86 | * Note: datetime strings need to be in UTC timezones (GMT, UTC, Z, +/-0000) to be | |
87 | * parsed. Other timezones will be rejected as invalid. Previous implementations | |
88 | * accepted other timezones, but treated them as UTC. | |
89 | * | |
90 | * @param str is the datetime string to parse | |
91 | * @param[out] time will be the seconds since epoch of the given datetime if | |
92 | * parsing is successful, undefined otherwise. | |
93 | * @return \b true if parsing was successful, otherwise \b false. | |
94 | */ | |
453b82a3 DK |
95 | bool RFC1123StrToTime(const char* const str,time_t &time) APT_MUSTCHECK; |
96 | bool FTPMDTMStrToTime(const char* const str,time_t &time) APT_MUSTCHECK; | |
5dd00edb | 97 | APT_DEPRECATED_MSG("Use RFC1123StrToTime or FTPMDTMStrToTime as needed instead") bool StrToTime(const std::string &Val,time_t &Result); |
8f3ba4e8 DK |
98 | std::string LookupTag(const std::string &Message,const char *Tag,const char *Default = 0); |
99 | int StringToBool(const std::string &Text,int Default = -1); | |
100 | bool ReadMessages(int Fd, std::vector<std::string> &List); | |
ddc1d8d0 | 101 | bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base = 0); |
650faab0 | 102 | bool StrToNum(const char *Str,unsigned long long &Res,unsigned Len,unsigned Base = 0); |
f688d1d3 | 103 | bool Base256ToNum(const char *Str,unsigned long &Res,unsigned int Len); |
3c09d634 | 104 | bool Base256ToNum(const char *Str,unsigned long long &Res,unsigned int Len); |
8f3ba4e8 | 105 | bool Hex2Num(const std::string &Str,unsigned char *Num,unsigned int Length); |
eff0c22e JAK |
106 | #ifdef APT_PKG_EXPOSE_STRING_VIEW |
107 | APT_HIDDEN bool Hex2Num(const APT::StringView Str,unsigned char *Num,unsigned int Length); | |
108 | #endif | |
9572a54b | 109 | // input changing string split |
b2e465d6 AL |
110 | bool TokSplitString(char Tok,char *Input,char **List, |
111 | unsigned long ListMax); | |
9572a54b MV |
112 | |
113 | // split a given string by a char | |
7e9b7ea8 | 114 | std::vector<std::string> VectorizeString(std::string const &haystack, char const &split) APT_PURE; |
9572a54b | 115 | |
41053d72 MV |
116 | /* \brief Return a vector of strings from string "input" where "sep" |
117 | * is used as the delimiter string. | |
118 | * | |
119 | * \param input The input string. | |
120 | * | |
8d89cda7 | 121 | * \param sep The separator to use. |
41053d72 MV |
122 | * |
123 | * \param maxsplit (optional) The maximum amount of splitting that | |
124 | * should be done . | |
125 | * | |
126 | * The optional "maxsplit" argument can be used to limit the splitting, | |
127 | * if used the string is only split on maxsplit places and the last | |
128 | * item in the vector contains the remainder string. | |
129 | */ | |
130 | std::vector<std::string> StringSplit(std::string const &input, | |
131 | std::string const &sep, | |
453b82a3 | 132 | unsigned int maxsplit=std::numeric_limits<unsigned int>::max()) APT_CONST; |
9572a54b | 133 | |
453b82a3 DK |
134 | void ioprintf(std::ostream &out,const char *format,...) APT_PRINTF(2); |
135 | void strprintf(std::string &out,const char *format,...) APT_PRINTF(2); | |
136 | char *safe_snprintf(char *Buffer,char *End,const char *Format,...) APT_PRINTF(3); | |
8f3ba4e8 | 137 | bool CheckDomainList(const std::string &Host, const std::string &List); |
98eb4e9e JAK |
138 | |
139 | /* Do some compat mumbo jumbo */ | |
140 | #define tolower_ascii tolower_ascii_inline | |
141 | #define isspace_ascii isspace_ascii_inline | |
142 | ||
7a3b00b1 JAK |
143 | APT_CONST APT_HOT |
144 | static inline int tolower_ascii_unsafe(int const c) | |
145 | { | |
146 | return c | 0x20; | |
147 | } | |
98eb4e9e JAK |
148 | APT_CONST APT_HOT |
149 | static inline int tolower_ascii_inline(int const c) | |
150 | { | |
151 | return (c >= 'A' && c <= 'Z') ? c + 32 : c; | |
152 | } | |
153 | APT_CONST APT_HOT | |
154 | static inline int isspace_ascii_inline(int const c) | |
155 | { | |
156 | // 9='\t',10='\n',11='\v',12='\f',13='\r',32=' ' | |
157 | return (c >= 9 && c <= 13) || c == ' '; | |
158 | } | |
98b06343 | 159 | |
9fa247dc JF |
160 | // StringViewCompareFast - awkward attempt to optimize cache generation /*{{{*/ |
161 | #ifdef APT_PKG_EXPOSE_STRING_VIEW | |
162 | /** | |
163 | * \brief Faster comparison for string views (compare size before data) | |
164 | * | |
165 | * Still stable, but faster than the normal ordering. | |
166 | * As this is used for package comparison this *MUST* be case insensitive, | |
167 | * as the alternative is to lower case all dependency fields which is slow. */ | |
168 | static inline int StringViewCompareFast(APT::StringView a, APT::StringView b) { | |
169 | if (a.size() != b.size()) | |
170 | return a.size() - b.size(); | |
171 | auto l(a.data()), r(b.data()); | |
172 | for (auto e(a.size()), i(decltype(e)(0)); i != e; ++i) | |
173 | if (tolower_ascii_inline(l[i]) != tolower_ascii_inline(r[i])) | |
174 | return tolower_ascii(l[i]) < tolower_ascii(r[i]) ? -1 : 1; | |
175 | return 0; | |
176 | } | |
177 | #endif | |
178 | /*}}}*/ | |
179 | ||
180 | ||
8f3ba4e8 | 181 | std::string StripEpoch(const std::string &VerStr); |
6c139d6e | 182 | |
c24972cb | 183 | #define APT_MKSTRCMP(name,func) \ |
a02db58f DK |
184 | inline APT_PURE int name(const char *A,const char *B) {return func(A,A+strlen(A),B,B+strlen(B));} \ |
185 | inline APT_PURE int name(const char *A,const char *AEnd,const char *B) {return func(A,AEnd,B,B+strlen(B));} \ | |
186 | inline APT_PURE int name(const std::string& A,const char *B) {return func(A.c_str(),A.c_str()+A.length(),B,B+strlen(B));} \ | |
187 | inline APT_PURE int name(const std::string& A,const std::string& B) {return func(A.c_str(),A.c_str()+A.length(),B.c_str(),B.c_str()+B.length());} \ | |
188 | inline APT_PURE int name(const std::string& A,const char *B,const char *BEnd) {return func(A.c_str(),A.c_str()+A.length(),B,BEnd);} | |
0db4a45b | 189 | |
47db8997 | 190 | #define APT_MKSTRCMP2(name,func) \ |
a02db58f DK |
191 | inline APT_PURE int name(const char *A,const char *AEnd,const char *B) {return func(A,AEnd,B,B+strlen(B));} \ |
192 | inline APT_PURE int name(const std::string& A,const char *B) {return func(A.begin(),A.end(),B,B+strlen(B));} \ | |
193 | inline APT_PURE int name(const std::string& A,const std::string& B) {return func(A.begin(),A.end(),B.begin(),B.end());} \ | |
194 | inline APT_PURE int name(const std::string& A,const char *B,const char *BEnd) {return func(A.begin(),A.end(),B,BEnd);} | |
47db8997 | 195 | |
a02db58f DK |
196 | int APT_PURE stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd); |
197 | int APT_PURE stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd); | |
ae0b19f5 AL |
198 | |
199 | /* We assume that GCC 3 indicates that libstdc++3 is in use too. In that | |
200 | case the definition of string::const_iterator is not the same as | |
201 | const char * and we need these extra functions */ | |
202 | #if __GNUC__ >= 3 | |
a02db58f | 203 | int APT_PURE stringcmp(std::string::const_iterator A,std::string::const_iterator AEnd, |
47db8997 | 204 | const char *B,const char *BEnd); |
a02db58f | 205 | int APT_PURE stringcmp(std::string::const_iterator A,std::string::const_iterator AEnd, |
8f3ba4e8 | 206 | std::string::const_iterator B,std::string::const_iterator BEnd); |
a02db58f | 207 | int APT_PURE stringcasecmp(std::string::const_iterator A,std::string::const_iterator AEnd, |
47db8997 | 208 | const char *B,const char *BEnd); |
a02db58f | 209 | int APT_PURE stringcasecmp(std::string::const_iterator A,std::string::const_iterator AEnd, |
8f3ba4e8 | 210 | std::string::const_iterator B,std::string::const_iterator BEnd); |
c24972cb | 211 | |
a02db58f DK |
212 | inline APT_PURE int stringcmp(std::string::const_iterator A,std::string::const_iterator Aend,const char *B) {return stringcmp(A,Aend,B,B+strlen(B));} |
213 | inline APT_PURE int stringcasecmp(std::string::const_iterator A,std::string::const_iterator Aend,const char *B) {return stringcasecmp(A,Aend,B,B+strlen(B));} | |
ae0b19f5 AL |
214 | #endif |
215 | ||
d3e8fbb3 DK |
216 | APT_MKSTRCMP2(stringcmp,stringcmp) |
217 | APT_MKSTRCMP2(stringcasecmp,stringcasecmp) | |
0db4a45b | 218 | |
b9179170 | 219 | // Return the length of a NULL-terminated string array |
a02db58f | 220 | size_t APT_PURE strv_length(const char **str_array); |
b9179170 MV |
221 | |
222 | ||
d3e8fbb3 | 223 | inline const char *DeNull(const char *s) {return (s == 0?"(null)":s);} |
6c139d6e | 224 | |
93bf083d AL |
225 | class URI |
226 | { | |
8f3ba4e8 | 227 | void CopyFrom(const std::string &From); |
3809194b | 228 | |
93bf083d | 229 | public: |
3809194b | 230 | |
8f3ba4e8 DK |
231 | std::string Access; |
232 | std::string User; | |
233 | std::string Password; | |
234 | std::string Host; | |
235 | std::string Path; | |
93bf083d AL |
236 | unsigned int Port; |
237 | ||
8f3ba4e8 | 238 | operator std::string(); |
d3e8fbb3 | 239 | inline void operator =(const std::string &From) {CopyFrom(From);} |
be4401bf | 240 | inline bool empty() {return Access.empty();}; |
8f3ba4e8 | 241 | static std::string SiteOnly(const std::string &URI); |
1da3b7b8 | 242 | static std::string ArchiveOnly(const std::string &URI); |
8f3ba4e8 | 243 | static std::string NoUserPassword(const std::string &URI); |
93bf083d | 244 | |
d3e8fbb3 DK |
245 | URI(std::string Path) {CopyFrom(Path);} |
246 | URI() : Port(0) {} | |
93bf083d AL |
247 | }; |
248 | ||
b2e465d6 AL |
249 | struct SubstVar |
250 | { | |
251 | const char *Subst; | |
8f3ba4e8 | 252 | const std::string *Contents; |
b2e465d6 | 253 | }; |
8f3ba4e8 DK |
254 | std::string SubstVar(std::string Str,const struct SubstVar *Vars); |
255 | std::string SubstVar(const std::string &Str,const std::string &Subst,const std::string &Contents); | |
b2e465d6 AL |
256 | |
257 | struct RxChoiceList | |
258 | { | |
259 | void *UserData; | |
260 | const char *Str; | |
261 | bool Hit; | |
262 | }; | |
263 | unsigned long RegexChoice(RxChoiceList *Rxs,const char **ListBegin, | |
264 | const char **ListEnd); | |
265 | ||
6c139d6e | 266 | #endif |