]>
Commit | Line | Data |
---|---|---|
dd65d8c8 | 1 | ///////////////////////////////////////////////////////////////////////////// |
80fdcdb9 | 2 | // Name: src/common/uri.cpp |
2186321f VZ |
3 | // Purpose: Implementation of a URI parser |
4 | // Author: Ryan Norton, | |
5 | // Vadim Zeitlin (UTF-8 URI support, many other changes) | |
dd65d8c8 | 6 | // Created: 10/26/04 |
2186321f VZ |
7 | // Copyright: (c) 2004 Ryan Norton, |
8 | // 2008 Vadim Zeitlin | |
9 | // Licence: wxWindows licence | |
dd65d8c8 RN |
10 | ///////////////////////////////////////////////////////////////////////////// |
11 | ||
12 | // =========================================================================== | |
13 | // declarations | |
14 | // =========================================================================== | |
15 | ||
16 | // --------------------------------------------------------------------------- | |
17 | // headers | |
18 | // --------------------------------------------------------------------------- | |
19 | ||
dd65d8c8 RN |
20 | // For compilers that support precompilation, includes "wx.h". |
21 | #include "wx/wxprec.h" | |
22 | ||
23 | #ifdef __BORLANDC__ | |
24 | #pragma hdrstop | |
25 | #endif | |
26 | ||
0bf751e7 VS |
27 | #ifndef WX_PRECOMP |
28 | #include "wx/crt.h" | |
29 | #endif | |
30 | ||
dd65d8c8 RN |
31 | #include "wx/uri.h" |
32 | ||
33 | // --------------------------------------------------------------------------- | |
34 | // definitions | |
35 | // --------------------------------------------------------------------------- | |
36 | ||
4115960d | 37 | IMPLEMENT_CLASS(wxURI, wxObject) |
dd65d8c8 RN |
38 | |
39 | // =========================================================================== | |
2186321f | 40 | // wxURI implementation |
dd65d8c8 RN |
41 | // =========================================================================== |
42 | ||
43 | // --------------------------------------------------------------------------- | |
2186321f | 44 | // Constructors and cleanup |
dd65d8c8 RN |
45 | // --------------------------------------------------------------------------- |
46 | ||
2186321f VZ |
47 | wxURI::wxURI() |
48 | : m_hostType(wxURI_REGNAME), | |
49 | m_fields(0) | |
dd65d8c8 RN |
50 | { |
51 | } | |
846978d7 | 52 | |
2186321f VZ |
53 | wxURI::wxURI(const wxString& uri) |
54 | : m_hostType(wxURI_REGNAME), | |
55 | m_fields(0) | |
dd65d8c8 RN |
56 | { |
57 | Create(uri); | |
58 | } | |
59 | ||
2186321f | 60 | bool wxURI::Create(const wxString& uri) |
dd65d8c8 | 61 | { |
2186321f VZ |
62 | if (m_fields) |
63 | Clear(); | |
dd65d8c8 | 64 | |
2186321f | 65 | return Parse(uri.utf8_str()); |
dd65d8c8 RN |
66 | } |
67 | ||
68 | void wxURI::Clear() | |
69 | { | |
2186321f VZ |
70 | m_scheme = |
71 | m_userinfo = | |
72 | m_server = | |
73 | m_port = | |
74 | m_path = | |
75 | m_query = | |
76 | m_fragment = wxEmptyString; | |
dd65d8c8 RN |
77 | |
78 | m_hostType = wxURI_REGNAME; | |
79 | ||
80 | m_fields = 0; | |
81 | } | |
82 | ||
83 | // --------------------------------------------------------------------------- | |
2186321f | 84 | // Escaped characters handling |
dd65d8c8 RN |
85 | // --------------------------------------------------------------------------- |
86 | ||
2186321f VZ |
87 | // Converts a character into a numeric hexadecimal value, or -1 if the passed |
88 | // in character is not a valid hex character | |
dd65d8c8 | 89 | |
2186321f VZ |
90 | /* static */ |
91 | int wxURI::CharToHex(char c) | |
92 | { | |
93 | if ((c >= 'A') && (c <= 'Z')) | |
94 | return c - 'A' + 10; | |
95 | if ((c >= 'a') && (c <= 'z')) | |
96 | return c - 'a' + 10; | |
97 | if ((c >= '0') && (c <= '9')) | |
98 | return c - '0'; | |
99 | ||
100 | return -1; | |
846978d7 | 101 | } |
dd65d8c8 | 102 | |
2186321f | 103 | int wxURI::DecodeEscape(wxString::const_iterator& i) |
dd65d8c8 | 104 | { |
2186321f VZ |
105 | int hi = CharToHex(*++i); |
106 | if ( hi == -1 ) | |
107 | return -1; | |
8404931e | 108 | |
2186321f VZ |
109 | int lo = CharToHex(*++i); |
110 | if ( lo == -1 ) | |
111 | return -1; | |
c9f78968 | 112 | |
2186321f | 113 | return (hi << 4) | lo; |
dd65d8c8 RN |
114 | } |
115 | ||
2186321f | 116 | /* static */ |
86470d43 RN |
117 | wxString wxURI::Unescape(const wxString& uri) |
118 | { | |
2186321f VZ |
119 | // the unescaped version can't be longer than the original one |
120 | wxCharBuffer buf(uri.length()); | |
121 | char *p = buf.data(); | |
86470d43 | 122 | |
2186321f | 123 | for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p ) |
86470d43 | 124 | { |
2186321f VZ |
125 | char c = *i; |
126 | if ( c == '%' ) | |
86470d43 | 127 | { |
2186321f VZ |
128 | int n = wxURI::DecodeEscape(i); |
129 | if ( n == -1 ) | |
130 | return wxString(); | |
131 | ||
132 | wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" ); | |
133 | ||
5c33522f | 134 | c = static_cast<char>(n); |
86470d43 | 135 | } |
2186321f VZ |
136 | |
137 | *p = c; | |
86470d43 RN |
138 | } |
139 | ||
2186321f | 140 | *p = '\0'; |
86470d43 | 141 | |
2186321f VZ |
142 | // by default assume that the URI is in UTF-8, this is the most common |
143 | // practice | |
144 | wxString s = wxString::FromUTF8(buf); | |
145 | if ( s.empty() ) | |
146 | { | |
147 | // if it isn't, use latin-1 as a fallback -- at least this always | |
148 | // succeeds | |
149 | s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf); | |
150 | } | |
151 | ||
152 | return s; | |
dd65d8c8 RN |
153 | } |
154 | ||
2186321f | 155 | void wxURI::AppendNextEscaped(wxString& s, const char *& p) |
dd65d8c8 | 156 | { |
2186321f VZ |
157 | // check for an already encoded character: |
158 | // | |
ce321570 | 159 | // pct-encoded = "%" HEXDIG HEXDIG |
2186321f VZ |
160 | if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) ) |
161 | { | |
162 | s += *p++; | |
163 | s += *p++; | |
164 | s += *p++; | |
165 | } | |
166 | else // really needs escaping | |
167 | { | |
168 | static const char* hexDigits = "0123456789abcdef"; | |
169 | ||
170 | const char c = *p++; | |
171 | ||
172 | s += '%'; | |
173 | s += hexDigits[(c >> 4) & 15]; | |
174 | s += hexDigits[c & 15]; | |
175 | } | |
dd65d8c8 RN |
176 | } |
177 | ||
4860d40d RN |
178 | // --------------------------------------------------------------------------- |
179 | // GetUser | |
180 | // GetPassword | |
181 | // | |
182 | // Gets the username and password via the old URL method. | |
183 | // --------------------------------------------------------------------------- | |
184 | wxString wxURI::GetUser() const | |
185 | { | |
62e3e6c2 VZ |
186 | // if there is no colon at all, find() returns npos and this method returns |
187 | // the entire string which is correct as it means that password was omitted | |
188 | return m_userinfo(0, m_userinfo.find(':')); | |
4860d40d RN |
189 | } |
190 | ||
191 | wxString wxURI::GetPassword() const | |
192 | { | |
62e3e6c2 | 193 | size_t posColon = m_userinfo.find(':'); |
4860d40d | 194 | |
62e3e6c2 | 195 | if ( posColon == wxString::npos ) |
2186321f | 196 | return ""; |
62e3e6c2 VZ |
197 | |
198 | return m_userinfo(posColon + 1, wxString::npos); | |
dd65d8c8 RN |
199 | } |
200 | ||
2186321f VZ |
201 | // combine all URI fields in a single string, applying funcDecode to each |
202 | // component which it may make sense to decode (i.e. "unescape") | |
203 | wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const | |
86470d43 RN |
204 | { |
205 | wxString ret; | |
206 | ||
207 | if (HasScheme()) | |
2186321f | 208 | ret += m_scheme + ":"; |
86470d43 RN |
209 | |
210 | if (HasServer()) | |
211 | { | |
2186321f | 212 | ret += "//"; |
86470d43 | 213 | |
4860d40d | 214 | if (HasUserInfo()) |
2186321f | 215 | ret += funcDecode(m_userinfo) + "@"; |
86470d43 RN |
216 | |
217 | if (m_hostType == wxURI_REGNAME) | |
2186321f | 218 | ret += funcDecode(m_server); |
86470d43 RN |
219 | else |
220 | ret += m_server; | |
221 | ||
222 | if (HasPort()) | |
2186321f | 223 | ret += ":" + m_port; |
86470d43 RN |
224 | } |
225 | ||
2186321f | 226 | ret += funcDecode(m_path); |
86470d43 RN |
227 | |
228 | if (HasQuery()) | |
2186321f | 229 | ret += "?" + funcDecode(m_query); |
86470d43 RN |
230 | |
231 | if (HasFragment()) | |
2186321f | 232 | ret += "#" + funcDecode(m_fragment); |
86470d43 RN |
233 | |
234 | return ret; | |
235 | } | |
236 | ||
ce321570 RN |
237 | // --------------------------------------------------------------------------- |
238 | // Comparison | |
239 | // --------------------------------------------------------------------------- | |
240 | ||
2186321f | 241 | bool wxURI::operator==(const wxURI& uri) const |
846978d7 | 242 | { |
dd65d8c8 RN |
243 | if (HasScheme()) |
244 | { | |
245 | if(m_scheme != uri.m_scheme) | |
246 | return false; | |
247 | } | |
248 | else if (uri.HasScheme()) | |
249 | return false; | |
250 | ||
251 | ||
252 | if (HasServer()) | |
253 | { | |
4860d40d | 254 | if (HasUserInfo()) |
dd65d8c8 | 255 | { |
4860d40d | 256 | if (m_userinfo != uri.m_userinfo) |
dd65d8c8 RN |
257 | return false; |
258 | } | |
4860d40d | 259 | else if (uri.HasUserInfo()) |
dd65d8c8 RN |
260 | return false; |
261 | ||
262 | if (m_server != uri.m_server || | |
263 | m_hostType != uri.m_hostType) | |
264 | return false; | |
265 | ||
266 | if (HasPort()) | |
267 | { | |
268 | if(m_port != uri.m_port) | |
269 | return false; | |
270 | } | |
271 | else if (uri.HasPort()) | |
272 | return false; | |
273 | } | |
274 | else if (uri.HasServer()) | |
275 | return false; | |
276 | ||
277 | ||
278 | if (HasPath()) | |
279 | { | |
280 | if(m_path != uri.m_path) | |
281 | return false; | |
282 | } | |
283 | else if (uri.HasPath()) | |
284 | return false; | |
285 | ||
286 | if (HasQuery()) | |
287 | { | |
288 | if (m_query != uri.m_query) | |
289 | return false; | |
290 | } | |
291 | else if (uri.HasQuery()) | |
292 | return false; | |
293 | ||
294 | if (HasFragment()) | |
295 | { | |
296 | if (m_fragment != uri.m_fragment) | |
297 | return false; | |
298 | } | |
299 | else if (uri.HasFragment()) | |
300 | return false; | |
301 | ||
302 | return true; | |
303 | } | |
304 | ||
305 | // --------------------------------------------------------------------------- | |
306 | // IsReference | |
307 | // | |
308 | // if there is no authority or scheme, it is a reference | |
309 | // --------------------------------------------------------------------------- | |
310 | ||
311 | bool wxURI::IsReference() const | |
2186321f VZ |
312 | { |
313 | return !HasScheme() || !HasServer(); | |
314 | } | |
dd65d8c8 RN |
315 | |
316 | // --------------------------------------------------------------------------- | |
317 | // Parse | |
318 | // | |
319 | // Master URI parsing method. Just calls the individual parsing methods | |
320 | // | |
321 | // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] | |
4cc52142 | 322 | // URI-reference = URI / relative |
dd65d8c8 RN |
323 | // --------------------------------------------------------------------------- |
324 | ||
2186321f | 325 | bool wxURI::Parse(const char *uri) |
dd65d8c8 RN |
326 | { |
327 | uri = ParseScheme(uri); | |
2186321f VZ |
328 | if ( uri ) |
329 | uri = ParseAuthority(uri); | |
330 | if ( uri ) | |
331 | uri = ParsePath(uri); | |
332 | if ( uri ) | |
333 | uri = ParseQuery(uri); | |
334 | if ( uri ) | |
335 | uri = ParseFragment(uri); | |
336 | ||
337 | // we only succeed if we parsed the entire string | |
338 | return uri && *uri == '\0'; | |
dd65d8c8 RN |
339 | } |
340 | ||
2186321f | 341 | const char* wxURI::ParseScheme(const char *uri) |
dd65d8c8 | 342 | { |
2186321f | 343 | const char * const start = uri; |
dd65d8c8 | 344 | |
2186321f VZ |
345 | // assume that we have a scheme if we have the valid start of it |
346 | if ( IsAlpha(*uri) ) | |
dd65d8c8 RN |
347 | { |
348 | m_scheme += *uri++; | |
349 | ||
350 | //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
846978d7 | 351 | while (IsAlpha(*uri) || IsDigit(*uri) || |
2186321f VZ |
352 | *uri == '+' || |
353 | *uri == '-' || | |
354 | *uri == '.') | |
846978d7 WS |
355 | { |
356 | m_scheme += *uri++; | |
dd65d8c8 RN |
357 | } |
358 | ||
359 | //valid scheme? | |
2186321f | 360 | if (*uri == ':') |
846978d7 | 361 | { |
dd65d8c8 RN |
362 | //mark the scheme as valid |
363 | m_fields |= wxURI_SCHEME; | |
364 | ||
365 | //move reference point up to input buffer | |
2186321f VZ |
366 | ++uri; |
367 | } | |
368 | else // no valid scheme finally | |
369 | { | |
370 | uri = start; // rewind | |
371 | m_scheme.clear(); | |
dd65d8c8 | 372 | } |
dd65d8c8 | 373 | } |
2186321f | 374 | //else: can't have schema, possible a relative URI |
dd65d8c8 | 375 | |
2186321f | 376 | return uri; |
dd65d8c8 RN |
377 | } |
378 | ||
2186321f | 379 | const char* wxURI::ParseAuthority(const char* uri) |
dd65d8c8 RN |
380 | { |
381 | // authority = [ userinfo "@" ] host [ ":" port ] | |
2186321f | 382 | if ( uri[0] == '/' && uri[1] == '/' ) |
dd65d8c8 | 383 | { |
97ad053b | 384 | //skip past the two slashes |
dd65d8c8 RN |
385 | uri += 2; |
386 | ||
97ad053b VZ |
387 | // ############# DEVIATION FROM RFC ######################### |
388 | // Don't parse the server component for file URIs | |
2186321f | 389 | if(m_scheme != "file") |
97ad053b VZ |
390 | { |
391 | //normal way | |
2186321f VZ |
392 | uri = ParseUserInfo(uri); |
393 | uri = ParseServer(uri); | |
394 | return ParsePort(uri); | |
97ad053b | 395 | } |
dd65d8c8 RN |
396 | } |
397 | ||
398 | return uri; | |
399 | } | |
400 | ||
2186321f | 401 | const char* wxURI::ParseUserInfo(const char* uri) |
dd65d8c8 | 402 | { |
2186321f | 403 | const char * const start = uri; |
dd65d8c8 RN |
404 | |
405 | // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
2186321f | 406 | while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' ) |
dd65d8c8 | 407 | { |
2186321f | 408 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' ) |
24ca04e7 | 409 | m_userinfo += *uri++; |
dd65d8c8 | 410 | else |
2186321f | 411 | AppendNextEscaped(m_userinfo, uri); |
dd65d8c8 RN |
412 | } |
413 | ||
2186321f | 414 | if ( *uri++ == '@' ) |
dd65d8c8 | 415 | { |
2186321f | 416 | // valid userinfo |
4860d40d | 417 | m_fields |= wxURI_USERINFO; |
dd65d8c8 RN |
418 | } |
419 | else | |
2186321f VZ |
420 | { |
421 | uri = start; // rewind | |
422 | m_userinfo.clear(); | |
423 | } | |
dd65d8c8 | 424 | |
2186321f | 425 | return uri; |
dd65d8c8 RN |
426 | } |
427 | ||
2186321f | 428 | const char* wxURI::ParseServer(const char* uri) |
dd65d8c8 | 429 | { |
2186321f | 430 | const char * const start = uri; |
dd65d8c8 RN |
431 | |
432 | // host = IP-literal / IPv4address / reg-name | |
433 | // IP-literal = "[" ( IPv6address / IPvFuture ) "]" | |
2186321f | 434 | if (*uri == '[') |
dd65d8c8 | 435 | { |
2186321f VZ |
436 | ++uri; |
437 | if (ParseIPv6address(uri) && *uri == ']') | |
dd65d8c8 | 438 | { |
dd65d8c8 | 439 | m_hostType = wxURI_IPV6ADDRESS; |
846978d7 | 440 | |
c4dbb953 | 441 | m_server.assign(start + 1, uri - start - 1); |
2186321f | 442 | ++uri; |
dd65d8c8 RN |
443 | } |
444 | else | |
445 | { | |
2186321f | 446 | uri = start + 1; // skip the leading '[' again |
dd65d8c8 | 447 | |
2186321f | 448 | if (ParseIPvFuture(uri) && *uri == ']') |
dd65d8c8 | 449 | { |
846978d7 WS |
450 | m_hostType = wxURI_IPVFUTURE; |
451 | ||
c4dbb953 | 452 | m_server.assign(start + 1, uri - start - 1); |
2186321f VZ |
453 | ++uri; |
454 | } | |
455 | else // unrecognized IP literal | |
456 | { | |
457 | uri = start; | |
dd65d8c8 | 458 | } |
dd65d8c8 RN |
459 | } |
460 | } | |
2186321f | 461 | else // IPv4 or a reg-name |
dd65d8c8 RN |
462 | { |
463 | if (ParseIPv4address(uri)) | |
464 | { | |
465 | m_hostType = wxURI_IPV4ADDRESS; | |
466 | ||
c4dbb953 | 467 | m_server.assign(start, uri - start); |
dd65d8c8 | 468 | } |
846978d7 | 469 | else |
2186321f VZ |
470 | { |
471 | uri = start; | |
472 | } | |
dd65d8c8 RN |
473 | } |
474 | ||
2186321f | 475 | if ( m_hostType == wxURI_REGNAME ) |
dd65d8c8 | 476 | { |
2186321f | 477 | uri = start; |
dd65d8c8 | 478 | // reg-name = *( unreserved / pct-encoded / sub-delims ) |
2186321f | 479 | while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' ) |
dd65d8c8 | 480 | { |
2186321f | 481 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) ) |
24ca04e7 | 482 | m_server += *uri++; |
dd65d8c8 | 483 | else |
2186321f | 484 | AppendNextEscaped(m_server, uri); |
846978d7 | 485 | } |
dd65d8c8 RN |
486 | } |
487 | ||
dd65d8c8 RN |
488 | m_fields |= wxURI_SERVER; |
489 | ||
490 | return uri; | |
491 | } | |
492 | ||
846978d7 | 493 | |
2186321f | 494 | const char* wxURI::ParsePort(const char* uri) |
dd65d8c8 | 495 | { |
dd65d8c8 | 496 | // port = *DIGIT |
2186321f | 497 | if( *uri == ':' ) |
dd65d8c8 RN |
498 | { |
499 | ++uri; | |
2186321f | 500 | while ( IsDigit(*uri) ) |
dd65d8c8 RN |
501 | { |
502 | m_port += *uri++; | |
846978d7 | 503 | } |
dd65d8c8 | 504 | |
dd65d8c8 RN |
505 | m_fields |= wxURI_PORT; |
506 | } | |
507 | ||
508 | return uri; | |
509 | } | |
510 | ||
2186321f | 511 | const char* wxURI::ParsePath(const char* uri) |
dd65d8c8 | 512 | { |
dd65d8c8 RN |
513 | /// hier-part = "//" authority path-abempty |
514 | /// / path-absolute | |
515 | /// / path-rootless | |
516 | /// / path-empty | |
517 | /// | |
518 | /// relative-part = "//" authority path-abempty | |
519 | /// / path-absolute | |
520 | /// / path-noscheme | |
521 | /// / path-empty | |
522 | /// | |
523 | /// path-abempty = *( "/" segment ) | |
524 | /// path-absolute = "/" [ segment-nz *( "/" segment ) ] | |
525 | /// path-noscheme = segment-nz-nc *( "/" segment ) | |
526 | /// path-rootless = segment-nz *( "/" segment ) | |
527 | /// path-empty = 0<pchar> | |
528 | /// | |
529 | /// segment = *pchar | |
530 | /// segment-nz = 1*pchar | |
531 | /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) | |
532 | /// ; non-zero-length segment without any colon ":" | |
533 | /// | |
534 | /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" | |
dd65d8c8 | 535 | |
2186321f VZ |
536 | if ( IsEndPath(*uri) ) |
537 | return uri; | |
dd65d8c8 | 538 | |
2186321f VZ |
539 | const bool isAbs = *uri == '/'; |
540 | if ( isAbs ) | |
541 | m_path += *uri++; | |
542 | ||
543 | wxArrayString segments; | |
544 | wxString segment; | |
545 | for ( ;; ) | |
dd65d8c8 | 546 | { |
2186321f VZ |
547 | const bool endPath = IsEndPath(*uri); |
548 | if ( endPath || *uri == '/' ) | |
dd65d8c8 | 549 | { |
2186321f VZ |
550 | // end of a segment, look at what we got |
551 | if ( segment == ".." ) | |
dd65d8c8 | 552 | { |
2186321f VZ |
553 | if ( !segments.empty() && *segments.rbegin() != ".." ) |
554 | segments.pop_back(); | |
555 | else if ( !isAbs ) | |
556 | segments.push_back(".."); | |
dd65d8c8 | 557 | } |
2186321f | 558 | else if ( segment == "." ) |
dd65d8c8 | 559 | { |
2186321f VZ |
560 | // normally we ignore "." but the last one should be taken into |
561 | // account as "path/." is the same as "path/" and not just "path" | |
562 | if ( endPath ) | |
563 | segments.push_back(""); | |
dd65d8c8 | 564 | } |
2186321f | 565 | else // normal segment |
dd65d8c8 | 566 | { |
2186321f | 567 | segments.push_back(segment); |
dd65d8c8 RN |
568 | } |
569 | ||
2186321f VZ |
570 | if ( endPath ) |
571 | break; | |
572 | ||
573 | segment.clear(); | |
574 | ++uri; | |
575 | continue; | |
dd65d8c8 | 576 | } |
2186321f VZ |
577 | |
578 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' ) | |
579 | segment += *uri++; | |
580 | else | |
581 | AppendNextEscaped(segment, uri); | |
dd65d8c8 RN |
582 | } |
583 | ||
2186321f VZ |
584 | m_path += wxJoin(segments, '/', '\0'); |
585 | m_fields |= wxURI_PATH; | |
586 | ||
dd65d8c8 RN |
587 | return uri; |
588 | } | |
589 | ||
590 | ||
2186321f | 591 | const char* wxURI::ParseQuery(const char* uri) |
dd65d8c8 | 592 | { |
dd65d8c8 | 593 | // query = *( pchar / "/" / "?" ) |
2186321f | 594 | if ( *uri == '?' ) |
dd65d8c8 RN |
595 | { |
596 | ++uri; | |
2186321f | 597 | while ( *uri && *uri != '#' ) |
dd65d8c8 | 598 | { |
2186321f VZ |
599 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || |
600 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' ) | |
601 | m_query += *uri++; | |
dd65d8c8 | 602 | else |
2186321f | 603 | AppendNextEscaped(m_query, uri); |
dd65d8c8 RN |
604 | } |
605 | ||
dd65d8c8 RN |
606 | m_fields |= wxURI_QUERY; |
607 | } | |
608 | ||
609 | return uri; | |
610 | } | |
611 | ||
612 | ||
2186321f | 613 | const char* wxURI::ParseFragment(const char* uri) |
dd65d8c8 | 614 | { |
dd65d8c8 | 615 | // fragment = *( pchar / "/" / "?" ) |
2186321f | 616 | if ( *uri == '#' ) |
dd65d8c8 RN |
617 | { |
618 | ++uri; | |
2186321f | 619 | while ( *uri ) |
dd65d8c8 | 620 | { |
2186321f VZ |
621 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || |
622 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') | |
623 | m_fragment += *uri++; | |
dd65d8c8 | 624 | else |
2186321f | 625 | AppendNextEscaped(m_fragment, uri); |
dd65d8c8 RN |
626 | } |
627 | ||
dd65d8c8 RN |
628 | m_fields |= wxURI_FRAGMENT; |
629 | } | |
630 | ||
631 | return uri; | |
632 | } | |
633 | ||
634 | // --------------------------------------------------------------------------- | |
ce321570 | 635 | // Resolve |
dd65d8c8 | 636 | // |
ce321570 | 637 | // Builds missing components of this uri from a base uri |
dd65d8c8 | 638 | // |
ce321570 RN |
639 | // A version of the algorithm outlined in the RFC is used here |
640 | // (it is shown in comments) | |
641 | // | |
846978d7 | 642 | // Note that an empty URI inherits all components |
dd65d8c8 RN |
643 | // --------------------------------------------------------------------------- |
644 | ||
2186321f VZ |
645 | /* static */ |
646 | wxArrayString wxURI::SplitInSegments(const wxString& path) | |
647 | { | |
648 | return wxSplit(path, '/', '\0' /* no escape character */); | |
649 | } | |
650 | ||
8404931e | 651 | void wxURI::Resolve(const wxURI& base, int flags) |
dd65d8c8 | 652 | { |
846978d7 | 653 | wxASSERT_MSG(!base.IsReference(), |
2186321f | 654 | "wxURI to inherit from must not be a reference!"); |
dd65d8c8 | 655 | |
2186321f VZ |
656 | // If we aren't being strict, enable the older (pre-RFC2396) loophole that |
657 | // allows this uri to inherit other properties from the base uri - even if | |
658 | // the scheme is defined | |
8404931e VZ |
659 | if ( !(flags & wxURI_STRICT) && |
660 | HasScheme() && base.HasScheme() && | |
661 | m_scheme == base.m_scheme ) | |
846978d7 | 662 | { |
dd65d8c8 RN |
663 | m_fields -= wxURI_SCHEME; |
664 | } | |
665 | ||
666 | ||
667 | // Do nothing if this is an absolute wxURI | |
668 | // if defined(R.scheme) then | |
669 | // T.scheme = R.scheme; | |
670 | // T.authority = R.authority; | |
671 | // T.path = remove_dot_segments(R.path); | |
672 | // T.query = R.query; | |
673 | if (HasScheme()) | |
dd65d8c8 | 674 | return; |
dd65d8c8 | 675 | |
ea4daac4 | 676 | //No scheme - inherit |
dd65d8c8 RN |
677 | m_scheme = base.m_scheme; |
678 | m_fields |= wxURI_SCHEME; | |
679 | ||
680 | // All we need to do for relative URIs with an | |
681 | // authority component is just inherit the scheme | |
682 | // if defined(R.authority) then | |
683 | // T.authority = R.authority; | |
684 | // T.path = remove_dot_segments(R.path); | |
685 | // T.query = R.query; | |
686 | if (HasServer()) | |
dd65d8c8 | 687 | return; |
dd65d8c8 RN |
688 | |
689 | //No authority - inherit | |
4860d40d | 690 | if (base.HasUserInfo()) |
dd65d8c8 | 691 | { |
4860d40d RN |
692 | m_userinfo = base.m_userinfo; |
693 | m_fields |= wxURI_USERINFO; | |
dd65d8c8 | 694 | } |
846978d7 | 695 | |
dd65d8c8 RN |
696 | m_server = base.m_server; |
697 | m_hostType = base.m_hostType; | |
698 | m_fields |= wxURI_SERVER; | |
846978d7 | 699 | |
dd65d8c8 RN |
700 | if (base.HasPort()) |
701 | { | |
702 | m_port = base.m_port; | |
703 | m_fields |= wxURI_PORT; | |
704 | } | |
846978d7 | 705 | |
dd65d8c8 RN |
706 | |
707 | // Simple path inheritance from base | |
708 | if (!HasPath()) | |
709 | { | |
710 | // T.path = Base.path; | |
711 | m_path = base.m_path; | |
712 | m_fields |= wxURI_PATH; | |
846978d7 | 713 | |
dd65d8c8 RN |
714 | |
715 | // if defined(R.query) then | |
716 | // T.query = R.query; | |
717 | // else | |
718 | // T.query = Base.query; | |
719 | // endif; | |
720 | if (!HasQuery()) | |
721 | { | |
722 | m_query = base.m_query; | |
723 | m_fields |= wxURI_QUERY; | |
724 | } | |
725 | } | |
2186321f | 726 | else if ( m_path.empty() || m_path[0u] != '/' ) |
dd65d8c8 RN |
727 | { |
728 | // if (R.path starts-with "/") then | |
729 | // T.path = remove_dot_segments(R.path); | |
730 | // else | |
731 | // T.path = merge(Base.path, R.path); | |
732 | // T.path = remove_dot_segments(T.path); | |
733 | // endif; | |
734 | // T.query = R.query; | |
2186321f VZ |
735 | // |
736 | // So we don't do anything for absolute paths and implement merge for | |
737 | // the relative ones | |
c9f78968 | 738 | |
2186321f VZ |
739 | wxArrayString our(SplitInSegments(m_path)), |
740 | result(SplitInSegments(base.m_path)); | |
c9f78968 | 741 | |
2186321f VZ |
742 | if ( !result.empty() ) |
743 | result.pop_back(); | |
846978d7 | 744 | |
2186321f | 745 | if ( our.empty() ) |
dd65d8c8 | 746 | { |
2186321f VZ |
747 | // if we have an empty path it means we were constructed from a "." |
748 | // string or something similar (e.g. "././././"), it should count | |
749 | // as (empty) segment | |
750 | our.push_back(""); | |
dd65d8c8 | 751 | } |
dd65d8c8 | 752 | |
2186321f VZ |
753 | const wxArrayString::const_iterator end = our.end(); |
754 | for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i ) | |
dd65d8c8 | 755 | { |
2186321f | 756 | if ( i->empty() || *i == "." ) |
dd65d8c8 | 757 | { |
2186321f VZ |
758 | // as in ParsePath(), while normally we ignore the empty |
759 | // segments, we need to take account of them at the end | |
760 | if ( i == end - 1 ) | |
761 | result.push_back(""); | |
762 | continue; | |
dd65d8c8 | 763 | } |
dd65d8c8 | 764 | |
2186321f | 765 | if ( *i == ".." ) |
dd65d8c8 | 766 | { |
2186321f VZ |
767 | if ( !result.empty() ) |
768 | { | |
769 | result.pop_back(); | |
dd65d8c8 | 770 | |
2186321f VZ |
771 | if ( i == end - 1 ) |
772 | result.push_back(""); | |
773 | } | |
774 | //else: just ignore, extra ".." don't accumulate | |
dd65d8c8 RN |
775 | } |
776 | else | |
777 | { | |
2186321f VZ |
778 | if ( result.empty() ) |
779 | { | |
780 | // ensure that the resulting path will always be absolute | |
781 | result.push_back(""); | |
782 | } | |
783 | ||
784 | result.push_back(*i); | |
dd65d8c8 RN |
785 | } |
786 | } | |
2186321f VZ |
787 | |
788 | m_path = wxJoin(result, '/', '\0'); | |
dd65d8c8 RN |
789 | } |
790 | ||
2186321f | 791 | //T.fragment = R.fragment; |
dd65d8c8 RN |
792 | } |
793 | ||
794 | // --------------------------------------------------------------------------- | |
ce321570 RN |
795 | // ParseH16 |
796 | // | |
797 | // Parses 1 to 4 hex values. Returns true if the first character of the input | |
2186321f | 798 | // string is a valid hex character. It is the caller's responsibility to move |
ce321570 RN |
799 | // the input string back to its original position on failure. |
800 | // --------------------------------------------------------------------------- | |
801 | ||
2186321f | 802 | bool wxURI::ParseH16(const char*& uri) |
ce321570 RN |
803 | { |
804 | // h16 = 1*4HEXDIG | |
805 | if(!IsHex(*++uri)) | |
806 | return false; | |
807 | ||
808 | if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) | |
809 | ++uri; | |
810 | ||
811 | return true; | |
812 | } | |
813 | ||
814 | // --------------------------------------------------------------------------- | |
815 | // ParseIPXXX | |
816 | // | |
846978d7 WS |
817 | // Parses a certain version of an IP address and moves the input string past |
818 | // it. Returns true if the input string contains the proper version of an ip | |
37424888 | 819 | // address. It is the caller's responsibility to move the input string back |
ce321570 | 820 | // to its original position on failure. |
dd65d8c8 RN |
821 | // --------------------------------------------------------------------------- |
822 | ||
2186321f | 823 | bool wxURI::ParseIPv4address(const char*& uri) |
dd65d8c8 RN |
824 | { |
825 | //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet | |
826 | // | |
827 | //dec-octet = DIGIT ; 0-9 | |
828 | // / %x31-39 DIGIT ; 10-99 | |
829 | // / "1" 2DIGIT ; 100-199 | |
830 | // / "2" %x30-34 DIGIT ; 200-249 | |
831 | // / "25" %x30-35 ; 250-255 | |
832 | size_t iIPv4 = 0; | |
833 | if (IsDigit(*uri)) | |
834 | { | |
835 | ++iIPv4; | |
836 | ||
846978d7 | 837 | |
dd65d8c8 RN |
838 | //each ip part must be between 0-255 (dupe of version in for loop) |
839 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
840 | //100 or less (note !) | |
2186321f | 841 | !( (*(uri-2) < '2') || |
846978d7 | 842 | //240 or less |
2186321f VZ |
843 | (*(uri-2) == '2' && |
844 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
dd65d8c8 RN |
845 | ) |
846 | ) | |
847 | ) | |
848 | { | |
849 | return false; | |
850 | } | |
851 | ||
852 | if(IsDigit(*uri))++uri; | |
853 | ||
854 | //compilers should unroll this loop | |
855 | for(; iIPv4 < 4; ++iIPv4) | |
856 | { | |
2186321f | 857 | if (*uri != '.' || !IsDigit(*++uri)) |
dd65d8c8 RN |
858 | break; |
859 | ||
860 | //each ip part must be between 0-255 | |
861 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
862 | //100 or less (note !) | |
2186321f | 863 | !( (*(uri-2) < '2') || |
846978d7 | 864 | //240 or less |
2186321f VZ |
865 | (*(uri-2) == '2' && |
866 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
dd65d8c8 RN |
867 | ) |
868 | ) | |
869 | ) | |
870 | { | |
871 | return false; | |
872 | } | |
873 | if(IsDigit(*uri))++uri; | |
874 | } | |
875 | } | |
876 | return iIPv4 == 4; | |
877 | } | |
878 | ||
2186321f | 879 | bool wxURI::ParseIPv6address(const char*& uri) |
dd65d8c8 RN |
880 | { |
881 | // IPv6address = 6( h16 ":" ) ls32 | |
882 | // / "::" 5( h16 ":" ) ls32 | |
883 | // / [ h16 ] "::" 4( h16 ":" ) ls32 | |
884 | // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
885 | // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
886 | // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
887 | // / [ *4( h16 ":" ) h16 ] "::" ls32 | |
888 | // / [ *5( h16 ":" ) h16 ] "::" h16 | |
889 | // / [ *6( h16 ":" ) h16 ] "::" | |
890 | ||
891 | size_t numPrefix = 0, | |
892 | maxPostfix; | |
893 | ||
894 | bool bEndHex = false; | |
895 | ||
896 | for( ; numPrefix < 6; ++numPrefix) | |
897 | { | |
898 | if(!ParseH16(uri)) | |
899 | { | |
900 | --uri; | |
901 | bEndHex = true; | |
902 | break; | |
903 | } | |
846978d7 | 904 | |
2186321f | 905 | if(*uri != ':') |
dd65d8c8 RN |
906 | { |
907 | break; | |
908 | } | |
909 | } | |
910 | ||
911 | if(!bEndHex && !ParseH16(uri)) | |
912 | { | |
913 | --uri; | |
914 | ||
915 | if (numPrefix) | |
916 | return false; | |
917 | ||
2186321f | 918 | if (*uri == ':') |
dd65d8c8 | 919 | { |
2186321f | 920 | if (*++uri != ':') |
dd65d8c8 RN |
921 | return false; |
922 | ||
923 | maxPostfix = 5; | |
924 | } | |
925 | else | |
926 | maxPostfix = 6; | |
927 | } | |
928 | else | |
929 | { | |
2186321f | 930 | if (*uri != ':' || *(uri+1) != ':') |
dd65d8c8 RN |
931 | { |
932 | if (numPrefix != 6) | |
933 | return false; | |
934 | ||
2186321f | 935 | while (*--uri != ':') {} |
dd65d8c8 RN |
936 | ++uri; |
937 | ||
2186321f | 938 | const char * const start = uri; |
dd65d8c8 RN |
939 | //parse ls32 |
940 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
2186321f | 941 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) |
dd65d8c8 RN |
942 | return true; |
943 | ||
2186321f | 944 | uri = start; |
dd65d8c8 RN |
945 | |
946 | if (ParseIPv4address(uri)) | |
947 | return true; | |
948 | else | |
949 | return false; | |
950 | } | |
951 | else | |
952 | { | |
953 | uri += 2; | |
846978d7 | 954 | |
dd65d8c8 RN |
955 | if (numPrefix > 3) |
956 | maxPostfix = 0; | |
957 | else | |
958 | maxPostfix = 4 - numPrefix; | |
959 | } | |
960 | } | |
961 | ||
962 | bool bAllowAltEnding = maxPostfix == 0; | |
963 | ||
964 | for(; maxPostfix != 0; --maxPostfix) | |
965 | { | |
2186321f | 966 | if(!ParseH16(uri) || *uri != ':') |
dd65d8c8 RN |
967 | return false; |
968 | } | |
969 | ||
970 | if(numPrefix <= 4) | |
971 | { | |
2186321f | 972 | const char * const start = uri; |
dd65d8c8 RN |
973 | //parse ls32 |
974 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
2186321f | 975 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) |
dd65d8c8 RN |
976 | return true; |
977 | ||
2186321f | 978 | uri = start; |
dd65d8c8 RN |
979 | |
980 | if (ParseIPv4address(uri)) | |
981 | return true; | |
982 | ||
2186321f | 983 | uri = start; |
846978d7 | 984 | |
dd65d8c8 RN |
985 | if (!bAllowAltEnding) |
986 | return false; | |
987 | } | |
988 | ||
989 | if(numPrefix <= 5 && ParseH16(uri)) | |
990 | return true; | |
991 | ||
992 | return true; | |
993 | } | |
994 | ||
2186321f | 995 | bool wxURI::ParseIPvFuture(const char*& uri) |
dd65d8c8 RN |
996 | { |
997 | // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) | |
2186321f | 998 | if (*++uri != 'v' || !IsHex(*++uri)) |
dd65d8c8 RN |
999 | return false; |
1000 | ||
2186321f VZ |
1001 | while (IsHex(*++uri)) |
1002 | ; | |
dd65d8c8 | 1003 | |
2186321f | 1004 | if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) |
dd65d8c8 RN |
1005 | return false; |
1006 | ||
2186321f | 1007 | while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {} |
dd65d8c8 RN |
1008 | |
1009 | return true; | |
1010 | } | |
1011 | ||
1012 | ||
ce321570 RN |
1013 | // --------------------------------------------------------------------------- |
1014 | // IsXXX | |
1015 | // | |
1016 | // Returns true if the passed in character meets the criteria of the method | |
1017 | // --------------------------------------------------------------------------- | |
1018 | ||
2186321f VZ |
1019 | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
1020 | bool wxURI::IsUnreserved(char c) | |
1021 | { | |
1022 | return IsAlpha(c) || | |
1023 | IsDigit(c) || | |
1024 | c == '-' || | |
1025 | c == '.' || | |
1026 | c == '_' || | |
1027 | c == '~' | |
846978d7 | 1028 | ; |
dd65d8c8 RN |
1029 | } |
1030 | ||
2186321f | 1031 | bool wxURI::IsReserved(char c) |
846978d7 | 1032 | { |
dd65d8c8 RN |
1033 | return IsGenDelim(c) || IsSubDelim(c); |
1034 | } | |
1035 | ||
2186321f VZ |
1036 | // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
1037 | bool wxURI::IsGenDelim(char c) | |
dd65d8c8 | 1038 | { |
2186321f VZ |
1039 | return c == ':' || |
1040 | c == '/' || | |
1041 | c == '?' || | |
1042 | c == '#' || | |
1043 | c == '[' || | |
1044 | c == ']' || | |
1045 | c == '@'; | |
dd65d8c8 RN |
1046 | } |
1047 | ||
2186321f VZ |
1048 | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
1049 | // / "*" / "+" / "," / ";" / "=" | |
1050 | bool wxURI::IsSubDelim(char c) | |
dd65d8c8 | 1051 | { |
2186321f VZ |
1052 | return c == '!' || |
1053 | c == '$' || | |
1054 | c == '&' || | |
1055 | c == '\'' || | |
1056 | c == '(' || | |
1057 | c == ')' || | |
1058 | c == '*' || | |
1059 | c == '+' || | |
1060 | c == ',' || | |
1061 | c == ';' || | |
1062 | c == '=' | |
dd65d8c8 RN |
1063 | ; |
1064 | } | |
1065 | ||
2186321f VZ |
1066 | bool wxURI::IsHex(char c) |
1067 | { | |
1068 | return IsDigit(c) || | |
1069 | (c >= 'a' && c <= 'f') || | |
1070 | (c >= 'A' && c <= 'F'); | |
1071 | } | |
dd65d8c8 | 1072 | |
2186321f VZ |
1073 | bool wxURI::IsAlpha(char c) |
1074 | { | |
1075 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); | |
1076 | } | |
dd65d8c8 | 1077 | |
2186321f VZ |
1078 | bool wxURI::IsDigit(char c) |
1079 | { | |
1080 | return c >= '0' && c <= '9'; | |
1081 | } | |
dd65d8c8 | 1082 | |
2186321f VZ |
1083 | bool wxURI::IsEndPath(char c) |
1084 | { | |
1085 | return c == '\0' || c == '#' || c == '?'; | |
1086 | } | |
dd65d8c8 | 1087 |