]>
Commit | Line | Data |
---|---|---|
dd65d8c8 RN |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: uri.cpp | |
2186321f VZ |
3 | // Purpose: Implementation of a URI parser |
4 | // Author: Ryan Norton, | |
5 | // Vadim Zeitlin (UTF-8 URI support, many other changes) | |
dd65d8c8 RN |
6 | // Created: 10/26/04 |
7 | // RCS-ID: $Id$ | |
2186321f VZ |
8 | // Copyright: (c) 2004 Ryan Norton, |
9 | // 2008 Vadim Zeitlin | |
10 | // Licence: wxWindows licence | |
dd65d8c8 RN |
11 | ///////////////////////////////////////////////////////////////////////////// |
12 | ||
13 | // =========================================================================== | |
14 | // declarations | |
15 | // =========================================================================== | |
16 | ||
17 | // --------------------------------------------------------------------------- | |
18 | // headers | |
19 | // --------------------------------------------------------------------------- | |
20 | ||
dd65d8c8 RN |
21 | // For compilers that support precompilation, includes "wx.h". |
22 | #include "wx/wxprec.h" | |
23 | ||
24 | #ifdef __BORLANDC__ | |
25 | #pragma hdrstop | |
26 | #endif | |
27 | ||
0bf751e7 VS |
28 | #ifndef WX_PRECOMP |
29 | #include "wx/crt.h" | |
30 | #endif | |
31 | ||
dd65d8c8 RN |
32 | #include "wx/uri.h" |
33 | ||
34 | // --------------------------------------------------------------------------- | |
35 | // definitions | |
36 | // --------------------------------------------------------------------------- | |
37 | ||
4115960d | 38 | IMPLEMENT_CLASS(wxURI, wxObject) |
dd65d8c8 RN |
39 | |
40 | // =========================================================================== | |
2186321f | 41 | // wxURI implementation |
dd65d8c8 RN |
42 | // =========================================================================== |
43 | ||
44 | // --------------------------------------------------------------------------- | |
2186321f | 45 | // Constructors and cleanup |
dd65d8c8 RN |
46 | // --------------------------------------------------------------------------- |
47 | ||
2186321f VZ |
48 | wxURI::wxURI() |
49 | : m_hostType(wxURI_REGNAME), | |
50 | m_fields(0) | |
dd65d8c8 RN |
51 | { |
52 | } | |
846978d7 | 53 | |
2186321f VZ |
54 | wxURI::wxURI(const wxString& uri) |
55 | : m_hostType(wxURI_REGNAME), | |
56 | m_fields(0) | |
dd65d8c8 RN |
57 | { |
58 | Create(uri); | |
59 | } | |
60 | ||
2186321f | 61 | bool wxURI::Create(const wxString& uri) |
dd65d8c8 | 62 | { |
2186321f VZ |
63 | if (m_fields) |
64 | Clear(); | |
dd65d8c8 | 65 | |
2186321f | 66 | return Parse(uri.utf8_str()); |
dd65d8c8 RN |
67 | } |
68 | ||
69 | void wxURI::Clear() | |
70 | { | |
2186321f VZ |
71 | m_scheme = |
72 | m_userinfo = | |
73 | m_server = | |
74 | m_port = | |
75 | m_path = | |
76 | m_query = | |
77 | m_fragment = wxEmptyString; | |
dd65d8c8 RN |
78 | |
79 | m_hostType = wxURI_REGNAME; | |
80 | ||
81 | m_fields = 0; | |
82 | } | |
83 | ||
84 | // --------------------------------------------------------------------------- | |
2186321f | 85 | // Escaped characters handling |
dd65d8c8 RN |
86 | // --------------------------------------------------------------------------- |
87 | ||
2186321f VZ |
88 | // Converts a character into a numeric hexadecimal value, or -1 if the passed |
89 | // in character is not a valid hex character | |
dd65d8c8 | 90 | |
2186321f VZ |
91 | /* static */ |
92 | int wxURI::CharToHex(char c) | |
93 | { | |
94 | if ((c >= 'A') && (c <= 'Z')) | |
95 | return c - 'A' + 10; | |
96 | if ((c >= 'a') && (c <= 'z')) | |
97 | return c - 'a' + 10; | |
98 | if ((c >= '0') && (c <= '9')) | |
99 | return c - '0'; | |
100 | ||
101 | return -1; | |
846978d7 | 102 | } |
dd65d8c8 | 103 | |
2186321f | 104 | int wxURI::DecodeEscape(wxString::const_iterator& i) |
dd65d8c8 | 105 | { |
2186321f VZ |
106 | int hi = CharToHex(*++i); |
107 | if ( hi == -1 ) | |
108 | return -1; | |
8404931e | 109 | |
2186321f VZ |
110 | int lo = CharToHex(*++i); |
111 | if ( lo == -1 ) | |
112 | return -1; | |
c9f78968 | 113 | |
2186321f | 114 | return (hi << 4) | lo; |
dd65d8c8 RN |
115 | } |
116 | ||
2186321f | 117 | /* static */ |
86470d43 RN |
118 | wxString wxURI::Unescape(const wxString& uri) |
119 | { | |
2186321f VZ |
120 | // the unescaped version can't be longer than the original one |
121 | wxCharBuffer buf(uri.length()); | |
122 | char *p = buf.data(); | |
86470d43 | 123 | |
2186321f | 124 | for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p ) |
86470d43 | 125 | { |
2186321f VZ |
126 | char c = *i; |
127 | if ( c == '%' ) | |
86470d43 | 128 | { |
2186321f VZ |
129 | int n = wxURI::DecodeEscape(i); |
130 | if ( n == -1 ) | |
131 | return wxString(); | |
132 | ||
133 | wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" ); | |
134 | ||
135 | c = wx_static_cast(char, n); | |
86470d43 | 136 | } |
2186321f VZ |
137 | |
138 | *p = c; | |
86470d43 RN |
139 | } |
140 | ||
2186321f | 141 | *p = '\0'; |
86470d43 | 142 | |
2186321f VZ |
143 | // by default assume that the URI is in UTF-8, this is the most common |
144 | // practice | |
145 | wxString s = wxString::FromUTF8(buf); | |
146 | if ( s.empty() ) | |
147 | { | |
148 | // if it isn't, use latin-1 as a fallback -- at least this always | |
149 | // succeeds | |
150 | s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf); | |
151 | } | |
152 | ||
153 | return s; | |
dd65d8c8 RN |
154 | } |
155 | ||
2186321f | 156 | void wxURI::AppendNextEscaped(wxString& s, const char *& p) |
dd65d8c8 | 157 | { |
2186321f VZ |
158 | // check for an already encoded character: |
159 | // | |
ce321570 | 160 | // pct-encoded = "%" HEXDIG HEXDIG |
2186321f VZ |
161 | if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) ) |
162 | { | |
163 | s += *p++; | |
164 | s += *p++; | |
165 | s += *p++; | |
166 | } | |
167 | else // really needs escaping | |
168 | { | |
169 | static const char* hexDigits = "0123456789abcdef"; | |
170 | ||
171 | const char c = *p++; | |
172 | ||
173 | s += '%'; | |
174 | s += hexDigits[(c >> 4) & 15]; | |
175 | s += hexDigits[c & 15]; | |
176 | } | |
dd65d8c8 RN |
177 | } |
178 | ||
4860d40d RN |
179 | // --------------------------------------------------------------------------- |
180 | // GetUser | |
181 | // GetPassword | |
182 | // | |
183 | // Gets the username and password via the old URL method. | |
184 | // --------------------------------------------------------------------------- | |
185 | wxString wxURI::GetUser() const | |
186 | { | |
187 | size_t dwPasswordPos = m_userinfo.find(':'); | |
188 | ||
189 | if (dwPasswordPos == wxString::npos) | |
190 | dwPasswordPos = 0; | |
2186321f | 191 | |
4860d40d RN |
192 | return m_userinfo(0, dwPasswordPos); |
193 | } | |
194 | ||
195 | wxString wxURI::GetPassword() const | |
196 | { | |
197 | size_t dwPasswordPos = m_userinfo.find(':'); | |
198 | ||
199 | if (dwPasswordPos == wxString::npos) | |
2186321f | 200 | return ""; |
4860d40d | 201 | else |
2186321f | 202 | return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1); |
dd65d8c8 RN |
203 | } |
204 | ||
2186321f VZ |
205 | // combine all URI fields in a single string, applying funcDecode to each |
206 | // component which it may make sense to decode (i.e. "unescape") | |
207 | wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const | |
86470d43 RN |
208 | { |
209 | wxString ret; | |
210 | ||
211 | if (HasScheme()) | |
2186321f | 212 | ret += m_scheme + ":"; |
86470d43 RN |
213 | |
214 | if (HasServer()) | |
215 | { | |
2186321f | 216 | ret += "//"; |
86470d43 | 217 | |
4860d40d | 218 | if (HasUserInfo()) |
2186321f | 219 | ret += funcDecode(m_userinfo) + "@"; |
86470d43 RN |
220 | |
221 | if (m_hostType == wxURI_REGNAME) | |
2186321f | 222 | ret += funcDecode(m_server); |
86470d43 RN |
223 | else |
224 | ret += m_server; | |
225 | ||
226 | if (HasPort()) | |
2186321f | 227 | ret += ":" + m_port; |
86470d43 RN |
228 | } |
229 | ||
2186321f | 230 | ret += funcDecode(m_path); |
86470d43 RN |
231 | |
232 | if (HasQuery()) | |
2186321f | 233 | ret += "?" + funcDecode(m_query); |
86470d43 RN |
234 | |
235 | if (HasFragment()) | |
2186321f | 236 | ret += "#" + funcDecode(m_fragment); |
86470d43 RN |
237 | |
238 | return ret; | |
239 | } | |
240 | ||
ce321570 RN |
241 | // --------------------------------------------------------------------------- |
242 | // Comparison | |
243 | // --------------------------------------------------------------------------- | |
244 | ||
2186321f | 245 | bool wxURI::operator==(const wxURI& uri) const |
846978d7 | 246 | { |
dd65d8c8 RN |
247 | if (HasScheme()) |
248 | { | |
249 | if(m_scheme != uri.m_scheme) | |
250 | return false; | |
251 | } | |
252 | else if (uri.HasScheme()) | |
253 | return false; | |
254 | ||
255 | ||
256 | if (HasServer()) | |
257 | { | |
4860d40d | 258 | if (HasUserInfo()) |
dd65d8c8 | 259 | { |
4860d40d | 260 | if (m_userinfo != uri.m_userinfo) |
dd65d8c8 RN |
261 | return false; |
262 | } | |
4860d40d | 263 | else if (uri.HasUserInfo()) |
dd65d8c8 RN |
264 | return false; |
265 | ||
266 | if (m_server != uri.m_server || | |
267 | m_hostType != uri.m_hostType) | |
268 | return false; | |
269 | ||
270 | if (HasPort()) | |
271 | { | |
272 | if(m_port != uri.m_port) | |
273 | return false; | |
274 | } | |
275 | else if (uri.HasPort()) | |
276 | return false; | |
277 | } | |
278 | else if (uri.HasServer()) | |
279 | return false; | |
280 | ||
281 | ||
282 | if (HasPath()) | |
283 | { | |
284 | if(m_path != uri.m_path) | |
285 | return false; | |
286 | } | |
287 | else if (uri.HasPath()) | |
288 | return false; | |
289 | ||
290 | if (HasQuery()) | |
291 | { | |
292 | if (m_query != uri.m_query) | |
293 | return false; | |
294 | } | |
295 | else if (uri.HasQuery()) | |
296 | return false; | |
297 | ||
298 | if (HasFragment()) | |
299 | { | |
300 | if (m_fragment != uri.m_fragment) | |
301 | return false; | |
302 | } | |
303 | else if (uri.HasFragment()) | |
304 | return false; | |
305 | ||
306 | return true; | |
307 | } | |
308 | ||
309 | // --------------------------------------------------------------------------- | |
310 | // IsReference | |
311 | // | |
312 | // if there is no authority or scheme, it is a reference | |
313 | // --------------------------------------------------------------------------- | |
314 | ||
315 | bool wxURI::IsReference() const | |
2186321f VZ |
316 | { |
317 | return !HasScheme() || !HasServer(); | |
318 | } | |
dd65d8c8 RN |
319 | |
320 | // --------------------------------------------------------------------------- | |
321 | // Parse | |
322 | // | |
323 | // Master URI parsing method. Just calls the individual parsing methods | |
324 | // | |
325 | // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] | |
4cc52142 | 326 | // URI-reference = URI / relative |
dd65d8c8 RN |
327 | // --------------------------------------------------------------------------- |
328 | ||
2186321f | 329 | bool wxURI::Parse(const char *uri) |
dd65d8c8 RN |
330 | { |
331 | uri = ParseScheme(uri); | |
2186321f VZ |
332 | if ( uri ) |
333 | uri = ParseAuthority(uri); | |
334 | if ( uri ) | |
335 | uri = ParsePath(uri); | |
336 | if ( uri ) | |
337 | uri = ParseQuery(uri); | |
338 | if ( uri ) | |
339 | uri = ParseFragment(uri); | |
340 | ||
341 | // we only succeed if we parsed the entire string | |
342 | return uri && *uri == '\0'; | |
dd65d8c8 RN |
343 | } |
344 | ||
2186321f | 345 | const char* wxURI::ParseScheme(const char *uri) |
dd65d8c8 | 346 | { |
2186321f | 347 | const char * const start = uri; |
dd65d8c8 | 348 | |
2186321f VZ |
349 | // assume that we have a scheme if we have the valid start of it |
350 | if ( IsAlpha(*uri) ) | |
dd65d8c8 RN |
351 | { |
352 | m_scheme += *uri++; | |
353 | ||
354 | //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
846978d7 | 355 | while (IsAlpha(*uri) || IsDigit(*uri) || |
2186321f VZ |
356 | *uri == '+' || |
357 | *uri == '-' || | |
358 | *uri == '.') | |
846978d7 WS |
359 | { |
360 | m_scheme += *uri++; | |
dd65d8c8 RN |
361 | } |
362 | ||
363 | //valid scheme? | |
2186321f | 364 | if (*uri == ':') |
846978d7 | 365 | { |
dd65d8c8 RN |
366 | //mark the scheme as valid |
367 | m_fields |= wxURI_SCHEME; | |
368 | ||
369 | //move reference point up to input buffer | |
2186321f VZ |
370 | ++uri; |
371 | } | |
372 | else // no valid scheme finally | |
373 | { | |
374 | uri = start; // rewind | |
375 | m_scheme.clear(); | |
dd65d8c8 | 376 | } |
dd65d8c8 | 377 | } |
2186321f | 378 | //else: can't have schema, possible a relative URI |
dd65d8c8 | 379 | |
2186321f | 380 | return uri; |
dd65d8c8 RN |
381 | } |
382 | ||
2186321f | 383 | const char* wxURI::ParseAuthority(const char* uri) |
dd65d8c8 RN |
384 | { |
385 | // authority = [ userinfo "@" ] host [ ":" port ] | |
2186321f | 386 | if ( uri[0] == '/' && uri[1] == '/' ) |
dd65d8c8 | 387 | { |
97ad053b | 388 | //skip past the two slashes |
dd65d8c8 RN |
389 | uri += 2; |
390 | ||
97ad053b VZ |
391 | // ############# DEVIATION FROM RFC ######################### |
392 | // Don't parse the server component for file URIs | |
2186321f | 393 | if(m_scheme != "file") |
97ad053b VZ |
394 | { |
395 | //normal way | |
2186321f VZ |
396 | uri = ParseUserInfo(uri); |
397 | uri = ParseServer(uri); | |
398 | return ParsePort(uri); | |
97ad053b | 399 | } |
dd65d8c8 RN |
400 | } |
401 | ||
402 | return uri; | |
403 | } | |
404 | ||
2186321f | 405 | const char* wxURI::ParseUserInfo(const char* uri) |
dd65d8c8 | 406 | { |
2186321f | 407 | const char * const start = uri; |
dd65d8c8 RN |
408 | |
409 | // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
2186321f | 410 | while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' ) |
dd65d8c8 | 411 | { |
2186321f | 412 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' ) |
24ca04e7 | 413 | m_userinfo += *uri++; |
dd65d8c8 | 414 | else |
2186321f | 415 | AppendNextEscaped(m_userinfo, uri); |
dd65d8c8 RN |
416 | } |
417 | ||
2186321f | 418 | if ( *uri++ == '@' ) |
dd65d8c8 | 419 | { |
2186321f | 420 | // valid userinfo |
4860d40d | 421 | m_fields |= wxURI_USERINFO; |
dd65d8c8 RN |
422 | } |
423 | else | |
2186321f VZ |
424 | { |
425 | uri = start; // rewind | |
426 | m_userinfo.clear(); | |
427 | } | |
dd65d8c8 | 428 | |
2186321f | 429 | return uri; |
dd65d8c8 RN |
430 | } |
431 | ||
2186321f | 432 | const char* wxURI::ParseServer(const char* uri) |
dd65d8c8 | 433 | { |
2186321f | 434 | const char * const start = uri; |
dd65d8c8 RN |
435 | |
436 | // host = IP-literal / IPv4address / reg-name | |
437 | // IP-literal = "[" ( IPv6address / IPvFuture ) "]" | |
2186321f | 438 | if (*uri == '[') |
dd65d8c8 | 439 | { |
2186321f VZ |
440 | ++uri; |
441 | if (ParseIPv6address(uri) && *uri == ']') | |
dd65d8c8 | 442 | { |
dd65d8c8 | 443 | m_hostType = wxURI_IPV6ADDRESS; |
846978d7 | 444 | |
2186321f VZ |
445 | m_server.assign(start, uri - start - 1); |
446 | ++uri; | |
dd65d8c8 RN |
447 | } |
448 | else | |
449 | { | |
2186321f | 450 | uri = start + 1; // skip the leading '[' again |
dd65d8c8 | 451 | |
2186321f | 452 | if (ParseIPvFuture(uri) && *uri == ']') |
dd65d8c8 | 453 | { |
846978d7 WS |
454 | m_hostType = wxURI_IPVFUTURE; |
455 | ||
2186321f VZ |
456 | m_server.assign(start, uri - start - 1); |
457 | ++uri; | |
458 | } | |
459 | else // unrecognized IP literal | |
460 | { | |
461 | uri = start; | |
dd65d8c8 | 462 | } |
dd65d8c8 RN |
463 | } |
464 | } | |
2186321f | 465 | else // IPv4 or a reg-name |
dd65d8c8 RN |
466 | { |
467 | if (ParseIPv4address(uri)) | |
468 | { | |
469 | m_hostType = wxURI_IPV4ADDRESS; | |
470 | ||
2186321f | 471 | m_server.assign(start, uri - start - 1); |
dd65d8c8 | 472 | } |
846978d7 | 473 | else |
2186321f VZ |
474 | { |
475 | uri = start; | |
476 | } | |
dd65d8c8 RN |
477 | } |
478 | ||
2186321f | 479 | if ( m_hostType == wxURI_REGNAME ) |
dd65d8c8 | 480 | { |
2186321f | 481 | uri = start; |
dd65d8c8 | 482 | // reg-name = *( unreserved / pct-encoded / sub-delims ) |
2186321f | 483 | while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' ) |
dd65d8c8 | 484 | { |
2186321f | 485 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) ) |
24ca04e7 | 486 | m_server += *uri++; |
dd65d8c8 | 487 | else |
2186321f | 488 | AppendNextEscaped(m_server, uri); |
846978d7 | 489 | } |
dd65d8c8 RN |
490 | } |
491 | ||
dd65d8c8 RN |
492 | m_fields |= wxURI_SERVER; |
493 | ||
494 | return uri; | |
495 | } | |
496 | ||
846978d7 | 497 | |
2186321f | 498 | const char* wxURI::ParsePort(const char* uri) |
dd65d8c8 | 499 | { |
dd65d8c8 | 500 | // port = *DIGIT |
2186321f | 501 | if( *uri == ':' ) |
dd65d8c8 RN |
502 | { |
503 | ++uri; | |
2186321f | 504 | while ( IsDigit(*uri) ) |
dd65d8c8 RN |
505 | { |
506 | m_port += *uri++; | |
846978d7 | 507 | } |
dd65d8c8 | 508 | |
dd65d8c8 RN |
509 | m_fields |= wxURI_PORT; |
510 | } | |
511 | ||
512 | return uri; | |
513 | } | |
514 | ||
2186321f | 515 | const char* wxURI::ParsePath(const char* uri) |
dd65d8c8 | 516 | { |
dd65d8c8 RN |
517 | /// hier-part = "//" authority path-abempty |
518 | /// / path-absolute | |
519 | /// / path-rootless | |
520 | /// / path-empty | |
521 | /// | |
522 | /// relative-part = "//" authority path-abempty | |
523 | /// / path-absolute | |
524 | /// / path-noscheme | |
525 | /// / path-empty | |
526 | /// | |
527 | /// path-abempty = *( "/" segment ) | |
528 | /// path-absolute = "/" [ segment-nz *( "/" segment ) ] | |
529 | /// path-noscheme = segment-nz-nc *( "/" segment ) | |
530 | /// path-rootless = segment-nz *( "/" segment ) | |
531 | /// path-empty = 0<pchar> | |
532 | /// | |
533 | /// segment = *pchar | |
534 | /// segment-nz = 1*pchar | |
535 | /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) | |
536 | /// ; non-zero-length segment without any colon ":" | |
537 | /// | |
538 | /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" | |
dd65d8c8 | 539 | |
2186321f VZ |
540 | if ( IsEndPath(*uri) ) |
541 | return uri; | |
dd65d8c8 | 542 | |
2186321f VZ |
543 | const bool isAbs = *uri == '/'; |
544 | if ( isAbs ) | |
545 | m_path += *uri++; | |
546 | ||
547 | wxArrayString segments; | |
548 | wxString segment; | |
549 | for ( ;; ) | |
dd65d8c8 | 550 | { |
2186321f VZ |
551 | const bool endPath = IsEndPath(*uri); |
552 | if ( endPath || *uri == '/' ) | |
dd65d8c8 | 553 | { |
2186321f VZ |
554 | // end of a segment, look at what we got |
555 | if ( segment == ".." ) | |
dd65d8c8 | 556 | { |
2186321f VZ |
557 | if ( !segments.empty() && *segments.rbegin() != ".." ) |
558 | segments.pop_back(); | |
559 | else if ( !isAbs ) | |
560 | segments.push_back(".."); | |
dd65d8c8 | 561 | } |
2186321f | 562 | else if ( segment == "." ) |
dd65d8c8 | 563 | { |
2186321f VZ |
564 | // normally we ignore "." but the last one should be taken into |
565 | // account as "path/." is the same as "path/" and not just "path" | |
566 | if ( endPath ) | |
567 | segments.push_back(""); | |
dd65d8c8 | 568 | } |
2186321f | 569 | else // normal segment |
dd65d8c8 | 570 | { |
2186321f | 571 | segments.push_back(segment); |
dd65d8c8 RN |
572 | } |
573 | ||
2186321f VZ |
574 | if ( endPath ) |
575 | break; | |
576 | ||
577 | segment.clear(); | |
578 | ++uri; | |
579 | continue; | |
dd65d8c8 | 580 | } |
2186321f VZ |
581 | |
582 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' ) | |
583 | segment += *uri++; | |
584 | else | |
585 | AppendNextEscaped(segment, uri); | |
dd65d8c8 RN |
586 | } |
587 | ||
2186321f VZ |
588 | m_path += wxJoin(segments, '/', '\0'); |
589 | m_fields |= wxURI_PATH; | |
590 | ||
dd65d8c8 RN |
591 | return uri; |
592 | } | |
593 | ||
594 | ||
2186321f | 595 | const char* wxURI::ParseQuery(const char* uri) |
dd65d8c8 | 596 | { |
dd65d8c8 | 597 | // query = *( pchar / "/" / "?" ) |
2186321f | 598 | if ( *uri == '?' ) |
dd65d8c8 RN |
599 | { |
600 | ++uri; | |
2186321f | 601 | while ( *uri && *uri != '#' ) |
dd65d8c8 | 602 | { |
2186321f VZ |
603 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || |
604 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' ) | |
605 | m_query += *uri++; | |
dd65d8c8 | 606 | else |
2186321f | 607 | AppendNextEscaped(m_query, uri); |
dd65d8c8 RN |
608 | } |
609 | ||
dd65d8c8 RN |
610 | m_fields |= wxURI_QUERY; |
611 | } | |
612 | ||
613 | return uri; | |
614 | } | |
615 | ||
616 | ||
2186321f | 617 | const char* wxURI::ParseFragment(const char* uri) |
dd65d8c8 | 618 | { |
dd65d8c8 | 619 | // fragment = *( pchar / "/" / "?" ) |
2186321f | 620 | if ( *uri == '#' ) |
dd65d8c8 RN |
621 | { |
622 | ++uri; | |
2186321f | 623 | while ( *uri ) |
dd65d8c8 | 624 | { |
2186321f VZ |
625 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || |
626 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') | |
627 | m_fragment += *uri++; | |
dd65d8c8 | 628 | else |
2186321f | 629 | AppendNextEscaped(m_fragment, uri); |
dd65d8c8 RN |
630 | } |
631 | ||
dd65d8c8 RN |
632 | m_fields |= wxURI_FRAGMENT; |
633 | } | |
634 | ||
635 | return uri; | |
636 | } | |
637 | ||
638 | // --------------------------------------------------------------------------- | |
ce321570 | 639 | // Resolve |
dd65d8c8 | 640 | // |
ce321570 | 641 | // Builds missing components of this uri from a base uri |
dd65d8c8 | 642 | // |
ce321570 RN |
643 | // A version of the algorithm outlined in the RFC is used here |
644 | // (it is shown in comments) | |
645 | // | |
846978d7 | 646 | // Note that an empty URI inherits all components |
dd65d8c8 RN |
647 | // --------------------------------------------------------------------------- |
648 | ||
2186321f VZ |
649 | /* static */ |
650 | wxArrayString wxURI::SplitInSegments(const wxString& path) | |
651 | { | |
652 | return wxSplit(path, '/', '\0' /* no escape character */); | |
653 | } | |
654 | ||
8404931e | 655 | void wxURI::Resolve(const wxURI& base, int flags) |
dd65d8c8 | 656 | { |
846978d7 | 657 | wxASSERT_MSG(!base.IsReference(), |
2186321f | 658 | "wxURI to inherit from must not be a reference!"); |
dd65d8c8 | 659 | |
2186321f VZ |
660 | // If we aren't being strict, enable the older (pre-RFC2396) loophole that |
661 | // allows this uri to inherit other properties from the base uri - even if | |
662 | // the scheme is defined | |
8404931e VZ |
663 | if ( !(flags & wxURI_STRICT) && |
664 | HasScheme() && base.HasScheme() && | |
665 | m_scheme == base.m_scheme ) | |
846978d7 | 666 | { |
dd65d8c8 RN |
667 | m_fields -= wxURI_SCHEME; |
668 | } | |
669 | ||
670 | ||
671 | // Do nothing if this is an absolute wxURI | |
672 | // if defined(R.scheme) then | |
673 | // T.scheme = R.scheme; | |
674 | // T.authority = R.authority; | |
675 | // T.path = remove_dot_segments(R.path); | |
676 | // T.query = R.query; | |
677 | if (HasScheme()) | |
dd65d8c8 | 678 | return; |
dd65d8c8 | 679 | |
ea4daac4 | 680 | //No scheme - inherit |
dd65d8c8 RN |
681 | m_scheme = base.m_scheme; |
682 | m_fields |= wxURI_SCHEME; | |
683 | ||
684 | // All we need to do for relative URIs with an | |
685 | // authority component is just inherit the scheme | |
686 | // if defined(R.authority) then | |
687 | // T.authority = R.authority; | |
688 | // T.path = remove_dot_segments(R.path); | |
689 | // T.query = R.query; | |
690 | if (HasServer()) | |
dd65d8c8 | 691 | return; |
dd65d8c8 RN |
692 | |
693 | //No authority - inherit | |
4860d40d | 694 | if (base.HasUserInfo()) |
dd65d8c8 | 695 | { |
4860d40d RN |
696 | m_userinfo = base.m_userinfo; |
697 | m_fields |= wxURI_USERINFO; | |
dd65d8c8 | 698 | } |
846978d7 | 699 | |
dd65d8c8 RN |
700 | m_server = base.m_server; |
701 | m_hostType = base.m_hostType; | |
702 | m_fields |= wxURI_SERVER; | |
846978d7 | 703 | |
dd65d8c8 RN |
704 | if (base.HasPort()) |
705 | { | |
706 | m_port = base.m_port; | |
707 | m_fields |= wxURI_PORT; | |
708 | } | |
846978d7 | 709 | |
dd65d8c8 RN |
710 | |
711 | // Simple path inheritance from base | |
712 | if (!HasPath()) | |
713 | { | |
714 | // T.path = Base.path; | |
715 | m_path = base.m_path; | |
716 | m_fields |= wxURI_PATH; | |
846978d7 | 717 | |
dd65d8c8 RN |
718 | |
719 | // if defined(R.query) then | |
720 | // T.query = R.query; | |
721 | // else | |
722 | // T.query = Base.query; | |
723 | // endif; | |
724 | if (!HasQuery()) | |
725 | { | |
726 | m_query = base.m_query; | |
727 | m_fields |= wxURI_QUERY; | |
728 | } | |
729 | } | |
2186321f | 730 | else if ( m_path.empty() || m_path[0u] != '/' ) |
dd65d8c8 RN |
731 | { |
732 | // if (R.path starts-with "/") then | |
733 | // T.path = remove_dot_segments(R.path); | |
734 | // else | |
735 | // T.path = merge(Base.path, R.path); | |
736 | // T.path = remove_dot_segments(T.path); | |
737 | // endif; | |
738 | // T.query = R.query; | |
2186321f VZ |
739 | // |
740 | // So we don't do anything for absolute paths and implement merge for | |
741 | // the relative ones | |
c9f78968 | 742 | |
2186321f VZ |
743 | wxArrayString our(SplitInSegments(m_path)), |
744 | result(SplitInSegments(base.m_path)); | |
c9f78968 | 745 | |
2186321f VZ |
746 | if ( !result.empty() ) |
747 | result.pop_back(); | |
846978d7 | 748 | |
2186321f | 749 | if ( our.empty() ) |
dd65d8c8 | 750 | { |
2186321f VZ |
751 | // if we have an empty path it means we were constructed from a "." |
752 | // string or something similar (e.g. "././././"), it should count | |
753 | // as (empty) segment | |
754 | our.push_back(""); | |
dd65d8c8 | 755 | } |
dd65d8c8 | 756 | |
2186321f VZ |
757 | const wxArrayString::const_iterator end = our.end(); |
758 | for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i ) | |
dd65d8c8 | 759 | { |
2186321f | 760 | if ( i->empty() || *i == "." ) |
dd65d8c8 | 761 | { |
2186321f VZ |
762 | // as in ParsePath(), while normally we ignore the empty |
763 | // segments, we need to take account of them at the end | |
764 | if ( i == end - 1 ) | |
765 | result.push_back(""); | |
766 | continue; | |
dd65d8c8 | 767 | } |
dd65d8c8 | 768 | |
2186321f | 769 | if ( *i == ".." ) |
dd65d8c8 | 770 | { |
2186321f VZ |
771 | if ( !result.empty() ) |
772 | { | |
773 | result.pop_back(); | |
dd65d8c8 | 774 | |
2186321f VZ |
775 | if ( i == end - 1 ) |
776 | result.push_back(""); | |
777 | } | |
778 | //else: just ignore, extra ".." don't accumulate | |
dd65d8c8 RN |
779 | } |
780 | else | |
781 | { | |
2186321f VZ |
782 | if ( result.empty() ) |
783 | { | |
784 | // ensure that the resulting path will always be absolute | |
785 | result.push_back(""); | |
786 | } | |
787 | ||
788 | result.push_back(*i); | |
dd65d8c8 RN |
789 | } |
790 | } | |
2186321f VZ |
791 | |
792 | m_path = wxJoin(result, '/', '\0'); | |
dd65d8c8 RN |
793 | } |
794 | ||
2186321f | 795 | //T.fragment = R.fragment; |
dd65d8c8 RN |
796 | } |
797 | ||
798 | // --------------------------------------------------------------------------- | |
ce321570 RN |
799 | // ParseH16 |
800 | // | |
801 | // Parses 1 to 4 hex values. Returns true if the first character of the input | |
2186321f | 802 | // string is a valid hex character. It is the caller's responsibility to move |
ce321570 RN |
803 | // the input string back to its original position on failure. |
804 | // --------------------------------------------------------------------------- | |
805 | ||
2186321f | 806 | bool wxURI::ParseH16(const char*& uri) |
ce321570 RN |
807 | { |
808 | // h16 = 1*4HEXDIG | |
809 | if(!IsHex(*++uri)) | |
810 | return false; | |
811 | ||
812 | if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) | |
813 | ++uri; | |
814 | ||
815 | return true; | |
816 | } | |
817 | ||
818 | // --------------------------------------------------------------------------- | |
819 | // ParseIPXXX | |
820 | // | |
846978d7 WS |
821 | // Parses a certain version of an IP address and moves the input string past |
822 | // it. Returns true if the input string contains the proper version of an ip | |
823 | // address. It is the caller's responsability to move the input string back | |
ce321570 | 824 | // to its original position on failure. |
dd65d8c8 RN |
825 | // --------------------------------------------------------------------------- |
826 | ||
2186321f | 827 | bool wxURI::ParseIPv4address(const char*& uri) |
dd65d8c8 RN |
828 | { |
829 | //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet | |
830 | // | |
831 | //dec-octet = DIGIT ; 0-9 | |
832 | // / %x31-39 DIGIT ; 10-99 | |
833 | // / "1" 2DIGIT ; 100-199 | |
834 | // / "2" %x30-34 DIGIT ; 200-249 | |
835 | // / "25" %x30-35 ; 250-255 | |
836 | size_t iIPv4 = 0; | |
837 | if (IsDigit(*uri)) | |
838 | { | |
839 | ++iIPv4; | |
840 | ||
846978d7 | 841 | |
dd65d8c8 RN |
842 | //each ip part must be between 0-255 (dupe of version in for loop) |
843 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
844 | //100 or less (note !) | |
2186321f | 845 | !( (*(uri-2) < '2') || |
846978d7 | 846 | //240 or less |
2186321f VZ |
847 | (*(uri-2) == '2' && |
848 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
dd65d8c8 RN |
849 | ) |
850 | ) | |
851 | ) | |
852 | { | |
853 | return false; | |
854 | } | |
855 | ||
856 | if(IsDigit(*uri))++uri; | |
857 | ||
858 | //compilers should unroll this loop | |
859 | for(; iIPv4 < 4; ++iIPv4) | |
860 | { | |
2186321f | 861 | if (*uri != '.' || !IsDigit(*++uri)) |
dd65d8c8 RN |
862 | break; |
863 | ||
864 | //each ip part must be between 0-255 | |
865 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
866 | //100 or less (note !) | |
2186321f | 867 | !( (*(uri-2) < '2') || |
846978d7 | 868 | //240 or less |
2186321f VZ |
869 | (*(uri-2) == '2' && |
870 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
dd65d8c8 RN |
871 | ) |
872 | ) | |
873 | ) | |
874 | { | |
875 | return false; | |
876 | } | |
877 | if(IsDigit(*uri))++uri; | |
878 | } | |
879 | } | |
880 | return iIPv4 == 4; | |
881 | } | |
882 | ||
2186321f | 883 | bool wxURI::ParseIPv6address(const char*& uri) |
dd65d8c8 RN |
884 | { |
885 | // IPv6address = 6( h16 ":" ) ls32 | |
886 | // / "::" 5( h16 ":" ) ls32 | |
887 | // / [ h16 ] "::" 4( h16 ":" ) ls32 | |
888 | // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
889 | // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
890 | // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
891 | // / [ *4( h16 ":" ) h16 ] "::" ls32 | |
892 | // / [ *5( h16 ":" ) h16 ] "::" h16 | |
893 | // / [ *6( h16 ":" ) h16 ] "::" | |
894 | ||
895 | size_t numPrefix = 0, | |
896 | maxPostfix; | |
897 | ||
898 | bool bEndHex = false; | |
899 | ||
900 | for( ; numPrefix < 6; ++numPrefix) | |
901 | { | |
902 | if(!ParseH16(uri)) | |
903 | { | |
904 | --uri; | |
905 | bEndHex = true; | |
906 | break; | |
907 | } | |
846978d7 | 908 | |
2186321f | 909 | if(*uri != ':') |
dd65d8c8 RN |
910 | { |
911 | break; | |
912 | } | |
913 | } | |
914 | ||
915 | if(!bEndHex && !ParseH16(uri)) | |
916 | { | |
917 | --uri; | |
918 | ||
919 | if (numPrefix) | |
920 | return false; | |
921 | ||
2186321f | 922 | if (*uri == ':') |
dd65d8c8 | 923 | { |
2186321f | 924 | if (*++uri != ':') |
dd65d8c8 RN |
925 | return false; |
926 | ||
927 | maxPostfix = 5; | |
928 | } | |
929 | else | |
930 | maxPostfix = 6; | |
931 | } | |
932 | else | |
933 | { | |
2186321f | 934 | if (*uri != ':' || *(uri+1) != ':') |
dd65d8c8 RN |
935 | { |
936 | if (numPrefix != 6) | |
937 | return false; | |
938 | ||
2186321f | 939 | while (*--uri != ':') {} |
dd65d8c8 RN |
940 | ++uri; |
941 | ||
2186321f | 942 | const char * const start = uri; |
dd65d8c8 RN |
943 | //parse ls32 |
944 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
2186321f | 945 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) |
dd65d8c8 RN |
946 | return true; |
947 | ||
2186321f | 948 | uri = start; |
dd65d8c8 RN |
949 | |
950 | if (ParseIPv4address(uri)) | |
951 | return true; | |
952 | else | |
953 | return false; | |
954 | } | |
955 | else | |
956 | { | |
957 | uri += 2; | |
846978d7 | 958 | |
dd65d8c8 RN |
959 | if (numPrefix > 3) |
960 | maxPostfix = 0; | |
961 | else | |
962 | maxPostfix = 4 - numPrefix; | |
963 | } | |
964 | } | |
965 | ||
966 | bool bAllowAltEnding = maxPostfix == 0; | |
967 | ||
968 | for(; maxPostfix != 0; --maxPostfix) | |
969 | { | |
2186321f | 970 | if(!ParseH16(uri) || *uri != ':') |
dd65d8c8 RN |
971 | return false; |
972 | } | |
973 | ||
974 | if(numPrefix <= 4) | |
975 | { | |
2186321f | 976 | const char * const start = uri; |
dd65d8c8 RN |
977 | //parse ls32 |
978 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
2186321f | 979 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) |
dd65d8c8 RN |
980 | return true; |
981 | ||
2186321f | 982 | uri = start; |
dd65d8c8 RN |
983 | |
984 | if (ParseIPv4address(uri)) | |
985 | return true; | |
986 | ||
2186321f | 987 | uri = start; |
846978d7 | 988 | |
dd65d8c8 RN |
989 | if (!bAllowAltEnding) |
990 | return false; | |
991 | } | |
992 | ||
993 | if(numPrefix <= 5 && ParseH16(uri)) | |
994 | return true; | |
995 | ||
996 | return true; | |
997 | } | |
998 | ||
2186321f | 999 | bool wxURI::ParseIPvFuture(const char*& uri) |
dd65d8c8 RN |
1000 | { |
1001 | // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) | |
2186321f | 1002 | if (*++uri != 'v' || !IsHex(*++uri)) |
dd65d8c8 RN |
1003 | return false; |
1004 | ||
2186321f VZ |
1005 | while (IsHex(*++uri)) |
1006 | ; | |
dd65d8c8 | 1007 | |
2186321f | 1008 | if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) |
dd65d8c8 RN |
1009 | return false; |
1010 | ||
2186321f | 1011 | while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {} |
dd65d8c8 RN |
1012 | |
1013 | return true; | |
1014 | } | |
1015 | ||
1016 | ||
ce321570 RN |
1017 | // --------------------------------------------------------------------------- |
1018 | // IsXXX | |
1019 | // | |
1020 | // Returns true if the passed in character meets the criteria of the method | |
1021 | // --------------------------------------------------------------------------- | |
1022 | ||
2186321f VZ |
1023 | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" |
1024 | bool wxURI::IsUnreserved(char c) | |
1025 | { | |
1026 | return IsAlpha(c) || | |
1027 | IsDigit(c) || | |
1028 | c == '-' || | |
1029 | c == '.' || | |
1030 | c == '_' || | |
1031 | c == '~' | |
846978d7 | 1032 | ; |
dd65d8c8 RN |
1033 | } |
1034 | ||
2186321f | 1035 | bool wxURI::IsReserved(char c) |
846978d7 | 1036 | { |
dd65d8c8 RN |
1037 | return IsGenDelim(c) || IsSubDelim(c); |
1038 | } | |
1039 | ||
2186321f VZ |
1040 | // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" |
1041 | bool wxURI::IsGenDelim(char c) | |
dd65d8c8 | 1042 | { |
2186321f VZ |
1043 | return c == ':' || |
1044 | c == '/' || | |
1045 | c == '?' || | |
1046 | c == '#' || | |
1047 | c == '[' || | |
1048 | c == ']' || | |
1049 | c == '@'; | |
dd65d8c8 RN |
1050 | } |
1051 | ||
2186321f VZ |
1052 | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" |
1053 | // / "*" / "+" / "," / ";" / "=" | |
1054 | bool wxURI::IsSubDelim(char c) | |
dd65d8c8 | 1055 | { |
2186321f VZ |
1056 | return c == '!' || |
1057 | c == '$' || | |
1058 | c == '&' || | |
1059 | c == '\'' || | |
1060 | c == '(' || | |
1061 | c == ')' || | |
1062 | c == '*' || | |
1063 | c == '+' || | |
1064 | c == ',' || | |
1065 | c == ';' || | |
1066 | c == '=' | |
dd65d8c8 RN |
1067 | ; |
1068 | } | |
1069 | ||
2186321f VZ |
1070 | bool wxURI::IsHex(char c) |
1071 | { | |
1072 | return IsDigit(c) || | |
1073 | (c >= 'a' && c <= 'f') || | |
1074 | (c >= 'A' && c <= 'F'); | |
1075 | } | |
dd65d8c8 | 1076 | |
2186321f VZ |
1077 | bool wxURI::IsAlpha(char c) |
1078 | { | |
1079 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); | |
1080 | } | |
dd65d8c8 | 1081 | |
2186321f VZ |
1082 | bool wxURI::IsDigit(char c) |
1083 | { | |
1084 | return c >= '0' && c <= '9'; | |
1085 | } | |
dd65d8c8 | 1086 | |
2186321f VZ |
1087 | bool wxURI::IsEndPath(char c) |
1088 | { | |
1089 | return c == '\0' || c == '#' || c == '?'; | |
1090 | } | |
dd65d8c8 | 1091 |