]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: src/common/uri.cpp | |
3 | // Purpose: Implementation of a URI parser | |
4 | // Author: Ryan Norton, | |
5 | // Vadim Zeitlin (UTF-8 URI support, many other changes) | |
6 | // Created: 10/26/04 | |
7 | // RCS-ID: $Id$ | |
8 | // Copyright: (c) 2004 Ryan Norton, | |
9 | // 2008 Vadim Zeitlin | |
10 | // Licence: wxWindows licence | |
11 | ///////////////////////////////////////////////////////////////////////////// | |
12 | ||
13 | // =========================================================================== | |
14 | // declarations | |
15 | // =========================================================================== | |
16 | ||
17 | // --------------------------------------------------------------------------- | |
18 | // headers | |
19 | // --------------------------------------------------------------------------- | |
20 | ||
21 | // For compilers that support precompilation, includes "wx.h". | |
22 | #include "wx/wxprec.h" | |
23 | ||
24 | #ifdef __BORLANDC__ | |
25 | #pragma hdrstop | |
26 | #endif | |
27 | ||
28 | #ifndef WX_PRECOMP | |
29 | #include "wx/crt.h" | |
30 | #endif | |
31 | ||
32 | #include "wx/uri.h" | |
33 | ||
34 | // --------------------------------------------------------------------------- | |
35 | // definitions | |
36 | // --------------------------------------------------------------------------- | |
37 | ||
38 | IMPLEMENT_CLASS(wxURI, wxObject) | |
39 | ||
40 | // =========================================================================== | |
41 | // wxURI implementation | |
42 | // =========================================================================== | |
43 | ||
44 | // --------------------------------------------------------------------------- | |
45 | // Constructors and cleanup | |
46 | // --------------------------------------------------------------------------- | |
47 | ||
48 | wxURI::wxURI() | |
49 | : m_hostType(wxURI_REGNAME), | |
50 | m_fields(0) | |
51 | { | |
52 | } | |
53 | ||
54 | wxURI::wxURI(const wxString& uri) | |
55 | : m_hostType(wxURI_REGNAME), | |
56 | m_fields(0) | |
57 | { | |
58 | Create(uri); | |
59 | } | |
60 | ||
61 | bool wxURI::Create(const wxString& uri) | |
62 | { | |
63 | if (m_fields) | |
64 | Clear(); | |
65 | ||
66 | return Parse(uri.utf8_str()); | |
67 | } | |
68 | ||
69 | void wxURI::Clear() | |
70 | { | |
71 | m_scheme = | |
72 | m_userinfo = | |
73 | m_server = | |
74 | m_port = | |
75 | m_path = | |
76 | m_query = | |
77 | m_fragment = wxEmptyString; | |
78 | ||
79 | m_hostType = wxURI_REGNAME; | |
80 | ||
81 | m_fields = 0; | |
82 | } | |
83 | ||
84 | // --------------------------------------------------------------------------- | |
85 | // Escaped characters handling | |
86 | // --------------------------------------------------------------------------- | |
87 | ||
88 | // Converts a character into a numeric hexadecimal value, or -1 if the passed | |
89 | // in character is not a valid hex character | |
90 | ||
91 | /* static */ | |
92 | int wxURI::CharToHex(char c) | |
93 | { | |
94 | if ((c >= 'A') && (c <= 'Z')) | |
95 | return c - 'A' + 10; | |
96 | if ((c >= 'a') && (c <= 'z')) | |
97 | return c - 'a' + 10; | |
98 | if ((c >= '0') && (c <= '9')) | |
99 | return c - '0'; | |
100 | ||
101 | return -1; | |
102 | } | |
103 | ||
104 | int wxURI::DecodeEscape(wxString::const_iterator& i) | |
105 | { | |
106 | int hi = CharToHex(*++i); | |
107 | if ( hi == -1 ) | |
108 | return -1; | |
109 | ||
110 | int lo = CharToHex(*++i); | |
111 | if ( lo == -1 ) | |
112 | return -1; | |
113 | ||
114 | return (hi << 4) | lo; | |
115 | } | |
116 | ||
117 | /* static */ | |
118 | wxString wxURI::Unescape(const wxString& uri) | |
119 | { | |
120 | // the unescaped version can't be longer than the original one | |
121 | wxCharBuffer buf(uri.length()); | |
122 | char *p = buf.data(); | |
123 | ||
124 | for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p ) | |
125 | { | |
126 | char c = *i; | |
127 | if ( c == '%' ) | |
128 | { | |
129 | int n = wxURI::DecodeEscape(i); | |
130 | if ( n == -1 ) | |
131 | return wxString(); | |
132 | ||
133 | wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" ); | |
134 | ||
135 | c = static_cast<char>(n); | |
136 | } | |
137 | ||
138 | *p = c; | |
139 | } | |
140 | ||
141 | *p = '\0'; | |
142 | ||
143 | // by default assume that the URI is in UTF-8, this is the most common | |
144 | // practice | |
145 | wxString s = wxString::FromUTF8(buf); | |
146 | if ( s.empty() ) | |
147 | { | |
148 | // if it isn't, use latin-1 as a fallback -- at least this always | |
149 | // succeeds | |
150 | s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf); | |
151 | } | |
152 | ||
153 | return s; | |
154 | } | |
155 | ||
156 | void wxURI::AppendNextEscaped(wxString& s, const char *& p) | |
157 | { | |
158 | // check for an already encoded character: | |
159 | // | |
160 | // pct-encoded = "%" HEXDIG HEXDIG | |
161 | if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) ) | |
162 | { | |
163 | s += *p++; | |
164 | s += *p++; | |
165 | s += *p++; | |
166 | } | |
167 | else // really needs escaping | |
168 | { | |
169 | static const char* hexDigits = "0123456789abcdef"; | |
170 | ||
171 | const char c = *p++; | |
172 | ||
173 | s += '%'; | |
174 | s += hexDigits[(c >> 4) & 15]; | |
175 | s += hexDigits[c & 15]; | |
176 | } | |
177 | } | |
178 | ||
179 | // --------------------------------------------------------------------------- | |
180 | // GetUser | |
181 | // GetPassword | |
182 | // | |
183 | // Gets the username and password via the old URL method. | |
184 | // --------------------------------------------------------------------------- | |
185 | wxString wxURI::GetUser() const | |
186 | { | |
187 | // if there is no colon at all, find() returns npos and this method returns | |
188 | // the entire string which is correct as it means that password was omitted | |
189 | return m_userinfo(0, m_userinfo.find(':')); | |
190 | } | |
191 | ||
192 | wxString wxURI::GetPassword() const | |
193 | { | |
194 | size_t posColon = m_userinfo.find(':'); | |
195 | ||
196 | if ( posColon == wxString::npos ) | |
197 | return ""; | |
198 | ||
199 | return m_userinfo(posColon + 1, wxString::npos); | |
200 | } | |
201 | ||
202 | // combine all URI fields in a single string, applying funcDecode to each | |
203 | // component which it may make sense to decode (i.e. "unescape") | |
204 | wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const | |
205 | { | |
206 | wxString ret; | |
207 | ||
208 | if (HasScheme()) | |
209 | ret += m_scheme + ":"; | |
210 | ||
211 | if (HasServer()) | |
212 | { | |
213 | ret += "//"; | |
214 | ||
215 | if (HasUserInfo()) | |
216 | ret += funcDecode(m_userinfo) + "@"; | |
217 | ||
218 | if (m_hostType == wxURI_REGNAME) | |
219 | ret += funcDecode(m_server); | |
220 | else | |
221 | ret += m_server; | |
222 | ||
223 | if (HasPort()) | |
224 | ret += ":" + m_port; | |
225 | } | |
226 | ||
227 | ret += funcDecode(m_path); | |
228 | ||
229 | if (HasQuery()) | |
230 | ret += "?" + funcDecode(m_query); | |
231 | ||
232 | if (HasFragment()) | |
233 | ret += "#" + funcDecode(m_fragment); | |
234 | ||
235 | return ret; | |
236 | } | |
237 | ||
238 | // --------------------------------------------------------------------------- | |
239 | // Comparison | |
240 | // --------------------------------------------------------------------------- | |
241 | ||
242 | bool wxURI::operator==(const wxURI& uri) const | |
243 | { | |
244 | if (HasScheme()) | |
245 | { | |
246 | if(m_scheme != uri.m_scheme) | |
247 | return false; | |
248 | } | |
249 | else if (uri.HasScheme()) | |
250 | return false; | |
251 | ||
252 | ||
253 | if (HasServer()) | |
254 | { | |
255 | if (HasUserInfo()) | |
256 | { | |
257 | if (m_userinfo != uri.m_userinfo) | |
258 | return false; | |
259 | } | |
260 | else if (uri.HasUserInfo()) | |
261 | return false; | |
262 | ||
263 | if (m_server != uri.m_server || | |
264 | m_hostType != uri.m_hostType) | |
265 | return false; | |
266 | ||
267 | if (HasPort()) | |
268 | { | |
269 | if(m_port != uri.m_port) | |
270 | return false; | |
271 | } | |
272 | else if (uri.HasPort()) | |
273 | return false; | |
274 | } | |
275 | else if (uri.HasServer()) | |
276 | return false; | |
277 | ||
278 | ||
279 | if (HasPath()) | |
280 | { | |
281 | if(m_path != uri.m_path) | |
282 | return false; | |
283 | } | |
284 | else if (uri.HasPath()) | |
285 | return false; | |
286 | ||
287 | if (HasQuery()) | |
288 | { | |
289 | if (m_query != uri.m_query) | |
290 | return false; | |
291 | } | |
292 | else if (uri.HasQuery()) | |
293 | return false; | |
294 | ||
295 | if (HasFragment()) | |
296 | { | |
297 | if (m_fragment != uri.m_fragment) | |
298 | return false; | |
299 | } | |
300 | else if (uri.HasFragment()) | |
301 | return false; | |
302 | ||
303 | return true; | |
304 | } | |
305 | ||
306 | // --------------------------------------------------------------------------- | |
307 | // IsReference | |
308 | // | |
309 | // if there is no authority or scheme, it is a reference | |
310 | // --------------------------------------------------------------------------- | |
311 | ||
312 | bool wxURI::IsReference() const | |
313 | { | |
314 | return !HasScheme() || !HasServer(); | |
315 | } | |
316 | ||
317 | // --------------------------------------------------------------------------- | |
318 | // Parse | |
319 | // | |
320 | // Master URI parsing method. Just calls the individual parsing methods | |
321 | // | |
322 | // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] | |
323 | // URI-reference = URI / relative | |
324 | // --------------------------------------------------------------------------- | |
325 | ||
326 | bool wxURI::Parse(const char *uri) | |
327 | { | |
328 | uri = ParseScheme(uri); | |
329 | if ( uri ) | |
330 | uri = ParseAuthority(uri); | |
331 | if ( uri ) | |
332 | uri = ParsePath(uri); | |
333 | if ( uri ) | |
334 | uri = ParseQuery(uri); | |
335 | if ( uri ) | |
336 | uri = ParseFragment(uri); | |
337 | ||
338 | // we only succeed if we parsed the entire string | |
339 | return uri && *uri == '\0'; | |
340 | } | |
341 | ||
342 | const char* wxURI::ParseScheme(const char *uri) | |
343 | { | |
344 | const char * const start = uri; | |
345 | ||
346 | // assume that we have a scheme if we have the valid start of it | |
347 | if ( IsAlpha(*uri) ) | |
348 | { | |
349 | m_scheme += *uri++; | |
350 | ||
351 | //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
352 | while (IsAlpha(*uri) || IsDigit(*uri) || | |
353 | *uri == '+' || | |
354 | *uri == '-' || | |
355 | *uri == '.') | |
356 | { | |
357 | m_scheme += *uri++; | |
358 | } | |
359 | ||
360 | //valid scheme? | |
361 | if (*uri == ':') | |
362 | { | |
363 | //mark the scheme as valid | |
364 | m_fields |= wxURI_SCHEME; | |
365 | ||
366 | //move reference point up to input buffer | |
367 | ++uri; | |
368 | } | |
369 | else // no valid scheme finally | |
370 | { | |
371 | uri = start; // rewind | |
372 | m_scheme.clear(); | |
373 | } | |
374 | } | |
375 | //else: can't have schema, possible a relative URI | |
376 | ||
377 | return uri; | |
378 | } | |
379 | ||
380 | const char* wxURI::ParseAuthority(const char* uri) | |
381 | { | |
382 | // authority = [ userinfo "@" ] host [ ":" port ] | |
383 | if ( uri[0] == '/' && uri[1] == '/' ) | |
384 | { | |
385 | //skip past the two slashes | |
386 | uri += 2; | |
387 | ||
388 | // ############# DEVIATION FROM RFC ######################### | |
389 | // Don't parse the server component for file URIs | |
390 | if(m_scheme != "file") | |
391 | { | |
392 | //normal way | |
393 | uri = ParseUserInfo(uri); | |
394 | uri = ParseServer(uri); | |
395 | return ParsePort(uri); | |
396 | } | |
397 | } | |
398 | ||
399 | return uri; | |
400 | } | |
401 | ||
402 | const char* wxURI::ParseUserInfo(const char* uri) | |
403 | { | |
404 | const char * const start = uri; | |
405 | ||
406 | // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
407 | while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' ) | |
408 | { | |
409 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' ) | |
410 | m_userinfo += *uri++; | |
411 | else | |
412 | AppendNextEscaped(m_userinfo, uri); | |
413 | } | |
414 | ||
415 | if ( *uri++ == '@' ) | |
416 | { | |
417 | // valid userinfo | |
418 | m_fields |= wxURI_USERINFO; | |
419 | } | |
420 | else | |
421 | { | |
422 | uri = start; // rewind | |
423 | m_userinfo.clear(); | |
424 | } | |
425 | ||
426 | return uri; | |
427 | } | |
428 | ||
429 | const char* wxURI::ParseServer(const char* uri) | |
430 | { | |
431 | const char * const start = uri; | |
432 | ||
433 | // host = IP-literal / IPv4address / reg-name | |
434 | // IP-literal = "[" ( IPv6address / IPvFuture ) "]" | |
435 | if (*uri == '[') | |
436 | { | |
437 | ++uri; | |
438 | if (ParseIPv6address(uri) && *uri == ']') | |
439 | { | |
440 | m_hostType = wxURI_IPV6ADDRESS; | |
441 | ||
442 | m_server.assign(start + 1, uri - start - 1); | |
443 | ++uri; | |
444 | } | |
445 | else | |
446 | { | |
447 | uri = start + 1; // skip the leading '[' again | |
448 | ||
449 | if (ParseIPvFuture(uri) && *uri == ']') | |
450 | { | |
451 | m_hostType = wxURI_IPVFUTURE; | |
452 | ||
453 | m_server.assign(start + 1, uri - start - 1); | |
454 | ++uri; | |
455 | } | |
456 | else // unrecognized IP literal | |
457 | { | |
458 | uri = start; | |
459 | } | |
460 | } | |
461 | } | |
462 | else // IPv4 or a reg-name | |
463 | { | |
464 | if (ParseIPv4address(uri)) | |
465 | { | |
466 | m_hostType = wxURI_IPV4ADDRESS; | |
467 | ||
468 | m_server.assign(start, uri - start); | |
469 | } | |
470 | else | |
471 | { | |
472 | uri = start; | |
473 | } | |
474 | } | |
475 | ||
476 | if ( m_hostType == wxURI_REGNAME ) | |
477 | { | |
478 | uri = start; | |
479 | // reg-name = *( unreserved / pct-encoded / sub-delims ) | |
480 | while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' ) | |
481 | { | |
482 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) ) | |
483 | m_server += *uri++; | |
484 | else | |
485 | AppendNextEscaped(m_server, uri); | |
486 | } | |
487 | } | |
488 | ||
489 | m_fields |= wxURI_SERVER; | |
490 | ||
491 | return uri; | |
492 | } | |
493 | ||
494 | ||
495 | const char* wxURI::ParsePort(const char* uri) | |
496 | { | |
497 | // port = *DIGIT | |
498 | if( *uri == ':' ) | |
499 | { | |
500 | ++uri; | |
501 | while ( IsDigit(*uri) ) | |
502 | { | |
503 | m_port += *uri++; | |
504 | } | |
505 | ||
506 | m_fields |= wxURI_PORT; | |
507 | } | |
508 | ||
509 | return uri; | |
510 | } | |
511 | ||
512 | const char* wxURI::ParsePath(const char* uri) | |
513 | { | |
514 | /// hier-part = "//" authority path-abempty | |
515 | /// / path-absolute | |
516 | /// / path-rootless | |
517 | /// / path-empty | |
518 | /// | |
519 | /// relative-part = "//" authority path-abempty | |
520 | /// / path-absolute | |
521 | /// / path-noscheme | |
522 | /// / path-empty | |
523 | /// | |
524 | /// path-abempty = *( "/" segment ) | |
525 | /// path-absolute = "/" [ segment-nz *( "/" segment ) ] | |
526 | /// path-noscheme = segment-nz-nc *( "/" segment ) | |
527 | /// path-rootless = segment-nz *( "/" segment ) | |
528 | /// path-empty = 0<pchar> | |
529 | /// | |
530 | /// segment = *pchar | |
531 | /// segment-nz = 1*pchar | |
532 | /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) | |
533 | /// ; non-zero-length segment without any colon ":" | |
534 | /// | |
535 | /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" | |
536 | ||
537 | if ( IsEndPath(*uri) ) | |
538 | return uri; | |
539 | ||
540 | const bool isAbs = *uri == '/'; | |
541 | if ( isAbs ) | |
542 | m_path += *uri++; | |
543 | ||
544 | wxArrayString segments; | |
545 | wxString segment; | |
546 | for ( ;; ) | |
547 | { | |
548 | const bool endPath = IsEndPath(*uri); | |
549 | if ( endPath || *uri == '/' ) | |
550 | { | |
551 | // end of a segment, look at what we got | |
552 | if ( segment == ".." ) | |
553 | { | |
554 | if ( !segments.empty() && *segments.rbegin() != ".." ) | |
555 | segments.pop_back(); | |
556 | else if ( !isAbs ) | |
557 | segments.push_back(".."); | |
558 | } | |
559 | else if ( segment == "." ) | |
560 | { | |
561 | // normally we ignore "." but the last one should be taken into | |
562 | // account as "path/." is the same as "path/" and not just "path" | |
563 | if ( endPath ) | |
564 | segments.push_back(""); | |
565 | } | |
566 | else // normal segment | |
567 | { | |
568 | segments.push_back(segment); | |
569 | } | |
570 | ||
571 | if ( endPath ) | |
572 | break; | |
573 | ||
574 | segment.clear(); | |
575 | ++uri; | |
576 | continue; | |
577 | } | |
578 | ||
579 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' ) | |
580 | segment += *uri++; | |
581 | else | |
582 | AppendNextEscaped(segment, uri); | |
583 | } | |
584 | ||
585 | m_path += wxJoin(segments, '/', '\0'); | |
586 | m_fields |= wxURI_PATH; | |
587 | ||
588 | return uri; | |
589 | } | |
590 | ||
591 | ||
592 | const char* wxURI::ParseQuery(const char* uri) | |
593 | { | |
594 | // query = *( pchar / "/" / "?" ) | |
595 | if ( *uri == '?' ) | |
596 | { | |
597 | ++uri; | |
598 | while ( *uri && *uri != '#' ) | |
599 | { | |
600 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || | |
601 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' ) | |
602 | m_query += *uri++; | |
603 | else | |
604 | AppendNextEscaped(m_query, uri); | |
605 | } | |
606 | ||
607 | m_fields |= wxURI_QUERY; | |
608 | } | |
609 | ||
610 | return uri; | |
611 | } | |
612 | ||
613 | ||
614 | const char* wxURI::ParseFragment(const char* uri) | |
615 | { | |
616 | // fragment = *( pchar / "/" / "?" ) | |
617 | if ( *uri == '#' ) | |
618 | { | |
619 | ++uri; | |
620 | while ( *uri ) | |
621 | { | |
622 | if ( IsUnreserved(*uri) || IsSubDelim(*uri) || | |
623 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') | |
624 | m_fragment += *uri++; | |
625 | else | |
626 | AppendNextEscaped(m_fragment, uri); | |
627 | } | |
628 | ||
629 | m_fields |= wxURI_FRAGMENT; | |
630 | } | |
631 | ||
632 | return uri; | |
633 | } | |
634 | ||
635 | // --------------------------------------------------------------------------- | |
636 | // Resolve | |
637 | // | |
638 | // Builds missing components of this uri from a base uri | |
639 | // | |
640 | // A version of the algorithm outlined in the RFC is used here | |
641 | // (it is shown in comments) | |
642 | // | |
643 | // Note that an empty URI inherits all components | |
644 | // --------------------------------------------------------------------------- | |
645 | ||
646 | /* static */ | |
647 | wxArrayString wxURI::SplitInSegments(const wxString& path) | |
648 | { | |
649 | return wxSplit(path, '/', '\0' /* no escape character */); | |
650 | } | |
651 | ||
652 | void wxURI::Resolve(const wxURI& base, int flags) | |
653 | { | |
654 | wxASSERT_MSG(!base.IsReference(), | |
655 | "wxURI to inherit from must not be a reference!"); | |
656 | ||
657 | // If we aren't being strict, enable the older (pre-RFC2396) loophole that | |
658 | // allows this uri to inherit other properties from the base uri - even if | |
659 | // the scheme is defined | |
660 | if ( !(flags & wxURI_STRICT) && | |
661 | HasScheme() && base.HasScheme() && | |
662 | m_scheme == base.m_scheme ) | |
663 | { | |
664 | m_fields -= wxURI_SCHEME; | |
665 | } | |
666 | ||
667 | ||
668 | // Do nothing if this is an absolute wxURI | |
669 | // if defined(R.scheme) then | |
670 | // T.scheme = R.scheme; | |
671 | // T.authority = R.authority; | |
672 | // T.path = remove_dot_segments(R.path); | |
673 | // T.query = R.query; | |
674 | if (HasScheme()) | |
675 | return; | |
676 | ||
677 | //No scheme - inherit | |
678 | m_scheme = base.m_scheme; | |
679 | m_fields |= wxURI_SCHEME; | |
680 | ||
681 | // All we need to do for relative URIs with an | |
682 | // authority component is just inherit the scheme | |
683 | // if defined(R.authority) then | |
684 | // T.authority = R.authority; | |
685 | // T.path = remove_dot_segments(R.path); | |
686 | // T.query = R.query; | |
687 | if (HasServer()) | |
688 | return; | |
689 | ||
690 | //No authority - inherit | |
691 | if (base.HasUserInfo()) | |
692 | { | |
693 | m_userinfo = base.m_userinfo; | |
694 | m_fields |= wxURI_USERINFO; | |
695 | } | |
696 | ||
697 | m_server = base.m_server; | |
698 | m_hostType = base.m_hostType; | |
699 | m_fields |= wxURI_SERVER; | |
700 | ||
701 | if (base.HasPort()) | |
702 | { | |
703 | m_port = base.m_port; | |
704 | m_fields |= wxURI_PORT; | |
705 | } | |
706 | ||
707 | ||
708 | // Simple path inheritance from base | |
709 | if (!HasPath()) | |
710 | { | |
711 | // T.path = Base.path; | |
712 | m_path = base.m_path; | |
713 | m_fields |= wxURI_PATH; | |
714 | ||
715 | ||
716 | // if defined(R.query) then | |
717 | // T.query = R.query; | |
718 | // else | |
719 | // T.query = Base.query; | |
720 | // endif; | |
721 | if (!HasQuery()) | |
722 | { | |
723 | m_query = base.m_query; | |
724 | m_fields |= wxURI_QUERY; | |
725 | } | |
726 | } | |
727 | else if ( m_path.empty() || m_path[0u] != '/' ) | |
728 | { | |
729 | // if (R.path starts-with "/") then | |
730 | // T.path = remove_dot_segments(R.path); | |
731 | // else | |
732 | // T.path = merge(Base.path, R.path); | |
733 | // T.path = remove_dot_segments(T.path); | |
734 | // endif; | |
735 | // T.query = R.query; | |
736 | // | |
737 | // So we don't do anything for absolute paths and implement merge for | |
738 | // the relative ones | |
739 | ||
740 | wxArrayString our(SplitInSegments(m_path)), | |
741 | result(SplitInSegments(base.m_path)); | |
742 | ||
743 | if ( !result.empty() ) | |
744 | result.pop_back(); | |
745 | ||
746 | if ( our.empty() ) | |
747 | { | |
748 | // if we have an empty path it means we were constructed from a "." | |
749 | // string or something similar (e.g. "././././"), it should count | |
750 | // as (empty) segment | |
751 | our.push_back(""); | |
752 | } | |
753 | ||
754 | const wxArrayString::const_iterator end = our.end(); | |
755 | for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i ) | |
756 | { | |
757 | if ( i->empty() || *i == "." ) | |
758 | { | |
759 | // as in ParsePath(), while normally we ignore the empty | |
760 | // segments, we need to take account of them at the end | |
761 | if ( i == end - 1 ) | |
762 | result.push_back(""); | |
763 | continue; | |
764 | } | |
765 | ||
766 | if ( *i == ".." ) | |
767 | { | |
768 | if ( !result.empty() ) | |
769 | { | |
770 | result.pop_back(); | |
771 | ||
772 | if ( i == end - 1 ) | |
773 | result.push_back(""); | |
774 | } | |
775 | //else: just ignore, extra ".." don't accumulate | |
776 | } | |
777 | else | |
778 | { | |
779 | if ( result.empty() ) | |
780 | { | |
781 | // ensure that the resulting path will always be absolute | |
782 | result.push_back(""); | |
783 | } | |
784 | ||
785 | result.push_back(*i); | |
786 | } | |
787 | } | |
788 | ||
789 | m_path = wxJoin(result, '/', '\0'); | |
790 | } | |
791 | ||
792 | //T.fragment = R.fragment; | |
793 | } | |
794 | ||
795 | // --------------------------------------------------------------------------- | |
796 | // ParseH16 | |
797 | // | |
798 | // Parses 1 to 4 hex values. Returns true if the first character of the input | |
799 | // string is a valid hex character. It is the caller's responsibility to move | |
800 | // the input string back to its original position on failure. | |
801 | // --------------------------------------------------------------------------- | |
802 | ||
803 | bool wxURI::ParseH16(const char*& uri) | |
804 | { | |
805 | // h16 = 1*4HEXDIG | |
806 | if(!IsHex(*++uri)) | |
807 | return false; | |
808 | ||
809 | if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) | |
810 | ++uri; | |
811 | ||
812 | return true; | |
813 | } | |
814 | ||
815 | // --------------------------------------------------------------------------- | |
816 | // ParseIPXXX | |
817 | // | |
818 | // Parses a certain version of an IP address and moves the input string past | |
819 | // it. Returns true if the input string contains the proper version of an ip | |
820 | // address. It is the caller's responsability to move the input string back | |
821 | // to its original position on failure. | |
822 | // --------------------------------------------------------------------------- | |
823 | ||
824 | bool wxURI::ParseIPv4address(const char*& uri) | |
825 | { | |
826 | //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet | |
827 | // | |
828 | //dec-octet = DIGIT ; 0-9 | |
829 | // / %x31-39 DIGIT ; 10-99 | |
830 | // / "1" 2DIGIT ; 100-199 | |
831 | // / "2" %x30-34 DIGIT ; 200-249 | |
832 | // / "25" %x30-35 ; 250-255 | |
833 | size_t iIPv4 = 0; | |
834 | if (IsDigit(*uri)) | |
835 | { | |
836 | ++iIPv4; | |
837 | ||
838 | ||
839 | //each ip part must be between 0-255 (dupe of version in for loop) | |
840 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
841 | //100 or less (note !) | |
842 | !( (*(uri-2) < '2') || | |
843 | //240 or less | |
844 | (*(uri-2) == '2' && | |
845 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
846 | ) | |
847 | ) | |
848 | ) | |
849 | { | |
850 | return false; | |
851 | } | |
852 | ||
853 | if(IsDigit(*uri))++uri; | |
854 | ||
855 | //compilers should unroll this loop | |
856 | for(; iIPv4 < 4; ++iIPv4) | |
857 | { | |
858 | if (*uri != '.' || !IsDigit(*++uri)) | |
859 | break; | |
860 | ||
861 | //each ip part must be between 0-255 | |
862 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
863 | //100 or less (note !) | |
864 | !( (*(uri-2) < '2') || | |
865 | //240 or less | |
866 | (*(uri-2) == '2' && | |
867 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
868 | ) | |
869 | ) | |
870 | ) | |
871 | { | |
872 | return false; | |
873 | } | |
874 | if(IsDigit(*uri))++uri; | |
875 | } | |
876 | } | |
877 | return iIPv4 == 4; | |
878 | } | |
879 | ||
880 | bool wxURI::ParseIPv6address(const char*& uri) | |
881 | { | |
882 | // IPv6address = 6( h16 ":" ) ls32 | |
883 | // / "::" 5( h16 ":" ) ls32 | |
884 | // / [ h16 ] "::" 4( h16 ":" ) ls32 | |
885 | // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
886 | // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
887 | // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
888 | // / [ *4( h16 ":" ) h16 ] "::" ls32 | |
889 | // / [ *5( h16 ":" ) h16 ] "::" h16 | |
890 | // / [ *6( h16 ":" ) h16 ] "::" | |
891 | ||
892 | size_t numPrefix = 0, | |
893 | maxPostfix; | |
894 | ||
895 | bool bEndHex = false; | |
896 | ||
897 | for( ; numPrefix < 6; ++numPrefix) | |
898 | { | |
899 | if(!ParseH16(uri)) | |
900 | { | |
901 | --uri; | |
902 | bEndHex = true; | |
903 | break; | |
904 | } | |
905 | ||
906 | if(*uri != ':') | |
907 | { | |
908 | break; | |
909 | } | |
910 | } | |
911 | ||
912 | if(!bEndHex && !ParseH16(uri)) | |
913 | { | |
914 | --uri; | |
915 | ||
916 | if (numPrefix) | |
917 | return false; | |
918 | ||
919 | if (*uri == ':') | |
920 | { | |
921 | if (*++uri != ':') | |
922 | return false; | |
923 | ||
924 | maxPostfix = 5; | |
925 | } | |
926 | else | |
927 | maxPostfix = 6; | |
928 | } | |
929 | else | |
930 | { | |
931 | if (*uri != ':' || *(uri+1) != ':') | |
932 | { | |
933 | if (numPrefix != 6) | |
934 | return false; | |
935 | ||
936 | while (*--uri != ':') {} | |
937 | ++uri; | |
938 | ||
939 | const char * const start = uri; | |
940 | //parse ls32 | |
941 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
942 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) | |
943 | return true; | |
944 | ||
945 | uri = start; | |
946 | ||
947 | if (ParseIPv4address(uri)) | |
948 | return true; | |
949 | else | |
950 | return false; | |
951 | } | |
952 | else | |
953 | { | |
954 | uri += 2; | |
955 | ||
956 | if (numPrefix > 3) | |
957 | maxPostfix = 0; | |
958 | else | |
959 | maxPostfix = 4 - numPrefix; | |
960 | } | |
961 | } | |
962 | ||
963 | bool bAllowAltEnding = maxPostfix == 0; | |
964 | ||
965 | for(; maxPostfix != 0; --maxPostfix) | |
966 | { | |
967 | if(!ParseH16(uri) || *uri != ':') | |
968 | return false; | |
969 | } | |
970 | ||
971 | if(numPrefix <= 4) | |
972 | { | |
973 | const char * const start = uri; | |
974 | //parse ls32 | |
975 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
976 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) | |
977 | return true; | |
978 | ||
979 | uri = start; | |
980 | ||
981 | if (ParseIPv4address(uri)) | |
982 | return true; | |
983 | ||
984 | uri = start; | |
985 | ||
986 | if (!bAllowAltEnding) | |
987 | return false; | |
988 | } | |
989 | ||
990 | if(numPrefix <= 5 && ParseH16(uri)) | |
991 | return true; | |
992 | ||
993 | return true; | |
994 | } | |
995 | ||
996 | bool wxURI::ParseIPvFuture(const char*& uri) | |
997 | { | |
998 | // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) | |
999 | if (*++uri != 'v' || !IsHex(*++uri)) | |
1000 | return false; | |
1001 | ||
1002 | while (IsHex(*++uri)) | |
1003 | ; | |
1004 | ||
1005 | if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) | |
1006 | return false; | |
1007 | ||
1008 | while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {} | |
1009 | ||
1010 | return true; | |
1011 | } | |
1012 | ||
1013 | ||
1014 | // --------------------------------------------------------------------------- | |
1015 | // IsXXX | |
1016 | // | |
1017 | // Returns true if the passed in character meets the criteria of the method | |
1018 | // --------------------------------------------------------------------------- | |
1019 | ||
1020 | // unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | |
1021 | bool wxURI::IsUnreserved(char c) | |
1022 | { | |
1023 | return IsAlpha(c) || | |
1024 | IsDigit(c) || | |
1025 | c == '-' || | |
1026 | c == '.' || | |
1027 | c == '_' || | |
1028 | c == '~' | |
1029 | ; | |
1030 | } | |
1031 | ||
1032 | bool wxURI::IsReserved(char c) | |
1033 | { | |
1034 | return IsGenDelim(c) || IsSubDelim(c); | |
1035 | } | |
1036 | ||
1037 | // gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" | |
1038 | bool wxURI::IsGenDelim(char c) | |
1039 | { | |
1040 | return c == ':' || | |
1041 | c == '/' || | |
1042 | c == '?' || | |
1043 | c == '#' || | |
1044 | c == '[' || | |
1045 | c == ']' || | |
1046 | c == '@'; | |
1047 | } | |
1048 | ||
1049 | // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" | |
1050 | // / "*" / "+" / "," / ";" / "=" | |
1051 | bool wxURI::IsSubDelim(char c) | |
1052 | { | |
1053 | return c == '!' || | |
1054 | c == '$' || | |
1055 | c == '&' || | |
1056 | c == '\'' || | |
1057 | c == '(' || | |
1058 | c == ')' || | |
1059 | c == '*' || | |
1060 | c == '+' || | |
1061 | c == ',' || | |
1062 | c == ';' || | |
1063 | c == '=' | |
1064 | ; | |
1065 | } | |
1066 | ||
1067 | bool wxURI::IsHex(char c) | |
1068 | { | |
1069 | return IsDigit(c) || | |
1070 | (c >= 'a' && c <= 'f') || | |
1071 | (c >= 'A' && c <= 'F'); | |
1072 | } | |
1073 | ||
1074 | bool wxURI::IsAlpha(char c) | |
1075 | { | |
1076 | return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); | |
1077 | } | |
1078 | ||
1079 | bool wxURI::IsDigit(char c) | |
1080 | { | |
1081 | return c >= '0' && c <= '9'; | |
1082 | } | |
1083 | ||
1084 | bool wxURI::IsEndPath(char c) | |
1085 | { | |
1086 | return c == '\0' || c == '#' || c == '?'; | |
1087 | } | |
1088 |