]>
Commit | Line | Data |
---|---|---|
dd65d8c8 RN |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: uri.cpp | |
3 | // Purpose: Implementation of a uri parser | |
4 | // Author: Ryan Norton | |
5 | // Created: 10/26/04 | |
6 | // RCS-ID: $Id$ | |
7 | // Copyright: (c) 2004 Ryan Norton | |
8 | // Licence: wxWindows | |
9 | ///////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // =========================================================================== | |
12 | // declarations | |
13 | // =========================================================================== | |
14 | ||
15 | // --------------------------------------------------------------------------- | |
16 | // headers | |
17 | // --------------------------------------------------------------------------- | |
18 | ||
19 | #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) | |
20 | #pragma implementation "uri.h" | |
21 | #endif | |
22 | ||
23 | // For compilers that support precompilation, includes "wx.h". | |
24 | #include "wx/wxprec.h" | |
25 | ||
26 | #ifdef __BORLANDC__ | |
27 | #pragma hdrstop | |
28 | #endif | |
29 | ||
30 | #include "wx/uri.h" | |
31 | ||
32 | // --------------------------------------------------------------------------- | |
33 | // definitions | |
34 | // --------------------------------------------------------------------------- | |
35 | ||
36 | IMPLEMENT_CLASS(wxURI, wxObject); | |
37 | ||
38 | // =========================================================================== | |
39 | // implementation | |
40 | // =========================================================================== | |
41 | ||
42 | // --------------------------------------------------------------------------- | |
43 | // utilities | |
44 | // --------------------------------------------------------------------------- | |
45 | ||
46 | // --------------------------------------------------------------------------- | |
47 | // | |
48 | // wxURI | |
49 | // | |
50 | // --------------------------------------------------------------------------- | |
51 | ||
52 | // --------------------------------------------------------------------------- | |
53 | // Constructors | |
54 | // --------------------------------------------------------------------------- | |
55 | ||
56 | wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0) | |
57 | { | |
58 | } | |
59 | ||
60 | wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0) | |
61 | { | |
62 | Create(uri); | |
63 | } | |
64 | ||
65 | wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0) | |
66 | { | |
67 | *this = uri; | |
68 | } | |
69 | ||
70 | // --------------------------------------------------------------------------- | |
71 | // Destructor and cleanup | |
72 | // --------------------------------------------------------------------------- | |
73 | ||
74 | wxURI::~wxURI() | |
75 | { | |
76 | Clear(); | |
77 | } | |
78 | ||
79 | void wxURI::Clear() | |
80 | { | |
81 | m_scheme = m_user = m_server = m_port = m_path = | |
82 | m_query = m_fragment = wxT(""); | |
83 | ||
84 | m_hostType = wxURI_REGNAME; | |
85 | ||
86 | m_fields = 0; | |
87 | } | |
88 | ||
89 | // --------------------------------------------------------------------------- | |
90 | // Create | |
91 | // | |
92 | // This creates the URI - all we do here is call the main parsing method | |
93 | // --------------------------------------------------------------------------- | |
94 | ||
95 | void wxURI::Create(const wxString& uri) | |
96 | { | |
97 | if (m_fields) | |
98 | Clear(); | |
99 | ||
100 | Parse(uri); | |
101 | } | |
102 | ||
103 | // --------------------------------------------------------------------------- | |
104 | // Escape/Unescape/IsEscape | |
105 | // | |
106 | // Unescape unencodes a 3 character URL escape sequence | |
107 | // Escape encodes an invalid URI character into a 3 character sequence | |
108 | // IsEscape determines if the input string contains an escape sequence, | |
109 | // if it does, then it moves the input string past the escape sequence | |
110 | // --------------------------------------------------------------------------- | |
111 | ||
8404931e | 112 | wxChar wxURI::Unescape(const wxChar* s) |
dd65d8c8 RN |
113 | { |
114 | wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!")); | |
8404931e VZ |
115 | |
116 | return CharToHex(*s) * 0x10 + CharToHex(*++s); | |
dd65d8c8 RN |
117 | } |
118 | ||
119 | void wxURI::Escape(wxString& s, const wxChar& c) | |
120 | { | |
121 | const wxChar* hdig = wxT("0123456789abcdef"); | |
122 | s += '%'; | |
123 | s += hdig[(c >> 4) & 15]; | |
124 | s += hdig[c & 15]; | |
125 | } | |
126 | ||
127 | bool wxURI::IsEscape(const wxChar*& uri) | |
128 | { | |
129 | if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2))) | |
130 | { | |
131 | uri += 3; | |
132 | return true; | |
133 | } | |
134 | else | |
135 | return false; | |
136 | } | |
137 | ||
138 | // --------------------------------------------------------------------------- | |
139 | // HasXXX | |
140 | // --------------------------------------------------------------------------- | |
141 | ||
142 | bool wxURI::HasScheme() const | |
143 | { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; } | |
144 | ||
145 | bool wxURI::HasUser() const | |
146 | { return (m_fields & wxURI_USER) == wxURI_USER; } | |
147 | ||
148 | bool wxURI::HasServer() const | |
149 | { return (m_fields & wxURI_SERVER) == wxURI_SERVER; } | |
150 | ||
151 | bool wxURI::HasPort() const | |
152 | { return (m_fields & wxURI_PORT) == wxURI_PORT; } | |
153 | ||
154 | bool wxURI::HasPath() const | |
155 | { return (m_fields & wxURI_PATH) == wxURI_PATH; } | |
156 | ||
157 | bool wxURI::HasQuery() const | |
158 | { return (m_fields & wxURI_QUERY) == wxURI_QUERY; } | |
159 | ||
160 | bool wxURI::HasFragment() const | |
161 | { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; } | |
162 | ||
163 | // --------------------------------------------------------------------------- | |
164 | // GetXXX | |
165 | // | |
166 | // The normal Get() actually builds the entire URI into a useable | |
167 | // representation, including proper identification characters such as slashes | |
168 | // --------------------------------------------------------------------------- | |
169 | ||
170 | const wxString& wxURI::GetScheme() const | |
171 | { return m_scheme; } | |
172 | ||
173 | const wxString& wxURI::GetPath() const | |
174 | { return m_path; } | |
175 | ||
176 | const wxString& wxURI::GetQuery() const | |
177 | { return m_query; } | |
178 | ||
179 | const wxString& wxURI::GetFragment() const | |
180 | { return m_fragment; } | |
181 | ||
182 | const wxString& wxURI::GetPort() const | |
183 | { return m_port; } | |
184 | ||
185 | const wxString& wxURI::GetUser() const | |
186 | { return m_user; } | |
187 | ||
188 | const wxString& wxURI::GetServer() const | |
189 | { return m_server; } | |
190 | ||
191 | const wxURIHostType& wxURI::GetHostType() const | |
192 | { return m_hostType; } | |
193 | ||
194 | wxString wxURI::Get() const | |
195 | { | |
196 | wxString ret; | |
197 | ||
198 | if (HasScheme()) | |
199 | ret = ret + m_scheme + wxT(":"); | |
200 | ||
201 | if (HasServer()) | |
202 | { | |
203 | ret += wxT("//"); | |
204 | ||
205 | if (HasUser()) | |
206 | ret = ret + m_user + wxT("@"); | |
207 | ||
208 | ret += m_server; | |
209 | ||
210 | if (HasPort()) | |
211 | ret = ret + wxT(":") + m_port; | |
212 | } | |
213 | ||
214 | ret += m_path; | |
215 | ||
216 | if (HasQuery()) | |
217 | ret = ret + wxT("?") + m_query; | |
218 | ||
219 | if (HasFragment()) | |
220 | ret = ret + wxT("#") + m_fragment; | |
221 | ||
222 | return ret; | |
223 | } | |
224 | ||
225 | // --------------------------------------------------------------------------- | |
226 | // operator = and == | |
227 | // --------------------------------------------------------------------------- | |
228 | ||
229 | wxURI& wxURI::operator = (const wxURI& uri) | |
230 | { | |
231 | if (HasScheme()) | |
232 | m_scheme = uri.m_scheme; | |
233 | ||
234 | ||
235 | if (HasServer()) | |
236 | { | |
237 | if (HasUser()) | |
238 | m_user = uri.m_user; | |
239 | ||
240 | m_server = uri.m_server; | |
241 | m_hostType = uri.m_hostType; | |
242 | ||
243 | if (HasPort()) | |
244 | m_port = uri.m_port; | |
245 | } | |
246 | ||
247 | ||
248 | if (HasPath()) | |
249 | m_path = uri.m_path; | |
250 | ||
251 | if (HasQuery()) | |
252 | m_query = uri.m_query; | |
253 | ||
254 | if (HasFragment()) | |
255 | m_fragment = uri.m_fragment; | |
256 | ||
257 | return *this; | |
258 | } | |
259 | ||
260 | wxURI& wxURI::operator = (const wxChar* string) | |
261 | { | |
262 | Create(string); | |
263 | return *this; | |
264 | } | |
265 | ||
266 | bool wxURI::operator == (const wxURI& uri) const | |
267 | { | |
268 | if (HasScheme()) | |
269 | { | |
270 | if(m_scheme != uri.m_scheme) | |
271 | return false; | |
272 | } | |
273 | else if (uri.HasScheme()) | |
274 | return false; | |
275 | ||
276 | ||
277 | if (HasServer()) | |
278 | { | |
279 | if (HasUser()) | |
280 | { | |
281 | if (m_user != uri.m_user) | |
282 | return false; | |
283 | } | |
284 | else if (uri.HasUser()) | |
285 | return false; | |
286 | ||
287 | if (m_server != uri.m_server || | |
288 | m_hostType != uri.m_hostType) | |
289 | return false; | |
290 | ||
291 | if (HasPort()) | |
292 | { | |
293 | if(m_port != uri.m_port) | |
294 | return false; | |
295 | } | |
296 | else if (uri.HasPort()) | |
297 | return false; | |
298 | } | |
299 | else if (uri.HasServer()) | |
300 | return false; | |
301 | ||
302 | ||
303 | if (HasPath()) | |
304 | { | |
305 | if(m_path != uri.m_path) | |
306 | return false; | |
307 | } | |
308 | else if (uri.HasPath()) | |
309 | return false; | |
310 | ||
311 | if (HasQuery()) | |
312 | { | |
313 | if (m_query != uri.m_query) | |
314 | return false; | |
315 | } | |
316 | else if (uri.HasQuery()) | |
317 | return false; | |
318 | ||
319 | if (HasFragment()) | |
320 | { | |
321 | if (m_fragment != uri.m_fragment) | |
322 | return false; | |
323 | } | |
324 | else if (uri.HasFragment()) | |
325 | return false; | |
326 | ||
327 | return true; | |
328 | } | |
329 | ||
330 | // --------------------------------------------------------------------------- | |
331 | // IsReference | |
332 | // | |
333 | // if there is no authority or scheme, it is a reference | |
334 | // --------------------------------------------------------------------------- | |
335 | ||
336 | bool wxURI::IsReference() const | |
337 | { return !HasScheme() || !HasServer(); } | |
338 | ||
339 | // --------------------------------------------------------------------------- | |
340 | // Parse | |
341 | // | |
342 | // Master URI parsing method. Just calls the individual parsing methods | |
343 | // | |
344 | // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] | |
345 | // URI-reference = URI / relative-URITestCase | |
346 | // --------------------------------------------------------------------------- | |
347 | ||
348 | const wxChar* wxURI::Parse(const wxChar* uri) | |
349 | { | |
350 | uri = ParseScheme(uri); | |
351 | uri = ParseAuthority(uri); | |
352 | uri = ParsePath(uri); | |
353 | uri = ParseQuery(uri); | |
354 | return ParseFragment(uri); | |
355 | } | |
356 | ||
357 | // --------------------------------------------------------------------------- | |
358 | // ParseXXX | |
359 | // | |
360 | // Individual parsers for each URI component | |
361 | // --------------------------------------------------------------------------- | |
362 | ||
363 | const wxChar* wxURI::ParseScheme(const wxChar* uri) | |
364 | { | |
365 | wxASSERT(uri != NULL); | |
366 | ||
367 | //copy of the uri - used for figuring out | |
368 | //length of each component | |
369 | const wxChar* uricopy = uri; | |
370 | ||
371 | //Does the uri have a scheme (first character alpha)? | |
372 | if (IsAlpha(*uri)) | |
373 | { | |
374 | m_scheme += *uri++; | |
375 | ||
376 | //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
377 | //RN: Scheme can not be escaped | |
378 | while (IsAlpha(*uri) || IsDigit(*uri) || | |
379 | *uri == '+' || | |
380 | *uri == '-' || | |
381 | *uri == '.') | |
382 | { | |
383 | m_scheme += *uri++; | |
384 | } | |
385 | ||
386 | //valid scheme? | |
387 | if (*uri == ':') | |
388 | { | |
389 | //mark the scheme as valid | |
390 | m_fields |= wxURI_SCHEME; | |
391 | ||
392 | //move reference point up to input buffer | |
393 | uricopy = ++uri; | |
394 | } | |
395 | else | |
396 | //relative uri with relative path reference | |
397 | m_scheme = wxT(""); | |
398 | } | |
399 | // else | |
400 | //relative uri with _possible_ relative path reference | |
401 | ||
402 | return uricopy; | |
403 | } | |
404 | ||
405 | const wxChar* wxURI::ParseAuthority(const wxChar* uri) | |
406 | { | |
407 | // authority = [ userinfo "@" ] host [ ":" port ] | |
408 | if (*uri == '/' && *(uri+1) == '/') | |
409 | { | |
410 | uri += 2; | |
411 | ||
412 | uri = ParseUser(uri); | |
413 | uri = ParseServer(uri); | |
414 | return ParsePort(uri); | |
415 | } | |
416 | ||
417 | return uri; | |
418 | } | |
419 | ||
420 | const wxChar* wxURI::ParseUser(const wxChar* uri) | |
421 | { | |
422 | wxASSERT(uri != NULL); | |
423 | ||
424 | //copy of the uri - used for figuring out | |
425 | //length of each component | |
426 | const wxChar* uricopy = uri; | |
427 | ||
428 | // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
429 | while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?') | |
430 | { | |
431 | if(IsUnreserved(*uri) || IsEscape(uri) || | |
432 | IsSubDelim(*uri) || *uri == ':') | |
433 | m_user += *uri++; | |
434 | else | |
435 | Escape(m_user, *uri++); | |
436 | } | |
437 | ||
438 | if(*uri == '@') | |
439 | { | |
440 | //valid userinfo | |
441 | m_fields |= wxURI_USER; | |
442 | ||
443 | uricopy = ++uri; | |
444 | } | |
445 | else | |
446 | m_user = wxT(""); | |
447 | ||
448 | return uricopy; | |
449 | } | |
450 | ||
451 | const wxChar* wxURI::ParseServer(const wxChar* uri) | |
452 | { | |
453 | wxASSERT(uri != NULL); | |
454 | ||
455 | //copy of the uri - used for figuring out | |
456 | //length of each component | |
457 | const wxChar* uricopy = uri; | |
458 | ||
459 | // host = IP-literal / IPv4address / reg-name | |
460 | // IP-literal = "[" ( IPv6address / IPvFuture ) "]" | |
461 | if (*uri == '[') | |
462 | { | |
463 | if (ParseIPv6address(++uri) && *uri == ']') | |
464 | { | |
465 | ++uri; | |
466 | m_hostType = wxURI_IPV6ADDRESS; | |
467 | ||
468 | wxStringBufferLength theBuffer(m_server, uri - uricopy); | |
469 | wxMemcpy(theBuffer, uricopy, uri-uricopy); | |
470 | theBuffer.SetLength(uri-uricopy); | |
471 | } | |
472 | else | |
473 | { | |
474 | uri = uricopy; | |
475 | ||
476 | if (ParseIPvFuture(++uri) && *uri == ']') | |
477 | { | |
478 | ++uri; | |
479 | m_hostType = wxURI_IPVFUTURE; | |
480 | ||
481 | wxStringBufferLength theBuffer(m_server, uri - uricopy); | |
482 | wxMemcpy(theBuffer, uricopy, uri-uricopy); | |
483 | theBuffer.SetLength(uri-uricopy); | |
484 | } | |
485 | else | |
486 | uri = uricopy; | |
487 | } | |
488 | } | |
489 | else | |
490 | { | |
491 | if (ParseIPv4address(uri)) | |
492 | { | |
493 | m_hostType = wxURI_IPV4ADDRESS; | |
494 | ||
495 | wxStringBufferLength theBuffer(m_server, uri - uricopy); | |
496 | wxMemcpy(theBuffer, uricopy, uri-uricopy); | |
497 | theBuffer.SetLength(uri-uricopy); | |
498 | } | |
499 | else | |
500 | uri = uricopy; | |
501 | } | |
502 | ||
503 | if(m_hostType == wxURI_REGNAME) | |
504 | { | |
505 | uri = uricopy; | |
506 | // reg-name = *( unreserved / pct-encoded / sub-delims ) | |
507 | while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?') | |
508 | { | |
509 | if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri)) | |
510 | m_server += *uri++; | |
511 | else | |
512 | Escape(m_server, *uri++); | |
513 | } | |
514 | } | |
515 | ||
516 | //mark the server as valid | |
517 | m_fields |= wxURI_SERVER; | |
518 | ||
519 | return uri; | |
520 | } | |
521 | ||
522 | ||
523 | const wxChar* wxURI::ParsePort(const wxChar* uri) | |
524 | { | |
525 | wxASSERT(uri != NULL); | |
526 | ||
527 | // port = *DIGIT | |
528 | if(*uri == ':') | |
529 | { | |
530 | ++uri; | |
531 | while(IsDigit(*uri)) | |
532 | { | |
533 | m_port += *uri++; | |
534 | } | |
535 | ||
536 | //mark the port as valid | |
537 | m_fields |= wxURI_PORT; | |
538 | } | |
539 | ||
540 | return uri; | |
541 | } | |
542 | ||
8404931e | 543 | const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize) |
dd65d8c8 RN |
544 | { |
545 | wxASSERT(uri != NULL); | |
546 | ||
547 | //copy of the uri - used for figuring out | |
548 | //length of each component | |
549 | const wxChar* uricopy = uri; | |
550 | ||
551 | /// hier-part = "//" authority path-abempty | |
552 | /// / path-absolute | |
553 | /// / path-rootless | |
554 | /// / path-empty | |
555 | /// | |
556 | /// relative-part = "//" authority path-abempty | |
557 | /// / path-absolute | |
558 | /// / path-noscheme | |
559 | /// / path-empty | |
560 | /// | |
561 | /// path-abempty = *( "/" segment ) | |
562 | /// path-absolute = "/" [ segment-nz *( "/" segment ) ] | |
563 | /// path-noscheme = segment-nz-nc *( "/" segment ) | |
564 | /// path-rootless = segment-nz *( "/" segment ) | |
565 | /// path-empty = 0<pchar> | |
566 | /// | |
567 | /// segment = *pchar | |
568 | /// segment-nz = 1*pchar | |
569 | /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) | |
570 | /// ; non-zero-length segment without any colon ":" | |
571 | /// | |
572 | /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" | |
573 | if (*uri == '/') | |
574 | { | |
575 | m_path += *uri++; | |
576 | ||
577 | while(*uri && *uri != '#' && *uri != '?') | |
578 | { | |
579 | if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
580 | *uri == ':' || *uri == '@' || *uri == '/') | |
581 | m_path += *uri++; | |
582 | else | |
583 | Escape(m_path, *uri++); | |
584 | } | |
585 | ||
586 | if (bNormalize) | |
587 | { | |
588 | wxStringBufferLength theBuffer(m_path, m_path.length() + 1); | |
589 | Normalize(theBuffer, true); | |
590 | theBuffer.SetLength(wxStrlen(theBuffer)); | |
591 | } | |
592 | //mark the path as valid | |
593 | m_fields |= wxURI_PATH; | |
594 | } | |
595 | else if(*uri) //Relative path | |
596 | { | |
597 | if (bReference) | |
598 | { | |
599 | //no colon allowed | |
600 | while(*uri && *uri != '#' && *uri != '?') | |
601 | { | |
602 | if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
603 | *uri == '@' || *uri == '/') | |
604 | m_path += *uri++; | |
605 | else | |
606 | Escape(m_path, *uri++); | |
607 | } | |
608 | } | |
609 | else | |
610 | { | |
611 | while(*uri && *uri != '#' && *uri != '?') | |
612 | { | |
613 | if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
614 | *uri == ':' || *uri == '@' || *uri == '/') | |
615 | m_path += *uri++; | |
616 | else | |
617 | Escape(m_path, *uri++); | |
618 | } | |
619 | } | |
620 | ||
621 | if (uri != uricopy) | |
622 | { | |
623 | if (bNormalize) | |
624 | { | |
625 | wxStringBufferLength theBuffer(m_path, m_path.length() + 1); | |
626 | Normalize(theBuffer); | |
627 | theBuffer.SetLength(wxStrlen(theBuffer)); | |
628 | } | |
629 | ||
630 | //mark the path as valid | |
631 | m_fields |= wxURI_PATH; | |
632 | } | |
633 | } | |
634 | ||
635 | return uri; | |
636 | } | |
637 | ||
638 | ||
639 | const wxChar* wxURI::ParseQuery(const wxChar* uri) | |
640 | { | |
641 | wxASSERT(uri != NULL); | |
642 | ||
643 | // query = *( pchar / "/" / "?" ) | |
644 | if (*uri == '?') | |
645 | { | |
646 | ++uri; | |
647 | while(*uri && *uri != '#') | |
648 | { | |
649 | if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
650 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') | |
651 | m_query += *uri++; | |
652 | else | |
653 | Escape(m_query, *uri++); | |
654 | } | |
655 | ||
656 | //mark the server as valid | |
657 | m_fields |= wxURI_QUERY; | |
658 | } | |
659 | ||
660 | return uri; | |
661 | } | |
662 | ||
663 | ||
664 | const wxChar* wxURI::ParseFragment(const wxChar* uri) | |
665 | { | |
666 | wxASSERT(uri != NULL); | |
667 | ||
668 | // fragment = *( pchar / "/" / "?" ) | |
669 | if (*uri == '#') | |
670 | { | |
671 | ++uri; | |
672 | while(*uri) | |
673 | { | |
674 | if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
675 | *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') | |
676 | m_fragment += *uri++; | |
677 | else | |
678 | Escape(m_fragment, *uri++); | |
679 | } | |
680 | ||
681 | //mark the server as valid | |
682 | m_fields |= wxURI_FRAGMENT; | |
683 | } | |
684 | ||
685 | return uri; | |
686 | } | |
687 | ||
688 | // --------------------------------------------------------------------------- | |
689 | // Resolve URI | |
690 | // | |
691 | // Builds missing components of this uri from a base uri | |
692 | // | |
693 | // A version of the algorithm outlined in the RFC is used here | |
694 | // (it is shown in comments) | |
695 | // --------------------------------------------------------------------------- | |
696 | ||
8404931e | 697 | void wxURI::Resolve(const wxURI& base, int flags) |
dd65d8c8 RN |
698 | { |
699 | wxASSERT_MSG(!base.IsReference(), | |
700 | wxT("wxURI to inherit from must not be a reference!")); | |
701 | ||
702 | // If we arn't being strict, enable the older | |
703 | // loophole that allows this uri to inherit other | |
704 | // properties from the base uri - even if the scheme | |
705 | // is defined | |
8404931e VZ |
706 | if ( !(flags & wxURI_STRICT) && |
707 | HasScheme() && base.HasScheme() && | |
708 | m_scheme == base.m_scheme ) | |
dd65d8c8 RN |
709 | { |
710 | m_fields -= wxURI_SCHEME; | |
711 | } | |
712 | ||
713 | ||
714 | // Do nothing if this is an absolute wxURI | |
715 | // if defined(R.scheme) then | |
716 | // T.scheme = R.scheme; | |
717 | // T.authority = R.authority; | |
718 | // T.path = remove_dot_segments(R.path); | |
719 | // T.query = R.query; | |
720 | if (HasScheme()) | |
721 | { | |
722 | return; | |
723 | } | |
724 | ||
725 | //No sheme - inherit | |
726 | m_scheme = base.m_scheme; | |
727 | m_fields |= wxURI_SCHEME; | |
728 | ||
729 | // All we need to do for relative URIs with an | |
730 | // authority component is just inherit the scheme | |
731 | // if defined(R.authority) then | |
732 | // T.authority = R.authority; | |
733 | // T.path = remove_dot_segments(R.path); | |
734 | // T.query = R.query; | |
735 | if (HasServer()) | |
736 | { | |
737 | return; | |
738 | } | |
739 | ||
740 | //No authority - inherit | |
741 | if (base.HasUser()) | |
742 | { | |
743 | m_user = base.m_user; | |
744 | m_fields |= wxURI_USER; | |
745 | } | |
746 | ||
747 | m_server = base.m_server; | |
748 | m_hostType = base.m_hostType; | |
749 | m_fields |= wxURI_SERVER; | |
750 | ||
751 | if (base.HasPort()) | |
752 | { | |
753 | m_port = base.m_port; | |
754 | m_fields |= wxURI_PORT; | |
755 | } | |
756 | ||
757 | ||
758 | // Simple path inheritance from base | |
759 | if (!HasPath()) | |
760 | { | |
761 | // T.path = Base.path; | |
762 | m_path = base.m_path; | |
763 | m_fields |= wxURI_PATH; | |
764 | ||
765 | ||
766 | // if defined(R.query) then | |
767 | // T.query = R.query; | |
768 | // else | |
769 | // T.query = Base.query; | |
770 | // endif; | |
771 | if (!HasQuery()) | |
772 | { | |
773 | m_query = base.m_query; | |
774 | m_fields |= wxURI_QUERY; | |
775 | } | |
776 | } | |
777 | else | |
778 | { | |
779 | // if (R.path starts-with "/") then | |
780 | // T.path = remove_dot_segments(R.path); | |
781 | // else | |
782 | // T.path = merge(Base.path, R.path); | |
783 | // T.path = remove_dot_segments(T.path); | |
784 | // endif; | |
785 | // T.query = R.query; | |
786 | if (m_path[(const size_t&)0] != '/') | |
787 | { | |
788 | //Marge paths | |
789 | const wxChar* op = m_path.c_str(); | |
790 | const wxChar* bp = base.m_path.c_str() + base.m_path.Length(); | |
791 | ||
792 | //not a ending directory? move up | |
793 | if (base.m_path[0] && *(bp-1) != '/') | |
794 | UpTree(base.m_path, bp); | |
795 | ||
796 | //normalize directories | |
797 | while(*op == '.' && *(op+1) == '.' && | |
798 | (*(op+2) == '\0' || *(op+2) == '/') ) | |
799 | { | |
800 | UpTree(base.m_path, bp); | |
801 | ||
802 | if (*(op+2) == '\0') | |
803 | op += 2; | |
804 | else | |
805 | op += 3; | |
806 | } | |
807 | ||
808 | m_path = base.m_path.substr(0, bp - base.m_path.c_str()) + | |
809 | m_path.Mid((op - m_path.c_str()), m_path.Length()); | |
810 | } | |
811 | } | |
812 | } | |
813 | ||
814 | // --------------------------------------------------------------------------- | |
815 | // Directory Normalization (static) | |
816 | // | |
817 | // UpTree goes up a directory in a string and moves the pointer as such, | |
818 | // while Normalize gets rid of duplicate/erronues directories in a URI | |
819 | // according to RFC 2396 and modified quite a bit to meet the unit tests | |
820 | // in it. | |
821 | // --------------------------------------------------------------------------- | |
822 | ||
823 | void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri) | |
824 | { | |
825 | if (uri != uristart && *(uri-1) == '/') | |
826 | { | |
827 | uri -= 2; | |
828 | } | |
829 | ||
830 | for(;uri != uristart; --uri) | |
831 | { | |
832 | if (*uri == '/') | |
833 | { | |
834 | ++uri; | |
835 | break; | |
836 | } | |
837 | } | |
838 | ||
839 | //!!!TODO:HACK!!!// | |
840 | if (uri == uristart && *uri == '/') | |
841 | ++uri; | |
842 | //!!!// | |
843 | } | |
844 | ||
8404931e | 845 | void wxURI::Normalize(wxChar* s, bool bIgnoreLeads) |
dd65d8c8 RN |
846 | { |
847 | wxChar* cp = s; | |
848 | wxChar* bp = s; | |
849 | ||
850 | if(s[0] == '/') | |
851 | ++bp; | |
852 | ||
853 | while(*cp) | |
854 | { | |
855 | if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0') | |
856 | && (bp == cp || *(cp-1) == '/')) | |
857 | { | |
858 | //. _or_ ./ - ignore | |
859 | if (*(cp+1) == '\0') | |
860 | cp += 1; | |
861 | else | |
862 | cp += 2; | |
863 | } | |
864 | else if (*cp == '.' && *(cp+1) == '.' && | |
865 | (*(cp+2) == '/' || *(cp+2) == '\0') | |
866 | && (bp == cp || *(cp-1) == '/')) | |
867 | { | |
868 | //.. _or_ ../ - go up the tree | |
869 | if (s != bp) | |
870 | { | |
871 | UpTree((const wxChar*)bp, (const wxChar*&)s); | |
872 | ||
873 | if (*(cp+2) == '\0') | |
874 | cp += 2; | |
875 | else | |
876 | cp += 3; | |
877 | } | |
878 | else if (!bIgnoreLeads) | |
879 | ||
880 | { | |
881 | *bp++ = *cp++; | |
882 | *bp++ = *cp++; | |
883 | if (*cp) | |
884 | *bp++ = *cp++; | |
885 | ||
886 | s = bp; | |
887 | } | |
888 | else | |
889 | { | |
890 | if (*(cp+2) == '\0') | |
891 | cp += 2; | |
892 | else | |
893 | cp += 3; | |
894 | } | |
895 | } | |
896 | else | |
897 | *s++ = *cp++; | |
898 | } | |
899 | ||
900 | *s = '\0'; | |
901 | } | |
902 | ||
903 | // --------------------------------------------------------------------------- | |
904 | // Misc. Parsing Methods | |
905 | // --------------------------------------------------------------------------- | |
906 | ||
907 | bool wxURI::ParseIPv4address(const wxChar*& uri) | |
908 | { | |
909 | //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet | |
910 | // | |
911 | //dec-octet = DIGIT ; 0-9 | |
912 | // / %x31-39 DIGIT ; 10-99 | |
913 | // / "1" 2DIGIT ; 100-199 | |
914 | // / "2" %x30-34 DIGIT ; 200-249 | |
915 | // / "25" %x30-35 ; 250-255 | |
916 | size_t iIPv4 = 0; | |
917 | if (IsDigit(*uri)) | |
918 | { | |
919 | ++iIPv4; | |
920 | ||
921 | ||
922 | //each ip part must be between 0-255 (dupe of version in for loop) | |
923 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
924 | //100 or less (note !) | |
925 | !( (*(uri-2) < '2') || | |
926 | //240 or less | |
927 | (*(uri-2) == '2' && | |
928 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
929 | ) | |
930 | ) | |
931 | ) | |
932 | { | |
933 | return false; | |
934 | } | |
935 | ||
936 | if(IsDigit(*uri))++uri; | |
937 | ||
938 | //compilers should unroll this loop | |
939 | for(; iIPv4 < 4; ++iIPv4) | |
940 | { | |
941 | if (*uri != '.' || !IsDigit(*++uri)) | |
942 | break; | |
943 | ||
944 | //each ip part must be between 0-255 | |
945 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
946 | //100 or less (note !) | |
947 | !( (*(uri-2) < '2') || | |
948 | //240 or less | |
949 | (*(uri-2) == '2' && | |
950 | (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) | |
951 | ) | |
952 | ) | |
953 | ) | |
954 | { | |
955 | return false; | |
956 | } | |
957 | if(IsDigit(*uri))++uri; | |
958 | } | |
959 | } | |
960 | return iIPv4 == 4; | |
961 | } | |
962 | ||
963 | bool wxURI::ParseH16(const wxChar*& uri) | |
964 | { | |
965 | // h16 = 1*4HEXDIG | |
966 | if(!IsHex(*++uri)) | |
967 | return false; | |
968 | ||
969 | if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) | |
970 | ++uri; | |
971 | ||
972 | return true; | |
973 | } | |
974 | ||
975 | bool wxURI::ParseIPv6address(const wxChar*& uri) | |
976 | { | |
977 | // IPv6address = 6( h16 ":" ) ls32 | |
978 | // / "::" 5( h16 ":" ) ls32 | |
979 | // / [ h16 ] "::" 4( h16 ":" ) ls32 | |
980 | // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
981 | // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
982 | // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
983 | // / [ *4( h16 ":" ) h16 ] "::" ls32 | |
984 | // / [ *5( h16 ":" ) h16 ] "::" h16 | |
985 | // / [ *6( h16 ":" ) h16 ] "::" | |
986 | ||
987 | size_t numPrefix = 0, | |
988 | maxPostfix; | |
989 | ||
990 | bool bEndHex = false; | |
991 | ||
992 | for( ; numPrefix < 6; ++numPrefix) | |
993 | { | |
994 | if(!ParseH16(uri)) | |
995 | { | |
996 | --uri; | |
997 | bEndHex = true; | |
998 | break; | |
999 | } | |
1000 | ||
1001 | if(*uri != ':') | |
1002 | { | |
1003 | break; | |
1004 | } | |
1005 | } | |
1006 | ||
1007 | if(!bEndHex && !ParseH16(uri)) | |
1008 | { | |
1009 | --uri; | |
1010 | ||
1011 | if (numPrefix) | |
1012 | return false; | |
1013 | ||
1014 | if (*uri == ':') | |
1015 | { | |
1016 | if (*++uri != ':') | |
1017 | return false; | |
1018 | ||
1019 | maxPostfix = 5; | |
1020 | } | |
1021 | else | |
1022 | maxPostfix = 6; | |
1023 | } | |
1024 | else | |
1025 | { | |
1026 | if (*uri != ':' || *(uri+1) != ':') | |
1027 | { | |
1028 | if (numPrefix != 6) | |
1029 | return false; | |
1030 | ||
1031 | while (*--uri != ':') {} | |
1032 | ++uri; | |
1033 | ||
1034 | const wxChar* uristart = uri; | |
1035 | //parse ls32 | |
1036 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
1037 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) | |
1038 | return true; | |
1039 | ||
1040 | uri = uristart; | |
1041 | ||
1042 | if (ParseIPv4address(uri)) | |
1043 | return true; | |
1044 | else | |
1045 | return false; | |
1046 | } | |
1047 | else | |
1048 | { | |
1049 | uri += 2; | |
1050 | ||
1051 | if (numPrefix > 3) | |
1052 | maxPostfix = 0; | |
1053 | else | |
1054 | maxPostfix = 4 - numPrefix; | |
1055 | } | |
1056 | } | |
1057 | ||
1058 | bool bAllowAltEnding = maxPostfix == 0; | |
1059 | ||
1060 | for(; maxPostfix != 0; --maxPostfix) | |
1061 | { | |
1062 | if(!ParseH16(uri) || *uri != ':') | |
1063 | return false; | |
1064 | } | |
1065 | ||
1066 | if(numPrefix <= 4) | |
1067 | { | |
1068 | const wxChar* uristart = uri; | |
1069 | //parse ls32 | |
1070 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
1071 | if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) | |
1072 | return true; | |
1073 | ||
1074 | uri = uristart; | |
1075 | ||
1076 | if (ParseIPv4address(uri)) | |
1077 | return true; | |
1078 | ||
1079 | uri = uristart; | |
1080 | ||
1081 | if (!bAllowAltEnding) | |
1082 | return false; | |
1083 | } | |
1084 | ||
1085 | if(numPrefix <= 5 && ParseH16(uri)) | |
1086 | return true; | |
1087 | ||
1088 | return true; | |
1089 | } | |
1090 | ||
1091 | bool wxURI::ParseIPvFuture(const wxChar*& uri) | |
1092 | { | |
1093 | // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) | |
1094 | if (*++uri != 'v' || !IsHex(*++uri)) | |
1095 | return false; | |
1096 | ||
1097 | while (IsHex(*++uri)) {} | |
1098 | ||
1099 | if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) | |
1100 | return false; | |
1101 | ||
1102 | while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {} | |
1103 | ||
1104 | return true; | |
1105 | } | |
1106 | ||
1107 | ||
1108 | // --------------------------------------------------------------------------- | |
1109 | // Misc methods - IsXXX and CharToHex | |
1110 | // --------------------------------------------------------------------------- | |
1111 | ||
1112 | int wxURI::CharToHex(const wxChar& c) | |
1113 | { | |
1114 | if ((c >= 'A') && (c <= 'Z')) return c - 'A' + 0x0A; | |
1115 | if ((c >= 'a') && (c <= 'z')) return c - 'a' + 0x0a; | |
1116 | if ((c >= '0') && (c <= '9')) return c - '0' + 0x00; | |
1117 | ||
1118 | return 0; | |
1119 | } | |
1120 | ||
1121 | //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | |
1122 | bool wxURI::IsUnreserved (const wxChar& c) | |
1123 | { return IsAlpha(c) || IsDigit(c) || | |
1124 | c == '-' || | |
1125 | c == '.' || | |
1126 | c == '_' || | |
1127 | c == '~' //tilde | |
1128 | ; | |
1129 | } | |
1130 | ||
1131 | bool wxURI::IsReserved (const wxChar& c) | |
1132 | { | |
1133 | return IsGenDelim(c) || IsSubDelim(c); | |
1134 | } | |
1135 | ||
1136 | //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" | |
1137 | bool wxURI::IsGenDelim (const wxChar& c) | |
1138 | { | |
1139 | return c == ':' || | |
1140 | c == '/' || | |
1141 | c == '?' || | |
1142 | c == '#' || | |
1143 | c == '[' || | |
1144 | c == ']' || | |
1145 | c == '@'; | |
1146 | } | |
1147 | ||
1148 | //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")" | |
1149 | //! / "*" / "+" / "," / ";" / "=" | |
1150 | bool wxURI::IsSubDelim (const wxChar& c) | |
1151 | { | |
1152 | return c == '!' || | |
1153 | c == '$' || | |
1154 | c == '&' || | |
1155 | c == '\'' || | |
1156 | c == '(' || | |
1157 | c == ')' || | |
1158 | c == '*' || | |
1159 | c == '+' || | |
1160 | c == ',' || | |
1161 | c == ';' || | |
1162 | c == '=' | |
1163 | ; | |
1164 | } | |
1165 | ||
1166 | bool wxURI::IsHex(const wxChar& c) | |
1167 | { return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } | |
1168 | ||
1169 | bool wxURI::IsAlpha(const wxChar& c) | |
1170 | { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } | |
1171 | ||
1172 | bool wxURI::IsDigit(const wxChar& c) | |
1173 | { return c >= '0' && c <= '9'; } | |
1174 | ||
1175 | ||
1176 | // --------------------------------------------------------------------------- | |
1177 | // | |
1178 | // wxURL Compatability | |
1179 | // | |
1180 | // TODO: Use wxURI instead here... | |
1181 | // --------------------------------------------------------------------------- | |
1182 | ||
1183 | #if wxUSE_URL | |
1184 | ||
1185 | #include "wx/url.h" | |
1186 | ||
1187 | wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims) | |
1188 | { | |
1189 | wxString out_str; | |
1190 | wxString hexa_code; | |
1191 | size_t i; | |
1192 | ||
1193 | for (i = 0; i < uri.Len(); i++) | |
1194 | { | |
1195 | wxChar c = uri.GetChar(i); | |
1196 | ||
1197 | if (c == wxT(' ')) | |
1198 | { | |
1199 | // GRG, Apr/2000: changed to "%20" instead of '+' | |
1200 | ||
1201 | out_str += wxT("%20"); | |
1202 | } | |
1203 | else | |
1204 | { | |
1205 | // GRG, Apr/2000: modified according to the URI definition (RFC 2396) | |
1206 | // | |
1207 | // - Alphanumeric characters are never escaped | |
1208 | // - Unreserved marks are never escaped | |
1209 | // - Delimiters must be escaped if they appear within a component | |
1210 | // but not if they are used to separate components. Here we have | |
1211 | // no clear way to distinguish between these two cases, so they | |
1212 | // are escaped unless they are passed in the 'delims' parameter | |
1213 | // (allowed delimiters). | |
1214 | ||
1215 | static const wxChar marks[] = wxT("-_.!~*()'"); | |
1216 | ||
1217 | if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) ) | |
1218 | { | |
1219 | hexa_code.Printf(wxT("%%%02X"), c); | |
1220 | out_str += hexa_code; | |
1221 | } | |
1222 | else | |
1223 | { | |
1224 | out_str += c; | |
1225 | } | |
1226 | } | |
1227 | } | |
1228 | ||
1229 | return out_str; | |
1230 | } | |
1231 | ||
1232 | wxString wxURL::ConvertFromURI(const wxString& uri) | |
1233 | { | |
1234 | wxString new_uri; | |
1235 | ||
1236 | size_t i = 0; | |
1237 | while (i < uri.Len()) | |
1238 | { | |
1239 | int code; | |
1240 | if (uri[i] == wxT('%')) | |
1241 | { | |
1242 | i++; | |
1243 | if (uri[i] >= wxT('A') && uri[i] <= wxT('F')) | |
1244 | code = (uri[i] - wxT('A') + 10) * 16; | |
1245 | else if (uri[i] >= wxT('a') && uri[i] <= wxT('f')) | |
1246 | code = (uri[i] - wxT('a') + 10) * 16; | |
1247 | else | |
1248 | code = (uri[i] - wxT('0')) * 16; | |
1249 | ||
1250 | i++; | |
1251 | if (uri[i] >= wxT('A') && uri[i] <= wxT('F')) | |
1252 | code += (uri[i] - wxT('A')) + 10; | |
1253 | else if (uri[i] >= wxT('a') && uri[i] <= wxT('f')) | |
1254 | code += (uri[i] - wxT('a')) + 10; | |
1255 | else | |
1256 | code += (uri[i] - wxT('0')); | |
1257 | ||
1258 | i++; | |
1259 | new_uri += (wxChar)code; | |
1260 | continue; | |
1261 | } | |
1262 | new_uri += uri[i]; | |
1263 | i++; | |
1264 | } | |
1265 | return new_uri; | |
1266 | } | |
1267 | ||
1268 | #endif //wxUSE_URL | |
1269 | ||
1270 | //end of uri.cpp | |
1271 | ||
1272 | ||
1273 |