]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: uri.cpp | |
3 | // Purpose: Implementation of a uri parser | |
4 | // Author: Ryan Norton | |
5 | // Created: 10/26/04 | |
6 | // RCS-ID: $Id$ | |
7 | // Copyright: (c) 2004 Ryan Norton | |
8 | // Licence: wxWindows | |
9 | ///////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // =========================================================================== | |
12 | // declarations | |
13 | // =========================================================================== | |
14 | ||
15 | // --------------------------------------------------------------------------- | |
16 | // headers | |
17 | // --------------------------------------------------------------------------- | |
18 | ||
19 | #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) | |
20 | #pragma implementation "uri.h" | |
21 | #endif | |
22 | ||
23 | // For compilers that support precompilation, includes "wx.h". | |
24 | #include "wx/wxprec.h" | |
25 | ||
26 | #ifdef __BORLANDC__ | |
27 | #pragma hdrstop | |
28 | #endif | |
29 | ||
30 | #include "wx/uri.h" | |
31 | ||
32 | // --------------------------------------------------------------------------- | |
33 | // definitions | |
34 | // --------------------------------------------------------------------------- | |
35 | ||
36 | IMPLEMENT_CLASS(wxURI, wxObject); | |
37 | ||
38 | // =========================================================================== | |
39 | // implementation | |
40 | // =========================================================================== | |
41 | ||
42 | // --------------------------------------------------------------------------- | |
43 | // utilities | |
44 | // --------------------------------------------------------------------------- | |
45 | ||
46 | // --------------------------------------------------------------------------- | |
47 | // | |
48 | // wxURI | |
49 | // | |
50 | // --------------------------------------------------------------------------- | |
51 | ||
52 | // --------------------------------------------------------------------------- | |
53 | // Constructors | |
54 | // --------------------------------------------------------------------------- | |
55 | ||
56 | wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0) | |
57 | { | |
58 | } | |
59 | ||
60 | wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0) | |
61 | { | |
62 | Create(uri); | |
63 | } | |
64 | ||
65 | wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0) | |
66 | { | |
67 | Assign(uri); | |
68 | } | |
69 | ||
70 | // --------------------------------------------------------------------------- | |
71 | // Destructor and cleanup | |
72 | // --------------------------------------------------------------------------- | |
73 | ||
74 | wxURI::~wxURI() | |
75 | { | |
76 | Clear(); | |
77 | } | |
78 | ||
79 | void wxURI::Clear() | |
80 | { | |
81 | m_scheme = m_user = m_server = m_port = m_path = | |
82 | m_query = m_fragment = wxT(""); | |
83 | ||
84 | m_hostType = wxURI_REGNAME; | |
85 | ||
86 | m_fields = 0; | |
87 | } | |
88 | ||
89 | // --------------------------------------------------------------------------- | |
90 | // Create | |
91 | // | |
92 | // This creates the URI - all we do here is call the main parsing method | |
93 | // --------------------------------------------------------------------------- | |
94 | ||
95 | const wxChar* wxURI::Create(const wxString& uri) | |
96 | { | |
97 | if (m_fields) | |
98 | Clear(); | |
99 | ||
100 | return Parse(uri); | |
101 | } | |
102 | ||
103 | // --------------------------------------------------------------------------- | |
104 | // Escape Methods | |
105 | // | |
106 | // TranslateEscape unencodes a 3 character URL escape sequence | |
107 | // | |
108 | // Escape encodes an invalid URI character into a 3 character sequence | |
109 | // | |
110 | // IsEscape determines if the input string contains an escape sequence, | |
111 | // if it does, then it moves the input string past the escape sequence | |
112 | // | |
113 | // Unescape unencodes all 3 character URL escape sequences in a wxString | |
114 | // --------------------------------------------------------------------------- | |
115 | ||
116 | wxChar wxURI::TranslateEscape(const wxChar* s) | |
117 | { | |
118 | wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!")); | |
119 | ||
120 | //<<4 == 16 | |
121 | return (wxChar)( CharToHex(*s) << 4 ) | CharToHex(*++s); | |
122 | } | |
123 | ||
124 | wxString wxURI::Unescape(const wxString& uri) | |
125 | { | |
126 | wxString new_uri; | |
127 | ||
128 | for(size_t i = 0; i < uri.length(); ++i) | |
129 | { | |
130 | if (uri[i] == wxT('%')) | |
131 | { | |
132 | new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) ); | |
133 | i += 2; | |
134 | } | |
135 | else | |
136 | new_uri += uri[i]; | |
137 | } | |
138 | ||
139 | return new_uri; | |
140 | } | |
141 | ||
142 | void wxURI::Escape(wxString& s, const wxChar& c) | |
143 | { | |
144 | const wxChar* hdig = wxT("0123456789abcdef"); | |
145 | s += wxT('%'); | |
146 | s += hdig[(c >> 4) & 15]; | |
147 | s += hdig[c & 15]; | |
148 | } | |
149 | ||
150 | bool wxURI::IsEscape(const wxChar*& uri) | |
151 | { | |
152 | // pct-encoded = "%" HEXDIG HEXDIG | |
153 | if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2))) | |
154 | { | |
155 | uri += 3; | |
156 | return true; | |
157 | } | |
158 | else | |
159 | return false; | |
160 | } | |
161 | ||
162 | // --------------------------------------------------------------------------- | |
163 | // BuildURI | |
164 | // | |
165 | // BuildURI() builds the entire URI into a useable | |
166 | // representation, including proper identification characters such as slashes | |
167 | // | |
168 | // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes | |
169 | // the components that accept escape sequences | |
170 | // --------------------------------------------------------------------------- | |
171 | ||
172 | wxString wxURI::BuildURI() const | |
173 | { | |
174 | wxString ret; | |
175 | ||
176 | if (HasScheme()) | |
177 | ret = ret + m_scheme + wxT(":"); | |
178 | ||
179 | if (HasServer()) | |
180 | { | |
181 | ret += wxT("//"); | |
182 | ||
183 | if (HasUser()) | |
184 | ret = ret + m_user + wxT("@"); | |
185 | ||
186 | ret += m_server; | |
187 | ||
188 | if (HasPort()) | |
189 | ret = ret + wxT(":") + m_port; | |
190 | } | |
191 | ||
192 | ret += m_path; | |
193 | ||
194 | if (HasQuery()) | |
195 | ret = ret + wxT("?") + m_query; | |
196 | ||
197 | if (HasFragment()) | |
198 | ret = ret + wxT("#") + m_fragment; | |
199 | ||
200 | return ret; | |
201 | } | |
202 | ||
203 | wxString wxURI::BuildUnescapedURI() const | |
204 | { | |
205 | wxString ret; | |
206 | ||
207 | if (HasScheme()) | |
208 | ret = ret + m_scheme + wxT(":"); | |
209 | ||
210 | if (HasServer()) | |
211 | { | |
212 | ret += wxT("//"); | |
213 | ||
214 | if (HasUser()) | |
215 | ret = ret + wxURI::Unescape(m_user) + wxT("@"); | |
216 | ||
217 | if (m_hostType == wxURI_REGNAME) | |
218 | ret += wxURI::Unescape(m_server); | |
219 | else | |
220 | ret += m_server; | |
221 | ||
222 | if (HasPort()) | |
223 | ret = ret + wxT(":") + m_port; | |
224 | } | |
225 | ||
226 | ret += wxURI::Unescape(m_path); | |
227 | ||
228 | if (HasQuery()) | |
229 | ret = ret + wxT("?") + wxURI::Unescape(m_query); | |
230 | ||
231 | if (HasFragment()) | |
232 | ret = ret + wxT("#") + wxURI::Unescape(m_fragment); | |
233 | ||
234 | return ret; | |
235 | } | |
236 | ||
237 | // --------------------------------------------------------------------------- | |
238 | // Assignment | |
239 | // --------------------------------------------------------------------------- | |
240 | ||
241 | wxURI& wxURI::Assign(const wxURI& uri) | |
242 | { | |
243 | //assign fields | |
244 | m_fields = uri.m_fields; | |
245 | ||
246 | //ref over components | |
247 | m_scheme = uri.m_scheme; | |
248 | m_user = uri.m_user; | |
249 | m_server = uri.m_server; | |
250 | m_hostType = uri.m_hostType; | |
251 | m_port = uri.m_port; | |
252 | m_path = uri.m_path; | |
253 | m_query = uri.m_query; | |
254 | m_fragment = uri.m_fragment; | |
255 | ||
256 | return *this; | |
257 | } | |
258 | ||
259 | wxURI& wxURI::operator = (const wxURI& uri) | |
260 | { | |
261 | return Assign(uri); | |
262 | } | |
263 | ||
264 | wxURI& wxURI::operator = (const wxString& string) | |
265 | { | |
266 | Create(string); | |
267 | return *this; | |
268 | } | |
269 | ||
270 | // --------------------------------------------------------------------------- | |
271 | // Comparison | |
272 | // --------------------------------------------------------------------------- | |
273 | ||
274 | bool wxURI::operator == (const wxURI& uri) const | |
275 | { | |
276 | if (HasScheme()) | |
277 | { | |
278 | if(m_scheme != uri.m_scheme) | |
279 | return false; | |
280 | } | |
281 | else if (uri.HasScheme()) | |
282 | return false; | |
283 | ||
284 | ||
285 | if (HasServer()) | |
286 | { | |
287 | if (HasUser()) | |
288 | { | |
289 | if (m_user != uri.m_user) | |
290 | return false; | |
291 | } | |
292 | else if (uri.HasUser()) | |
293 | return false; | |
294 | ||
295 | if (m_server != uri.m_server || | |
296 | m_hostType != uri.m_hostType) | |
297 | return false; | |
298 | ||
299 | if (HasPort()) | |
300 | { | |
301 | if(m_port != uri.m_port) | |
302 | return false; | |
303 | } | |
304 | else if (uri.HasPort()) | |
305 | return false; | |
306 | } | |
307 | else if (uri.HasServer()) | |
308 | return false; | |
309 | ||
310 | ||
311 | if (HasPath()) | |
312 | { | |
313 | if(m_path != uri.m_path) | |
314 | return false; | |
315 | } | |
316 | else if (uri.HasPath()) | |
317 | return false; | |
318 | ||
319 | if (HasQuery()) | |
320 | { | |
321 | if (m_query != uri.m_query) | |
322 | return false; | |
323 | } | |
324 | else if (uri.HasQuery()) | |
325 | return false; | |
326 | ||
327 | if (HasFragment()) | |
328 | { | |
329 | if (m_fragment != uri.m_fragment) | |
330 | return false; | |
331 | } | |
332 | else if (uri.HasFragment()) | |
333 | return false; | |
334 | ||
335 | return true; | |
336 | } | |
337 | ||
338 | // --------------------------------------------------------------------------- | |
339 | // IsReference | |
340 | // | |
341 | // if there is no authority or scheme, it is a reference | |
342 | // --------------------------------------------------------------------------- | |
343 | ||
344 | bool wxURI::IsReference() const | |
345 | { return !HasScheme() || !HasServer(); } | |
346 | ||
347 | // --------------------------------------------------------------------------- | |
348 | // Parse | |
349 | // | |
350 | // Master URI parsing method. Just calls the individual parsing methods | |
351 | // | |
352 | // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] | |
353 | // URI-reference = URI / relative-URITestCase | |
354 | // --------------------------------------------------------------------------- | |
355 | ||
356 | const wxChar* wxURI::Parse(const wxChar* uri) | |
357 | { | |
358 | uri = ParseScheme(uri); | |
359 | uri = ParseAuthority(uri); | |
360 | uri = ParsePath(uri); | |
361 | uri = ParseQuery(uri); | |
362 | return ParseFragment(uri); | |
363 | } | |
364 | ||
365 | // --------------------------------------------------------------------------- | |
366 | // ParseXXX | |
367 | // | |
368 | // Individual parsers for each URI component | |
369 | // --------------------------------------------------------------------------- | |
370 | ||
371 | const wxChar* wxURI::ParseScheme(const wxChar* uri) | |
372 | { | |
373 | wxASSERT(uri != NULL); | |
374 | ||
375 | //copy of the uri - used for figuring out | |
376 | //length of each component | |
377 | const wxChar* uricopy = uri; | |
378 | ||
379 | //Does the uri have a scheme (first character alpha)? | |
380 | if (IsAlpha(*uri)) | |
381 | { | |
382 | m_scheme += *uri++; | |
383 | ||
384 | //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) | |
385 | while (IsAlpha(*uri) || IsDigit(*uri) || | |
386 | *uri == wxT('+') || | |
387 | *uri == wxT('-') || | |
388 | *uri == wxT('.')) | |
389 | { | |
390 | m_scheme += *uri++; | |
391 | } | |
392 | ||
393 | //valid scheme? | |
394 | if (*uri == wxT(':')) | |
395 | { | |
396 | //mark the scheme as valid | |
397 | m_fields |= wxURI_SCHEME; | |
398 | ||
399 | //move reference point up to input buffer | |
400 | uricopy = ++uri; | |
401 | } | |
402 | else | |
403 | //relative uri with relative path reference | |
404 | m_scheme = wxT(""); | |
405 | } | |
406 | // else | |
407 | //relative uri with _possible_ relative path reference | |
408 | ||
409 | return uricopy; | |
410 | } | |
411 | ||
412 | const wxChar* wxURI::ParseAuthority(const wxChar* uri) | |
413 | { | |
414 | // authority = [ userinfo "@" ] host [ ":" port ] | |
415 | if (*uri == wxT('/') && *(uri+1) == wxT('/')) | |
416 | { | |
417 | uri += 2; | |
418 | ||
419 | uri = ParseUser(uri); | |
420 | uri = ParseServer(uri); | |
421 | return ParsePort(uri); | |
422 | } | |
423 | ||
424 | return uri; | |
425 | } | |
426 | ||
427 | const wxChar* wxURI::ParseUser(const wxChar* uri) | |
428 | { | |
429 | wxASSERT(uri != NULL); | |
430 | ||
431 | //copy of the uri - used for figuring out | |
432 | //length of each component | |
433 | const wxChar* uricopy = uri; | |
434 | ||
435 | // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) | |
436 | while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?')) | |
437 | { | |
438 | if(IsUnreserved(*uri) || IsEscape(uri) || | |
439 | IsSubDelim(*uri) || *uri == wxT(':')) | |
440 | m_user += *uri++; | |
441 | else | |
442 | Escape(m_user, *uri++); | |
443 | } | |
444 | ||
445 | if(*uri == wxT('@')) | |
446 | { | |
447 | //valid userinfo | |
448 | m_fields |= wxURI_USER; | |
449 | ||
450 | uricopy = ++uri; | |
451 | } | |
452 | else | |
453 | m_user = wxT(""); | |
454 | ||
455 | return uricopy; | |
456 | } | |
457 | ||
458 | const wxChar* wxURI::ParseServer(const wxChar* uri) | |
459 | { | |
460 | wxASSERT(uri != NULL); | |
461 | ||
462 | //copy of the uri - used for figuring out | |
463 | //length of each component | |
464 | const wxChar* uricopy = uri; | |
465 | ||
466 | // host = IP-literal / IPv4address / reg-name | |
467 | // IP-literal = "[" ( IPv6address / IPvFuture ) "]" | |
468 | if (*uri == wxT('[')) | |
469 | { | |
470 | ++uri; //some compilers don't support *&ing a ++* | |
471 | if (ParseIPv6address(uri) && *uri == wxT(']')) | |
472 | { | |
473 | ++uri; | |
474 | m_hostType = wxURI_IPV6ADDRESS; | |
475 | ||
476 | wxStringBufferLength theBuffer(m_server, uri - uricopy); | |
477 | wxTmemcpy(theBuffer, uricopy, uri-uricopy); | |
478 | theBuffer.SetLength(uri-uricopy); | |
479 | } | |
480 | else | |
481 | { | |
482 | uri = uricopy; | |
483 | ||
484 | ++uri; //some compilers don't support *&ing a ++* | |
485 | if (ParseIPvFuture(uri) && *uri == wxT(']')) | |
486 | { | |
487 | ++uri; | |
488 | m_hostType = wxURI_IPVFUTURE; | |
489 | ||
490 | wxStringBufferLength theBuffer(m_server, uri - uricopy); | |
491 | wxTmemcpy(theBuffer, uricopy, uri-uricopy); | |
492 | theBuffer.SetLength(uri-uricopy); | |
493 | } | |
494 | else | |
495 | uri = uricopy; | |
496 | } | |
497 | } | |
498 | else | |
499 | { | |
500 | if (ParseIPv4address(uri)) | |
501 | { | |
502 | m_hostType = wxURI_IPV4ADDRESS; | |
503 | ||
504 | wxStringBufferLength theBuffer(m_server, uri - uricopy); | |
505 | wxTmemcpy(theBuffer, uricopy, uri-uricopy); | |
506 | theBuffer.SetLength(uri-uricopy); | |
507 | } | |
508 | else | |
509 | uri = uricopy; | |
510 | } | |
511 | ||
512 | if(m_hostType == wxURI_REGNAME) | |
513 | { | |
514 | uri = uricopy; | |
515 | // reg-name = *( unreserved / pct-encoded / sub-delims ) | |
516 | while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?')) | |
517 | { | |
518 | if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri)) | |
519 | m_server += *uri++; | |
520 | else | |
521 | Escape(m_server, *uri++); | |
522 | } | |
523 | } | |
524 | ||
525 | //mark the server as valid | |
526 | m_fields |= wxURI_SERVER; | |
527 | ||
528 | return uri; | |
529 | } | |
530 | ||
531 | ||
532 | const wxChar* wxURI::ParsePort(const wxChar* uri) | |
533 | { | |
534 | wxASSERT(uri != NULL); | |
535 | ||
536 | // port = *DIGIT | |
537 | if(*uri == wxT(':')) | |
538 | { | |
539 | ++uri; | |
540 | while(IsDigit(*uri)) | |
541 | { | |
542 | m_port += *uri++; | |
543 | } | |
544 | ||
545 | //mark the port as valid | |
546 | m_fields |= wxURI_PORT; | |
547 | } | |
548 | ||
549 | return uri; | |
550 | } | |
551 | ||
552 | const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize) | |
553 | { | |
554 | wxASSERT(uri != NULL); | |
555 | ||
556 | //copy of the uri - used for figuring out | |
557 | //length of each component | |
558 | const wxChar* uricopy = uri; | |
559 | ||
560 | /// hier-part = "//" authority path-abempty | |
561 | /// / path-absolute | |
562 | /// / path-rootless | |
563 | /// / path-empty | |
564 | /// | |
565 | /// relative-part = "//" authority path-abempty | |
566 | /// / path-absolute | |
567 | /// / path-noscheme | |
568 | /// / path-empty | |
569 | /// | |
570 | /// path-abempty = *( "/" segment ) | |
571 | /// path-absolute = "/" [ segment-nz *( "/" segment ) ] | |
572 | /// path-noscheme = segment-nz-nc *( "/" segment ) | |
573 | /// path-rootless = segment-nz *( "/" segment ) | |
574 | /// path-empty = 0<pchar> | |
575 | /// | |
576 | /// segment = *pchar | |
577 | /// segment-nz = 1*pchar | |
578 | /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) | |
579 | /// ; non-zero-length segment without any colon ":" | |
580 | /// | |
581 | /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" | |
582 | if (*uri == wxT('/')) | |
583 | { | |
584 | m_path += *uri++; | |
585 | ||
586 | while(*uri && *uri != wxT('#') && *uri != wxT('?')) | |
587 | { | |
588 | if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
589 | *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/')) | |
590 | m_path += *uri++; | |
591 | else | |
592 | Escape(m_path, *uri++); | |
593 | } | |
594 | ||
595 | if (bNormalize) | |
596 | { | |
597 | wxStringBufferLength theBuffer(m_path, m_path.length() + 1); | |
598 | #if wxUSE_STL | |
599 | wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1); | |
600 | #endif | |
601 | Normalize(theBuffer, true); | |
602 | theBuffer.SetLength(wxStrlen(theBuffer)); | |
603 | } | |
604 | //mark the path as valid | |
605 | m_fields |= wxURI_PATH; | |
606 | } | |
607 | else if(*uri) //Relative path | |
608 | { | |
609 | if (bReference) | |
610 | { | |
611 | //no colon allowed | |
612 | while(*uri && *uri != wxT('#') && *uri != wxT('?')) | |
613 | { | |
614 | if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
615 | *uri == wxT('@') || *uri == wxT('/')) | |
616 | m_path += *uri++; | |
617 | else | |
618 | Escape(m_path, *uri++); | |
619 | } | |
620 | } | |
621 | else | |
622 | { | |
623 | while(*uri && *uri != wxT('#') && *uri != wxT('?')) | |
624 | { | |
625 | if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
626 | *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/')) | |
627 | m_path += *uri++; | |
628 | else | |
629 | Escape(m_path, *uri++); | |
630 | } | |
631 | } | |
632 | ||
633 | if (uri != uricopy) | |
634 | { | |
635 | if (bNormalize) | |
636 | { | |
637 | wxStringBufferLength theBuffer(m_path, m_path.length() + 1); | |
638 | #if wxUSE_STL | |
639 | wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1); | |
640 | #endif | |
641 | Normalize(theBuffer); | |
642 | theBuffer.SetLength(wxStrlen(theBuffer)); | |
643 | } | |
644 | ||
645 | //mark the path as valid | |
646 | m_fields |= wxURI_PATH; | |
647 | } | |
648 | } | |
649 | ||
650 | return uri; | |
651 | } | |
652 | ||
653 | ||
654 | const wxChar* wxURI::ParseQuery(const wxChar* uri) | |
655 | { | |
656 | wxASSERT(uri != NULL); | |
657 | ||
658 | // query = *( pchar / "/" / "?" ) | |
659 | if (*uri == wxT('?')) | |
660 | { | |
661 | ++uri; | |
662 | while(*uri && *uri != wxT('#')) | |
663 | { | |
664 | if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
665 | *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?')) | |
666 | m_query += *uri++; | |
667 | else | |
668 | Escape(m_query, *uri++); | |
669 | } | |
670 | ||
671 | //mark the server as valid | |
672 | m_fields |= wxURI_QUERY; | |
673 | } | |
674 | ||
675 | return uri; | |
676 | } | |
677 | ||
678 | ||
679 | const wxChar* wxURI::ParseFragment(const wxChar* uri) | |
680 | { | |
681 | wxASSERT(uri != NULL); | |
682 | ||
683 | // fragment = *( pchar / "/" / "?" ) | |
684 | if (*uri == wxT('#')) | |
685 | { | |
686 | ++uri; | |
687 | while(*uri) | |
688 | { | |
689 | if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || | |
690 | *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?')) | |
691 | m_fragment += *uri++; | |
692 | else | |
693 | Escape(m_fragment, *uri++); | |
694 | } | |
695 | ||
696 | //mark the server as valid | |
697 | m_fields |= wxURI_FRAGMENT; | |
698 | } | |
699 | ||
700 | return uri; | |
701 | } | |
702 | ||
703 | // --------------------------------------------------------------------------- | |
704 | // Resolve | |
705 | // | |
706 | // Builds missing components of this uri from a base uri | |
707 | // | |
708 | // A version of the algorithm outlined in the RFC is used here | |
709 | // (it is shown in comments) | |
710 | // | |
711 | // Note that an empty URI inherits all components | |
712 | // --------------------------------------------------------------------------- | |
713 | ||
714 | void wxURI::Resolve(const wxURI& base, int flags) | |
715 | { | |
716 | wxASSERT_MSG(!base.IsReference(), | |
717 | wxT("wxURI to inherit from must not be a reference!")); | |
718 | ||
719 | // If we arn't being strict, enable the older (pre-RFC2396) | |
720 | // loophole that allows this uri to inherit other | |
721 | // properties from the base uri - even if the scheme | |
722 | // is defined | |
723 | if ( !(flags & wxURI_STRICT) && | |
724 | HasScheme() && base.HasScheme() && | |
725 | m_scheme == base.m_scheme ) | |
726 | { | |
727 | m_fields -= wxURI_SCHEME; | |
728 | } | |
729 | ||
730 | ||
731 | // Do nothing if this is an absolute wxURI | |
732 | // if defined(R.scheme) then | |
733 | // T.scheme = R.scheme; | |
734 | // T.authority = R.authority; | |
735 | // T.path = remove_dot_segments(R.path); | |
736 | // T.query = R.query; | |
737 | if (HasScheme()) | |
738 | { | |
739 | return; | |
740 | } | |
741 | ||
742 | //No scheme - inherit | |
743 | m_scheme = base.m_scheme; | |
744 | m_fields |= wxURI_SCHEME; | |
745 | ||
746 | // All we need to do for relative URIs with an | |
747 | // authority component is just inherit the scheme | |
748 | // if defined(R.authority) then | |
749 | // T.authority = R.authority; | |
750 | // T.path = remove_dot_segments(R.path); | |
751 | // T.query = R.query; | |
752 | if (HasServer()) | |
753 | { | |
754 | return; | |
755 | } | |
756 | ||
757 | //No authority - inherit | |
758 | if (base.HasUser()) | |
759 | { | |
760 | m_user = base.m_user; | |
761 | m_fields |= wxURI_USER; | |
762 | } | |
763 | ||
764 | m_server = base.m_server; | |
765 | m_hostType = base.m_hostType; | |
766 | m_fields |= wxURI_SERVER; | |
767 | ||
768 | if (base.HasPort()) | |
769 | { | |
770 | m_port = base.m_port; | |
771 | m_fields |= wxURI_PORT; | |
772 | } | |
773 | ||
774 | ||
775 | // Simple path inheritance from base | |
776 | if (!HasPath()) | |
777 | { | |
778 | // T.path = Base.path; | |
779 | m_path = base.m_path; | |
780 | m_fields |= wxURI_PATH; | |
781 | ||
782 | ||
783 | // if defined(R.query) then | |
784 | // T.query = R.query; | |
785 | // else | |
786 | // T.query = Base.query; | |
787 | // endif; | |
788 | if (!HasQuery()) | |
789 | { | |
790 | m_query = base.m_query; | |
791 | m_fields |= wxURI_QUERY; | |
792 | } | |
793 | } | |
794 | else | |
795 | { | |
796 | // if (R.path starts-with "/") then | |
797 | // T.path = remove_dot_segments(R.path); | |
798 | // else | |
799 | // T.path = merge(Base.path, R.path); | |
800 | // T.path = remove_dot_segments(T.path); | |
801 | // endif; | |
802 | // T.query = R.query; | |
803 | if (m_path[0u] != wxT('/')) | |
804 | { | |
805 | //Merge paths | |
806 | const wxChar* op = m_path.c_str(); | |
807 | const wxChar* bp = base.m_path.c_str() + base.m_path.Length(); | |
808 | ||
809 | //not a ending directory? move up | |
810 | if (base.m_path[0] && *(bp-1) != wxT('/')) | |
811 | UpTree(base.m_path, bp); | |
812 | ||
813 | //normalize directories | |
814 | while(*op == wxT('.') && *(op+1) == wxT('.') && | |
815 | (*(op+2) == '\0' || *(op+2) == wxT('/')) ) | |
816 | { | |
817 | UpTree(base.m_path, bp); | |
818 | ||
819 | if (*(op+2) == '\0') | |
820 | op += 2; | |
821 | else | |
822 | op += 3; | |
823 | } | |
824 | ||
825 | m_path = base.m_path.substr(0, bp - base.m_path.c_str()) + | |
826 | m_path.substr((op - m_path.c_str()), m_path.Length()); | |
827 | } | |
828 | } | |
829 | ||
830 | //T.fragment = R.fragment; | |
831 | } | |
832 | ||
833 | // --------------------------------------------------------------------------- | |
834 | // UpTree | |
835 | // | |
836 | // Moves a URI path up a directory | |
837 | // --------------------------------------------------------------------------- | |
838 | ||
839 | //static | |
840 | void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri) | |
841 | { | |
842 | if (uri != uristart && *(uri-1) == wxT('/')) | |
843 | { | |
844 | uri -= 2; | |
845 | } | |
846 | ||
847 | for(;uri != uristart; --uri) | |
848 | { | |
849 | if (*uri == wxT('/')) | |
850 | { | |
851 | ++uri; | |
852 | break; | |
853 | } | |
854 | } | |
855 | ||
856 | //!!!TODO:HACK!!!// | |
857 | if (uri == uristart && *uri == wxT('/')) | |
858 | ++uri; | |
859 | //!!!// | |
860 | } | |
861 | ||
862 | // --------------------------------------------------------------------------- | |
863 | // Normalize | |
864 | // | |
865 | // Normalizes directories in-place | |
866 | // | |
867 | // I.E. ./ and . are ignored | |
868 | // | |
869 | // ../ and .. are removed if a directory is before it, along | |
870 | // with that directory (leading .. and ../ are kept) | |
871 | // --------------------------------------------------------------------------- | |
872 | ||
873 | //static | |
874 | void wxURI::Normalize(wxChar* s, bool bIgnoreLeads) | |
875 | { | |
876 | wxChar* cp = s; | |
877 | wxChar* bp = s; | |
878 | ||
879 | if(s[0] == wxT('/')) | |
880 | ++bp; | |
881 | ||
882 | while(*cp) | |
883 | { | |
884 | if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0') | |
885 | && (bp == cp || *(cp-1) == wxT('/'))) | |
886 | { | |
887 | //. _or_ ./ - ignore | |
888 | if (*(cp+1) == '\0') | |
889 | cp += 1; | |
890 | else | |
891 | cp += 2; | |
892 | } | |
893 | else if (*cp == wxT('.') && *(cp+1) == wxT('.') && | |
894 | (*(cp+2) == wxT('/') || *(cp+2) == '\0') | |
895 | && (bp == cp || *(cp-1) == wxT('/'))) | |
896 | { | |
897 | //.. _or_ ../ - go up the tree | |
898 | if (s != bp) | |
899 | { | |
900 | UpTree((const wxChar*)bp, (const wxChar*&)s); | |
901 | ||
902 | if (*(cp+2) == '\0') | |
903 | cp += 2; | |
904 | else | |
905 | cp += 3; | |
906 | } | |
907 | else if (!bIgnoreLeads) | |
908 | ||
909 | { | |
910 | *bp++ = *cp++; | |
911 | *bp++ = *cp++; | |
912 | if (*cp) | |
913 | *bp++ = *cp++; | |
914 | ||
915 | s = bp; | |
916 | } | |
917 | else | |
918 | { | |
919 | if (*(cp+2) == '\0') | |
920 | cp += 2; | |
921 | else | |
922 | cp += 3; | |
923 | } | |
924 | } | |
925 | else | |
926 | *s++ = *cp++; | |
927 | } | |
928 | ||
929 | *s = '\0'; | |
930 | } | |
931 | ||
932 | // --------------------------------------------------------------------------- | |
933 | // ParseH16 | |
934 | // | |
935 | // Parses 1 to 4 hex values. Returns true if the first character of the input | |
936 | // string is a valid hex character. It is the caller's responsability to move | |
937 | // the input string back to its original position on failure. | |
938 | // --------------------------------------------------------------------------- | |
939 | ||
940 | bool wxURI::ParseH16(const wxChar*& uri) | |
941 | { | |
942 | // h16 = 1*4HEXDIG | |
943 | if(!IsHex(*++uri)) | |
944 | return false; | |
945 | ||
946 | if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) | |
947 | ++uri; | |
948 | ||
949 | return true; | |
950 | } | |
951 | ||
952 | // --------------------------------------------------------------------------- | |
953 | // ParseIPXXX | |
954 | // | |
955 | // Parses a certain version of an IP address and moves the input string past | |
956 | // it. Returns true if the input string contains the proper version of an ip | |
957 | // address. It is the caller's responsability to move the input string back | |
958 | // to its original position on failure. | |
959 | // --------------------------------------------------------------------------- | |
960 | ||
961 | bool wxURI::ParseIPv4address(const wxChar*& uri) | |
962 | { | |
963 | //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet | |
964 | // | |
965 | //dec-octet = DIGIT ; 0-9 | |
966 | // / %x31-39 DIGIT ; 10-99 | |
967 | // / "1" 2DIGIT ; 100-199 | |
968 | // / "2" %x30-34 DIGIT ; 200-249 | |
969 | // / "25" %x30-35 ; 250-255 | |
970 | size_t iIPv4 = 0; | |
971 | if (IsDigit(*uri)) | |
972 | { | |
973 | ++iIPv4; | |
974 | ||
975 | ||
976 | //each ip part must be between 0-255 (dupe of version in for loop) | |
977 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
978 | //100 or less (note !) | |
979 | !( (*(uri-2) < wxT('2')) || | |
980 | //240 or less | |
981 | (*(uri-2) == wxT('2') && | |
982 | (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5'))) | |
983 | ) | |
984 | ) | |
985 | ) | |
986 | { | |
987 | return false; | |
988 | } | |
989 | ||
990 | if(IsDigit(*uri))++uri; | |
991 | ||
992 | //compilers should unroll this loop | |
993 | for(; iIPv4 < 4; ++iIPv4) | |
994 | { | |
995 | if (*uri != wxT('.') || !IsDigit(*++uri)) | |
996 | break; | |
997 | ||
998 | //each ip part must be between 0-255 | |
999 | if( IsDigit(*++uri) && IsDigit(*++uri) && | |
1000 | //100 or less (note !) | |
1001 | !( (*(uri-2) < wxT('2')) || | |
1002 | //240 or less | |
1003 | (*(uri-2) == wxT('2') && | |
1004 | (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5'))) | |
1005 | ) | |
1006 | ) | |
1007 | ) | |
1008 | { | |
1009 | return false; | |
1010 | } | |
1011 | if(IsDigit(*uri))++uri; | |
1012 | } | |
1013 | } | |
1014 | return iIPv4 == 4; | |
1015 | } | |
1016 | ||
1017 | bool wxURI::ParseIPv6address(const wxChar*& uri) | |
1018 | { | |
1019 | // IPv6address = 6( h16 ":" ) ls32 | |
1020 | // / "::" 5( h16 ":" ) ls32 | |
1021 | // / [ h16 ] "::" 4( h16 ":" ) ls32 | |
1022 | // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 | |
1023 | // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 | |
1024 | // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 | |
1025 | // / [ *4( h16 ":" ) h16 ] "::" ls32 | |
1026 | // / [ *5( h16 ":" ) h16 ] "::" h16 | |
1027 | // / [ *6( h16 ":" ) h16 ] "::" | |
1028 | ||
1029 | size_t numPrefix = 0, | |
1030 | maxPostfix; | |
1031 | ||
1032 | bool bEndHex = false; | |
1033 | ||
1034 | for( ; numPrefix < 6; ++numPrefix) | |
1035 | { | |
1036 | if(!ParseH16(uri)) | |
1037 | { | |
1038 | --uri; | |
1039 | bEndHex = true; | |
1040 | break; | |
1041 | } | |
1042 | ||
1043 | if(*uri != wxT(':')) | |
1044 | { | |
1045 | break; | |
1046 | } | |
1047 | } | |
1048 | ||
1049 | if(!bEndHex && !ParseH16(uri)) | |
1050 | { | |
1051 | --uri; | |
1052 | ||
1053 | if (numPrefix) | |
1054 | return false; | |
1055 | ||
1056 | if (*uri == wxT(':')) | |
1057 | { | |
1058 | if (*++uri != wxT(':')) | |
1059 | return false; | |
1060 | ||
1061 | maxPostfix = 5; | |
1062 | } | |
1063 | else | |
1064 | maxPostfix = 6; | |
1065 | } | |
1066 | else | |
1067 | { | |
1068 | if (*uri != wxT(':') || *(uri+1) != wxT(':')) | |
1069 | { | |
1070 | if (numPrefix != 6) | |
1071 | return false; | |
1072 | ||
1073 | while (*--uri != wxT(':')) {} | |
1074 | ++uri; | |
1075 | ||
1076 | const wxChar* uristart = uri; | |
1077 | //parse ls32 | |
1078 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
1079 | if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri)) | |
1080 | return true; | |
1081 | ||
1082 | uri = uristart; | |
1083 | ||
1084 | if (ParseIPv4address(uri)) | |
1085 | return true; | |
1086 | else | |
1087 | return false; | |
1088 | } | |
1089 | else | |
1090 | { | |
1091 | uri += 2; | |
1092 | ||
1093 | if (numPrefix > 3) | |
1094 | maxPostfix = 0; | |
1095 | else | |
1096 | maxPostfix = 4 - numPrefix; | |
1097 | } | |
1098 | } | |
1099 | ||
1100 | bool bAllowAltEnding = maxPostfix == 0; | |
1101 | ||
1102 | for(; maxPostfix != 0; --maxPostfix) | |
1103 | { | |
1104 | if(!ParseH16(uri) || *uri != wxT(':')) | |
1105 | return false; | |
1106 | } | |
1107 | ||
1108 | if(numPrefix <= 4) | |
1109 | { | |
1110 | const wxChar* uristart = uri; | |
1111 | //parse ls32 | |
1112 | // ls32 = ( h16 ":" h16 ) / IPv4address | |
1113 | if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri)) | |
1114 | return true; | |
1115 | ||
1116 | uri = uristart; | |
1117 | ||
1118 | if (ParseIPv4address(uri)) | |
1119 | return true; | |
1120 | ||
1121 | uri = uristart; | |
1122 | ||
1123 | if (!bAllowAltEnding) | |
1124 | return false; | |
1125 | } | |
1126 | ||
1127 | if(numPrefix <= 5 && ParseH16(uri)) | |
1128 | return true; | |
1129 | ||
1130 | return true; | |
1131 | } | |
1132 | ||
1133 | bool wxURI::ParseIPvFuture(const wxChar*& uri) | |
1134 | { | |
1135 | // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) | |
1136 | if (*++uri != wxT('v') || !IsHex(*++uri)) | |
1137 | return false; | |
1138 | ||
1139 | while (IsHex(*++uri)) {} | |
1140 | ||
1141 | if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':'))) | |
1142 | return false; | |
1143 | ||
1144 | while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {} | |
1145 | ||
1146 | return true; | |
1147 | } | |
1148 | ||
1149 | ||
1150 | // --------------------------------------------------------------------------- | |
1151 | // CharToHex | |
1152 | // | |
1153 | // Converts a character into a numeric hexidecimal value, or 0 if the | |
1154 | // passed in character is not a valid hex character | |
1155 | // --------------------------------------------------------------------------- | |
1156 | ||
1157 | //static | |
1158 | wxChar wxURI::CharToHex(const wxChar& c) | |
1159 | { | |
1160 | if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A); | |
1161 | if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a); | |
1162 | if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00); | |
1163 | ||
1164 | return 0; | |
1165 | } | |
1166 | ||
1167 | // --------------------------------------------------------------------------- | |
1168 | // IsXXX | |
1169 | // | |
1170 | // Returns true if the passed in character meets the criteria of the method | |
1171 | // --------------------------------------------------------------------------- | |
1172 | ||
1173 | //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" | |
1174 | bool wxURI::IsUnreserved (const wxChar& c) | |
1175 | { return IsAlpha(c) || IsDigit(c) || | |
1176 | c == wxT('-') || | |
1177 | c == wxT('.') || | |
1178 | c == wxT('_') || | |
1179 | c == wxT('~') //tilde | |
1180 | ; | |
1181 | } | |
1182 | ||
1183 | bool wxURI::IsReserved (const wxChar& c) | |
1184 | { | |
1185 | return IsGenDelim(c) || IsSubDelim(c); | |
1186 | } | |
1187 | ||
1188 | //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" | |
1189 | bool wxURI::IsGenDelim (const wxChar& c) | |
1190 | { | |
1191 | return c == wxT(':') || | |
1192 | c == wxT('/') || | |
1193 | c == wxT('?') || | |
1194 | c == wxT('#') || | |
1195 | c == wxT('[') || | |
1196 | c == wxT(']') || | |
1197 | c == wxT('@'); | |
1198 | } | |
1199 | ||
1200 | //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")" | |
1201 | //! / "*" / "+" / "," / ";" / "=" | |
1202 | bool wxURI::IsSubDelim (const wxChar& c) | |
1203 | { | |
1204 | return c == wxT('!') || | |
1205 | c == wxT('$') || | |
1206 | c == wxT('&') || | |
1207 | c == wxT('\'') || | |
1208 | c == wxT('(') || | |
1209 | c == wxT(')') || | |
1210 | c == wxT('*') || | |
1211 | c == wxT('+') || | |
1212 | c == wxT(',') || | |
1213 | c == wxT(';') || | |
1214 | c == wxT('=') | |
1215 | ; | |
1216 | } | |
1217 | ||
1218 | bool wxURI::IsHex(const wxChar& c) | |
1219 | { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); } | |
1220 | ||
1221 | bool wxURI::IsAlpha(const wxChar& c) | |
1222 | { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); } | |
1223 | ||
1224 | bool wxURI::IsDigit(const wxChar& c) | |
1225 | { return c >= wxT('0') && c <= wxT('9'); } | |
1226 | ||
1227 | ||
1228 | // --------------------------------------------------------------------------- | |
1229 | // | |
1230 | // wxURL Compatability | |
1231 | // | |
1232 | // --------------------------------------------------------------------------- | |
1233 | ||
1234 | #if wxUSE_URL | |
1235 | ||
1236 | #if WXWIN_COMPATIBILITY_2_4 | |
1237 | ||
1238 | #include "wx/url.h" | |
1239 | ||
1240 | //Note that this old code really doesn't convert to a URI that well and looks | |
1241 | //more like a dirty hack than anything else... | |
1242 | ||
1243 | wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims) | |
1244 | { | |
1245 | wxString out_str; | |
1246 | wxString hexa_code; | |
1247 | size_t i; | |
1248 | ||
1249 | for (i = 0; i < uri.Len(); i++) | |
1250 | { | |
1251 | wxChar c = uri.GetChar(i); | |
1252 | ||
1253 | if (c == wxT(' ')) | |
1254 | { | |
1255 | // GRG, Apr/2000: changed to "%20" instead of '+' | |
1256 | ||
1257 | out_str += wxT("%20"); | |
1258 | } | |
1259 | else | |
1260 | { | |
1261 | // GRG, Apr/2000: modified according to the URI definition (RFC 2396) | |
1262 | // | |
1263 | // - Alphanumeric characters are never escaped | |
1264 | // - Unreserved marks are never escaped | |
1265 | // - Delimiters must be escaped if they appear within a component | |
1266 | // but not if they are used to separate components. Here we have | |
1267 | // no clear way to distinguish between these two cases, so they | |
1268 | // are escaped unless they are passed in the 'delims' parameter | |
1269 | // (allowed delimiters). | |
1270 | ||
1271 | static const wxChar marks[] = wxT("-_.!~*()'"); | |
1272 | ||
1273 | if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) ) | |
1274 | { | |
1275 | hexa_code.Printf(wxT("%%%02X"), c); | |
1276 | out_str += hexa_code; | |
1277 | } | |
1278 | else | |
1279 | { | |
1280 | out_str += c; | |
1281 | } | |
1282 | } | |
1283 | } | |
1284 | ||
1285 | return out_str; | |
1286 | } | |
1287 | ||
1288 | wxString wxURL::ConvertFromURI(const wxString& uri) | |
1289 | { | |
1290 | return wxURI::Unescape(uri); | |
1291 | } | |
1292 | ||
1293 | #endif //WXWIN_COMPATIBILITY_2_4 | |
1294 | ||
1295 | #endif //wxUSE_URL | |
1296 | ||
1297 | //end of uri.cpp | |
1298 | ||
1299 | ||
1300 |