restore ConvertToValidURI to prev. version for bw compat. Note changes
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21 #endif
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject);
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 Assign(uri);
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_user = m_server = m_port = m_path =
82 m_query = m_fragment = wxT("");
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 const wxChar* wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 return Parse(uri);
101 }
102
103 // ---------------------------------------------------------------------------
104 // Escape Methods
105 //
106 // TranslateEscape unencodes a 3 character URL escape sequence
107 //
108 // Escape encodes an invalid URI character into a 3 character sequence
109 //
110 // IsEscape determines if the input string contains an escape sequence,
111 // if it does, then it moves the input string past the escape sequence
112 //
113 // Unescape unencodes all 3 character URL escape sequences in a wxString
114 // ---------------------------------------------------------------------------
115
116 wxChar wxURI::TranslateEscape(const wxChar* s)
117 {
118 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
119
120 return CharToHex(*s) * 0x10 + CharToHex(*++s);
121 }
122
123 wxString wxURI::Unescape(const wxString& uri)
124 {
125 wxString new_uri;
126
127 for(size_t i = 0; i < uri.length(); ++i)
128 {
129 if (uri[i] == wxT('%'))
130 {
131 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
132 i += 2;
133 }
134 }
135
136 return new_uri;
137 }
138
139 void wxURI::Escape(wxString& s, const wxChar& c)
140 {
141 const wxChar* hdig = wxT("0123456789abcdef");
142 s += wxT('%');
143 s += hdig[(c >> 4) & 15];
144 s += hdig[c & 15];
145 }
146
147 bool wxURI::IsEscape(const wxChar*& uri)
148 {
149 // pct-encoded = "%" HEXDIG HEXDIG
150 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
151 {
152 uri += 3;
153 return true;
154 }
155 else
156 return false;
157 }
158
159 // ---------------------------------------------------------------------------
160 // BuildURI
161 //
162 // BuildURI() builds the entire URI into a useable
163 // representation, including proper identification characters such as slashes
164 //
165 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
166 // the components that accept escape sequences
167 // ---------------------------------------------------------------------------
168
169 wxString wxURI::BuildURI() const
170 {
171 wxString ret;
172
173 if (HasScheme())
174 ret = ret + m_scheme + wxT(":");
175
176 if (HasServer())
177 {
178 ret += wxT("//");
179
180 if (HasUser())
181 ret = ret + m_user + wxT("@");
182
183 ret += m_server;
184
185 if (HasPort())
186 ret = ret + wxT(":") + m_port;
187 }
188
189 ret += m_path;
190
191 if (HasQuery())
192 ret = ret + wxT("?") + m_query;
193
194 if (HasFragment())
195 ret = ret + wxT("#") + m_fragment;
196
197 return ret;
198 }
199
200 wxString wxURI::BuildUnescapedURI() const
201 {
202 wxString ret;
203
204 if (HasScheme())
205 ret = ret + m_scheme + wxT(":");
206
207 if (HasServer())
208 {
209 ret += wxT("//");
210
211 if (HasUser())
212 ret = ret + wxURI::Unescape(m_user) + wxT("@");
213
214 if (m_hostType == wxURI_REGNAME)
215 ret += wxURI::Unescape(m_server);
216 else
217 ret += m_server;
218
219 if (HasPort())
220 ret = ret + wxT(":") + m_port;
221 }
222
223 ret += wxURI::Unescape(m_path);
224
225 if (HasQuery())
226 ret = ret + wxT("?") + wxURI::Unescape(m_query);
227
228 if (HasFragment())
229 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
230
231 return ret;
232 }
233
234 // ---------------------------------------------------------------------------
235 // Assignment
236 // ---------------------------------------------------------------------------
237
238 wxURI& wxURI::Assign(const wxURI& uri)
239 {
240 //assign fields
241 m_fields = uri.m_fields;
242
243 //ref over components
244 m_scheme = uri.m_scheme;
245 m_user = uri.m_user;
246 m_server = uri.m_server;
247 m_hostType = uri.m_hostType;
248 m_port = uri.m_port;
249 m_path = uri.m_path;
250 m_query = uri.m_query;
251 m_fragment = uri.m_fragment;
252
253 return *this;
254 }
255
256 wxURI& wxURI::operator = (const wxURI& uri)
257 {
258 return Assign(uri);
259 }
260
261 wxURI& wxURI::operator = (const wxString& string)
262 {
263 Create(string);
264 return *this;
265 }
266
267 // ---------------------------------------------------------------------------
268 // Comparison
269 // ---------------------------------------------------------------------------
270
271 bool wxURI::operator == (const wxURI& uri) const
272 {
273 if (HasScheme())
274 {
275 if(m_scheme != uri.m_scheme)
276 return false;
277 }
278 else if (uri.HasScheme())
279 return false;
280
281
282 if (HasServer())
283 {
284 if (HasUser())
285 {
286 if (m_user != uri.m_user)
287 return false;
288 }
289 else if (uri.HasUser())
290 return false;
291
292 if (m_server != uri.m_server ||
293 m_hostType != uri.m_hostType)
294 return false;
295
296 if (HasPort())
297 {
298 if(m_port != uri.m_port)
299 return false;
300 }
301 else if (uri.HasPort())
302 return false;
303 }
304 else if (uri.HasServer())
305 return false;
306
307
308 if (HasPath())
309 {
310 if(m_path != uri.m_path)
311 return false;
312 }
313 else if (uri.HasPath())
314 return false;
315
316 if (HasQuery())
317 {
318 if (m_query != uri.m_query)
319 return false;
320 }
321 else if (uri.HasQuery())
322 return false;
323
324 if (HasFragment())
325 {
326 if (m_fragment != uri.m_fragment)
327 return false;
328 }
329 else if (uri.HasFragment())
330 return false;
331
332 return true;
333 }
334
335 // ---------------------------------------------------------------------------
336 // IsReference
337 //
338 // if there is no authority or scheme, it is a reference
339 // ---------------------------------------------------------------------------
340
341 bool wxURI::IsReference() const
342 { return !HasScheme() || !HasServer(); }
343
344 // ---------------------------------------------------------------------------
345 // Parse
346 //
347 // Master URI parsing method. Just calls the individual parsing methods
348 //
349 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
350 // URI-reference = URI / relative-URITestCase
351 // ---------------------------------------------------------------------------
352
353 const wxChar* wxURI::Parse(const wxChar* uri)
354 {
355 uri = ParseScheme(uri);
356 uri = ParseAuthority(uri);
357 uri = ParsePath(uri);
358 uri = ParseQuery(uri);
359 return ParseFragment(uri);
360 }
361
362 // ---------------------------------------------------------------------------
363 // ParseXXX
364 //
365 // Individual parsers for each URI component
366 // ---------------------------------------------------------------------------
367
368 const wxChar* wxURI::ParseScheme(const wxChar* uri)
369 {
370 wxASSERT(uri != NULL);
371
372 //copy of the uri - used for figuring out
373 //length of each component
374 const wxChar* uricopy = uri;
375
376 //Does the uri have a scheme (first character alpha)?
377 if (IsAlpha(*uri))
378 {
379 m_scheme += *uri++;
380
381 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
382 while (IsAlpha(*uri) || IsDigit(*uri) ||
383 *uri == wxT('+') ||
384 *uri == wxT('-') ||
385 *uri == wxT('.'))
386 {
387 m_scheme += *uri++;
388 }
389
390 //valid scheme?
391 if (*uri == wxT(':'))
392 {
393 //mark the scheme as valid
394 m_fields |= wxURI_SCHEME;
395
396 //move reference point up to input buffer
397 uricopy = ++uri;
398 }
399 else
400 //relative uri with relative path reference
401 m_scheme = wxT("");
402 }
403 // else
404 //relative uri with _possible_ relative path reference
405
406 return uricopy;
407 }
408
409 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
410 {
411 // authority = [ userinfo "@" ] host [ ":" port ]
412 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
413 {
414 uri += 2;
415
416 uri = ParseUser(uri);
417 uri = ParseServer(uri);
418 return ParsePort(uri);
419 }
420
421 return uri;
422 }
423
424 const wxChar* wxURI::ParseUser(const wxChar* uri)
425 {
426 wxASSERT(uri != NULL);
427
428 //copy of the uri - used for figuring out
429 //length of each component
430 const wxChar* uricopy = uri;
431
432 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
433 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
434 {
435 if(IsUnreserved(*uri) || IsEscape(uri) ||
436 IsSubDelim(*uri) || *uri == wxT(':'))
437 m_user += *uri++;
438 else
439 Escape(m_user, *uri++);
440 }
441
442 if(*uri == wxT('@'))
443 {
444 //valid userinfo
445 m_fields |= wxURI_USER;
446
447 uricopy = ++uri;
448 }
449 else
450 m_user = wxT("");
451
452 return uricopy;
453 }
454
455 const wxChar* wxURI::ParseServer(const wxChar* uri)
456 {
457 wxASSERT(uri != NULL);
458
459 //copy of the uri - used for figuring out
460 //length of each component
461 const wxChar* uricopy = uri;
462
463 // host = IP-literal / IPv4address / reg-name
464 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
465 if (*uri == wxT('['))
466 {
467 if (ParseIPv6address(++uri) && *uri == wxT(']'))
468 {
469 ++uri;
470 m_hostType = wxURI_IPV6ADDRESS;
471
472 wxStringBufferLength theBuffer(m_server, uri - uricopy);
473 wxMemcpy(theBuffer, uricopy, uri-uricopy);
474 theBuffer.SetLength(uri-uricopy);
475 }
476 else
477 {
478 uri = uricopy;
479
480 if (ParseIPvFuture(++uri) && *uri == wxT(']'))
481 {
482 ++uri;
483 m_hostType = wxURI_IPVFUTURE;
484
485 wxStringBufferLength theBuffer(m_server, uri - uricopy);
486 wxMemcpy(theBuffer, uricopy, uri-uricopy);
487 theBuffer.SetLength(uri-uricopy);
488 }
489 else
490 uri = uricopy;
491 }
492 }
493 else
494 {
495 if (ParseIPv4address(uri))
496 {
497 m_hostType = wxURI_IPV4ADDRESS;
498
499 wxStringBufferLength theBuffer(m_server, uri - uricopy);
500 wxMemcpy(theBuffer, uricopy, uri-uricopy);
501 theBuffer.SetLength(uri-uricopy);
502 }
503 else
504 uri = uricopy;
505 }
506
507 if(m_hostType == wxURI_REGNAME)
508 {
509 uri = uricopy;
510 // reg-name = *( unreserved / pct-encoded / sub-delims )
511 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
512 {
513 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
514 m_server += *uri++;
515 else
516 Escape(m_server, *uri++);
517 }
518 }
519
520 //mark the server as valid
521 m_fields |= wxURI_SERVER;
522
523 return uri;
524 }
525
526
527 const wxChar* wxURI::ParsePort(const wxChar* uri)
528 {
529 wxASSERT(uri != NULL);
530
531 // port = *DIGIT
532 if(*uri == wxT(':'))
533 {
534 ++uri;
535 while(IsDigit(*uri))
536 {
537 m_port += *uri++;
538 }
539
540 //mark the port as valid
541 m_fields |= wxURI_PORT;
542 }
543
544 return uri;
545 }
546
547 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
548 {
549 wxASSERT(uri != NULL);
550
551 //copy of the uri - used for figuring out
552 //length of each component
553 const wxChar* uricopy = uri;
554
555 /// hier-part = "//" authority path-abempty
556 /// / path-absolute
557 /// / path-rootless
558 /// / path-empty
559 ///
560 /// relative-part = "//" authority path-abempty
561 /// / path-absolute
562 /// / path-noscheme
563 /// / path-empty
564 ///
565 /// path-abempty = *( "/" segment )
566 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
567 /// path-noscheme = segment-nz-nc *( "/" segment )
568 /// path-rootless = segment-nz *( "/" segment )
569 /// path-empty = 0<pchar>
570 ///
571 /// segment = *pchar
572 /// segment-nz = 1*pchar
573 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
574 /// ; non-zero-length segment without any colon ":"
575 ///
576 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
577 if (*uri == wxT('/'))
578 {
579 m_path += *uri++;
580
581 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
582 {
583 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
584 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
585 m_path += *uri++;
586 else
587 Escape(m_path, *uri++);
588 }
589
590 if (bNormalize)
591 {
592 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
593 Normalize(theBuffer, true);
594 theBuffer.SetLength(wxStrlen(theBuffer));
595 }
596 //mark the path as valid
597 m_fields |= wxURI_PATH;
598 }
599 else if(*uri) //Relative path
600 {
601 if (bReference)
602 {
603 //no colon allowed
604 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
605 {
606 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
607 *uri == wxT('@') || *uri == wxT('/'))
608 m_path += *uri++;
609 else
610 Escape(m_path, *uri++);
611 }
612 }
613 else
614 {
615 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
616 {
617 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
618 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
619 m_path += *uri++;
620 else
621 Escape(m_path, *uri++);
622 }
623 }
624
625 if (uri != uricopy)
626 {
627 if (bNormalize)
628 {
629 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
630 Normalize(theBuffer);
631 theBuffer.SetLength(wxStrlen(theBuffer));
632 }
633
634 //mark the path as valid
635 m_fields |= wxURI_PATH;
636 }
637 }
638
639 return uri;
640 }
641
642
643 const wxChar* wxURI::ParseQuery(const wxChar* uri)
644 {
645 wxASSERT(uri != NULL);
646
647 // query = *( pchar / "/" / "?" )
648 if (*uri == wxT('?'))
649 {
650 ++uri;
651 while(*uri && *uri != wxT('#'))
652 {
653 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
654 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
655 m_query += *uri++;
656 else
657 Escape(m_query, *uri++);
658 }
659
660 //mark the server as valid
661 m_fields |= wxURI_QUERY;
662 }
663
664 return uri;
665 }
666
667
668 const wxChar* wxURI::ParseFragment(const wxChar* uri)
669 {
670 wxASSERT(uri != NULL);
671
672 // fragment = *( pchar / "/" / "?" )
673 if (*uri == wxT('#'))
674 {
675 ++uri;
676 while(*uri)
677 {
678 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
679 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
680 m_fragment += *uri++;
681 else
682 Escape(m_fragment, *uri++);
683 }
684
685 //mark the server as valid
686 m_fields |= wxURI_FRAGMENT;
687 }
688
689 return uri;
690 }
691
692 // ---------------------------------------------------------------------------
693 // Resolve
694 //
695 // Builds missing components of this uri from a base uri
696 //
697 // A version of the algorithm outlined in the RFC is used here
698 // (it is shown in comments)
699 //
700 // Note that an empty URI inherits all components
701 // ---------------------------------------------------------------------------
702
703 void wxURI::Resolve(const wxURI& base, int flags)
704 {
705 wxASSERT_MSG(!base.IsReference(),
706 wxT("wxURI to inherit from must not be a reference!"));
707
708 // If we arn't being strict, enable the older (pre-RFC2396)
709 // loophole that allows this uri to inherit other
710 // properties from the base uri - even if the scheme
711 // is defined
712 if ( !(flags & wxURI_STRICT) &&
713 HasScheme() && base.HasScheme() &&
714 m_scheme == base.m_scheme )
715 {
716 m_fields -= wxURI_SCHEME;
717 }
718
719
720 // Do nothing if this is an absolute wxURI
721 // if defined(R.scheme) then
722 // T.scheme = R.scheme;
723 // T.authority = R.authority;
724 // T.path = remove_dot_segments(R.path);
725 // T.query = R.query;
726 if (HasScheme())
727 {
728 return;
729 }
730
731 //No sheme - inherit
732 m_scheme = base.m_scheme;
733 m_fields |= wxURI_SCHEME;
734
735 // All we need to do for relative URIs with an
736 // authority component is just inherit the scheme
737 // if defined(R.authority) then
738 // T.authority = R.authority;
739 // T.path = remove_dot_segments(R.path);
740 // T.query = R.query;
741 if (HasServer())
742 {
743 return;
744 }
745
746 //No authority - inherit
747 if (base.HasUser())
748 {
749 m_user = base.m_user;
750 m_fields |= wxURI_USER;
751 }
752
753 m_server = base.m_server;
754 m_hostType = base.m_hostType;
755 m_fields |= wxURI_SERVER;
756
757 if (base.HasPort())
758 {
759 m_port = base.m_port;
760 m_fields |= wxURI_PORT;
761 }
762
763
764 // Simple path inheritance from base
765 if (!HasPath())
766 {
767 // T.path = Base.path;
768 m_path = base.m_path;
769 m_fields |= wxURI_PATH;
770
771
772 // if defined(R.query) then
773 // T.query = R.query;
774 // else
775 // T.query = Base.query;
776 // endif;
777 if (!HasQuery())
778 {
779 m_query = base.m_query;
780 m_fields |= wxURI_QUERY;
781 }
782 }
783 else
784 {
785 // if (R.path starts-with "/") then
786 // T.path = remove_dot_segments(R.path);
787 // else
788 // T.path = merge(Base.path, R.path);
789 // T.path = remove_dot_segments(T.path);
790 // endif;
791 // T.query = R.query;
792 if (m_path[0u] != wxT('/'))
793 {
794 //Marge paths
795 const wxChar* op = m_path.c_str();
796 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
797
798 //not a ending directory? move up
799 if (base.m_path[0] && *(bp-1) != wxT('/'))
800 UpTree(base.m_path, bp);
801
802 //normalize directories
803 while(*op == wxT('.') && *(op+1) == wxT('.') &&
804 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
805 {
806 UpTree(base.m_path, bp);
807
808 if (*(op+2) == '\0')
809 op += 2;
810 else
811 op += 3;
812 }
813
814 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
815 m_path.Mid((op - m_path.c_str()), m_path.Length());
816 }
817 }
818
819 //T.fragment = R.fragment;
820 }
821
822 // ---------------------------------------------------------------------------
823 // UpTree
824 //
825 // Moves a URI path up a directory
826 // ---------------------------------------------------------------------------
827
828 //static
829 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
830 {
831 if (uri != uristart && *(uri-1) == wxT('/'))
832 {
833 uri -= 2;
834 }
835
836 for(;uri != uristart; --uri)
837 {
838 if (*uri == wxT('/'))
839 {
840 ++uri;
841 break;
842 }
843 }
844
845 //!!!TODO:HACK!!!//
846 if (uri == uristart && *uri == wxT('/'))
847 ++uri;
848 //!!!//
849 }
850
851 // ---------------------------------------------------------------------------
852 // Normalize
853 //
854 // Normalizes directories in-place
855 //
856 // I.E. ./ and . are ignored
857 //
858 // ../ and .. are removed if a directory is before it, along
859 // with that directory (leading .. and ../ are kept)
860 // ---------------------------------------------------------------------------
861
862 //static
863 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
864 {
865 wxChar* cp = s;
866 wxChar* bp = s;
867
868 if(s[0] == wxT('/'))
869 ++bp;
870
871 while(*cp)
872 {
873 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
874 && (bp == cp || *(cp-1) == wxT('/')))
875 {
876 //. _or_ ./ - ignore
877 if (*(cp+1) == '\0')
878 cp += 1;
879 else
880 cp += 2;
881 }
882 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
883 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
884 && (bp == cp || *(cp-1) == wxT('/')))
885 {
886 //.. _or_ ../ - go up the tree
887 if (s != bp)
888 {
889 UpTree((const wxChar*)bp, (const wxChar*&)s);
890
891 if (*(cp+2) == '\0')
892 cp += 2;
893 else
894 cp += 3;
895 }
896 else if (!bIgnoreLeads)
897
898 {
899 *bp++ = *cp++;
900 *bp++ = *cp++;
901 if (*cp)
902 *bp++ = *cp++;
903
904 s = bp;
905 }
906 else
907 {
908 if (*(cp+2) == '\0')
909 cp += 2;
910 else
911 cp += 3;
912 }
913 }
914 else
915 *s++ = *cp++;
916 }
917
918 *s = '\0';
919 }
920
921 // ---------------------------------------------------------------------------
922 // ParseH16
923 //
924 // Parses 1 to 4 hex values. Returns true if the first character of the input
925 // string is a valid hex character. It is the caller's responsability to move
926 // the input string back to its original position on failure.
927 // ---------------------------------------------------------------------------
928
929 bool wxURI::ParseH16(const wxChar*& uri)
930 {
931 // h16 = 1*4HEXDIG
932 if(!IsHex(*++uri))
933 return false;
934
935 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
936 ++uri;
937
938 return true;
939 }
940
941 // ---------------------------------------------------------------------------
942 // ParseIPXXX
943 //
944 // Parses a certain version of an IP address and moves the input string past
945 // it. Returns true if the input string contains the proper version of an ip
946 // address. It is the caller's responsability to move the input string back
947 // to its original position on failure.
948 // ---------------------------------------------------------------------------
949
950 bool wxURI::ParseIPv4address(const wxChar*& uri)
951 {
952 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
953 //
954 //dec-octet = DIGIT ; 0-9
955 // / %x31-39 DIGIT ; 10-99
956 // / "1" 2DIGIT ; 100-199
957 // / "2" %x30-34 DIGIT ; 200-249
958 // / "25" %x30-35 ; 250-255
959 size_t iIPv4 = 0;
960 if (IsDigit(*uri))
961 {
962 ++iIPv4;
963
964
965 //each ip part must be between 0-255 (dupe of version in for loop)
966 if( IsDigit(*++uri) && IsDigit(*++uri) &&
967 //100 or less (note !)
968 !( (*(uri-2) < wxT('2')) ||
969 //240 or less
970 (*(uri-2) == wxT('2') &&
971 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
972 )
973 )
974 )
975 {
976 return false;
977 }
978
979 if(IsDigit(*uri))++uri;
980
981 //compilers should unroll this loop
982 for(; iIPv4 < 4; ++iIPv4)
983 {
984 if (*uri != wxT('.') || !IsDigit(*++uri))
985 break;
986
987 //each ip part must be between 0-255
988 if( IsDigit(*++uri) && IsDigit(*++uri) &&
989 //100 or less (note !)
990 !( (*(uri-2) < wxT('2')) ||
991 //240 or less
992 (*(uri-2) == wxT('2') &&
993 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
994 )
995 )
996 )
997 {
998 return false;
999 }
1000 if(IsDigit(*uri))++uri;
1001 }
1002 }
1003 return iIPv4 == 4;
1004 }
1005
1006 bool wxURI::ParseIPv6address(const wxChar*& uri)
1007 {
1008 // IPv6address = 6( h16 ":" ) ls32
1009 // / "::" 5( h16 ":" ) ls32
1010 // / [ h16 ] "::" 4( h16 ":" ) ls32
1011 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1012 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1013 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1014 // / [ *4( h16 ":" ) h16 ] "::" ls32
1015 // / [ *5( h16 ":" ) h16 ] "::" h16
1016 // / [ *6( h16 ":" ) h16 ] "::"
1017
1018 size_t numPrefix = 0,
1019 maxPostfix;
1020
1021 bool bEndHex = false;
1022
1023 for( ; numPrefix < 6; ++numPrefix)
1024 {
1025 if(!ParseH16(uri))
1026 {
1027 --uri;
1028 bEndHex = true;
1029 break;
1030 }
1031
1032 if(*uri != wxT(':'))
1033 {
1034 break;
1035 }
1036 }
1037
1038 if(!bEndHex && !ParseH16(uri))
1039 {
1040 --uri;
1041
1042 if (numPrefix)
1043 return false;
1044
1045 if (*uri == wxT(':'))
1046 {
1047 if (*++uri != wxT(':'))
1048 return false;
1049
1050 maxPostfix = 5;
1051 }
1052 else
1053 maxPostfix = 6;
1054 }
1055 else
1056 {
1057 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1058 {
1059 if (numPrefix != 6)
1060 return false;
1061
1062 while (*--uri != wxT(':')) {}
1063 ++uri;
1064
1065 const wxChar* uristart = uri;
1066 //parse ls32
1067 // ls32 = ( h16 ":" h16 ) / IPv4address
1068 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1069 return true;
1070
1071 uri = uristart;
1072
1073 if (ParseIPv4address(uri))
1074 return true;
1075 else
1076 return false;
1077 }
1078 else
1079 {
1080 uri += 2;
1081
1082 if (numPrefix > 3)
1083 maxPostfix = 0;
1084 else
1085 maxPostfix = 4 - numPrefix;
1086 }
1087 }
1088
1089 bool bAllowAltEnding = maxPostfix == 0;
1090
1091 for(; maxPostfix != 0; --maxPostfix)
1092 {
1093 if(!ParseH16(uri) || *uri != wxT(':'))
1094 return false;
1095 }
1096
1097 if(numPrefix <= 4)
1098 {
1099 const wxChar* uristart = uri;
1100 //parse ls32
1101 // ls32 = ( h16 ":" h16 ) / IPv4address
1102 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1103 return true;
1104
1105 uri = uristart;
1106
1107 if (ParseIPv4address(uri))
1108 return true;
1109
1110 uri = uristart;
1111
1112 if (!bAllowAltEnding)
1113 return false;
1114 }
1115
1116 if(numPrefix <= 5 && ParseH16(uri))
1117 return true;
1118
1119 return true;
1120 }
1121
1122 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1123 {
1124 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1125 if (*++uri != wxT('v') || !IsHex(*++uri))
1126 return false;
1127
1128 while (IsHex(*++uri)) {}
1129
1130 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1131 return false;
1132
1133 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1134
1135 return true;
1136 }
1137
1138
1139 // ---------------------------------------------------------------------------
1140 // CharToHex
1141 //
1142 // Converts a character into a numeric hexidecimal value, or 0 if the
1143 // passed in character is not a valid hex character
1144 // ---------------------------------------------------------------------------
1145
1146 //static
1147 wxInt32 wxURI::CharToHex(const wxChar& c)
1148 {
1149 if ((c >= wxT('A')) && (c <= wxT('Z'))) return c - wxT('A') + 0x0A;
1150 if ((c >= wxT('a')) && (c <= wxT('z'))) return c - wxT('a') + 0x0a;
1151 if ((c >= wxT('0')) && (c <= wxT('9'))) return c - wxT('0') + 0x00;
1152
1153 return 0;
1154 }
1155
1156 // ---------------------------------------------------------------------------
1157 // IsXXX
1158 //
1159 // Returns true if the passed in character meets the criteria of the method
1160 // ---------------------------------------------------------------------------
1161
1162 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1163 bool wxURI::IsUnreserved (const wxChar& c)
1164 { return IsAlpha(c) || IsDigit(c) ||
1165 c == wxT('-') ||
1166 c == wxT('.') ||
1167 c == wxT('_') ||
1168 c == wxT('~') //tilde
1169 ;
1170 }
1171
1172 bool wxURI::IsReserved (const wxChar& c)
1173 {
1174 return IsGenDelim(c) || IsSubDelim(c);
1175 }
1176
1177 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1178 bool wxURI::IsGenDelim (const wxChar& c)
1179 {
1180 return c == wxT(':') ||
1181 c == wxT('/') ||
1182 c == wxT('?') ||
1183 c == wxT('#') ||
1184 c == wxT('[') ||
1185 c == wxT(']') ||
1186 c == wxT('@');
1187 }
1188
1189 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1190 //! / "*" / "+" / "," / ";" / "="
1191 bool wxURI::IsSubDelim (const wxChar& c)
1192 {
1193 return c == wxT('!') ||
1194 c == wxT('$') ||
1195 c == wxT('&') ||
1196 c == wxT('\'') ||
1197 c == wxT('(') ||
1198 c == wxT(')') ||
1199 c == wxT('*') ||
1200 c == wxT('+') ||
1201 c == wxT(',') ||
1202 c == wxT(';') ||
1203 c == wxT('=')
1204 ;
1205 }
1206
1207 bool wxURI::IsHex(const wxChar& c)
1208 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1209
1210 bool wxURI::IsAlpha(const wxChar& c)
1211 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1212
1213 bool wxURI::IsDigit(const wxChar& c)
1214 { return c >= wxT('0') && c <= wxT('9'); }
1215
1216
1217 // ---------------------------------------------------------------------------
1218 //
1219 // wxURL Compatability
1220 //
1221 // ---------------------------------------------------------------------------
1222
1223 #if wxUSE_URL
1224
1225 #if WXWIN_COMPATIBILITY_2_4
1226
1227 #include "wx/url.h"
1228
1229 //Note that this old code really doesn't convert to a URI that well and looks
1230 //more like a dirty hack than anything else...
1231
1232 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1233 {
1234 wxString out_str;
1235 wxString hexa_code;
1236 size_t i;
1237
1238 for (i = 0; i < uri.Len(); i++)
1239 {
1240 wxChar c = uri.GetChar(i);
1241
1242 if (c == wxT(' '))
1243 {
1244 // GRG, Apr/2000: changed to "%20" instead of '+'
1245
1246 out_str += wxT("%20");
1247 }
1248 else
1249 {
1250 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1251 //
1252 // - Alphanumeric characters are never escaped
1253 // - Unreserved marks are never escaped
1254 // - Delimiters must be escaped if they appear within a component
1255 // but not if they are used to separate components. Here we have
1256 // no clear way to distinguish between these two cases, so they
1257 // are escaped unless they are passed in the 'delims' parameter
1258 // (allowed delimiters).
1259
1260 static const wxChar marks[] = wxT("-_.!~*()'");
1261
1262 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1263 {
1264 hexa_code.Printf(wxT("%%%02X"), c);
1265 out_str += hexa_code;
1266 }
1267 else
1268 {
1269 out_str += c;
1270 }
1271 }
1272 }
1273
1274 return out_str;
1275 }
1276
1277 wxString wxURL::ConvertFromURI(const wxString& uri)
1278 {
1279 return wxURI::Unescape(uri);
1280 }
1281
1282 #endif //WXWIN_COMPATIBILITY_2_4
1283
1284 #endif //wxUSE_URL
1285
1286 //end of uri.cpp
1287
1288
1289