in unescape add normal characters in addition to unescaped characters :)
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21 #endif
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject);
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 Assign(uri);
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_user = m_server = m_port = m_path =
82 m_query = m_fragment = wxT("");
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 const wxChar* wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 return Parse(uri);
101 }
102
103 // ---------------------------------------------------------------------------
104 // Escape Methods
105 //
106 // TranslateEscape unencodes a 3 character URL escape sequence
107 //
108 // Escape encodes an invalid URI character into a 3 character sequence
109 //
110 // IsEscape determines if the input string contains an escape sequence,
111 // if it does, then it moves the input string past the escape sequence
112 //
113 // Unescape unencodes all 3 character URL escape sequences in a wxString
114 // ---------------------------------------------------------------------------
115
116 wxChar wxURI::TranslateEscape(const wxChar* s)
117 {
118 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
119
120 //<<4 == 16
121 return ( CharToHex(*s) << 4 ) | CharToHex(*++s);
122 }
123
124 wxString wxURI::Unescape(const wxString& uri)
125 {
126 wxString new_uri;
127
128 for(size_t i = 0; i < uri.length(); ++i)
129 {
130 if (uri[i] == wxT('%'))
131 {
132 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
133 i += 2;
134 }
135 else
136 new_uri += uri[i];
137 }
138
139 return new_uri;
140 }
141
142 void wxURI::Escape(wxString& s, const wxChar& c)
143 {
144 const wxChar* hdig = wxT("0123456789abcdef");
145 s += wxT('%');
146 s += hdig[(c >> 4) & 15];
147 s += hdig[c & 15];
148 }
149
150 bool wxURI::IsEscape(const wxChar*& uri)
151 {
152 // pct-encoded = "%" HEXDIG HEXDIG
153 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
154 {
155 uri += 3;
156 return true;
157 }
158 else
159 return false;
160 }
161
162 // ---------------------------------------------------------------------------
163 // BuildURI
164 //
165 // BuildURI() builds the entire URI into a useable
166 // representation, including proper identification characters such as slashes
167 //
168 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
169 // the components that accept escape sequences
170 // ---------------------------------------------------------------------------
171
172 wxString wxURI::BuildURI() const
173 {
174 wxString ret;
175
176 if (HasScheme())
177 ret = ret + m_scheme + wxT(":");
178
179 if (HasServer())
180 {
181 ret += wxT("//");
182
183 if (HasUser())
184 ret = ret + m_user + wxT("@");
185
186 ret += m_server;
187
188 if (HasPort())
189 ret = ret + wxT(":") + m_port;
190 }
191
192 ret += m_path;
193
194 if (HasQuery())
195 ret = ret + wxT("?") + m_query;
196
197 if (HasFragment())
198 ret = ret + wxT("#") + m_fragment;
199
200 return ret;
201 }
202
203 wxString wxURI::BuildUnescapedURI() const
204 {
205 wxString ret;
206
207 if (HasScheme())
208 ret = ret + m_scheme + wxT(":");
209
210 if (HasServer())
211 {
212 ret += wxT("//");
213
214 if (HasUser())
215 ret = ret + wxURI::Unescape(m_user) + wxT("@");
216
217 if (m_hostType == wxURI_REGNAME)
218 ret += wxURI::Unescape(m_server);
219 else
220 ret += m_server;
221
222 if (HasPort())
223 ret = ret + wxT(":") + m_port;
224 }
225
226 ret += wxURI::Unescape(m_path);
227
228 if (HasQuery())
229 ret = ret + wxT("?") + wxURI::Unescape(m_query);
230
231 if (HasFragment())
232 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
233
234 return ret;
235 }
236
237 // ---------------------------------------------------------------------------
238 // Assignment
239 // ---------------------------------------------------------------------------
240
241 wxURI& wxURI::Assign(const wxURI& uri)
242 {
243 //assign fields
244 m_fields = uri.m_fields;
245
246 //ref over components
247 m_scheme = uri.m_scheme;
248 m_user = uri.m_user;
249 m_server = uri.m_server;
250 m_hostType = uri.m_hostType;
251 m_port = uri.m_port;
252 m_path = uri.m_path;
253 m_query = uri.m_query;
254 m_fragment = uri.m_fragment;
255
256 return *this;
257 }
258
259 wxURI& wxURI::operator = (const wxURI& uri)
260 {
261 return Assign(uri);
262 }
263
264 wxURI& wxURI::operator = (const wxString& string)
265 {
266 Create(string);
267 return *this;
268 }
269
270 // ---------------------------------------------------------------------------
271 // Comparison
272 // ---------------------------------------------------------------------------
273
274 bool wxURI::operator == (const wxURI& uri) const
275 {
276 if (HasScheme())
277 {
278 if(m_scheme != uri.m_scheme)
279 return false;
280 }
281 else if (uri.HasScheme())
282 return false;
283
284
285 if (HasServer())
286 {
287 if (HasUser())
288 {
289 if (m_user != uri.m_user)
290 return false;
291 }
292 else if (uri.HasUser())
293 return false;
294
295 if (m_server != uri.m_server ||
296 m_hostType != uri.m_hostType)
297 return false;
298
299 if (HasPort())
300 {
301 if(m_port != uri.m_port)
302 return false;
303 }
304 else if (uri.HasPort())
305 return false;
306 }
307 else if (uri.HasServer())
308 return false;
309
310
311 if (HasPath())
312 {
313 if(m_path != uri.m_path)
314 return false;
315 }
316 else if (uri.HasPath())
317 return false;
318
319 if (HasQuery())
320 {
321 if (m_query != uri.m_query)
322 return false;
323 }
324 else if (uri.HasQuery())
325 return false;
326
327 if (HasFragment())
328 {
329 if (m_fragment != uri.m_fragment)
330 return false;
331 }
332 else if (uri.HasFragment())
333 return false;
334
335 return true;
336 }
337
338 // ---------------------------------------------------------------------------
339 // IsReference
340 //
341 // if there is no authority or scheme, it is a reference
342 // ---------------------------------------------------------------------------
343
344 bool wxURI::IsReference() const
345 { return !HasScheme() || !HasServer(); }
346
347 // ---------------------------------------------------------------------------
348 // Parse
349 //
350 // Master URI parsing method. Just calls the individual parsing methods
351 //
352 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
353 // URI-reference = URI / relative-URITestCase
354 // ---------------------------------------------------------------------------
355
356 const wxChar* wxURI::Parse(const wxChar* uri)
357 {
358 uri = ParseScheme(uri);
359 uri = ParseAuthority(uri);
360 uri = ParsePath(uri);
361 uri = ParseQuery(uri);
362 return ParseFragment(uri);
363 }
364
365 // ---------------------------------------------------------------------------
366 // ParseXXX
367 //
368 // Individual parsers for each URI component
369 // ---------------------------------------------------------------------------
370
371 const wxChar* wxURI::ParseScheme(const wxChar* uri)
372 {
373 wxASSERT(uri != NULL);
374
375 //copy of the uri - used for figuring out
376 //length of each component
377 const wxChar* uricopy = uri;
378
379 //Does the uri have a scheme (first character alpha)?
380 if (IsAlpha(*uri))
381 {
382 m_scheme += *uri++;
383
384 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
385 while (IsAlpha(*uri) || IsDigit(*uri) ||
386 *uri == wxT('+') ||
387 *uri == wxT('-') ||
388 *uri == wxT('.'))
389 {
390 m_scheme += *uri++;
391 }
392
393 //valid scheme?
394 if (*uri == wxT(':'))
395 {
396 //mark the scheme as valid
397 m_fields |= wxURI_SCHEME;
398
399 //move reference point up to input buffer
400 uricopy = ++uri;
401 }
402 else
403 //relative uri with relative path reference
404 m_scheme = wxT("");
405 }
406 // else
407 //relative uri with _possible_ relative path reference
408
409 return uricopy;
410 }
411
412 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
413 {
414 // authority = [ userinfo "@" ] host [ ":" port ]
415 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
416 {
417 uri += 2;
418
419 uri = ParseUser(uri);
420 uri = ParseServer(uri);
421 return ParsePort(uri);
422 }
423
424 return uri;
425 }
426
427 const wxChar* wxURI::ParseUser(const wxChar* uri)
428 {
429 wxASSERT(uri != NULL);
430
431 //copy of the uri - used for figuring out
432 //length of each component
433 const wxChar* uricopy = uri;
434
435 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
436 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
437 {
438 if(IsUnreserved(*uri) || IsEscape(uri) ||
439 IsSubDelim(*uri) || *uri == wxT(':'))
440 m_user += *uri++;
441 else
442 Escape(m_user, *uri++);
443 }
444
445 if(*uri == wxT('@'))
446 {
447 //valid userinfo
448 m_fields |= wxURI_USER;
449
450 uricopy = ++uri;
451 }
452 else
453 m_user = wxT("");
454
455 return uricopy;
456 }
457
458 const wxChar* wxURI::ParseServer(const wxChar* uri)
459 {
460 wxASSERT(uri != NULL);
461
462 //copy of the uri - used for figuring out
463 //length of each component
464 const wxChar* uricopy = uri;
465
466 // host = IP-literal / IPv4address / reg-name
467 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
468 if (*uri == wxT('['))
469 {
470 if (ParseIPv6address(++uri) && *uri == wxT(']'))
471 {
472 ++uri;
473 m_hostType = wxURI_IPV6ADDRESS;
474
475 wxStringBufferLength theBuffer(m_server, uri - uricopy);
476 wxMemcpy(theBuffer, uricopy, uri-uricopy);
477 theBuffer.SetLength(uri-uricopy);
478 }
479 else
480 {
481 uri = uricopy;
482
483 if (ParseIPvFuture(++uri) && *uri == wxT(']'))
484 {
485 ++uri;
486 m_hostType = wxURI_IPVFUTURE;
487
488 wxStringBufferLength theBuffer(m_server, uri - uricopy);
489 wxMemcpy(theBuffer, uricopy, uri-uricopy);
490 theBuffer.SetLength(uri-uricopy);
491 }
492 else
493 uri = uricopy;
494 }
495 }
496 else
497 {
498 if (ParseIPv4address(uri))
499 {
500 m_hostType = wxURI_IPV4ADDRESS;
501
502 wxStringBufferLength theBuffer(m_server, uri - uricopy);
503 wxMemcpy(theBuffer, uricopy, uri-uricopy);
504 theBuffer.SetLength(uri-uricopy);
505 }
506 else
507 uri = uricopy;
508 }
509
510 if(m_hostType == wxURI_REGNAME)
511 {
512 uri = uricopy;
513 // reg-name = *( unreserved / pct-encoded / sub-delims )
514 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
515 {
516 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
517 m_server += *uri++;
518 else
519 Escape(m_server, *uri++);
520 }
521 }
522
523 //mark the server as valid
524 m_fields |= wxURI_SERVER;
525
526 return uri;
527 }
528
529
530 const wxChar* wxURI::ParsePort(const wxChar* uri)
531 {
532 wxASSERT(uri != NULL);
533
534 // port = *DIGIT
535 if(*uri == wxT(':'))
536 {
537 ++uri;
538 while(IsDigit(*uri))
539 {
540 m_port += *uri++;
541 }
542
543 //mark the port as valid
544 m_fields |= wxURI_PORT;
545 }
546
547 return uri;
548 }
549
550 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
551 {
552 wxASSERT(uri != NULL);
553
554 //copy of the uri - used for figuring out
555 //length of each component
556 const wxChar* uricopy = uri;
557
558 /// hier-part = "//" authority path-abempty
559 /// / path-absolute
560 /// / path-rootless
561 /// / path-empty
562 ///
563 /// relative-part = "//" authority path-abempty
564 /// / path-absolute
565 /// / path-noscheme
566 /// / path-empty
567 ///
568 /// path-abempty = *( "/" segment )
569 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
570 /// path-noscheme = segment-nz-nc *( "/" segment )
571 /// path-rootless = segment-nz *( "/" segment )
572 /// path-empty = 0<pchar>
573 ///
574 /// segment = *pchar
575 /// segment-nz = 1*pchar
576 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
577 /// ; non-zero-length segment without any colon ":"
578 ///
579 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
580 if (*uri == wxT('/'))
581 {
582 m_path += *uri++;
583
584 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
585 {
586 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
587 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
588 m_path += *uri++;
589 else
590 Escape(m_path, *uri++);
591 }
592
593 if (bNormalize)
594 {
595 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
596 #if wxUSE_STL
597 wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
598 #endif
599 Normalize(theBuffer, true);
600 theBuffer.SetLength(wxStrlen(theBuffer));
601 }
602 //mark the path as valid
603 m_fields |= wxURI_PATH;
604 }
605 else if(*uri) //Relative path
606 {
607 if (bReference)
608 {
609 //no colon allowed
610 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
611 {
612 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
613 *uri == wxT('@') || *uri == wxT('/'))
614 m_path += *uri++;
615 else
616 Escape(m_path, *uri++);
617 }
618 }
619 else
620 {
621 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
622 {
623 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
624 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
625 m_path += *uri++;
626 else
627 Escape(m_path, *uri++);
628 }
629 }
630
631 if (uri != uricopy)
632 {
633 if (bNormalize)
634 {
635 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
636 #if wxUSE_STL
637 wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
638 #endif
639 Normalize(theBuffer);
640 theBuffer.SetLength(wxStrlen(theBuffer));
641 }
642
643 //mark the path as valid
644 m_fields |= wxURI_PATH;
645 }
646 }
647
648 return uri;
649 }
650
651
652 const wxChar* wxURI::ParseQuery(const wxChar* uri)
653 {
654 wxASSERT(uri != NULL);
655
656 // query = *( pchar / "/" / "?" )
657 if (*uri == wxT('?'))
658 {
659 ++uri;
660 while(*uri && *uri != wxT('#'))
661 {
662 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
663 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
664 m_query += *uri++;
665 else
666 Escape(m_query, *uri++);
667 }
668
669 //mark the server as valid
670 m_fields |= wxURI_QUERY;
671 }
672
673 return uri;
674 }
675
676
677 const wxChar* wxURI::ParseFragment(const wxChar* uri)
678 {
679 wxASSERT(uri != NULL);
680
681 // fragment = *( pchar / "/" / "?" )
682 if (*uri == wxT('#'))
683 {
684 ++uri;
685 while(*uri)
686 {
687 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
688 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
689 m_fragment += *uri++;
690 else
691 Escape(m_fragment, *uri++);
692 }
693
694 //mark the server as valid
695 m_fields |= wxURI_FRAGMENT;
696 }
697
698 return uri;
699 }
700
701 // ---------------------------------------------------------------------------
702 // Resolve
703 //
704 // Builds missing components of this uri from a base uri
705 //
706 // A version of the algorithm outlined in the RFC is used here
707 // (it is shown in comments)
708 //
709 // Note that an empty URI inherits all components
710 // ---------------------------------------------------------------------------
711
712 void wxURI::Resolve(const wxURI& base, int flags)
713 {
714 wxASSERT_MSG(!base.IsReference(),
715 wxT("wxURI to inherit from must not be a reference!"));
716
717 // If we arn't being strict, enable the older (pre-RFC2396)
718 // loophole that allows this uri to inherit other
719 // properties from the base uri - even if the scheme
720 // is defined
721 if ( !(flags & wxURI_STRICT) &&
722 HasScheme() && base.HasScheme() &&
723 m_scheme == base.m_scheme )
724 {
725 m_fields -= wxURI_SCHEME;
726 }
727
728
729 // Do nothing if this is an absolute wxURI
730 // if defined(R.scheme) then
731 // T.scheme = R.scheme;
732 // T.authority = R.authority;
733 // T.path = remove_dot_segments(R.path);
734 // T.query = R.query;
735 if (HasScheme())
736 {
737 return;
738 }
739
740 //No sheme - inherit
741 m_scheme = base.m_scheme;
742 m_fields |= wxURI_SCHEME;
743
744 // All we need to do for relative URIs with an
745 // authority component is just inherit the scheme
746 // if defined(R.authority) then
747 // T.authority = R.authority;
748 // T.path = remove_dot_segments(R.path);
749 // T.query = R.query;
750 if (HasServer())
751 {
752 return;
753 }
754
755 //No authority - inherit
756 if (base.HasUser())
757 {
758 m_user = base.m_user;
759 m_fields |= wxURI_USER;
760 }
761
762 m_server = base.m_server;
763 m_hostType = base.m_hostType;
764 m_fields |= wxURI_SERVER;
765
766 if (base.HasPort())
767 {
768 m_port = base.m_port;
769 m_fields |= wxURI_PORT;
770 }
771
772
773 // Simple path inheritance from base
774 if (!HasPath())
775 {
776 // T.path = Base.path;
777 m_path = base.m_path;
778 m_fields |= wxURI_PATH;
779
780
781 // if defined(R.query) then
782 // T.query = R.query;
783 // else
784 // T.query = Base.query;
785 // endif;
786 if (!HasQuery())
787 {
788 m_query = base.m_query;
789 m_fields |= wxURI_QUERY;
790 }
791 }
792 else
793 {
794 // if (R.path starts-with "/") then
795 // T.path = remove_dot_segments(R.path);
796 // else
797 // T.path = merge(Base.path, R.path);
798 // T.path = remove_dot_segments(T.path);
799 // endif;
800 // T.query = R.query;
801 if (m_path[0u] != wxT('/'))
802 {
803 //Marge paths
804 const wxChar* op = m_path.c_str();
805 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
806
807 //not a ending directory? move up
808 if (base.m_path[0] && *(bp-1) != wxT('/'))
809 UpTree(base.m_path, bp);
810
811 //normalize directories
812 while(*op == wxT('.') && *(op+1) == wxT('.') &&
813 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
814 {
815 UpTree(base.m_path, bp);
816
817 if (*(op+2) == '\0')
818 op += 2;
819 else
820 op += 3;
821 }
822
823 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
824 m_path.substr((op - m_path.c_str()), m_path.Length());
825 }
826 }
827
828 //T.fragment = R.fragment;
829 }
830
831 // ---------------------------------------------------------------------------
832 // UpTree
833 //
834 // Moves a URI path up a directory
835 // ---------------------------------------------------------------------------
836
837 //static
838 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
839 {
840 if (uri != uristart && *(uri-1) == wxT('/'))
841 {
842 uri -= 2;
843 }
844
845 for(;uri != uristart; --uri)
846 {
847 if (*uri == wxT('/'))
848 {
849 ++uri;
850 break;
851 }
852 }
853
854 //!!!TODO:HACK!!!//
855 if (uri == uristart && *uri == wxT('/'))
856 ++uri;
857 //!!!//
858 }
859
860 // ---------------------------------------------------------------------------
861 // Normalize
862 //
863 // Normalizes directories in-place
864 //
865 // I.E. ./ and . are ignored
866 //
867 // ../ and .. are removed if a directory is before it, along
868 // with that directory (leading .. and ../ are kept)
869 // ---------------------------------------------------------------------------
870
871 //static
872 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
873 {
874 wxChar* cp = s;
875 wxChar* bp = s;
876
877 if(s[0] == wxT('/'))
878 ++bp;
879
880 while(*cp)
881 {
882 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
883 && (bp == cp || *(cp-1) == wxT('/')))
884 {
885 //. _or_ ./ - ignore
886 if (*(cp+1) == '\0')
887 cp += 1;
888 else
889 cp += 2;
890 }
891 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
892 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
893 && (bp == cp || *(cp-1) == wxT('/')))
894 {
895 //.. _or_ ../ - go up the tree
896 if (s != bp)
897 {
898 UpTree((const wxChar*)bp, (const wxChar*&)s);
899
900 if (*(cp+2) == '\0')
901 cp += 2;
902 else
903 cp += 3;
904 }
905 else if (!bIgnoreLeads)
906
907 {
908 *bp++ = *cp++;
909 *bp++ = *cp++;
910 if (*cp)
911 *bp++ = *cp++;
912
913 s = bp;
914 }
915 else
916 {
917 if (*(cp+2) == '\0')
918 cp += 2;
919 else
920 cp += 3;
921 }
922 }
923 else
924 *s++ = *cp++;
925 }
926
927 *s = '\0';
928 }
929
930 // ---------------------------------------------------------------------------
931 // ParseH16
932 //
933 // Parses 1 to 4 hex values. Returns true if the first character of the input
934 // string is a valid hex character. It is the caller's responsability to move
935 // the input string back to its original position on failure.
936 // ---------------------------------------------------------------------------
937
938 bool wxURI::ParseH16(const wxChar*& uri)
939 {
940 // h16 = 1*4HEXDIG
941 if(!IsHex(*++uri))
942 return false;
943
944 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
945 ++uri;
946
947 return true;
948 }
949
950 // ---------------------------------------------------------------------------
951 // ParseIPXXX
952 //
953 // Parses a certain version of an IP address and moves the input string past
954 // it. Returns true if the input string contains the proper version of an ip
955 // address. It is the caller's responsability to move the input string back
956 // to its original position on failure.
957 // ---------------------------------------------------------------------------
958
959 bool wxURI::ParseIPv4address(const wxChar*& uri)
960 {
961 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
962 //
963 //dec-octet = DIGIT ; 0-9
964 // / %x31-39 DIGIT ; 10-99
965 // / "1" 2DIGIT ; 100-199
966 // / "2" %x30-34 DIGIT ; 200-249
967 // / "25" %x30-35 ; 250-255
968 size_t iIPv4 = 0;
969 if (IsDigit(*uri))
970 {
971 ++iIPv4;
972
973
974 //each ip part must be between 0-255 (dupe of version in for loop)
975 if( IsDigit(*++uri) && IsDigit(*++uri) &&
976 //100 or less (note !)
977 !( (*(uri-2) < wxT('2')) ||
978 //240 or less
979 (*(uri-2) == wxT('2') &&
980 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
981 )
982 )
983 )
984 {
985 return false;
986 }
987
988 if(IsDigit(*uri))++uri;
989
990 //compilers should unroll this loop
991 for(; iIPv4 < 4; ++iIPv4)
992 {
993 if (*uri != wxT('.') || !IsDigit(*++uri))
994 break;
995
996 //each ip part must be between 0-255
997 if( IsDigit(*++uri) && IsDigit(*++uri) &&
998 //100 or less (note !)
999 !( (*(uri-2) < wxT('2')) ||
1000 //240 or less
1001 (*(uri-2) == wxT('2') &&
1002 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1003 )
1004 )
1005 )
1006 {
1007 return false;
1008 }
1009 if(IsDigit(*uri))++uri;
1010 }
1011 }
1012 return iIPv4 == 4;
1013 }
1014
1015 bool wxURI::ParseIPv6address(const wxChar*& uri)
1016 {
1017 // IPv6address = 6( h16 ":" ) ls32
1018 // / "::" 5( h16 ":" ) ls32
1019 // / [ h16 ] "::" 4( h16 ":" ) ls32
1020 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1021 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1022 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1023 // / [ *4( h16 ":" ) h16 ] "::" ls32
1024 // / [ *5( h16 ":" ) h16 ] "::" h16
1025 // / [ *6( h16 ":" ) h16 ] "::"
1026
1027 size_t numPrefix = 0,
1028 maxPostfix;
1029
1030 bool bEndHex = false;
1031
1032 for( ; numPrefix < 6; ++numPrefix)
1033 {
1034 if(!ParseH16(uri))
1035 {
1036 --uri;
1037 bEndHex = true;
1038 break;
1039 }
1040
1041 if(*uri != wxT(':'))
1042 {
1043 break;
1044 }
1045 }
1046
1047 if(!bEndHex && !ParseH16(uri))
1048 {
1049 --uri;
1050
1051 if (numPrefix)
1052 return false;
1053
1054 if (*uri == wxT(':'))
1055 {
1056 if (*++uri != wxT(':'))
1057 return false;
1058
1059 maxPostfix = 5;
1060 }
1061 else
1062 maxPostfix = 6;
1063 }
1064 else
1065 {
1066 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1067 {
1068 if (numPrefix != 6)
1069 return false;
1070
1071 while (*--uri != wxT(':')) {}
1072 ++uri;
1073
1074 const wxChar* uristart = uri;
1075 //parse ls32
1076 // ls32 = ( h16 ":" h16 ) / IPv4address
1077 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1078 return true;
1079
1080 uri = uristart;
1081
1082 if (ParseIPv4address(uri))
1083 return true;
1084 else
1085 return false;
1086 }
1087 else
1088 {
1089 uri += 2;
1090
1091 if (numPrefix > 3)
1092 maxPostfix = 0;
1093 else
1094 maxPostfix = 4 - numPrefix;
1095 }
1096 }
1097
1098 bool bAllowAltEnding = maxPostfix == 0;
1099
1100 for(; maxPostfix != 0; --maxPostfix)
1101 {
1102 if(!ParseH16(uri) || *uri != wxT(':'))
1103 return false;
1104 }
1105
1106 if(numPrefix <= 4)
1107 {
1108 const wxChar* uristart = uri;
1109 //parse ls32
1110 // ls32 = ( h16 ":" h16 ) / IPv4address
1111 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1112 return true;
1113
1114 uri = uristart;
1115
1116 if (ParseIPv4address(uri))
1117 return true;
1118
1119 uri = uristart;
1120
1121 if (!bAllowAltEnding)
1122 return false;
1123 }
1124
1125 if(numPrefix <= 5 && ParseH16(uri))
1126 return true;
1127
1128 return true;
1129 }
1130
1131 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1132 {
1133 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1134 if (*++uri != wxT('v') || !IsHex(*++uri))
1135 return false;
1136
1137 while (IsHex(*++uri)) {}
1138
1139 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1140 return false;
1141
1142 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1143
1144 return true;
1145 }
1146
1147
1148 // ---------------------------------------------------------------------------
1149 // CharToHex
1150 //
1151 // Converts a character into a numeric hexidecimal value, or 0 if the
1152 // passed in character is not a valid hex character
1153 // ---------------------------------------------------------------------------
1154
1155 //static
1156 wxChar wxURI::CharToHex(const wxChar& c)
1157 {
1158 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1159 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1160 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1161
1162 return 0;
1163 }
1164
1165 // ---------------------------------------------------------------------------
1166 // IsXXX
1167 //
1168 // Returns true if the passed in character meets the criteria of the method
1169 // ---------------------------------------------------------------------------
1170
1171 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1172 bool wxURI::IsUnreserved (const wxChar& c)
1173 { return IsAlpha(c) || IsDigit(c) ||
1174 c == wxT('-') ||
1175 c == wxT('.') ||
1176 c == wxT('_') ||
1177 c == wxT('~') //tilde
1178 ;
1179 }
1180
1181 bool wxURI::IsReserved (const wxChar& c)
1182 {
1183 return IsGenDelim(c) || IsSubDelim(c);
1184 }
1185
1186 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1187 bool wxURI::IsGenDelim (const wxChar& c)
1188 {
1189 return c == wxT(':') ||
1190 c == wxT('/') ||
1191 c == wxT('?') ||
1192 c == wxT('#') ||
1193 c == wxT('[') ||
1194 c == wxT(']') ||
1195 c == wxT('@');
1196 }
1197
1198 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1199 //! / "*" / "+" / "," / ";" / "="
1200 bool wxURI::IsSubDelim (const wxChar& c)
1201 {
1202 return c == wxT('!') ||
1203 c == wxT('$') ||
1204 c == wxT('&') ||
1205 c == wxT('\'') ||
1206 c == wxT('(') ||
1207 c == wxT(')') ||
1208 c == wxT('*') ||
1209 c == wxT('+') ||
1210 c == wxT(',') ||
1211 c == wxT(';') ||
1212 c == wxT('=')
1213 ;
1214 }
1215
1216 bool wxURI::IsHex(const wxChar& c)
1217 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1218
1219 bool wxURI::IsAlpha(const wxChar& c)
1220 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1221
1222 bool wxURI::IsDigit(const wxChar& c)
1223 { return c >= wxT('0') && c <= wxT('9'); }
1224
1225
1226 // ---------------------------------------------------------------------------
1227 //
1228 // wxURL Compatability
1229 //
1230 // ---------------------------------------------------------------------------
1231
1232 #if wxUSE_URL
1233
1234 #if WXWIN_COMPATIBILITY_2_4
1235
1236 #include "wx/url.h"
1237
1238 //Note that this old code really doesn't convert to a URI that well and looks
1239 //more like a dirty hack than anything else...
1240
1241 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1242 {
1243 wxString out_str;
1244 wxString hexa_code;
1245 size_t i;
1246
1247 for (i = 0; i < uri.Len(); i++)
1248 {
1249 wxChar c = uri.GetChar(i);
1250
1251 if (c == wxT(' '))
1252 {
1253 // GRG, Apr/2000: changed to "%20" instead of '+'
1254
1255 out_str += wxT("%20");
1256 }
1257 else
1258 {
1259 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1260 //
1261 // - Alphanumeric characters are never escaped
1262 // - Unreserved marks are never escaped
1263 // - Delimiters must be escaped if they appear within a component
1264 // but not if they are used to separate components. Here we have
1265 // no clear way to distinguish between these two cases, so they
1266 // are escaped unless they are passed in the 'delims' parameter
1267 // (allowed delimiters).
1268
1269 static const wxChar marks[] = wxT("-_.!~*()'");
1270
1271 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1272 {
1273 hexa_code.Printf(wxT("%%%02X"), c);
1274 out_str += hexa_code;
1275 }
1276 else
1277 {
1278 out_str += c;
1279 }
1280 }
1281 }
1282
1283 return out_str;
1284 }
1285
1286 wxString wxURL::ConvertFromURI(const wxString& uri)
1287 {
1288 return wxURI::Unescape(uri);
1289 }
1290
1291 #endif //WXWIN_COMPATIBILITY_2_4
1292
1293 #endif //wxUSE_URL
1294
1295 //end of uri.cpp
1296
1297
1298