]> git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
0f03cf5aa5b4cf664dfc0c3b239e8406b4259338
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21 #endif
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject);
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 Assign(uri);
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_userinfo = m_server = m_port = m_path =
82 m_query = m_fragment = wxEmptyString;
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 const wxChar* wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 return Parse(uri);
101 }
102
103 // ---------------------------------------------------------------------------
104 // Escape Methods
105 //
106 // TranslateEscape unencodes a 3 character URL escape sequence
107 //
108 // Escape encodes an invalid URI character into a 3 character sequence
109 //
110 // IsEscape determines if the input string contains an escape sequence,
111 // if it does, then it moves the input string past the escape sequence
112 //
113 // Unescape unencodes all 3 character URL escape sequences in a wxString
114 // ---------------------------------------------------------------------------
115
116 wxChar wxURI::TranslateEscape(const wxChar* s)
117 {
118 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
119
120 //<<4 == 16
121 return (wxChar)( CharToHex(*s) << 4 ) | CharToHex(*++s);
122 }
123
124 wxString wxURI::Unescape(const wxString& uri)
125 {
126 wxString new_uri;
127
128 for(size_t i = 0; i < uri.length(); ++i)
129 {
130 if (uri[i] == wxT('%'))
131 {
132 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
133 i += 2;
134 }
135 else
136 new_uri += uri[i];
137 }
138
139 return new_uri;
140 }
141
142 void wxURI::Escape(wxString& s, const wxChar& c)
143 {
144 const wxChar* hdig = wxT("0123456789abcdef");
145 s += wxT('%');
146 s += hdig[(c >> 4) & 15];
147 s += hdig[c & 15];
148 }
149
150 bool wxURI::IsEscape(const wxChar*& uri)
151 {
152 // pct-encoded = "%" HEXDIG HEXDIG
153 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
154 {
155 uri += 3;
156 return true;
157 }
158 else
159 return false;
160 }
161
162 // ---------------------------------------------------------------------------
163 // GetUser
164 // GetPassword
165 //
166 // Gets the username and password via the old URL method.
167 // ---------------------------------------------------------------------------
168 wxString wxURI::GetUser() const
169 {
170 size_t dwPasswordPos = m_userinfo.find(':');
171
172 if (dwPasswordPos == wxString::npos)
173 dwPasswordPos = 0;
174
175 return m_userinfo(0, dwPasswordPos);
176 }
177
178 wxString wxURI::GetPassword() const
179 {
180 size_t dwPasswordPos = m_userinfo.find(':');
181
182 if (dwPasswordPos == wxString::npos)
183 return wxT("");
184 else
185 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
186 }
187
188 // ---------------------------------------------------------------------------
189 // BuildURI
190 //
191 // BuildURI() builds the entire URI into a useable
192 // representation, including proper identification characters such as slashes
193 //
194 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
195 // the components that accept escape sequences
196 // ---------------------------------------------------------------------------
197
198 wxString wxURI::BuildURI() const
199 {
200 wxString ret;
201
202 if (HasScheme())
203 ret = ret + m_scheme + wxT(":");
204
205 if (HasServer())
206 {
207 ret += wxT("//");
208
209 if (HasUserInfo())
210 ret = ret + m_userinfo + wxT("@");
211
212 ret += m_server;
213
214 if (HasPort())
215 ret = ret + wxT(":") + m_port;
216 }
217
218 ret += m_path;
219
220 if (HasQuery())
221 ret = ret + wxT("?") + m_query;
222
223 if (HasFragment())
224 ret = ret + wxT("#") + m_fragment;
225
226 return ret;
227 }
228
229 wxString wxURI::BuildUnescapedURI() const
230 {
231 wxString ret;
232
233 if (HasScheme())
234 ret = ret + m_scheme + wxT(":");
235
236 if (HasServer())
237 {
238 ret += wxT("//");
239
240 if (HasUserInfo())
241 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
242
243 if (m_hostType == wxURI_REGNAME)
244 ret += wxURI::Unescape(m_server);
245 else
246 ret += m_server;
247
248 if (HasPort())
249 ret = ret + wxT(":") + m_port;
250 }
251
252 ret += wxURI::Unescape(m_path);
253
254 if (HasQuery())
255 ret = ret + wxT("?") + wxURI::Unescape(m_query);
256
257 if (HasFragment())
258 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
259
260 return ret;
261 }
262
263 // ---------------------------------------------------------------------------
264 // Assignment
265 // ---------------------------------------------------------------------------
266
267 wxURI& wxURI::Assign(const wxURI& uri)
268 {
269 //assign fields
270 m_fields = uri.m_fields;
271
272 //ref over components
273 m_scheme = uri.m_scheme;
274 m_userinfo = uri.m_userinfo;
275 m_server = uri.m_server;
276 m_hostType = uri.m_hostType;
277 m_port = uri.m_port;
278 m_path = uri.m_path;
279 m_query = uri.m_query;
280 m_fragment = uri.m_fragment;
281
282 return *this;
283 }
284
285 wxURI& wxURI::operator = (const wxURI& uri)
286 {
287 return Assign(uri);
288 }
289
290 wxURI& wxURI::operator = (const wxString& string)
291 {
292 Create(string);
293 return *this;
294 }
295
296 // ---------------------------------------------------------------------------
297 // Comparison
298 // ---------------------------------------------------------------------------
299
300 bool wxURI::operator == (const wxURI& uri) const
301 {
302 if (HasScheme())
303 {
304 if(m_scheme != uri.m_scheme)
305 return false;
306 }
307 else if (uri.HasScheme())
308 return false;
309
310
311 if (HasServer())
312 {
313 if (HasUserInfo())
314 {
315 if (m_userinfo != uri.m_userinfo)
316 return false;
317 }
318 else if (uri.HasUserInfo())
319 return false;
320
321 if (m_server != uri.m_server ||
322 m_hostType != uri.m_hostType)
323 return false;
324
325 if (HasPort())
326 {
327 if(m_port != uri.m_port)
328 return false;
329 }
330 else if (uri.HasPort())
331 return false;
332 }
333 else if (uri.HasServer())
334 return false;
335
336
337 if (HasPath())
338 {
339 if(m_path != uri.m_path)
340 return false;
341 }
342 else if (uri.HasPath())
343 return false;
344
345 if (HasQuery())
346 {
347 if (m_query != uri.m_query)
348 return false;
349 }
350 else if (uri.HasQuery())
351 return false;
352
353 if (HasFragment())
354 {
355 if (m_fragment != uri.m_fragment)
356 return false;
357 }
358 else if (uri.HasFragment())
359 return false;
360
361 return true;
362 }
363
364 // ---------------------------------------------------------------------------
365 // IsReference
366 //
367 // if there is no authority or scheme, it is a reference
368 // ---------------------------------------------------------------------------
369
370 bool wxURI::IsReference() const
371 { return !HasScheme() || !HasServer(); }
372
373 // ---------------------------------------------------------------------------
374 // Parse
375 //
376 // Master URI parsing method. Just calls the individual parsing methods
377 //
378 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
379 // URI-reference = URI / relative-URITestCase
380 // ---------------------------------------------------------------------------
381
382 const wxChar* wxURI::Parse(const wxChar* uri)
383 {
384 uri = ParseScheme(uri);
385 uri = ParseAuthority(uri);
386 uri = ParsePath(uri);
387 uri = ParseQuery(uri);
388 return ParseFragment(uri);
389 }
390
391 // ---------------------------------------------------------------------------
392 // ParseXXX
393 //
394 // Individual parsers for each URI component
395 // ---------------------------------------------------------------------------
396
397 const wxChar* wxURI::ParseScheme(const wxChar* uri)
398 {
399 wxASSERT(uri != NULL);
400
401 //copy of the uri - used for figuring out
402 //length of each component
403 const wxChar* uricopy = uri;
404
405 //Does the uri have a scheme (first character alpha)?
406 if (IsAlpha(*uri))
407 {
408 m_scheme += *uri++;
409
410 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
411 while (IsAlpha(*uri) || IsDigit(*uri) ||
412 *uri == wxT('+') ||
413 *uri == wxT('-') ||
414 *uri == wxT('.'))
415 {
416 m_scheme += *uri++;
417 }
418
419 //valid scheme?
420 if (*uri == wxT(':'))
421 {
422 //mark the scheme as valid
423 m_fields |= wxURI_SCHEME;
424
425 //move reference point up to input buffer
426 uricopy = ++uri;
427 }
428 else
429 //relative uri with relative path reference
430 m_scheme = wxEmptyString;
431 }
432 // else
433 //relative uri with _possible_ relative path reference
434
435 return uricopy;
436 }
437
438 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
439 {
440 // authority = [ userinfo "@" ] host [ ":" port ]
441 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
442 {
443 uri += 2;
444
445 uri = ParseUserInfo(uri);
446 uri = ParseServer(uri);
447 return ParsePort(uri);
448 }
449
450 return uri;
451 }
452
453 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
454 {
455 wxASSERT(uri != NULL);
456
457 //copy of the uri - used for figuring out
458 //length of each component
459 const wxChar* uricopy = uri;
460
461 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
462 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
463 {
464 if(IsUnreserved(*uri) || IsEscape(uri) ||
465 IsSubDelim(*uri) || *uri == wxT(':'))
466 m_userinfo += *uri++;
467 else
468 Escape(m_userinfo, *uri++);
469 }
470
471 if(*uri == wxT('@'))
472 {
473 //valid userinfo
474 m_fields |= wxURI_USERINFO;
475
476 uricopy = ++uri;
477 }
478 else
479 m_userinfo = wxEmptyString;
480
481 return uricopy;
482 }
483
484 const wxChar* wxURI::ParseServer(const wxChar* uri)
485 {
486 wxASSERT(uri != NULL);
487
488 //copy of the uri - used for figuring out
489 //length of each component
490 const wxChar* uricopy = uri;
491
492 // host = IP-literal / IPv4address / reg-name
493 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
494 if (*uri == wxT('['))
495 {
496 ++uri; //some compilers don't support *&ing a ++*
497 if (ParseIPv6address(uri) && *uri == wxT(']'))
498 {
499 ++uri;
500 m_hostType = wxURI_IPV6ADDRESS;
501
502 wxStringBufferLength theBuffer(m_server, uri - uricopy);
503 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
504 theBuffer.SetLength(uri-uricopy);
505 }
506 else
507 {
508 uri = uricopy;
509
510 ++uri; //some compilers don't support *&ing a ++*
511 if (ParseIPvFuture(uri) && *uri == wxT(']'))
512 {
513 ++uri;
514 m_hostType = wxURI_IPVFUTURE;
515
516 wxStringBufferLength theBuffer(m_server, uri - uricopy);
517 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
518 theBuffer.SetLength(uri-uricopy);
519 }
520 else
521 uri = uricopy;
522 }
523 }
524 else
525 {
526 if (ParseIPv4address(uri))
527 {
528 m_hostType = wxURI_IPV4ADDRESS;
529
530 wxStringBufferLength theBuffer(m_server, uri - uricopy);
531 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
532 theBuffer.SetLength(uri-uricopy);
533 }
534 else
535 uri = uricopy;
536 }
537
538 if(m_hostType == wxURI_REGNAME)
539 {
540 uri = uricopy;
541 // reg-name = *( unreserved / pct-encoded / sub-delims )
542 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
543 {
544 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
545 m_server += *uri++;
546 else
547 Escape(m_server, *uri++);
548 }
549 }
550
551 //mark the server as valid
552 m_fields |= wxURI_SERVER;
553
554 return uri;
555 }
556
557
558 const wxChar* wxURI::ParsePort(const wxChar* uri)
559 {
560 wxASSERT(uri != NULL);
561
562 // port = *DIGIT
563 if(*uri == wxT(':'))
564 {
565 ++uri;
566 while(IsDigit(*uri))
567 {
568 m_port += *uri++;
569 }
570
571 //mark the port as valid
572 m_fields |= wxURI_PORT;
573 }
574
575 return uri;
576 }
577
578 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
579 {
580 wxASSERT(uri != NULL);
581
582 //copy of the uri - used for figuring out
583 //length of each component
584 const wxChar* uricopy = uri;
585
586 /// hier-part = "//" authority path-abempty
587 /// / path-absolute
588 /// / path-rootless
589 /// / path-empty
590 ///
591 /// relative-part = "//" authority path-abempty
592 /// / path-absolute
593 /// / path-noscheme
594 /// / path-empty
595 ///
596 /// path-abempty = *( "/" segment )
597 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
598 /// path-noscheme = segment-nz-nc *( "/" segment )
599 /// path-rootless = segment-nz *( "/" segment )
600 /// path-empty = 0<pchar>
601 ///
602 /// segment = *pchar
603 /// segment-nz = 1*pchar
604 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
605 /// ; non-zero-length segment without any colon ":"
606 ///
607 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
608 if (*uri == wxT('/'))
609 {
610 m_path += *uri++;
611
612 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
613 {
614 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
615 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
616 m_path += *uri++;
617 else
618 Escape(m_path, *uri++);
619 }
620
621 if (bNormalize)
622 {
623 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
624 #if wxUSE_STL
625 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
626 #endif
627 Normalize(theBuffer, true);
628 theBuffer.SetLength(wxStrlen(theBuffer));
629 }
630 //mark the path as valid
631 m_fields |= wxURI_PATH;
632 }
633 else if(*uri) //Relative path
634 {
635 if (bReference)
636 {
637 //no colon allowed
638 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
639 {
640 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
641 *uri == wxT('@') || *uri == wxT('/'))
642 m_path += *uri++;
643 else
644 Escape(m_path, *uri++);
645 }
646 }
647 else
648 {
649 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
650 {
651 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
652 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
653 m_path += *uri++;
654 else
655 Escape(m_path, *uri++);
656 }
657 }
658
659 if (uri != uricopy)
660 {
661 if (bNormalize)
662 {
663 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
664 #if wxUSE_STL
665 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
666 #endif
667 Normalize(theBuffer);
668 theBuffer.SetLength(wxStrlen(theBuffer));
669 }
670
671 //mark the path as valid
672 m_fields |= wxURI_PATH;
673 }
674 }
675
676 return uri;
677 }
678
679
680 const wxChar* wxURI::ParseQuery(const wxChar* uri)
681 {
682 wxASSERT(uri != NULL);
683
684 // query = *( pchar / "/" / "?" )
685 if (*uri == wxT('?'))
686 {
687 ++uri;
688 while(*uri && *uri != wxT('#'))
689 {
690 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
691 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
692 m_query += *uri++;
693 else
694 Escape(m_query, *uri++);
695 }
696
697 //mark the server as valid
698 m_fields |= wxURI_QUERY;
699 }
700
701 return uri;
702 }
703
704
705 const wxChar* wxURI::ParseFragment(const wxChar* uri)
706 {
707 wxASSERT(uri != NULL);
708
709 // fragment = *( pchar / "/" / "?" )
710 if (*uri == wxT('#'))
711 {
712 ++uri;
713 while(*uri)
714 {
715 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
716 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
717 m_fragment += *uri++;
718 else
719 Escape(m_fragment, *uri++);
720 }
721
722 //mark the server as valid
723 m_fields |= wxURI_FRAGMENT;
724 }
725
726 return uri;
727 }
728
729 // ---------------------------------------------------------------------------
730 // Resolve
731 //
732 // Builds missing components of this uri from a base uri
733 //
734 // A version of the algorithm outlined in the RFC is used here
735 // (it is shown in comments)
736 //
737 // Note that an empty URI inherits all components
738 // ---------------------------------------------------------------------------
739
740 void wxURI::Resolve(const wxURI& base, int flags)
741 {
742 wxASSERT_MSG(!base.IsReference(),
743 wxT("wxURI to inherit from must not be a reference!"));
744
745 // If we arn't being strict, enable the older (pre-RFC2396)
746 // loophole that allows this uri to inherit other
747 // properties from the base uri - even if the scheme
748 // is defined
749 if ( !(flags & wxURI_STRICT) &&
750 HasScheme() && base.HasScheme() &&
751 m_scheme == base.m_scheme )
752 {
753 m_fields -= wxURI_SCHEME;
754 }
755
756
757 // Do nothing if this is an absolute wxURI
758 // if defined(R.scheme) then
759 // T.scheme = R.scheme;
760 // T.authority = R.authority;
761 // T.path = remove_dot_segments(R.path);
762 // T.query = R.query;
763 if (HasScheme())
764 {
765 return;
766 }
767
768 //No scheme - inherit
769 m_scheme = base.m_scheme;
770 m_fields |= wxURI_SCHEME;
771
772 // All we need to do for relative URIs with an
773 // authority component is just inherit the scheme
774 // if defined(R.authority) then
775 // T.authority = R.authority;
776 // T.path = remove_dot_segments(R.path);
777 // T.query = R.query;
778 if (HasServer())
779 {
780 return;
781 }
782
783 //No authority - inherit
784 if (base.HasUserInfo())
785 {
786 m_userinfo = base.m_userinfo;
787 m_fields |= wxURI_USERINFO;
788 }
789
790 m_server = base.m_server;
791 m_hostType = base.m_hostType;
792 m_fields |= wxURI_SERVER;
793
794 if (base.HasPort())
795 {
796 m_port = base.m_port;
797 m_fields |= wxURI_PORT;
798 }
799
800
801 // Simple path inheritance from base
802 if (!HasPath())
803 {
804 // T.path = Base.path;
805 m_path = base.m_path;
806 m_fields |= wxURI_PATH;
807
808
809 // if defined(R.query) then
810 // T.query = R.query;
811 // else
812 // T.query = Base.query;
813 // endif;
814 if (!HasQuery())
815 {
816 m_query = base.m_query;
817 m_fields |= wxURI_QUERY;
818 }
819 }
820 else
821 {
822 // if (R.path starts-with "/") then
823 // T.path = remove_dot_segments(R.path);
824 // else
825 // T.path = merge(Base.path, R.path);
826 // T.path = remove_dot_segments(T.path);
827 // endif;
828 // T.query = R.query;
829 if (m_path[0u] != wxT('/'))
830 {
831 //Merge paths
832 const wxChar* op = m_path.c_str();
833 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
834
835 //not a ending directory? move up
836 if (base.m_path[0] && *(bp-1) != wxT('/'))
837 UpTree(base.m_path, bp);
838
839 //normalize directories
840 while(*op == wxT('.') && *(op+1) == wxT('.') &&
841 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
842 {
843 UpTree(base.m_path, bp);
844
845 if (*(op+2) == '\0')
846 op += 2;
847 else
848 op += 3;
849 }
850
851 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
852 m_path.substr((op - m_path.c_str()), m_path.Length());
853 }
854 }
855
856 //T.fragment = R.fragment;
857 }
858
859 // ---------------------------------------------------------------------------
860 // UpTree
861 //
862 // Moves a URI path up a directory
863 // ---------------------------------------------------------------------------
864
865 //static
866 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
867 {
868 if (uri != uristart && *(uri-1) == wxT('/'))
869 {
870 uri -= 2;
871 }
872
873 for(;uri != uristart; --uri)
874 {
875 if (*uri == wxT('/'))
876 {
877 ++uri;
878 break;
879 }
880 }
881
882 //!!!TODO:HACK!!!//
883 if (uri == uristart && *uri == wxT('/'))
884 ++uri;
885 //!!!//
886 }
887
888 // ---------------------------------------------------------------------------
889 // Normalize
890 //
891 // Normalizes directories in-place
892 //
893 // I.E. ./ and . are ignored
894 //
895 // ../ and .. are removed if a directory is before it, along
896 // with that directory (leading .. and ../ are kept)
897 // ---------------------------------------------------------------------------
898
899 //static
900 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
901 {
902 wxChar* cp = s;
903 wxChar* bp = s;
904
905 if(s[0] == wxT('/'))
906 ++bp;
907
908 while(*cp)
909 {
910 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
911 && (bp == cp || *(cp-1) == wxT('/')))
912 {
913 //. _or_ ./ - ignore
914 if (*(cp+1) == '\0')
915 cp += 1;
916 else
917 cp += 2;
918 }
919 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
920 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
921 && (bp == cp || *(cp-1) == wxT('/')))
922 {
923 //.. _or_ ../ - go up the tree
924 if (s != bp)
925 {
926 UpTree((const wxChar*)bp, (const wxChar*&)s);
927
928 if (*(cp+2) == '\0')
929 cp += 2;
930 else
931 cp += 3;
932 }
933 else if (!bIgnoreLeads)
934
935 {
936 *bp++ = *cp++;
937 *bp++ = *cp++;
938 if (*cp)
939 *bp++ = *cp++;
940
941 s = bp;
942 }
943 else
944 {
945 if (*(cp+2) == '\0')
946 cp += 2;
947 else
948 cp += 3;
949 }
950 }
951 else
952 *s++ = *cp++;
953 }
954
955 *s = '\0';
956 }
957
958 // ---------------------------------------------------------------------------
959 // ParseH16
960 //
961 // Parses 1 to 4 hex values. Returns true if the first character of the input
962 // string is a valid hex character. It is the caller's responsability to move
963 // the input string back to its original position on failure.
964 // ---------------------------------------------------------------------------
965
966 bool wxURI::ParseH16(const wxChar*& uri)
967 {
968 // h16 = 1*4HEXDIG
969 if(!IsHex(*++uri))
970 return false;
971
972 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
973 ++uri;
974
975 return true;
976 }
977
978 // ---------------------------------------------------------------------------
979 // ParseIPXXX
980 //
981 // Parses a certain version of an IP address and moves the input string past
982 // it. Returns true if the input string contains the proper version of an ip
983 // address. It is the caller's responsability to move the input string back
984 // to its original position on failure.
985 // ---------------------------------------------------------------------------
986
987 bool wxURI::ParseIPv4address(const wxChar*& uri)
988 {
989 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
990 //
991 //dec-octet = DIGIT ; 0-9
992 // / %x31-39 DIGIT ; 10-99
993 // / "1" 2DIGIT ; 100-199
994 // / "2" %x30-34 DIGIT ; 200-249
995 // / "25" %x30-35 ; 250-255
996 size_t iIPv4 = 0;
997 if (IsDigit(*uri))
998 {
999 ++iIPv4;
1000
1001
1002 //each ip part must be between 0-255 (dupe of version in for loop)
1003 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1004 //100 or less (note !)
1005 !( (*(uri-2) < wxT('2')) ||
1006 //240 or less
1007 (*(uri-2) == wxT('2') &&
1008 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1009 )
1010 )
1011 )
1012 {
1013 return false;
1014 }
1015
1016 if(IsDigit(*uri))++uri;
1017
1018 //compilers should unroll this loop
1019 for(; iIPv4 < 4; ++iIPv4)
1020 {
1021 if (*uri != wxT('.') || !IsDigit(*++uri))
1022 break;
1023
1024 //each ip part must be between 0-255
1025 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1026 //100 or less (note !)
1027 !( (*(uri-2) < wxT('2')) ||
1028 //240 or less
1029 (*(uri-2) == wxT('2') &&
1030 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1031 )
1032 )
1033 )
1034 {
1035 return false;
1036 }
1037 if(IsDigit(*uri))++uri;
1038 }
1039 }
1040 return iIPv4 == 4;
1041 }
1042
1043 bool wxURI::ParseIPv6address(const wxChar*& uri)
1044 {
1045 // IPv6address = 6( h16 ":" ) ls32
1046 // / "::" 5( h16 ":" ) ls32
1047 // / [ h16 ] "::" 4( h16 ":" ) ls32
1048 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1049 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1050 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1051 // / [ *4( h16 ":" ) h16 ] "::" ls32
1052 // / [ *5( h16 ":" ) h16 ] "::" h16
1053 // / [ *6( h16 ":" ) h16 ] "::"
1054
1055 size_t numPrefix = 0,
1056 maxPostfix;
1057
1058 bool bEndHex = false;
1059
1060 for( ; numPrefix < 6; ++numPrefix)
1061 {
1062 if(!ParseH16(uri))
1063 {
1064 --uri;
1065 bEndHex = true;
1066 break;
1067 }
1068
1069 if(*uri != wxT(':'))
1070 {
1071 break;
1072 }
1073 }
1074
1075 if(!bEndHex && !ParseH16(uri))
1076 {
1077 --uri;
1078
1079 if (numPrefix)
1080 return false;
1081
1082 if (*uri == wxT(':'))
1083 {
1084 if (*++uri != wxT(':'))
1085 return false;
1086
1087 maxPostfix = 5;
1088 }
1089 else
1090 maxPostfix = 6;
1091 }
1092 else
1093 {
1094 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1095 {
1096 if (numPrefix != 6)
1097 return false;
1098
1099 while (*--uri != wxT(':')) {}
1100 ++uri;
1101
1102 const wxChar* uristart = uri;
1103 //parse ls32
1104 // ls32 = ( h16 ":" h16 ) / IPv4address
1105 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1106 return true;
1107
1108 uri = uristart;
1109
1110 if (ParseIPv4address(uri))
1111 return true;
1112 else
1113 return false;
1114 }
1115 else
1116 {
1117 uri += 2;
1118
1119 if (numPrefix > 3)
1120 maxPostfix = 0;
1121 else
1122 maxPostfix = 4 - numPrefix;
1123 }
1124 }
1125
1126 bool bAllowAltEnding = maxPostfix == 0;
1127
1128 for(; maxPostfix != 0; --maxPostfix)
1129 {
1130 if(!ParseH16(uri) || *uri != wxT(':'))
1131 return false;
1132 }
1133
1134 if(numPrefix <= 4)
1135 {
1136 const wxChar* uristart = uri;
1137 //parse ls32
1138 // ls32 = ( h16 ":" h16 ) / IPv4address
1139 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1140 return true;
1141
1142 uri = uristart;
1143
1144 if (ParseIPv4address(uri))
1145 return true;
1146
1147 uri = uristart;
1148
1149 if (!bAllowAltEnding)
1150 return false;
1151 }
1152
1153 if(numPrefix <= 5 && ParseH16(uri))
1154 return true;
1155
1156 return true;
1157 }
1158
1159 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1160 {
1161 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1162 if (*++uri != wxT('v') || !IsHex(*++uri))
1163 return false;
1164
1165 while (IsHex(*++uri)) {}
1166
1167 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1168 return false;
1169
1170 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1171
1172 return true;
1173 }
1174
1175
1176 // ---------------------------------------------------------------------------
1177 // CharToHex
1178 //
1179 // Converts a character into a numeric hexidecimal value, or 0 if the
1180 // passed in character is not a valid hex character
1181 // ---------------------------------------------------------------------------
1182
1183 //static
1184 wxChar wxURI::CharToHex(const wxChar& c)
1185 {
1186 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1187 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1188 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1189
1190 return 0;
1191 }
1192
1193 // ---------------------------------------------------------------------------
1194 // IsXXX
1195 //
1196 // Returns true if the passed in character meets the criteria of the method
1197 // ---------------------------------------------------------------------------
1198
1199 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1200 bool wxURI::IsUnreserved (const wxChar& c)
1201 { return IsAlpha(c) || IsDigit(c) ||
1202 c == wxT('-') ||
1203 c == wxT('.') ||
1204 c == wxT('_') ||
1205 c == wxT('~') //tilde
1206 ;
1207 }
1208
1209 bool wxURI::IsReserved (const wxChar& c)
1210 {
1211 return IsGenDelim(c) || IsSubDelim(c);
1212 }
1213
1214 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1215 bool wxURI::IsGenDelim (const wxChar& c)
1216 {
1217 return c == wxT(':') ||
1218 c == wxT('/') ||
1219 c == wxT('?') ||
1220 c == wxT('#') ||
1221 c == wxT('[') ||
1222 c == wxT(']') ||
1223 c == wxT('@');
1224 }
1225
1226 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1227 //! / "*" / "+" / "," / ";" / "="
1228 bool wxURI::IsSubDelim (const wxChar& c)
1229 {
1230 return c == wxT('!') ||
1231 c == wxT('$') ||
1232 c == wxT('&') ||
1233 c == wxT('\'') ||
1234 c == wxT('(') ||
1235 c == wxT(')') ||
1236 c == wxT('*') ||
1237 c == wxT('+') ||
1238 c == wxT(',') ||
1239 c == wxT(';') ||
1240 c == wxT('=')
1241 ;
1242 }
1243
1244 bool wxURI::IsHex(const wxChar& c)
1245 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1246
1247 bool wxURI::IsAlpha(const wxChar& c)
1248 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1249
1250 bool wxURI::IsDigit(const wxChar& c)
1251 { return c >= wxT('0') && c <= wxT('9'); }
1252
1253
1254 // ---------------------------------------------------------------------------
1255 //
1256 // wxURL Compatibility
1257 //
1258 // ---------------------------------------------------------------------------
1259
1260 #if wxUSE_URL
1261
1262 #if WXWIN_COMPATIBILITY_2_4
1263
1264 #include "wx/url.h"
1265
1266 wxString wxURL::GetProtocolName() const
1267 {
1268 return m_scheme;
1269 }
1270
1271 wxString wxURL::GetHostName() const
1272 {
1273 return m_server;
1274 }
1275
1276 wxString wxURL::GetPath() const
1277 {
1278 return m_path;
1279 }
1280
1281 //Note that this old code really doesn't convert to a URI that well and looks
1282 //more like a dirty hack than anything else...
1283
1284 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1285 {
1286 wxString out_str;
1287 wxString hexa_code;
1288 size_t i;
1289
1290 for (i = 0; i < uri.Len(); i++)
1291 {
1292 wxChar c = uri.GetChar(i);
1293
1294 if (c == wxT(' '))
1295 {
1296 // GRG, Apr/2000: changed to "%20" instead of '+'
1297
1298 out_str += wxT("%20");
1299 }
1300 else
1301 {
1302 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1303 //
1304 // - Alphanumeric characters are never escaped
1305 // - Unreserved marks are never escaped
1306 // - Delimiters must be escaped if they appear within a component
1307 // but not if they are used to separate components. Here we have
1308 // no clear way to distinguish between these two cases, so they
1309 // are escaped unless they are passed in the 'delims' parameter
1310 // (allowed delimiters).
1311
1312 static const wxChar marks[] = wxT("-_.!~*()'");
1313
1314 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1315 {
1316 hexa_code.Printf(wxT("%%%02X"), c);
1317 out_str += hexa_code;
1318 }
1319 else
1320 {
1321 out_str += c;
1322 }
1323 }
1324 }
1325
1326 return out_str;
1327 }
1328
1329 wxString wxURL::ConvertFromURI(const wxString& uri)
1330 {
1331 return wxURI::Unescape(uri);
1332 }
1333
1334 #endif //WXWIN_COMPATIBILITY_2_4
1335
1336 #endif //wxUSE_URL
1337
1338 //end of uri.cpp
1339
1340
1341