]> git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
Unicode fixes
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21 #endif
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject);
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 Assign(uri);
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_userinfo = m_server = m_port = m_path =
82 m_query = m_fragment = wxEmptyString;
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 const wxChar* wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 return Parse(uri);
101 }
102
103 // ---------------------------------------------------------------------------
104 // Escape Methods
105 //
106 // TranslateEscape unencodes a 3 character URL escape sequence
107 //
108 // Escape encodes an invalid URI character into a 3 character sequence
109 //
110 // IsEscape determines if the input string contains an escape sequence,
111 // if it does, then it moves the input string past the escape sequence
112 //
113 // Unescape unencodes all 3 character URL escape sequences in a wxString
114 // ---------------------------------------------------------------------------
115
116 wxChar wxURI::TranslateEscape(const wxChar* s)
117 {
118 wxASSERT_MSG( IsHex(s[0]) && IsHex(s[1]), wxT("Invalid escape sequence!"));
119
120 return (wxChar)( CharToHex(s[0]) << 4 ) | CharToHex(s[1]);
121 }
122
123 wxString wxURI::Unescape(const wxString& uri)
124 {
125 wxString new_uri;
126
127 for(size_t i = 0; i < uri.length(); ++i)
128 {
129 if (uri[i] == wxT('%'))
130 {
131 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
132 i += 2;
133 }
134 else
135 new_uri += uri[i];
136 }
137
138 return new_uri;
139 }
140
141 void wxURI::Escape(wxString& s, const wxChar& c)
142 {
143 const wxChar* hdig = wxT("0123456789abcdef");
144 s += wxT('%');
145 s += hdig[(c >> 4) & 15];
146 s += hdig[c & 15];
147 }
148
149 bool wxURI::IsEscape(const wxChar*& uri)
150 {
151 // pct-encoded = "%" HEXDIG HEXDIG
152 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
153 {
154 uri += 3;
155 return true;
156 }
157 else
158 return false;
159 }
160
161 // ---------------------------------------------------------------------------
162 // GetUser
163 // GetPassword
164 //
165 // Gets the username and password via the old URL method.
166 // ---------------------------------------------------------------------------
167 wxString wxURI::GetUser() const
168 {
169 size_t dwPasswordPos = m_userinfo.find(':');
170
171 if (dwPasswordPos == wxString::npos)
172 dwPasswordPos = 0;
173
174 return m_userinfo(0, dwPasswordPos);
175 }
176
177 wxString wxURI::GetPassword() const
178 {
179 size_t dwPasswordPos = m_userinfo.find(':');
180
181 if (dwPasswordPos == wxString::npos)
182 return wxT("");
183 else
184 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
185 }
186
187 // ---------------------------------------------------------------------------
188 // BuildURI
189 //
190 // BuildURI() builds the entire URI into a useable
191 // representation, including proper identification characters such as slashes
192 //
193 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
194 // the components that accept escape sequences
195 // ---------------------------------------------------------------------------
196
197 wxString wxURI::BuildURI() const
198 {
199 wxString ret;
200
201 if (HasScheme())
202 ret = ret + m_scheme + wxT(":");
203
204 if (HasServer())
205 {
206 ret += wxT("//");
207
208 if (HasUserInfo())
209 ret = ret + m_userinfo + wxT("@");
210
211 ret += m_server;
212
213 if (HasPort())
214 ret = ret + wxT(":") + m_port;
215 }
216
217 ret += m_path;
218
219 if (HasQuery())
220 ret = ret + wxT("?") + m_query;
221
222 if (HasFragment())
223 ret = ret + wxT("#") + m_fragment;
224
225 return ret;
226 }
227
228 wxString wxURI::BuildUnescapedURI() const
229 {
230 wxString ret;
231
232 if (HasScheme())
233 ret = ret + m_scheme + wxT(":");
234
235 if (HasServer())
236 {
237 ret += wxT("//");
238
239 if (HasUserInfo())
240 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
241
242 if (m_hostType == wxURI_REGNAME)
243 ret += wxURI::Unescape(m_server);
244 else
245 ret += m_server;
246
247 if (HasPort())
248 ret = ret + wxT(":") + m_port;
249 }
250
251 ret += wxURI::Unescape(m_path);
252
253 if (HasQuery())
254 ret = ret + wxT("?") + wxURI::Unescape(m_query);
255
256 if (HasFragment())
257 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
258
259 return ret;
260 }
261
262 // ---------------------------------------------------------------------------
263 // Assignment
264 // ---------------------------------------------------------------------------
265
266 wxURI& wxURI::Assign(const wxURI& uri)
267 {
268 //assign fields
269 m_fields = uri.m_fields;
270
271 //ref over components
272 m_scheme = uri.m_scheme;
273 m_userinfo = uri.m_userinfo;
274 m_server = uri.m_server;
275 m_hostType = uri.m_hostType;
276 m_port = uri.m_port;
277 m_path = uri.m_path;
278 m_query = uri.m_query;
279 m_fragment = uri.m_fragment;
280
281 return *this;
282 }
283
284 wxURI& wxURI::operator = (const wxURI& uri)
285 {
286 return Assign(uri);
287 }
288
289 wxURI& wxURI::operator = (const wxString& string)
290 {
291 Create(string);
292 return *this;
293 }
294
295 // ---------------------------------------------------------------------------
296 // Comparison
297 // ---------------------------------------------------------------------------
298
299 bool wxURI::operator == (const wxURI& uri) const
300 {
301 if (HasScheme())
302 {
303 if(m_scheme != uri.m_scheme)
304 return false;
305 }
306 else if (uri.HasScheme())
307 return false;
308
309
310 if (HasServer())
311 {
312 if (HasUserInfo())
313 {
314 if (m_userinfo != uri.m_userinfo)
315 return false;
316 }
317 else if (uri.HasUserInfo())
318 return false;
319
320 if (m_server != uri.m_server ||
321 m_hostType != uri.m_hostType)
322 return false;
323
324 if (HasPort())
325 {
326 if(m_port != uri.m_port)
327 return false;
328 }
329 else if (uri.HasPort())
330 return false;
331 }
332 else if (uri.HasServer())
333 return false;
334
335
336 if (HasPath())
337 {
338 if(m_path != uri.m_path)
339 return false;
340 }
341 else if (uri.HasPath())
342 return false;
343
344 if (HasQuery())
345 {
346 if (m_query != uri.m_query)
347 return false;
348 }
349 else if (uri.HasQuery())
350 return false;
351
352 if (HasFragment())
353 {
354 if (m_fragment != uri.m_fragment)
355 return false;
356 }
357 else if (uri.HasFragment())
358 return false;
359
360 return true;
361 }
362
363 // ---------------------------------------------------------------------------
364 // IsReference
365 //
366 // if there is no authority or scheme, it is a reference
367 // ---------------------------------------------------------------------------
368
369 bool wxURI::IsReference() const
370 { return !HasScheme() || !HasServer(); }
371
372 // ---------------------------------------------------------------------------
373 // Parse
374 //
375 // Master URI parsing method. Just calls the individual parsing methods
376 //
377 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
378 // URI-reference = URI / relative
379 // ---------------------------------------------------------------------------
380
381 const wxChar* wxURI::Parse(const wxChar* uri)
382 {
383 uri = ParseScheme(uri);
384 uri = ParseAuthority(uri);
385 uri = ParsePath(uri);
386 uri = ParseQuery(uri);
387 return ParseFragment(uri);
388 }
389
390 // ---------------------------------------------------------------------------
391 // ParseXXX
392 //
393 // Individual parsers for each URI component
394 // ---------------------------------------------------------------------------
395
396 const wxChar* wxURI::ParseScheme(const wxChar* uri)
397 {
398 wxASSERT(uri != NULL);
399
400 //copy of the uri - used for figuring out
401 //length of each component
402 const wxChar* uricopy = uri;
403
404 //Does the uri have a scheme (first character alpha)?
405 if (IsAlpha(*uri))
406 {
407 m_scheme += *uri++;
408
409 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
410 while (IsAlpha(*uri) || IsDigit(*uri) ||
411 *uri == wxT('+') ||
412 *uri == wxT('-') ||
413 *uri == wxT('.'))
414 {
415 m_scheme += *uri++;
416 }
417
418 //valid scheme?
419 if (*uri == wxT(':'))
420 {
421 //mark the scheme as valid
422 m_fields |= wxURI_SCHEME;
423
424 //move reference point up to input buffer
425 uricopy = ++uri;
426 }
427 else
428 //relative uri with relative path reference
429 m_scheme = wxEmptyString;
430 }
431 // else
432 //relative uri with _possible_ relative path reference
433
434 return uricopy;
435 }
436
437 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
438 {
439 // authority = [ userinfo "@" ] host [ ":" port ]
440 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
441 {
442 uri += 2;
443
444 uri = ParseUserInfo(uri);
445 uri = ParseServer(uri);
446 return ParsePort(uri);
447 }
448
449 return uri;
450 }
451
452 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
453 {
454 wxASSERT(uri != NULL);
455
456 //copy of the uri - used for figuring out
457 //length of each component
458 const wxChar* uricopy = uri;
459
460 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
461 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
462 {
463 if(IsUnreserved(*uri) || IsEscape(uri) ||
464 IsSubDelim(*uri) || *uri == wxT(':'))
465 m_userinfo += *uri++;
466 else
467 Escape(m_userinfo, *uri++);
468 }
469
470 if(*uri == wxT('@'))
471 {
472 //valid userinfo
473 m_fields |= wxURI_USERINFO;
474
475 uricopy = ++uri;
476 }
477 else
478 m_userinfo = wxEmptyString;
479
480 return uricopy;
481 }
482
483 const wxChar* wxURI::ParseServer(const wxChar* uri)
484 {
485 wxASSERT(uri != NULL);
486
487 //copy of the uri - used for figuring out
488 //length of each component
489 const wxChar* uricopy = uri;
490
491 // host = IP-literal / IPv4address / reg-name
492 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
493 if (*uri == wxT('['))
494 {
495 ++uri; //some compilers don't support *&ing a ++*
496 if (ParseIPv6address(uri) && *uri == wxT(']'))
497 {
498 ++uri;
499 m_hostType = wxURI_IPV6ADDRESS;
500
501 wxStringBufferLength theBuffer(m_server, uri - uricopy);
502 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
503 theBuffer.SetLength(uri-uricopy);
504 }
505 else
506 {
507 uri = uricopy;
508
509 ++uri; //some compilers don't support *&ing a ++*
510 if (ParseIPvFuture(uri) && *uri == wxT(']'))
511 {
512 ++uri;
513 m_hostType = wxURI_IPVFUTURE;
514
515 wxStringBufferLength theBuffer(m_server, uri - uricopy);
516 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
517 theBuffer.SetLength(uri-uricopy);
518 }
519 else
520 uri = uricopy;
521 }
522 }
523 else
524 {
525 if (ParseIPv4address(uri))
526 {
527 m_hostType = wxURI_IPV4ADDRESS;
528
529 wxStringBufferLength theBuffer(m_server, uri - uricopy);
530 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
531 theBuffer.SetLength(uri-uricopy);
532 }
533 else
534 uri = uricopy;
535 }
536
537 if(m_hostType == wxURI_REGNAME)
538 {
539 uri = uricopy;
540 // reg-name = *( unreserved / pct-encoded / sub-delims )
541 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
542 {
543 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
544 m_server += *uri++;
545 else
546 Escape(m_server, *uri++);
547 }
548 }
549
550 //mark the server as valid
551 m_fields |= wxURI_SERVER;
552
553 return uri;
554 }
555
556
557 const wxChar* wxURI::ParsePort(const wxChar* uri)
558 {
559 wxASSERT(uri != NULL);
560
561 // port = *DIGIT
562 if(*uri == wxT(':'))
563 {
564 ++uri;
565 while(IsDigit(*uri))
566 {
567 m_port += *uri++;
568 }
569
570 //mark the port as valid
571 m_fields |= wxURI_PORT;
572 }
573
574 return uri;
575 }
576
577 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
578 {
579 wxASSERT(uri != NULL);
580
581 //copy of the uri - used for figuring out
582 //length of each component
583 const wxChar* uricopy = uri;
584
585 /// hier-part = "//" authority path-abempty
586 /// / path-absolute
587 /// / path-rootless
588 /// / path-empty
589 ///
590 /// relative-part = "//" authority path-abempty
591 /// / path-absolute
592 /// / path-noscheme
593 /// / path-empty
594 ///
595 /// path-abempty = *( "/" segment )
596 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
597 /// path-noscheme = segment-nz-nc *( "/" segment )
598 /// path-rootless = segment-nz *( "/" segment )
599 /// path-empty = 0<pchar>
600 ///
601 /// segment = *pchar
602 /// segment-nz = 1*pchar
603 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
604 /// ; non-zero-length segment without any colon ":"
605 ///
606 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
607 if (*uri == wxT('/'))
608 {
609 m_path += *uri++;
610
611 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
612 {
613 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
614 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
615 m_path += *uri++;
616 else
617 Escape(m_path, *uri++);
618 }
619
620 if (bNormalize)
621 {
622 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
623 #if wxUSE_STL
624 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
625 #endif
626 Normalize(theBuffer, true);
627 theBuffer.SetLength(wxStrlen(theBuffer));
628 }
629 //mark the path as valid
630 m_fields |= wxURI_PATH;
631 }
632 else if(*uri) //Relative path
633 {
634 if (bReference)
635 {
636 //no colon allowed
637 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
638 {
639 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
640 *uri == wxT('@') || *uri == wxT('/'))
641 m_path += *uri++;
642 else
643 Escape(m_path, *uri++);
644 }
645 }
646 else
647 {
648 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
649 {
650 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
651 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
652 m_path += *uri++;
653 else
654 Escape(m_path, *uri++);
655 }
656 }
657
658 if (uri != uricopy)
659 {
660 if (bNormalize)
661 {
662 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
663 #if wxUSE_STL
664 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
665 #endif
666 Normalize(theBuffer);
667 theBuffer.SetLength(wxStrlen(theBuffer));
668 }
669
670 //mark the path as valid
671 m_fields |= wxURI_PATH;
672 }
673 }
674
675 return uri;
676 }
677
678
679 const wxChar* wxURI::ParseQuery(const wxChar* uri)
680 {
681 wxASSERT(uri != NULL);
682
683 // query = *( pchar / "/" / "?" )
684 if (*uri == wxT('?'))
685 {
686 ++uri;
687 while(*uri && *uri != wxT('#'))
688 {
689 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
690 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
691 m_query += *uri++;
692 else
693 Escape(m_query, *uri++);
694 }
695
696 //mark the server as valid
697 m_fields |= wxURI_QUERY;
698 }
699
700 return uri;
701 }
702
703
704 const wxChar* wxURI::ParseFragment(const wxChar* uri)
705 {
706 wxASSERT(uri != NULL);
707
708 // fragment = *( pchar / "/" / "?" )
709 if (*uri == wxT('#'))
710 {
711 ++uri;
712 while(*uri)
713 {
714 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
715 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
716 m_fragment += *uri++;
717 else
718 Escape(m_fragment, *uri++);
719 }
720
721 //mark the server as valid
722 m_fields |= wxURI_FRAGMENT;
723 }
724
725 return uri;
726 }
727
728 // ---------------------------------------------------------------------------
729 // Resolve
730 //
731 // Builds missing components of this uri from a base uri
732 //
733 // A version of the algorithm outlined in the RFC is used here
734 // (it is shown in comments)
735 //
736 // Note that an empty URI inherits all components
737 // ---------------------------------------------------------------------------
738
739 void wxURI::Resolve(const wxURI& base, int flags)
740 {
741 wxASSERT_MSG(!base.IsReference(),
742 wxT("wxURI to inherit from must not be a reference!"));
743
744 // If we arn't being strict, enable the older (pre-RFC2396)
745 // loophole that allows this uri to inherit other
746 // properties from the base uri - even if the scheme
747 // is defined
748 if ( !(flags & wxURI_STRICT) &&
749 HasScheme() && base.HasScheme() &&
750 m_scheme == base.m_scheme )
751 {
752 m_fields -= wxURI_SCHEME;
753 }
754
755
756 // Do nothing if this is an absolute wxURI
757 // if defined(R.scheme) then
758 // T.scheme = R.scheme;
759 // T.authority = R.authority;
760 // T.path = remove_dot_segments(R.path);
761 // T.query = R.query;
762 if (HasScheme())
763 {
764 return;
765 }
766
767 //No scheme - inherit
768 m_scheme = base.m_scheme;
769 m_fields |= wxURI_SCHEME;
770
771 // All we need to do for relative URIs with an
772 // authority component is just inherit the scheme
773 // if defined(R.authority) then
774 // T.authority = R.authority;
775 // T.path = remove_dot_segments(R.path);
776 // T.query = R.query;
777 if (HasServer())
778 {
779 return;
780 }
781
782 //No authority - inherit
783 if (base.HasUserInfo())
784 {
785 m_userinfo = base.m_userinfo;
786 m_fields |= wxURI_USERINFO;
787 }
788
789 m_server = base.m_server;
790 m_hostType = base.m_hostType;
791 m_fields |= wxURI_SERVER;
792
793 if (base.HasPort())
794 {
795 m_port = base.m_port;
796 m_fields |= wxURI_PORT;
797 }
798
799
800 // Simple path inheritance from base
801 if (!HasPath())
802 {
803 // T.path = Base.path;
804 m_path = base.m_path;
805 m_fields |= wxURI_PATH;
806
807
808 // if defined(R.query) then
809 // T.query = R.query;
810 // else
811 // T.query = Base.query;
812 // endif;
813 if (!HasQuery())
814 {
815 m_query = base.m_query;
816 m_fields |= wxURI_QUERY;
817 }
818 }
819 else
820 {
821 // if (R.path starts-with "/") then
822 // T.path = remove_dot_segments(R.path);
823 // else
824 // T.path = merge(Base.path, R.path);
825 // T.path = remove_dot_segments(T.path);
826 // endif;
827 // T.query = R.query;
828 if (m_path[0u] != wxT('/'))
829 {
830 //Merge paths
831 const wxChar* op = m_path.c_str();
832 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
833
834 //not a ending directory? move up
835 if (base.m_path[0] && *(bp-1) != wxT('/'))
836 UpTree(base.m_path, bp);
837
838 //normalize directories
839 while(*op == wxT('.') && *(op+1) == wxT('.') &&
840 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
841 {
842 UpTree(base.m_path, bp);
843
844 if (*(op+2) == '\0')
845 op += 2;
846 else
847 op += 3;
848 }
849
850 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
851 m_path.substr((op - m_path.c_str()), m_path.Length());
852 }
853 }
854
855 //T.fragment = R.fragment;
856 }
857
858 // ---------------------------------------------------------------------------
859 // UpTree
860 //
861 // Moves a URI path up a directory
862 // ---------------------------------------------------------------------------
863
864 //static
865 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
866 {
867 if (uri != uristart && *(uri-1) == wxT('/'))
868 {
869 uri -= 2;
870 }
871
872 for(;uri != uristart; --uri)
873 {
874 if (*uri == wxT('/'))
875 {
876 ++uri;
877 break;
878 }
879 }
880
881 //!!!TODO:HACK!!!//
882 if (uri == uristart && *uri == wxT('/'))
883 ++uri;
884 //!!!//
885 }
886
887 // ---------------------------------------------------------------------------
888 // Normalize
889 //
890 // Normalizes directories in-place
891 //
892 // I.E. ./ and . are ignored
893 //
894 // ../ and .. are removed if a directory is before it, along
895 // with that directory (leading .. and ../ are kept)
896 // ---------------------------------------------------------------------------
897
898 //static
899 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
900 {
901 wxChar* cp = s;
902 wxChar* bp = s;
903
904 if(s[0] == wxT('/'))
905 ++bp;
906
907 while(*cp)
908 {
909 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
910 && (bp == cp || *(cp-1) == wxT('/')))
911 {
912 //. _or_ ./ - ignore
913 if (*(cp+1) == '\0')
914 cp += 1;
915 else
916 cp += 2;
917 }
918 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
919 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
920 && (bp == cp || *(cp-1) == wxT('/')))
921 {
922 //.. _or_ ../ - go up the tree
923 if (s != bp)
924 {
925 UpTree((const wxChar*)bp, (const wxChar*&)s);
926
927 if (*(cp+2) == '\0')
928 cp += 2;
929 else
930 cp += 3;
931 }
932 else if (!bIgnoreLeads)
933
934 {
935 *bp++ = *cp++;
936 *bp++ = *cp++;
937 if (*cp)
938 *bp++ = *cp++;
939
940 s = bp;
941 }
942 else
943 {
944 if (*(cp+2) == '\0')
945 cp += 2;
946 else
947 cp += 3;
948 }
949 }
950 else
951 *s++ = *cp++;
952 }
953
954 *s = '\0';
955 }
956
957 // ---------------------------------------------------------------------------
958 // ParseH16
959 //
960 // Parses 1 to 4 hex values. Returns true if the first character of the input
961 // string is a valid hex character. It is the caller's responsability to move
962 // the input string back to its original position on failure.
963 // ---------------------------------------------------------------------------
964
965 bool wxURI::ParseH16(const wxChar*& uri)
966 {
967 // h16 = 1*4HEXDIG
968 if(!IsHex(*++uri))
969 return false;
970
971 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
972 ++uri;
973
974 return true;
975 }
976
977 // ---------------------------------------------------------------------------
978 // ParseIPXXX
979 //
980 // Parses a certain version of an IP address and moves the input string past
981 // it. Returns true if the input string contains the proper version of an ip
982 // address. It is the caller's responsability to move the input string back
983 // to its original position on failure.
984 // ---------------------------------------------------------------------------
985
986 bool wxURI::ParseIPv4address(const wxChar*& uri)
987 {
988 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
989 //
990 //dec-octet = DIGIT ; 0-9
991 // / %x31-39 DIGIT ; 10-99
992 // / "1" 2DIGIT ; 100-199
993 // / "2" %x30-34 DIGIT ; 200-249
994 // / "25" %x30-35 ; 250-255
995 size_t iIPv4 = 0;
996 if (IsDigit(*uri))
997 {
998 ++iIPv4;
999
1000
1001 //each ip part must be between 0-255 (dupe of version in for loop)
1002 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1003 //100 or less (note !)
1004 !( (*(uri-2) < wxT('2')) ||
1005 //240 or less
1006 (*(uri-2) == wxT('2') &&
1007 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1008 )
1009 )
1010 )
1011 {
1012 return false;
1013 }
1014
1015 if(IsDigit(*uri))++uri;
1016
1017 //compilers should unroll this loop
1018 for(; iIPv4 < 4; ++iIPv4)
1019 {
1020 if (*uri != wxT('.') || !IsDigit(*++uri))
1021 break;
1022
1023 //each ip part must be between 0-255
1024 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1025 //100 or less (note !)
1026 !( (*(uri-2) < wxT('2')) ||
1027 //240 or less
1028 (*(uri-2) == wxT('2') &&
1029 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1030 )
1031 )
1032 )
1033 {
1034 return false;
1035 }
1036 if(IsDigit(*uri))++uri;
1037 }
1038 }
1039 return iIPv4 == 4;
1040 }
1041
1042 bool wxURI::ParseIPv6address(const wxChar*& uri)
1043 {
1044 // IPv6address = 6( h16 ":" ) ls32
1045 // / "::" 5( h16 ":" ) ls32
1046 // / [ h16 ] "::" 4( h16 ":" ) ls32
1047 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1048 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1049 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1050 // / [ *4( h16 ":" ) h16 ] "::" ls32
1051 // / [ *5( h16 ":" ) h16 ] "::" h16
1052 // / [ *6( h16 ":" ) h16 ] "::"
1053
1054 size_t numPrefix = 0,
1055 maxPostfix;
1056
1057 bool bEndHex = false;
1058
1059 for( ; numPrefix < 6; ++numPrefix)
1060 {
1061 if(!ParseH16(uri))
1062 {
1063 --uri;
1064 bEndHex = true;
1065 break;
1066 }
1067
1068 if(*uri != wxT(':'))
1069 {
1070 break;
1071 }
1072 }
1073
1074 if(!bEndHex && !ParseH16(uri))
1075 {
1076 --uri;
1077
1078 if (numPrefix)
1079 return false;
1080
1081 if (*uri == wxT(':'))
1082 {
1083 if (*++uri != wxT(':'))
1084 return false;
1085
1086 maxPostfix = 5;
1087 }
1088 else
1089 maxPostfix = 6;
1090 }
1091 else
1092 {
1093 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1094 {
1095 if (numPrefix != 6)
1096 return false;
1097
1098 while (*--uri != wxT(':')) {}
1099 ++uri;
1100
1101 const wxChar* uristart = uri;
1102 //parse ls32
1103 // ls32 = ( h16 ":" h16 ) / IPv4address
1104 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1105 return true;
1106
1107 uri = uristart;
1108
1109 if (ParseIPv4address(uri))
1110 return true;
1111 else
1112 return false;
1113 }
1114 else
1115 {
1116 uri += 2;
1117
1118 if (numPrefix > 3)
1119 maxPostfix = 0;
1120 else
1121 maxPostfix = 4 - numPrefix;
1122 }
1123 }
1124
1125 bool bAllowAltEnding = maxPostfix == 0;
1126
1127 for(; maxPostfix != 0; --maxPostfix)
1128 {
1129 if(!ParseH16(uri) || *uri != wxT(':'))
1130 return false;
1131 }
1132
1133 if(numPrefix <= 4)
1134 {
1135 const wxChar* uristart = uri;
1136 //parse ls32
1137 // ls32 = ( h16 ":" h16 ) / IPv4address
1138 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1139 return true;
1140
1141 uri = uristart;
1142
1143 if (ParseIPv4address(uri))
1144 return true;
1145
1146 uri = uristart;
1147
1148 if (!bAllowAltEnding)
1149 return false;
1150 }
1151
1152 if(numPrefix <= 5 && ParseH16(uri))
1153 return true;
1154
1155 return true;
1156 }
1157
1158 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1159 {
1160 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1161 if (*++uri != wxT('v') || !IsHex(*++uri))
1162 return false;
1163
1164 while (IsHex(*++uri)) {}
1165
1166 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1167 return false;
1168
1169 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1170
1171 return true;
1172 }
1173
1174
1175 // ---------------------------------------------------------------------------
1176 // CharToHex
1177 //
1178 // Converts a character into a numeric hexidecimal value, or 0 if the
1179 // passed in character is not a valid hex character
1180 // ---------------------------------------------------------------------------
1181
1182 //static
1183 wxChar wxURI::CharToHex(const wxChar& c)
1184 {
1185 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1186 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1187 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1188
1189 return 0;
1190 }
1191
1192 // ---------------------------------------------------------------------------
1193 // IsXXX
1194 //
1195 // Returns true if the passed in character meets the criteria of the method
1196 // ---------------------------------------------------------------------------
1197
1198 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1199 bool wxURI::IsUnreserved (const wxChar& c)
1200 { return IsAlpha(c) || IsDigit(c) ||
1201 c == wxT('-') ||
1202 c == wxT('.') ||
1203 c == wxT('_') ||
1204 c == wxT('~') //tilde
1205 ;
1206 }
1207
1208 bool wxURI::IsReserved (const wxChar& c)
1209 {
1210 return IsGenDelim(c) || IsSubDelim(c);
1211 }
1212
1213 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1214 bool wxURI::IsGenDelim (const wxChar& c)
1215 {
1216 return c == wxT(':') ||
1217 c == wxT('/') ||
1218 c == wxT('?') ||
1219 c == wxT('#') ||
1220 c == wxT('[') ||
1221 c == wxT(']') ||
1222 c == wxT('@');
1223 }
1224
1225 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1226 //! / "*" / "+" / "," / ";" / "="
1227 bool wxURI::IsSubDelim (const wxChar& c)
1228 {
1229 return c == wxT('!') ||
1230 c == wxT('$') ||
1231 c == wxT('&') ||
1232 c == wxT('\'') ||
1233 c == wxT('(') ||
1234 c == wxT(')') ||
1235 c == wxT('*') ||
1236 c == wxT('+') ||
1237 c == wxT(',') ||
1238 c == wxT(';') ||
1239 c == wxT('=')
1240 ;
1241 }
1242
1243 bool wxURI::IsHex(const wxChar& c)
1244 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1245
1246 bool wxURI::IsAlpha(const wxChar& c)
1247 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1248
1249 bool wxURI::IsDigit(const wxChar& c)
1250 { return c >= wxT('0') && c <= wxT('9'); }
1251
1252
1253 // ---------------------------------------------------------------------------
1254 //
1255 // wxURL Compatibility
1256 //
1257 // ---------------------------------------------------------------------------
1258
1259 #if wxUSE_URL
1260
1261 #if WXWIN_COMPATIBILITY_2_4
1262
1263 #include "wx/url.h"
1264
1265 wxString wxURL::GetProtocolName() const
1266 {
1267 return m_scheme;
1268 }
1269
1270 wxString wxURL::GetHostName() const
1271 {
1272 return m_server;
1273 }
1274
1275 wxString wxURL::GetPath() const
1276 {
1277 return m_path;
1278 }
1279
1280 //Note that this old code really doesn't convert to a URI that well and looks
1281 //more like a dirty hack than anything else...
1282
1283 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1284 {
1285 wxString out_str;
1286 wxString hexa_code;
1287 size_t i;
1288
1289 for (i = 0; i < uri.Len(); i++)
1290 {
1291 wxChar c = uri.GetChar(i);
1292
1293 if (c == wxT(' '))
1294 {
1295 // GRG, Apr/2000: changed to "%20" instead of '+'
1296
1297 out_str += wxT("%20");
1298 }
1299 else
1300 {
1301 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1302 //
1303 // - Alphanumeric characters are never escaped
1304 // - Unreserved marks are never escaped
1305 // - Delimiters must be escaped if they appear within a component
1306 // but not if they are used to separate components. Here we have
1307 // no clear way to distinguish between these two cases, so they
1308 // are escaped unless they are passed in the 'delims' parameter
1309 // (allowed delimiters).
1310
1311 static const wxChar marks[] = wxT("-_.!~*()'");
1312
1313 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1314 {
1315 hexa_code.Printf(wxT("%%%02X"), c);
1316 out_str += hexa_code;
1317 }
1318 else
1319 {
1320 out_str += c;
1321 }
1322 }
1323 }
1324
1325 return out_str;
1326 }
1327
1328 wxString wxURL::ConvertFromURI(const wxString& uri)
1329 {
1330 return wxURI::Unescape(uri);
1331 }
1332
1333 #endif //WXWIN_COMPATIBILITY_2_4
1334
1335 #endif //wxUSE_URL
1336
1337 //end of uri.cpp
1338
1339
1340