]> git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
added our own implementation of strto[u]ll() if the system doesn't have one (patch...
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 // For compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #include "wx/uri.h"
27
28 // ---------------------------------------------------------------------------
29 // definitions
30 // ---------------------------------------------------------------------------
31
32 IMPLEMENT_CLASS(wxURI, wxObject)
33
34 // ===========================================================================
35 // implementation
36 // ===========================================================================
37
38 // ---------------------------------------------------------------------------
39 // utilities
40 // ---------------------------------------------------------------------------
41
42 // ---------------------------------------------------------------------------
43 //
44 // wxURI
45 //
46 // ---------------------------------------------------------------------------
47
48 // ---------------------------------------------------------------------------
49 // Constructors
50 // ---------------------------------------------------------------------------
51
52 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
53 {
54 }
55
56 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 Create(uri);
59 }
60
61 wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
62 {
63 Assign(uri);
64 }
65
66 // ---------------------------------------------------------------------------
67 // Destructor and cleanup
68 // ---------------------------------------------------------------------------
69
70 wxURI::~wxURI()
71 {
72 Clear();
73 }
74
75 void wxURI::Clear()
76 {
77 m_scheme = m_userinfo = m_server = m_port = m_path =
78 m_query = m_fragment = wxEmptyString;
79
80 m_hostType = wxURI_REGNAME;
81
82 m_fields = 0;
83 }
84
85 // ---------------------------------------------------------------------------
86 // Create
87 //
88 // This creates the URI - all we do here is call the main parsing method
89 // ---------------------------------------------------------------------------
90
91 const wxChar* wxURI::Create(const wxString& uri)
92 {
93 if (m_fields)
94 Clear();
95
96 return Parse(uri);
97 }
98
99 // ---------------------------------------------------------------------------
100 // Escape Methods
101 //
102 // TranslateEscape unencodes a 3 character URL escape sequence
103 //
104 // Escape encodes an invalid URI character into a 3 character sequence
105 //
106 // IsEscape determines if the input string contains an escape sequence,
107 // if it does, then it moves the input string past the escape sequence
108 //
109 // Unescape unencodes all 3 character URL escape sequences in a wxString
110 // ---------------------------------------------------------------------------
111
112 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
113 {
114 wxChar c1(*s);
115 wxChar c2(*(s + 1));
116
117 wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
118
119 return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
120 }
121
122 wxString wxURI::Unescape(const wxString& uri)
123 {
124 wxString new_uri;
125
126 for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
127 {
128 if ( *i == wxT('%') )
129 {
130 new_uri += wxURI::TranslateEscape(i + 1);
131 i += 2;
132 }
133 else
134 new_uri += *i;
135 }
136
137 return new_uri;
138 }
139
140 void wxURI::Escape(wxString& s, const wxChar& c)
141 {
142 const wxChar* hdig = wxT("0123456789abcdef");
143 s += wxT('%');
144 s += hdig[(c >> 4) & 15];
145 s += hdig[c & 15];
146 }
147
148 bool wxURI::IsEscape(const wxChar*& uri)
149 {
150 // pct-encoded = "%" HEXDIG HEXDIG
151 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
152 return true;
153 else
154 return false;
155 }
156
157 // ---------------------------------------------------------------------------
158 // GetUser
159 // GetPassword
160 //
161 // Gets the username and password via the old URL method.
162 // ---------------------------------------------------------------------------
163 wxString wxURI::GetUser() const
164 {
165 size_t dwPasswordPos = m_userinfo.find(':');
166
167 if (dwPasswordPos == wxString::npos)
168 dwPasswordPos = 0;
169
170 return m_userinfo(0, dwPasswordPos);
171 }
172
173 wxString wxURI::GetPassword() const
174 {
175 size_t dwPasswordPos = m_userinfo.find(':');
176
177 if (dwPasswordPos == wxString::npos)
178 return wxT("");
179 else
180 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
181 }
182
183 // ---------------------------------------------------------------------------
184 // BuildURI
185 //
186 // BuildURI() builds the entire URI into a useable
187 // representation, including proper identification characters such as slashes
188 //
189 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
190 // the components that accept escape sequences
191 // ---------------------------------------------------------------------------
192
193 wxString wxURI::BuildURI() const
194 {
195 wxString ret;
196
197 if (HasScheme())
198 ret = ret + m_scheme + wxT(":");
199
200 if (HasServer())
201 {
202 ret += wxT("//");
203
204 if (HasUserInfo())
205 ret = ret + m_userinfo + wxT("@");
206
207 ret += m_server;
208
209 if (HasPort())
210 ret = ret + wxT(":") + m_port;
211 }
212
213 ret += m_path;
214
215 if (HasQuery())
216 ret = ret + wxT("?") + m_query;
217
218 if (HasFragment())
219 ret = ret + wxT("#") + m_fragment;
220
221 return ret;
222 }
223
224 wxString wxURI::BuildUnescapedURI() const
225 {
226 wxString ret;
227
228 if (HasScheme())
229 ret = ret + m_scheme + wxT(":");
230
231 if (HasServer())
232 {
233 ret += wxT("//");
234
235 if (HasUserInfo())
236 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
237
238 if (m_hostType == wxURI_REGNAME)
239 ret += wxURI::Unescape(m_server);
240 else
241 ret += m_server;
242
243 if (HasPort())
244 ret = ret + wxT(":") + m_port;
245 }
246
247 ret += wxURI::Unescape(m_path);
248
249 if (HasQuery())
250 ret = ret + wxT("?") + wxURI::Unescape(m_query);
251
252 if (HasFragment())
253 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
254
255 return ret;
256 }
257
258 // ---------------------------------------------------------------------------
259 // Assignment
260 // ---------------------------------------------------------------------------
261
262 wxURI& wxURI::Assign(const wxURI& uri)
263 {
264 //assign fields
265 m_fields = uri.m_fields;
266
267 //ref over components
268 m_scheme = uri.m_scheme;
269 m_userinfo = uri.m_userinfo;
270 m_server = uri.m_server;
271 m_hostType = uri.m_hostType;
272 m_port = uri.m_port;
273 m_path = uri.m_path;
274 m_query = uri.m_query;
275 m_fragment = uri.m_fragment;
276
277 return *this;
278 }
279
280 wxURI& wxURI::operator = (const wxURI& uri)
281 {
282 return Assign(uri);
283 }
284
285 wxURI& wxURI::operator = (const wxString& string)
286 {
287 Create(string);
288 return *this;
289 }
290
291 // ---------------------------------------------------------------------------
292 // Comparison
293 // ---------------------------------------------------------------------------
294
295 bool wxURI::operator == (const wxURI& uri) const
296 {
297 if (HasScheme())
298 {
299 if(m_scheme != uri.m_scheme)
300 return false;
301 }
302 else if (uri.HasScheme())
303 return false;
304
305
306 if (HasServer())
307 {
308 if (HasUserInfo())
309 {
310 if (m_userinfo != uri.m_userinfo)
311 return false;
312 }
313 else if (uri.HasUserInfo())
314 return false;
315
316 if (m_server != uri.m_server ||
317 m_hostType != uri.m_hostType)
318 return false;
319
320 if (HasPort())
321 {
322 if(m_port != uri.m_port)
323 return false;
324 }
325 else if (uri.HasPort())
326 return false;
327 }
328 else if (uri.HasServer())
329 return false;
330
331
332 if (HasPath())
333 {
334 if(m_path != uri.m_path)
335 return false;
336 }
337 else if (uri.HasPath())
338 return false;
339
340 if (HasQuery())
341 {
342 if (m_query != uri.m_query)
343 return false;
344 }
345 else if (uri.HasQuery())
346 return false;
347
348 if (HasFragment())
349 {
350 if (m_fragment != uri.m_fragment)
351 return false;
352 }
353 else if (uri.HasFragment())
354 return false;
355
356 return true;
357 }
358
359 // ---------------------------------------------------------------------------
360 // IsReference
361 //
362 // if there is no authority or scheme, it is a reference
363 // ---------------------------------------------------------------------------
364
365 bool wxURI::IsReference() const
366 { return !HasScheme() || !HasServer(); }
367
368 // ---------------------------------------------------------------------------
369 // Parse
370 //
371 // Master URI parsing method. Just calls the individual parsing methods
372 //
373 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
374 // URI-reference = URI / relative
375 // ---------------------------------------------------------------------------
376
377 const wxChar* wxURI::Parse(const wxChar* uri)
378 {
379 uri = ParseScheme(uri);
380 uri = ParseAuthority(uri);
381 uri = ParsePath(uri);
382 uri = ParseQuery(uri);
383 return ParseFragment(uri);
384 }
385
386 // ---------------------------------------------------------------------------
387 // ParseXXX
388 //
389 // Individual parsers for each URI component
390 // ---------------------------------------------------------------------------
391
392 const wxChar* wxURI::ParseScheme(const wxChar* uri)
393 {
394 wxASSERT(uri != NULL);
395
396 //copy of the uri - used for figuring out
397 //length of each component
398 const wxChar* uricopy = uri;
399
400 //Does the uri have a scheme (first character alpha)?
401 if (IsAlpha(*uri))
402 {
403 m_scheme += *uri++;
404
405 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
406 while (IsAlpha(*uri) || IsDigit(*uri) ||
407 *uri == wxT('+') ||
408 *uri == wxT('-') ||
409 *uri == wxT('.'))
410 {
411 m_scheme += *uri++;
412 }
413
414 //valid scheme?
415 if (*uri == wxT(':'))
416 {
417 //mark the scheme as valid
418 m_fields |= wxURI_SCHEME;
419
420 //move reference point up to input buffer
421 uricopy = ++uri;
422 }
423 else
424 //relative uri with relative path reference
425 m_scheme = wxEmptyString;
426 }
427 // else
428 //relative uri with _possible_ relative path reference
429
430 return uricopy;
431 }
432
433 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
434 {
435 // authority = [ userinfo "@" ] host [ ":" port ]
436 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
437 {
438 //skip past the two slashes
439 uri += 2;
440
441 // ############# DEVIATION FROM RFC #########################
442 // Don't parse the server component for file URIs
443 if(m_scheme != wxT("file"))
444 {
445 //normal way
446 uri = ParseUserInfo(uri);
447 uri = ParseServer(uri);
448 return ParsePort(uri);
449 }
450 }
451
452 return uri;
453 }
454
455 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
456 {
457 wxASSERT(uri != NULL);
458
459 //copy of the uri - used for figuring out
460 //length of each component
461 const wxChar* uricopy = uri;
462
463 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
464 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
465 {
466 if(IsUnreserved(*uri) ||
467 IsSubDelim(*uri) || *uri == wxT(':'))
468 m_userinfo += *uri++;
469 else if (IsEscape(uri))
470 {
471 m_userinfo += *uri++;
472 m_userinfo += *uri++;
473 m_userinfo += *uri++;
474 }
475 else
476 Escape(m_userinfo, *uri++);
477 }
478
479 if(*uri == wxT('@'))
480 {
481 //valid userinfo
482 m_fields |= wxURI_USERINFO;
483
484 uricopy = ++uri;
485 }
486 else
487 m_userinfo = wxEmptyString;
488
489 return uricopy;
490 }
491
492 const wxChar* wxURI::ParseServer(const wxChar* uri)
493 {
494 wxASSERT(uri != NULL);
495
496 //copy of the uri - used for figuring out
497 //length of each component
498 const wxChar* uricopy = uri;
499
500 // host = IP-literal / IPv4address / reg-name
501 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
502 if (*uri == wxT('['))
503 {
504 ++uri; //some compilers don't support *&ing a ++*
505 if (ParseIPv6address(uri) && *uri == wxT(']'))
506 {
507 ++uri;
508 m_hostType = wxURI_IPV6ADDRESS;
509
510 wxStringBufferLength theBuffer(m_server, uri - uricopy);
511 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
512 theBuffer.SetLength(uri-uricopy);
513 }
514 else
515 {
516 uri = uricopy;
517
518 ++uri; //some compilers don't support *&ing a ++*
519 if (ParseIPvFuture(uri) && *uri == wxT(']'))
520 {
521 ++uri;
522 m_hostType = wxURI_IPVFUTURE;
523
524 wxStringBufferLength theBuffer(m_server, uri - uricopy);
525 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
526 theBuffer.SetLength(uri-uricopy);
527 }
528 else
529 uri = uricopy;
530 }
531 }
532 else
533 {
534 if (ParseIPv4address(uri))
535 {
536 m_hostType = wxURI_IPV4ADDRESS;
537
538 wxStringBufferLength theBuffer(m_server, uri - uricopy);
539 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
540 theBuffer.SetLength(uri-uricopy);
541 }
542 else
543 uri = uricopy;
544 }
545
546 if(m_hostType == wxURI_REGNAME)
547 {
548 uri = uricopy;
549 // reg-name = *( unreserved / pct-encoded / sub-delims )
550 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
551 {
552 if(IsUnreserved(*uri) || IsSubDelim(*uri))
553 m_server += *uri++;
554 else if (IsEscape(uri))
555 {
556 m_server += *uri++;
557 m_server += *uri++;
558 m_server += *uri++;
559 }
560 else
561 Escape(m_server, *uri++);
562 }
563 }
564
565 //mark the server as valid
566 m_fields |= wxURI_SERVER;
567
568 return uri;
569 }
570
571
572 const wxChar* wxURI::ParsePort(const wxChar* uri)
573 {
574 wxASSERT(uri != NULL);
575
576 // port = *DIGIT
577 if(*uri == wxT(':'))
578 {
579 ++uri;
580 while(IsDigit(*uri))
581 {
582 m_port += *uri++;
583 }
584
585 //mark the port as valid
586 m_fields |= wxURI_PORT;
587 }
588
589 return uri;
590 }
591
592 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
593 {
594 wxASSERT(uri != NULL);
595
596 //copy of the uri - used for figuring out
597 //length of each component
598 const wxChar* uricopy = uri;
599
600 /// hier-part = "//" authority path-abempty
601 /// / path-absolute
602 /// / path-rootless
603 /// / path-empty
604 ///
605 /// relative-part = "//" authority path-abempty
606 /// / path-absolute
607 /// / path-noscheme
608 /// / path-empty
609 ///
610 /// path-abempty = *( "/" segment )
611 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
612 /// path-noscheme = segment-nz-nc *( "/" segment )
613 /// path-rootless = segment-nz *( "/" segment )
614 /// path-empty = 0<pchar>
615 ///
616 /// segment = *pchar
617 /// segment-nz = 1*pchar
618 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
619 /// ; non-zero-length segment without any colon ":"
620 ///
621 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
622 if (*uri == wxT('/'))
623 {
624 m_path += *uri++;
625
626 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
627 {
628 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
629 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
630 m_path += *uri++;
631 else if (IsEscape(uri))
632 {
633 m_path += *uri++;
634 m_path += *uri++;
635 m_path += *uri++;
636 }
637 else
638 Escape(m_path, *uri++);
639 }
640
641 if (bNormalize)
642 {
643 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
644 #if wxUSE_STL
645 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
646 #endif
647 Normalize(theBuffer, true);
648 theBuffer.SetLength(wxStrlen(theBuffer));
649 }
650 //mark the path as valid
651 m_fields |= wxURI_PATH;
652 }
653 else if(*uri) //Relative path
654 {
655 if (bReference)
656 {
657 //no colon allowed
658 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
659 {
660 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
661 *uri == wxT('@') || *uri == wxT('/'))
662 m_path += *uri++;
663 else if (IsEscape(uri))
664 {
665 m_path += *uri++;
666 m_path += *uri++;
667 m_path += *uri++;
668 }
669 else
670 Escape(m_path, *uri++);
671 }
672 }
673 else
674 {
675 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
676 {
677 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
678 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
679 m_path += *uri++;
680 else if (IsEscape(uri))
681 {
682 m_path += *uri++;
683 m_path += *uri++;
684 m_path += *uri++;
685 }
686 else
687 Escape(m_path, *uri++);
688 }
689 }
690
691 if (uri != uricopy)
692 {
693 if (bNormalize)
694 {
695 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
696 #if wxUSE_STL
697 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
698 #endif
699 Normalize(theBuffer);
700 theBuffer.SetLength(wxStrlen(theBuffer));
701 }
702
703 //mark the path as valid
704 m_fields |= wxURI_PATH;
705 }
706 }
707
708 return uri;
709 }
710
711
712 const wxChar* wxURI::ParseQuery(const wxChar* uri)
713 {
714 wxASSERT(uri != NULL);
715
716 // query = *( pchar / "/" / "?" )
717 if (*uri == wxT('?'))
718 {
719 ++uri;
720 while(*uri && *uri != wxT('#'))
721 {
722 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
723 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
724 m_query += *uri++;
725 else if (IsEscape(uri))
726 {
727 m_query += *uri++;
728 m_query += *uri++;
729 m_query += *uri++;
730 }
731 else
732 Escape(m_query, *uri++);
733 }
734
735 //mark the server as valid
736 m_fields |= wxURI_QUERY;
737 }
738
739 return uri;
740 }
741
742
743 const wxChar* wxURI::ParseFragment(const wxChar* uri)
744 {
745 wxASSERT(uri != NULL);
746
747 // fragment = *( pchar / "/" / "?" )
748 if (*uri == wxT('#'))
749 {
750 ++uri;
751 while(*uri)
752 {
753 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
754 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
755 m_fragment += *uri++;
756 else if (IsEscape(uri))
757 {
758 m_fragment += *uri++;
759 m_fragment += *uri++;
760 m_fragment += *uri++;
761 }
762 else
763 Escape(m_fragment, *uri++);
764 }
765
766 //mark the server as valid
767 m_fields |= wxURI_FRAGMENT;
768 }
769
770 return uri;
771 }
772
773 // ---------------------------------------------------------------------------
774 // Resolve
775 //
776 // Builds missing components of this uri from a base uri
777 //
778 // A version of the algorithm outlined in the RFC is used here
779 // (it is shown in comments)
780 //
781 // Note that an empty URI inherits all components
782 // ---------------------------------------------------------------------------
783
784 void wxURI::Resolve(const wxURI& base, int flags)
785 {
786 wxASSERT_MSG(!base.IsReference(),
787 wxT("wxURI to inherit from must not be a reference!"));
788
789 // If we arn't being strict, enable the older (pre-RFC2396)
790 // loophole that allows this uri to inherit other
791 // properties from the base uri - even if the scheme
792 // is defined
793 if ( !(flags & wxURI_STRICT) &&
794 HasScheme() && base.HasScheme() &&
795 m_scheme == base.m_scheme )
796 {
797 m_fields -= wxURI_SCHEME;
798 }
799
800
801 // Do nothing if this is an absolute wxURI
802 // if defined(R.scheme) then
803 // T.scheme = R.scheme;
804 // T.authority = R.authority;
805 // T.path = remove_dot_segments(R.path);
806 // T.query = R.query;
807 if (HasScheme())
808 {
809 return;
810 }
811
812 //No scheme - inherit
813 m_scheme = base.m_scheme;
814 m_fields |= wxURI_SCHEME;
815
816 // All we need to do for relative URIs with an
817 // authority component is just inherit the scheme
818 // if defined(R.authority) then
819 // T.authority = R.authority;
820 // T.path = remove_dot_segments(R.path);
821 // T.query = R.query;
822 if (HasServer())
823 {
824 return;
825 }
826
827 //No authority - inherit
828 if (base.HasUserInfo())
829 {
830 m_userinfo = base.m_userinfo;
831 m_fields |= wxURI_USERINFO;
832 }
833
834 m_server = base.m_server;
835 m_hostType = base.m_hostType;
836 m_fields |= wxURI_SERVER;
837
838 if (base.HasPort())
839 {
840 m_port = base.m_port;
841 m_fields |= wxURI_PORT;
842 }
843
844
845 // Simple path inheritance from base
846 if (!HasPath())
847 {
848 // T.path = Base.path;
849 m_path = base.m_path;
850 m_fields |= wxURI_PATH;
851
852
853 // if defined(R.query) then
854 // T.query = R.query;
855 // else
856 // T.query = Base.query;
857 // endif;
858 if (!HasQuery())
859 {
860 m_query = base.m_query;
861 m_fields |= wxURI_QUERY;
862 }
863 }
864 else
865 {
866 // if (R.path starts-with "/") then
867 // T.path = remove_dot_segments(R.path);
868 // else
869 // T.path = merge(Base.path, R.path);
870 // T.path = remove_dot_segments(T.path);
871 // endif;
872 // T.query = R.query;
873 if (m_path[0u] != wxT('/'))
874 {
875 //Merge paths
876 wxString::const_iterator op = m_path.begin();
877 wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
878
879 //not a ending directory? move up
880 if (base.m_path[0] && *(bp-1) != wxT('/'))
881 UpTree(base.m_path.begin(), bp);
882
883 //normalize directories
884 while(*op == wxT('.') && *(op+1) == wxT('.') &&
885 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
886 {
887 UpTree(base.m_path.begin(), bp);
888
889 if (*(op+2) == '\0')
890 op += 2;
891 else
892 op += 3;
893 }
894
895 m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
896 m_path.substr((op - m_path.begin()), m_path.length());
897 }
898 }
899
900 //T.fragment = R.fragment;
901 }
902
903 // ---------------------------------------------------------------------------
904 // UpTree
905 //
906 // Moves a URI path up a directory
907 // ---------------------------------------------------------------------------
908
909 //static
910 void wxURI::UpTree(wxString::const_iterator uristart,
911 wxString::const_iterator& uri)
912 {
913 if (uri != uristart && *(uri-1) == wxT('/'))
914 {
915 uri -= 2;
916 }
917
918 for(;uri != uristart; --uri)
919 {
920 if (*uri == wxT('/'))
921 {
922 ++uri;
923 break;
924 }
925 }
926
927 //!!!TODO:HACK!!!//
928 if (uri == uristart && *uri == wxT('/'))
929 ++uri;
930 //!!!//
931 }
932
933 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
934 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
935 {
936 if (uri != uristart && *(uri-1) == wxT('/'))
937 {
938 uri -= 2;
939 }
940
941 for(;uri != uristart; --uri)
942 {
943 if (*uri == wxT('/'))
944 {
945 ++uri;
946 break;
947 }
948 }
949
950 //!!!TODO:HACK!!!//
951 if (uri == uristart && *uri == wxT('/'))
952 ++uri;
953 //!!!//
954 }
955 // end of FIXME-UTF8
956
957 // ---------------------------------------------------------------------------
958 // Normalize
959 //
960 // Normalizes directories in-place
961 //
962 // I.E. ./ and . are ignored
963 //
964 // ../ and .. are removed if a directory is before it, along
965 // with that directory (leading .. and ../ are kept)
966 // ---------------------------------------------------------------------------
967
968 //static
969 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
970 {
971 wxChar* cp = s;
972 wxChar* bp = s;
973
974 if(s[0] == wxT('/'))
975 ++bp;
976
977 while(*cp)
978 {
979 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
980 && (bp == cp || *(cp-1) == wxT('/')))
981 {
982 //. _or_ ./ - ignore
983 if (*(cp+1) == '\0')
984 cp += 1;
985 else
986 cp += 2;
987 }
988 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
989 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
990 && (bp == cp || *(cp-1) == wxT('/')))
991 {
992 //.. _or_ ../ - go up the tree
993 if (s != bp)
994 {
995 UpTree((const wxChar*)bp, (const wxChar*&)s);
996
997 if (*(cp+2) == '\0')
998 cp += 2;
999 else
1000 cp += 3;
1001 }
1002 else if (!bIgnoreLeads)
1003
1004 {
1005 *bp++ = *cp++;
1006 *bp++ = *cp++;
1007 if (*cp)
1008 *bp++ = *cp++;
1009
1010 s = bp;
1011 }
1012 else
1013 {
1014 if (*(cp+2) == '\0')
1015 cp += 2;
1016 else
1017 cp += 3;
1018 }
1019 }
1020 else
1021 *s++ = *cp++;
1022 }
1023
1024 *s = '\0';
1025 }
1026
1027 // ---------------------------------------------------------------------------
1028 // ParseH16
1029 //
1030 // Parses 1 to 4 hex values. Returns true if the first character of the input
1031 // string is a valid hex character. It is the caller's responsability to move
1032 // the input string back to its original position on failure.
1033 // ---------------------------------------------------------------------------
1034
1035 bool wxURI::ParseH16(const wxChar*& uri)
1036 {
1037 // h16 = 1*4HEXDIG
1038 if(!IsHex(*++uri))
1039 return false;
1040
1041 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1042 ++uri;
1043
1044 return true;
1045 }
1046
1047 // ---------------------------------------------------------------------------
1048 // ParseIPXXX
1049 //
1050 // Parses a certain version of an IP address and moves the input string past
1051 // it. Returns true if the input string contains the proper version of an ip
1052 // address. It is the caller's responsability to move the input string back
1053 // to its original position on failure.
1054 // ---------------------------------------------------------------------------
1055
1056 bool wxURI::ParseIPv4address(const wxChar*& uri)
1057 {
1058 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1059 //
1060 //dec-octet = DIGIT ; 0-9
1061 // / %x31-39 DIGIT ; 10-99
1062 // / "1" 2DIGIT ; 100-199
1063 // / "2" %x30-34 DIGIT ; 200-249
1064 // / "25" %x30-35 ; 250-255
1065 size_t iIPv4 = 0;
1066 if (IsDigit(*uri))
1067 {
1068 ++iIPv4;
1069
1070
1071 //each ip part must be between 0-255 (dupe of version in for loop)
1072 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1073 //100 or less (note !)
1074 !( (*(uri-2) < wxT('2')) ||
1075 //240 or less
1076 (*(uri-2) == wxT('2') &&
1077 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1078 )
1079 )
1080 )
1081 {
1082 return false;
1083 }
1084
1085 if(IsDigit(*uri))++uri;
1086
1087 //compilers should unroll this loop
1088 for(; iIPv4 < 4; ++iIPv4)
1089 {
1090 if (*uri != wxT('.') || !IsDigit(*++uri))
1091 break;
1092
1093 //each ip part must be between 0-255
1094 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1095 //100 or less (note !)
1096 !( (*(uri-2) < wxT('2')) ||
1097 //240 or less
1098 (*(uri-2) == wxT('2') &&
1099 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1100 )
1101 )
1102 )
1103 {
1104 return false;
1105 }
1106 if(IsDigit(*uri))++uri;
1107 }
1108 }
1109 return iIPv4 == 4;
1110 }
1111
1112 bool wxURI::ParseIPv6address(const wxChar*& uri)
1113 {
1114 // IPv6address = 6( h16 ":" ) ls32
1115 // / "::" 5( h16 ":" ) ls32
1116 // / [ h16 ] "::" 4( h16 ":" ) ls32
1117 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1118 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1119 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1120 // / [ *4( h16 ":" ) h16 ] "::" ls32
1121 // / [ *5( h16 ":" ) h16 ] "::" h16
1122 // / [ *6( h16 ":" ) h16 ] "::"
1123
1124 size_t numPrefix = 0,
1125 maxPostfix;
1126
1127 bool bEndHex = false;
1128
1129 for( ; numPrefix < 6; ++numPrefix)
1130 {
1131 if(!ParseH16(uri))
1132 {
1133 --uri;
1134 bEndHex = true;
1135 break;
1136 }
1137
1138 if(*uri != wxT(':'))
1139 {
1140 break;
1141 }
1142 }
1143
1144 if(!bEndHex && !ParseH16(uri))
1145 {
1146 --uri;
1147
1148 if (numPrefix)
1149 return false;
1150
1151 if (*uri == wxT(':'))
1152 {
1153 if (*++uri != wxT(':'))
1154 return false;
1155
1156 maxPostfix = 5;
1157 }
1158 else
1159 maxPostfix = 6;
1160 }
1161 else
1162 {
1163 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1164 {
1165 if (numPrefix != 6)
1166 return false;
1167
1168 while (*--uri != wxT(':')) {}
1169 ++uri;
1170
1171 const wxChar* uristart = uri;
1172 //parse ls32
1173 // ls32 = ( h16 ":" h16 ) / IPv4address
1174 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1175 return true;
1176
1177 uri = uristart;
1178
1179 if (ParseIPv4address(uri))
1180 return true;
1181 else
1182 return false;
1183 }
1184 else
1185 {
1186 uri += 2;
1187
1188 if (numPrefix > 3)
1189 maxPostfix = 0;
1190 else
1191 maxPostfix = 4 - numPrefix;
1192 }
1193 }
1194
1195 bool bAllowAltEnding = maxPostfix == 0;
1196
1197 for(; maxPostfix != 0; --maxPostfix)
1198 {
1199 if(!ParseH16(uri) || *uri != wxT(':'))
1200 return false;
1201 }
1202
1203 if(numPrefix <= 4)
1204 {
1205 const wxChar* uristart = uri;
1206 //parse ls32
1207 // ls32 = ( h16 ":" h16 ) / IPv4address
1208 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1209 return true;
1210
1211 uri = uristart;
1212
1213 if (ParseIPv4address(uri))
1214 return true;
1215
1216 uri = uristart;
1217
1218 if (!bAllowAltEnding)
1219 return false;
1220 }
1221
1222 if(numPrefix <= 5 && ParseH16(uri))
1223 return true;
1224
1225 return true;
1226 }
1227
1228 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1229 {
1230 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1231 if (*++uri != wxT('v') || !IsHex(*++uri))
1232 return false;
1233
1234 while (IsHex(*++uri)) {}
1235
1236 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1237 return false;
1238
1239 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1240
1241 return true;
1242 }
1243
1244
1245 // ---------------------------------------------------------------------------
1246 // CharToHex
1247 //
1248 // Converts a character into a numeric hexidecimal value, or 0 if the
1249 // passed in character is not a valid hex character
1250 // ---------------------------------------------------------------------------
1251
1252 //static
1253 wxChar wxURI::CharToHex(const wxChar& c)
1254 {
1255 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1256 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1257 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1258
1259 return 0;
1260 }
1261
1262 // ---------------------------------------------------------------------------
1263 // IsXXX
1264 //
1265 // Returns true if the passed in character meets the criteria of the method
1266 // ---------------------------------------------------------------------------
1267
1268 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1269 bool wxURI::IsUnreserved (const wxChar& c)
1270 { return IsAlpha(c) || IsDigit(c) ||
1271 c == wxT('-') ||
1272 c == wxT('.') ||
1273 c == wxT('_') ||
1274 c == wxT('~') //tilde
1275 ;
1276 }
1277
1278 bool wxURI::IsReserved (const wxChar& c)
1279 {
1280 return IsGenDelim(c) || IsSubDelim(c);
1281 }
1282
1283 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1284 bool wxURI::IsGenDelim (const wxChar& c)
1285 {
1286 return c == wxT(':') ||
1287 c == wxT('/') ||
1288 c == wxT('?') ||
1289 c == wxT('#') ||
1290 c == wxT('[') ||
1291 c == wxT(']') ||
1292 c == wxT('@');
1293 }
1294
1295 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1296 //! / "*" / "+" / "," / ";" / "="
1297 bool wxURI::IsSubDelim (const wxChar& c)
1298 {
1299 return c == wxT('!') ||
1300 c == wxT('$') ||
1301 c == wxT('&') ||
1302 c == wxT('\'') ||
1303 c == wxT('(') ||
1304 c == wxT(')') ||
1305 c == wxT('*') ||
1306 c == wxT('+') ||
1307 c == wxT(',') ||
1308 c == wxT(';') ||
1309 c == wxT('=')
1310 ;
1311 }
1312
1313 bool wxURI::IsHex(const wxChar& c)
1314 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1315
1316 bool wxURI::IsAlpha(const wxChar& c)
1317 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1318
1319 bool wxURI::IsDigit(const wxChar& c)
1320 { return c >= wxT('0') && c <= wxT('9'); }
1321
1322
1323 //end of uri.cpp
1324
1325
1326