]> git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
make test failures easier to debug by using more informative failure messages
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 // For compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #ifndef WX_PRECOMP
27 #include "wx/crt.h"
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject)
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 Assign(uri);
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_userinfo = m_server = m_port = m_path =
82 m_query = m_fragment = wxEmptyString;
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 const wxChar* wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 // FIXME-UTF8: rewrite ParseXXX() methods using iterators
101 // NB: using wxWxCharBuffer instead of just c_str() avoids keeping
102 // converted string in memory for longer than needed
103 return Parse(wxWxCharBuffer(uri.c_str()));
104 }
105
106 // ---------------------------------------------------------------------------
107 // Escape Methods
108 //
109 // TranslateEscape unencodes a 3 character URL escape sequence
110 //
111 // Escape encodes an invalid URI character into a 3 character sequence
112 //
113 // IsEscape determines if the input string contains an escape sequence,
114 // if it does, then it moves the input string past the escape sequence
115 //
116 // Unescape unencodes all 3 character URL escape sequences in a wxString
117 // ---------------------------------------------------------------------------
118
119 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
120 {
121 wxChar c1(*s);
122 wxChar c2(*(s + 1));
123
124 wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
125
126 return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
127 }
128
129 wxString wxURI::Unescape(const wxString& uri)
130 {
131 wxString new_uri;
132
133 for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
134 {
135 if ( *i == wxT('%') )
136 {
137 new_uri += wxURI::TranslateEscape(i + 1);
138 i += 2;
139 }
140 else
141 new_uri += *i;
142 }
143
144 return new_uri;
145 }
146
147 void wxURI::Escape(wxString& s, const wxChar& c)
148 {
149 const wxChar* hdig = wxT("0123456789abcdef");
150 s += wxT('%');
151 s += hdig[(c >> 4) & 15];
152 s += hdig[c & 15];
153 }
154
155 bool wxURI::IsEscape(const wxChar*& uri)
156 {
157 // pct-encoded = "%" HEXDIG HEXDIG
158 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
159 return true;
160 else
161 return false;
162 }
163
164 // ---------------------------------------------------------------------------
165 // GetUser
166 // GetPassword
167 //
168 // Gets the username and password via the old URL method.
169 // ---------------------------------------------------------------------------
170 wxString wxURI::GetUser() const
171 {
172 size_t dwPasswordPos = m_userinfo.find(':');
173
174 if (dwPasswordPos == wxString::npos)
175 dwPasswordPos = 0;
176
177 return m_userinfo(0, dwPasswordPos);
178 }
179
180 wxString wxURI::GetPassword() const
181 {
182 size_t dwPasswordPos = m_userinfo.find(':');
183
184 if (dwPasswordPos == wxString::npos)
185 return wxT("");
186 else
187 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
188 }
189
190 // ---------------------------------------------------------------------------
191 // BuildURI
192 //
193 // BuildURI() builds the entire URI into a useable
194 // representation, including proper identification characters such as slashes
195 //
196 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
197 // the components that accept escape sequences
198 // ---------------------------------------------------------------------------
199
200 wxString wxURI::BuildURI() const
201 {
202 wxString ret;
203
204 if (HasScheme())
205 ret = ret + m_scheme + wxT(":");
206
207 if (HasServer())
208 {
209 ret += wxT("//");
210
211 if (HasUserInfo())
212 ret = ret + m_userinfo + wxT("@");
213
214 ret += m_server;
215
216 if (HasPort())
217 ret = ret + wxT(":") + m_port;
218 }
219
220 ret += m_path;
221
222 if (HasQuery())
223 ret = ret + wxT("?") + m_query;
224
225 if (HasFragment())
226 ret = ret + wxT("#") + m_fragment;
227
228 return ret;
229 }
230
231 wxString wxURI::BuildUnescapedURI() const
232 {
233 wxString ret;
234
235 if (HasScheme())
236 ret = ret + m_scheme + wxT(":");
237
238 if (HasServer())
239 {
240 ret += wxT("//");
241
242 if (HasUserInfo())
243 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
244
245 if (m_hostType == wxURI_REGNAME)
246 ret += wxURI::Unescape(m_server);
247 else
248 ret += m_server;
249
250 if (HasPort())
251 ret = ret + wxT(":") + m_port;
252 }
253
254 ret += wxURI::Unescape(m_path);
255
256 if (HasQuery())
257 ret = ret + wxT("?") + wxURI::Unescape(m_query);
258
259 if (HasFragment())
260 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
261
262 return ret;
263 }
264
265 // ---------------------------------------------------------------------------
266 // Assignment
267 // ---------------------------------------------------------------------------
268
269 wxURI& wxURI::Assign(const wxURI& uri)
270 {
271 //assign fields
272 m_fields = uri.m_fields;
273
274 //ref over components
275 m_scheme = uri.m_scheme;
276 m_userinfo = uri.m_userinfo;
277 m_server = uri.m_server;
278 m_hostType = uri.m_hostType;
279 m_port = uri.m_port;
280 m_path = uri.m_path;
281 m_query = uri.m_query;
282 m_fragment = uri.m_fragment;
283
284 return *this;
285 }
286
287 wxURI& wxURI::operator = (const wxURI& uri)
288 {
289 return Assign(uri);
290 }
291
292 wxURI& wxURI::operator = (const wxString& string)
293 {
294 Create(string);
295 return *this;
296 }
297
298 // ---------------------------------------------------------------------------
299 // Comparison
300 // ---------------------------------------------------------------------------
301
302 bool wxURI::operator == (const wxURI& uri) const
303 {
304 if (HasScheme())
305 {
306 if(m_scheme != uri.m_scheme)
307 return false;
308 }
309 else if (uri.HasScheme())
310 return false;
311
312
313 if (HasServer())
314 {
315 if (HasUserInfo())
316 {
317 if (m_userinfo != uri.m_userinfo)
318 return false;
319 }
320 else if (uri.HasUserInfo())
321 return false;
322
323 if (m_server != uri.m_server ||
324 m_hostType != uri.m_hostType)
325 return false;
326
327 if (HasPort())
328 {
329 if(m_port != uri.m_port)
330 return false;
331 }
332 else if (uri.HasPort())
333 return false;
334 }
335 else if (uri.HasServer())
336 return false;
337
338
339 if (HasPath())
340 {
341 if(m_path != uri.m_path)
342 return false;
343 }
344 else if (uri.HasPath())
345 return false;
346
347 if (HasQuery())
348 {
349 if (m_query != uri.m_query)
350 return false;
351 }
352 else if (uri.HasQuery())
353 return false;
354
355 if (HasFragment())
356 {
357 if (m_fragment != uri.m_fragment)
358 return false;
359 }
360 else if (uri.HasFragment())
361 return false;
362
363 return true;
364 }
365
366 // ---------------------------------------------------------------------------
367 // IsReference
368 //
369 // if there is no authority or scheme, it is a reference
370 // ---------------------------------------------------------------------------
371
372 bool wxURI::IsReference() const
373 { return !HasScheme() || !HasServer(); }
374
375 // ---------------------------------------------------------------------------
376 // Parse
377 //
378 // Master URI parsing method. Just calls the individual parsing methods
379 //
380 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
381 // URI-reference = URI / relative
382 // ---------------------------------------------------------------------------
383
384 const wxChar* wxURI::Parse(const wxChar *uri)
385 {
386 uri = ParseScheme(uri);
387 uri = ParseAuthority(uri);
388 uri = ParsePath(uri);
389 uri = ParseQuery(uri);
390 return ParseFragment(uri);
391 }
392
393 // ---------------------------------------------------------------------------
394 // ParseXXX
395 //
396 // Individual parsers for each URI component
397 // ---------------------------------------------------------------------------
398
399 const wxChar* wxURI::ParseScheme(const wxChar *uri)
400 {
401 wxASSERT(uri != NULL);
402
403 //copy of the uri - used for figuring out
404 //length of each component
405 const wxChar* uricopy = uri;
406
407 //Does the uri have a scheme (first character alpha)?
408 if (IsAlpha(*uri))
409 {
410 m_scheme += *uri++;
411
412 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
413 while (IsAlpha(*uri) || IsDigit(*uri) ||
414 *uri == wxT('+') ||
415 *uri == wxT('-') ||
416 *uri == wxT('.'))
417 {
418 m_scheme += *uri++;
419 }
420
421 //valid scheme?
422 if (*uri == wxT(':'))
423 {
424 //mark the scheme as valid
425 m_fields |= wxURI_SCHEME;
426
427 //move reference point up to input buffer
428 uricopy = ++uri;
429 }
430 else
431 //relative uri with relative path reference
432 m_scheme = wxEmptyString;
433 }
434 // else
435 //relative uri with _possible_ relative path reference
436
437 return uricopy;
438 }
439
440 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
441 {
442 // authority = [ userinfo "@" ] host [ ":" port ]
443 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
444 {
445 //skip past the two slashes
446 uri += 2;
447
448 // ############# DEVIATION FROM RFC #########################
449 // Don't parse the server component for file URIs
450 if(m_scheme != wxT("file"))
451 {
452 //normal way
453 uri = ParseUserInfo(uri);
454 uri = ParseServer(uri);
455 return ParsePort(uri);
456 }
457 }
458
459 return uri;
460 }
461
462 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
463 {
464 wxASSERT(uri != NULL);
465
466 //copy of the uri - used for figuring out
467 //length of each component
468 const wxChar* uricopy = uri;
469
470 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
471 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
472 {
473 if(IsUnreserved(*uri) ||
474 IsSubDelim(*uri) || *uri == wxT(':'))
475 m_userinfo += *uri++;
476 else if (IsEscape(uri))
477 {
478 m_userinfo += *uri++;
479 m_userinfo += *uri++;
480 m_userinfo += *uri++;
481 }
482 else
483 Escape(m_userinfo, *uri++);
484 }
485
486 if(*uri == wxT('@'))
487 {
488 //valid userinfo
489 m_fields |= wxURI_USERINFO;
490
491 uricopy = ++uri;
492 }
493 else
494 m_userinfo = wxEmptyString;
495
496 return uricopy;
497 }
498
499 const wxChar* wxURI::ParseServer(const wxChar* uri)
500 {
501 wxASSERT(uri != NULL);
502
503 //copy of the uri - used for figuring out
504 //length of each component
505 const wxChar* uricopy = uri;
506
507 // host = IP-literal / IPv4address / reg-name
508 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
509 if (*uri == wxT('['))
510 {
511 ++uri; //some compilers don't support *&ing a ++*
512 if (ParseIPv6address(uri) && *uri == wxT(']'))
513 {
514 ++uri;
515 m_hostType = wxURI_IPV6ADDRESS;
516
517 wxStringBufferLength theBuffer(m_server, uri - uricopy);
518 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
519 theBuffer.SetLength(uri-uricopy);
520 }
521 else
522 {
523 uri = uricopy;
524
525 ++uri; //some compilers don't support *&ing a ++*
526 if (ParseIPvFuture(uri) && *uri == wxT(']'))
527 {
528 ++uri;
529 m_hostType = wxURI_IPVFUTURE;
530
531 wxStringBufferLength theBuffer(m_server, uri - uricopy);
532 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
533 theBuffer.SetLength(uri-uricopy);
534 }
535 else
536 uri = uricopy;
537 }
538 }
539 else
540 {
541 if (ParseIPv4address(uri))
542 {
543 m_hostType = wxURI_IPV4ADDRESS;
544
545 wxStringBufferLength theBuffer(m_server, uri - uricopy);
546 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
547 theBuffer.SetLength(uri-uricopy);
548 }
549 else
550 uri = uricopy;
551 }
552
553 if(m_hostType == wxURI_REGNAME)
554 {
555 uri = uricopy;
556 // reg-name = *( unreserved / pct-encoded / sub-delims )
557 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
558 {
559 if(IsUnreserved(*uri) || IsSubDelim(*uri))
560 m_server += *uri++;
561 else if (IsEscape(uri))
562 {
563 m_server += *uri++;
564 m_server += *uri++;
565 m_server += *uri++;
566 }
567 else
568 Escape(m_server, *uri++);
569 }
570 }
571
572 //mark the server as valid
573 m_fields |= wxURI_SERVER;
574
575 return uri;
576 }
577
578
579 const wxChar* wxURI::ParsePort(const wxChar* uri)
580 {
581 wxASSERT(uri != NULL);
582
583 // port = *DIGIT
584 if(*uri == wxT(':'))
585 {
586 ++uri;
587 while(IsDigit(*uri))
588 {
589 m_port += *uri++;
590 }
591
592 //mark the port as valid
593 m_fields |= wxURI_PORT;
594 }
595
596 return uri;
597 }
598
599 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
600 {
601 wxASSERT(uri != NULL);
602
603 //copy of the uri - used for figuring out
604 //length of each component
605 const wxChar* uricopy = uri;
606
607 /// hier-part = "//" authority path-abempty
608 /// / path-absolute
609 /// / path-rootless
610 /// / path-empty
611 ///
612 /// relative-part = "//" authority path-abempty
613 /// / path-absolute
614 /// / path-noscheme
615 /// / path-empty
616 ///
617 /// path-abempty = *( "/" segment )
618 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
619 /// path-noscheme = segment-nz-nc *( "/" segment )
620 /// path-rootless = segment-nz *( "/" segment )
621 /// path-empty = 0<pchar>
622 ///
623 /// segment = *pchar
624 /// segment-nz = 1*pchar
625 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
626 /// ; non-zero-length segment without any colon ":"
627 ///
628 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
629 if (*uri == wxT('/'))
630 {
631 m_path += *uri++;
632
633 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
634 {
635 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
636 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
637 m_path += *uri++;
638 else if (IsEscape(uri))
639 {
640 m_path += *uri++;
641 m_path += *uri++;
642 m_path += *uri++;
643 }
644 else
645 Escape(m_path, *uri++);
646 }
647
648 if (bNormalize)
649 {
650 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
651 Normalize(theBuffer, true);
652 theBuffer.SetLength(wxStrlen(theBuffer));
653 }
654 //mark the path as valid
655 m_fields |= wxURI_PATH;
656 }
657 else if(*uri) //Relative path
658 {
659 if (bReference)
660 {
661 //no colon allowed
662 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
663 {
664 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
665 *uri == wxT('@') || *uri == wxT('/'))
666 m_path += *uri++;
667 else if (IsEscape(uri))
668 {
669 m_path += *uri++;
670 m_path += *uri++;
671 m_path += *uri++;
672 }
673 else
674 Escape(m_path, *uri++);
675 }
676 }
677 else
678 {
679 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
680 {
681 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
682 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
683 m_path += *uri++;
684 else if (IsEscape(uri))
685 {
686 m_path += *uri++;
687 m_path += *uri++;
688 m_path += *uri++;
689 }
690 else
691 Escape(m_path, *uri++);
692 }
693 }
694
695 if (uri != uricopy)
696 {
697 if (bNormalize)
698 {
699 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
700 Normalize(theBuffer);
701 theBuffer.SetLength(wxStrlen(theBuffer));
702 }
703
704 //mark the path as valid
705 m_fields |= wxURI_PATH;
706 }
707 }
708
709 return uri;
710 }
711
712
713 const wxChar* wxURI::ParseQuery(const wxChar* uri)
714 {
715 wxASSERT(uri != NULL);
716
717 // query = *( pchar / "/" / "?" )
718 if (*uri == wxT('?'))
719 {
720 ++uri;
721 while(*uri && *uri != wxT('#'))
722 {
723 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
724 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
725 m_query += *uri++;
726 else if (IsEscape(uri))
727 {
728 m_query += *uri++;
729 m_query += *uri++;
730 m_query += *uri++;
731 }
732 else
733 Escape(m_query, *uri++);
734 }
735
736 //mark the server as valid
737 m_fields |= wxURI_QUERY;
738 }
739
740 return uri;
741 }
742
743
744 const wxChar* wxURI::ParseFragment(const wxChar* uri)
745 {
746 wxASSERT(uri != NULL);
747
748 // fragment = *( pchar / "/" / "?" )
749 if (*uri == wxT('#'))
750 {
751 ++uri;
752 while(*uri)
753 {
754 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
755 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
756 m_fragment += *uri++;
757 else if (IsEscape(uri))
758 {
759 m_fragment += *uri++;
760 m_fragment += *uri++;
761 m_fragment += *uri++;
762 }
763 else
764 Escape(m_fragment, *uri++);
765 }
766
767 //mark the server as valid
768 m_fields |= wxURI_FRAGMENT;
769 }
770
771 return uri;
772 }
773
774 // ---------------------------------------------------------------------------
775 // Resolve
776 //
777 // Builds missing components of this uri from a base uri
778 //
779 // A version of the algorithm outlined in the RFC is used here
780 // (it is shown in comments)
781 //
782 // Note that an empty URI inherits all components
783 // ---------------------------------------------------------------------------
784
785 void wxURI::Resolve(const wxURI& base, int flags)
786 {
787 wxASSERT_MSG(!base.IsReference(),
788 wxT("wxURI to inherit from must not be a reference!"));
789
790 // If we arn't being strict, enable the older (pre-RFC2396)
791 // loophole that allows this uri to inherit other
792 // properties from the base uri - even if the scheme
793 // is defined
794 if ( !(flags & wxURI_STRICT) &&
795 HasScheme() && base.HasScheme() &&
796 m_scheme == base.m_scheme )
797 {
798 m_fields -= wxURI_SCHEME;
799 }
800
801
802 // Do nothing if this is an absolute wxURI
803 // if defined(R.scheme) then
804 // T.scheme = R.scheme;
805 // T.authority = R.authority;
806 // T.path = remove_dot_segments(R.path);
807 // T.query = R.query;
808 if (HasScheme())
809 {
810 return;
811 }
812
813 //No scheme - inherit
814 m_scheme = base.m_scheme;
815 m_fields |= wxURI_SCHEME;
816
817 // All we need to do for relative URIs with an
818 // authority component is just inherit the scheme
819 // if defined(R.authority) then
820 // T.authority = R.authority;
821 // T.path = remove_dot_segments(R.path);
822 // T.query = R.query;
823 if (HasServer())
824 {
825 return;
826 }
827
828 //No authority - inherit
829 if (base.HasUserInfo())
830 {
831 m_userinfo = base.m_userinfo;
832 m_fields |= wxURI_USERINFO;
833 }
834
835 m_server = base.m_server;
836 m_hostType = base.m_hostType;
837 m_fields |= wxURI_SERVER;
838
839 if (base.HasPort())
840 {
841 m_port = base.m_port;
842 m_fields |= wxURI_PORT;
843 }
844
845
846 // Simple path inheritance from base
847 if (!HasPath())
848 {
849 // T.path = Base.path;
850 m_path = base.m_path;
851 m_fields |= wxURI_PATH;
852
853
854 // if defined(R.query) then
855 // T.query = R.query;
856 // else
857 // T.query = Base.query;
858 // endif;
859 if (!HasQuery())
860 {
861 m_query = base.m_query;
862 m_fields |= wxURI_QUERY;
863 }
864 }
865 else
866 {
867 // if (R.path starts-with "/") then
868 // T.path = remove_dot_segments(R.path);
869 // else
870 // T.path = merge(Base.path, R.path);
871 // T.path = remove_dot_segments(T.path);
872 // endif;
873 // T.query = R.query;
874 if (m_path[0u] != wxT('/'))
875 {
876 //Merge paths
877 wxString::const_iterator op = m_path.begin();
878 wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
879
880 //not a ending directory? move up
881 if (base.m_path[0] && *(bp-1) != wxT('/'))
882 UpTree(base.m_path.begin(), bp);
883
884 //normalize directories
885 while(*op == wxT('.') && *(op+1) == wxT('.') &&
886 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
887 {
888 UpTree(base.m_path.begin(), bp);
889
890 if (*(op+2) == '\0')
891 op += 2;
892 else
893 op += 3;
894 }
895
896 m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
897 m_path.substr((op - m_path.begin()), m_path.length());
898 }
899 }
900
901 //T.fragment = R.fragment;
902 }
903
904 // ---------------------------------------------------------------------------
905 // UpTree
906 //
907 // Moves a URI path up a directory
908 // ---------------------------------------------------------------------------
909
910 //static
911 void wxURI::UpTree(wxString::const_iterator uristart,
912 wxString::const_iterator& uri)
913 {
914 if (uri != uristart && *(uri-1) == wxT('/'))
915 {
916 uri -= 2;
917 }
918
919 for(;uri != uristart; --uri)
920 {
921 if (*uri == wxT('/'))
922 {
923 ++uri;
924 break;
925 }
926 }
927
928 //!!!TODO:HACK!!!//
929 if (uri == uristart && *uri == wxT('/'))
930 ++uri;
931 //!!!//
932 }
933
934 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
935 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
936 {
937 if (uri != uristart && *(uri-1) == wxT('/'))
938 {
939 uri -= 2;
940 }
941
942 for(;uri != uristart; --uri)
943 {
944 if (*uri == wxT('/'))
945 {
946 ++uri;
947 break;
948 }
949 }
950
951 //!!!TODO:HACK!!!//
952 if (uri == uristart && *uri == wxT('/'))
953 ++uri;
954 //!!!//
955 }
956 // end of FIXME-UTF8
957
958 // ---------------------------------------------------------------------------
959 // Normalize
960 //
961 // Normalizes directories in-place
962 //
963 // I.E. ./ and . are ignored
964 //
965 // ../ and .. are removed if a directory is before it, along
966 // with that directory (leading .. and ../ are kept)
967 // ---------------------------------------------------------------------------
968
969 //static
970 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
971 {
972 wxChar* cp = s;
973 wxChar* bp = s;
974
975 if(s[0] == wxT('/'))
976 ++bp;
977
978 while(*cp)
979 {
980 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
981 && (bp == cp || *(cp-1) == wxT('/')))
982 {
983 //. _or_ ./ - ignore
984 if (*(cp+1) == '\0')
985 cp += 1;
986 else
987 cp += 2;
988 }
989 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
990 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
991 && (bp == cp || *(cp-1) == wxT('/')))
992 {
993 //.. _or_ ../ - go up the tree
994 if (s != bp)
995 {
996 UpTree((const wxChar*)bp, (const wxChar*&)s);
997
998 if (*(cp+2) == '\0')
999 cp += 2;
1000 else
1001 cp += 3;
1002 }
1003 else if (!bIgnoreLeads)
1004
1005 {
1006 *bp++ = *cp++;
1007 *bp++ = *cp++;
1008 if (*cp)
1009 *bp++ = *cp++;
1010
1011 s = bp;
1012 }
1013 else
1014 {
1015 if (*(cp+2) == '\0')
1016 cp += 2;
1017 else
1018 cp += 3;
1019 }
1020 }
1021 else
1022 *s++ = *cp++;
1023 }
1024
1025 *s = '\0';
1026 }
1027
1028 // ---------------------------------------------------------------------------
1029 // ParseH16
1030 //
1031 // Parses 1 to 4 hex values. Returns true if the first character of the input
1032 // string is a valid hex character. It is the caller's responsability to move
1033 // the input string back to its original position on failure.
1034 // ---------------------------------------------------------------------------
1035
1036 bool wxURI::ParseH16(const wxChar*& uri)
1037 {
1038 // h16 = 1*4HEXDIG
1039 if(!IsHex(*++uri))
1040 return false;
1041
1042 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1043 ++uri;
1044
1045 return true;
1046 }
1047
1048 // ---------------------------------------------------------------------------
1049 // ParseIPXXX
1050 //
1051 // Parses a certain version of an IP address and moves the input string past
1052 // it. Returns true if the input string contains the proper version of an ip
1053 // address. It is the caller's responsability to move the input string back
1054 // to its original position on failure.
1055 // ---------------------------------------------------------------------------
1056
1057 bool wxURI::ParseIPv4address(const wxChar*& uri)
1058 {
1059 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1060 //
1061 //dec-octet = DIGIT ; 0-9
1062 // / %x31-39 DIGIT ; 10-99
1063 // / "1" 2DIGIT ; 100-199
1064 // / "2" %x30-34 DIGIT ; 200-249
1065 // / "25" %x30-35 ; 250-255
1066 size_t iIPv4 = 0;
1067 if (IsDigit(*uri))
1068 {
1069 ++iIPv4;
1070
1071
1072 //each ip part must be between 0-255 (dupe of version in for loop)
1073 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1074 //100 or less (note !)
1075 !( (*(uri-2) < wxT('2')) ||
1076 //240 or less
1077 (*(uri-2) == wxT('2') &&
1078 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1079 )
1080 )
1081 )
1082 {
1083 return false;
1084 }
1085
1086 if(IsDigit(*uri))++uri;
1087
1088 //compilers should unroll this loop
1089 for(; iIPv4 < 4; ++iIPv4)
1090 {
1091 if (*uri != wxT('.') || !IsDigit(*++uri))
1092 break;
1093
1094 //each ip part must be between 0-255
1095 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1096 //100 or less (note !)
1097 !( (*(uri-2) < wxT('2')) ||
1098 //240 or less
1099 (*(uri-2) == wxT('2') &&
1100 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1101 )
1102 )
1103 )
1104 {
1105 return false;
1106 }
1107 if(IsDigit(*uri))++uri;
1108 }
1109 }
1110 return iIPv4 == 4;
1111 }
1112
1113 bool wxURI::ParseIPv6address(const wxChar*& uri)
1114 {
1115 // IPv6address = 6( h16 ":" ) ls32
1116 // / "::" 5( h16 ":" ) ls32
1117 // / [ h16 ] "::" 4( h16 ":" ) ls32
1118 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1119 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1120 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1121 // / [ *4( h16 ":" ) h16 ] "::" ls32
1122 // / [ *5( h16 ":" ) h16 ] "::" h16
1123 // / [ *6( h16 ":" ) h16 ] "::"
1124
1125 size_t numPrefix = 0,
1126 maxPostfix;
1127
1128 bool bEndHex = false;
1129
1130 for( ; numPrefix < 6; ++numPrefix)
1131 {
1132 if(!ParseH16(uri))
1133 {
1134 --uri;
1135 bEndHex = true;
1136 break;
1137 }
1138
1139 if(*uri != wxT(':'))
1140 {
1141 break;
1142 }
1143 }
1144
1145 if(!bEndHex && !ParseH16(uri))
1146 {
1147 --uri;
1148
1149 if (numPrefix)
1150 return false;
1151
1152 if (*uri == wxT(':'))
1153 {
1154 if (*++uri != wxT(':'))
1155 return false;
1156
1157 maxPostfix = 5;
1158 }
1159 else
1160 maxPostfix = 6;
1161 }
1162 else
1163 {
1164 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1165 {
1166 if (numPrefix != 6)
1167 return false;
1168
1169 while (*--uri != wxT(':')) {}
1170 ++uri;
1171
1172 const wxChar* uristart = uri;
1173 //parse ls32
1174 // ls32 = ( h16 ":" h16 ) / IPv4address
1175 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1176 return true;
1177
1178 uri = uristart;
1179
1180 if (ParseIPv4address(uri))
1181 return true;
1182 else
1183 return false;
1184 }
1185 else
1186 {
1187 uri += 2;
1188
1189 if (numPrefix > 3)
1190 maxPostfix = 0;
1191 else
1192 maxPostfix = 4 - numPrefix;
1193 }
1194 }
1195
1196 bool bAllowAltEnding = maxPostfix == 0;
1197
1198 for(; maxPostfix != 0; --maxPostfix)
1199 {
1200 if(!ParseH16(uri) || *uri != wxT(':'))
1201 return false;
1202 }
1203
1204 if(numPrefix <= 4)
1205 {
1206 const wxChar* uristart = uri;
1207 //parse ls32
1208 // ls32 = ( h16 ":" h16 ) / IPv4address
1209 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1210 return true;
1211
1212 uri = uristart;
1213
1214 if (ParseIPv4address(uri))
1215 return true;
1216
1217 uri = uristart;
1218
1219 if (!bAllowAltEnding)
1220 return false;
1221 }
1222
1223 if(numPrefix <= 5 && ParseH16(uri))
1224 return true;
1225
1226 return true;
1227 }
1228
1229 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1230 {
1231 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1232 if (*++uri != wxT('v') || !IsHex(*++uri))
1233 return false;
1234
1235 while (IsHex(*++uri)) {}
1236
1237 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1238 return false;
1239
1240 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1241
1242 return true;
1243 }
1244
1245
1246 // ---------------------------------------------------------------------------
1247 // CharToHex
1248 //
1249 // Converts a character into a numeric hexidecimal value, or 0 if the
1250 // passed in character is not a valid hex character
1251 // ---------------------------------------------------------------------------
1252
1253 //static
1254 wxChar wxURI::CharToHex(const wxChar& c)
1255 {
1256 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1257 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1258 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1259
1260 return 0;
1261 }
1262
1263 // ---------------------------------------------------------------------------
1264 // IsXXX
1265 //
1266 // Returns true if the passed in character meets the criteria of the method
1267 // ---------------------------------------------------------------------------
1268
1269 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1270 bool wxURI::IsUnreserved (const wxChar& c)
1271 { return IsAlpha(c) || IsDigit(c) ||
1272 c == wxT('-') ||
1273 c == wxT('.') ||
1274 c == wxT('_') ||
1275 c == wxT('~') //tilde
1276 ;
1277 }
1278
1279 bool wxURI::IsReserved (const wxChar& c)
1280 {
1281 return IsGenDelim(c) || IsSubDelim(c);
1282 }
1283
1284 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1285 bool wxURI::IsGenDelim (const wxChar& c)
1286 {
1287 return c == wxT(':') ||
1288 c == wxT('/') ||
1289 c == wxT('?') ||
1290 c == wxT('#') ||
1291 c == wxT('[') ||
1292 c == wxT(']') ||
1293 c == wxT('@');
1294 }
1295
1296 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1297 //! / "*" / "+" / "," / ";" / "="
1298 bool wxURI::IsSubDelim (const wxChar& c)
1299 {
1300 return c == wxT('!') ||
1301 c == wxT('$') ||
1302 c == wxT('&') ||
1303 c == wxT('\'') ||
1304 c == wxT('(') ||
1305 c == wxT(')') ||
1306 c == wxT('*') ||
1307 c == wxT('+') ||
1308 c == wxT(',') ||
1309 c == wxT(';') ||
1310 c == wxT('=')
1311 ;
1312 }
1313
1314 bool wxURI::IsHex(const wxChar& c)
1315 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1316
1317 bool wxURI::IsAlpha(const wxChar& c)
1318 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1319
1320 bool wxURI::IsDigit(const wxChar& c)
1321 { return c >= wxT('0') && c <= wxT('9'); }
1322
1323
1324 //end of uri.cpp
1325
1326
1327