]> git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
5ddfbc3cbc88806824fbf2ef1b75ba9a814dbee3
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 // For compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #include "wx/uri.h"
27
28 // ---------------------------------------------------------------------------
29 // definitions
30 // ---------------------------------------------------------------------------
31
32 IMPLEMENT_CLASS(wxURI, wxObject)
33
34 // ===========================================================================
35 // implementation
36 // ===========================================================================
37
38 // ---------------------------------------------------------------------------
39 // utilities
40 // ---------------------------------------------------------------------------
41
42 // ---------------------------------------------------------------------------
43 //
44 // wxURI
45 //
46 // ---------------------------------------------------------------------------
47
48 // ---------------------------------------------------------------------------
49 // Constructors
50 // ---------------------------------------------------------------------------
51
52 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
53 {
54 }
55
56 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 Create(uri);
59 }
60
61 wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
62 {
63 Assign(uri);
64 }
65
66 // ---------------------------------------------------------------------------
67 // Destructor and cleanup
68 // ---------------------------------------------------------------------------
69
70 wxURI::~wxURI()
71 {
72 Clear();
73 }
74
75 void wxURI::Clear()
76 {
77 m_scheme = m_userinfo = m_server = m_port = m_path =
78 m_query = m_fragment = wxEmptyString;
79
80 m_hostType = wxURI_REGNAME;
81
82 m_fields = 0;
83 }
84
85 // ---------------------------------------------------------------------------
86 // Create
87 //
88 // This creates the URI - all we do here is call the main parsing method
89 // ---------------------------------------------------------------------------
90
91 const wxChar* wxURI::Create(const wxString& uri)
92 {
93 if (m_fields)
94 Clear();
95
96 // FIXME-UTF8: rewrite ParseXXX() methods using iterators
97 // NB: using wxWxCharBuffer instead of just c_str() avoids keeping
98 // converted string in memory for longer than needed
99 return Parse(wxWxCharBuffer(uri.c_str()));
100 }
101
102 // ---------------------------------------------------------------------------
103 // Escape Methods
104 //
105 // TranslateEscape unencodes a 3 character URL escape sequence
106 //
107 // Escape encodes an invalid URI character into a 3 character sequence
108 //
109 // IsEscape determines if the input string contains an escape sequence,
110 // if it does, then it moves the input string past the escape sequence
111 //
112 // Unescape unencodes all 3 character URL escape sequences in a wxString
113 // ---------------------------------------------------------------------------
114
115 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
116 {
117 wxChar c1(*s);
118 wxChar c2(*(s + 1));
119
120 wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
121
122 return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
123 }
124
125 wxString wxURI::Unescape(const wxString& uri)
126 {
127 wxString new_uri;
128
129 for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
130 {
131 if ( *i == wxT('%') )
132 {
133 new_uri += wxURI::TranslateEscape(i + 1);
134 i += 2;
135 }
136 else
137 new_uri += *i;
138 }
139
140 return new_uri;
141 }
142
143 void wxURI::Escape(wxString& s, const wxChar& c)
144 {
145 const wxChar* hdig = wxT("0123456789abcdef");
146 s += wxT('%');
147 s += hdig[(c >> 4) & 15];
148 s += hdig[c & 15];
149 }
150
151 bool wxURI::IsEscape(const wxChar*& uri)
152 {
153 // pct-encoded = "%" HEXDIG HEXDIG
154 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
155 return true;
156 else
157 return false;
158 }
159
160 // ---------------------------------------------------------------------------
161 // GetUser
162 // GetPassword
163 //
164 // Gets the username and password via the old URL method.
165 // ---------------------------------------------------------------------------
166 wxString wxURI::GetUser() const
167 {
168 size_t dwPasswordPos = m_userinfo.find(':');
169
170 if (dwPasswordPos == wxString::npos)
171 dwPasswordPos = 0;
172
173 return m_userinfo(0, dwPasswordPos);
174 }
175
176 wxString wxURI::GetPassword() const
177 {
178 size_t dwPasswordPos = m_userinfo.find(':');
179
180 if (dwPasswordPos == wxString::npos)
181 return wxT("");
182 else
183 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
184 }
185
186 // ---------------------------------------------------------------------------
187 // BuildURI
188 //
189 // BuildURI() builds the entire URI into a useable
190 // representation, including proper identification characters such as slashes
191 //
192 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
193 // the components that accept escape sequences
194 // ---------------------------------------------------------------------------
195
196 wxString wxURI::BuildURI() const
197 {
198 wxString ret;
199
200 if (HasScheme())
201 ret = ret + m_scheme + wxT(":");
202
203 if (HasServer())
204 {
205 ret += wxT("//");
206
207 if (HasUserInfo())
208 ret = ret + m_userinfo + wxT("@");
209
210 ret += m_server;
211
212 if (HasPort())
213 ret = ret + wxT(":") + m_port;
214 }
215
216 ret += m_path;
217
218 if (HasQuery())
219 ret = ret + wxT("?") + m_query;
220
221 if (HasFragment())
222 ret = ret + wxT("#") + m_fragment;
223
224 return ret;
225 }
226
227 wxString wxURI::BuildUnescapedURI() const
228 {
229 wxString ret;
230
231 if (HasScheme())
232 ret = ret + m_scheme + wxT(":");
233
234 if (HasServer())
235 {
236 ret += wxT("//");
237
238 if (HasUserInfo())
239 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
240
241 if (m_hostType == wxURI_REGNAME)
242 ret += wxURI::Unescape(m_server);
243 else
244 ret += m_server;
245
246 if (HasPort())
247 ret = ret + wxT(":") + m_port;
248 }
249
250 ret += wxURI::Unescape(m_path);
251
252 if (HasQuery())
253 ret = ret + wxT("?") + wxURI::Unescape(m_query);
254
255 if (HasFragment())
256 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
257
258 return ret;
259 }
260
261 // ---------------------------------------------------------------------------
262 // Assignment
263 // ---------------------------------------------------------------------------
264
265 wxURI& wxURI::Assign(const wxURI& uri)
266 {
267 //assign fields
268 m_fields = uri.m_fields;
269
270 //ref over components
271 m_scheme = uri.m_scheme;
272 m_userinfo = uri.m_userinfo;
273 m_server = uri.m_server;
274 m_hostType = uri.m_hostType;
275 m_port = uri.m_port;
276 m_path = uri.m_path;
277 m_query = uri.m_query;
278 m_fragment = uri.m_fragment;
279
280 return *this;
281 }
282
283 wxURI& wxURI::operator = (const wxURI& uri)
284 {
285 return Assign(uri);
286 }
287
288 wxURI& wxURI::operator = (const wxString& string)
289 {
290 Create(string);
291 return *this;
292 }
293
294 // ---------------------------------------------------------------------------
295 // Comparison
296 // ---------------------------------------------------------------------------
297
298 bool wxURI::operator == (const wxURI& uri) const
299 {
300 if (HasScheme())
301 {
302 if(m_scheme != uri.m_scheme)
303 return false;
304 }
305 else if (uri.HasScheme())
306 return false;
307
308
309 if (HasServer())
310 {
311 if (HasUserInfo())
312 {
313 if (m_userinfo != uri.m_userinfo)
314 return false;
315 }
316 else if (uri.HasUserInfo())
317 return false;
318
319 if (m_server != uri.m_server ||
320 m_hostType != uri.m_hostType)
321 return false;
322
323 if (HasPort())
324 {
325 if(m_port != uri.m_port)
326 return false;
327 }
328 else if (uri.HasPort())
329 return false;
330 }
331 else if (uri.HasServer())
332 return false;
333
334
335 if (HasPath())
336 {
337 if(m_path != uri.m_path)
338 return false;
339 }
340 else if (uri.HasPath())
341 return false;
342
343 if (HasQuery())
344 {
345 if (m_query != uri.m_query)
346 return false;
347 }
348 else if (uri.HasQuery())
349 return false;
350
351 if (HasFragment())
352 {
353 if (m_fragment != uri.m_fragment)
354 return false;
355 }
356 else if (uri.HasFragment())
357 return false;
358
359 return true;
360 }
361
362 // ---------------------------------------------------------------------------
363 // IsReference
364 //
365 // if there is no authority or scheme, it is a reference
366 // ---------------------------------------------------------------------------
367
368 bool wxURI::IsReference() const
369 { return !HasScheme() || !HasServer(); }
370
371 // ---------------------------------------------------------------------------
372 // Parse
373 //
374 // Master URI parsing method. Just calls the individual parsing methods
375 //
376 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
377 // URI-reference = URI / relative
378 // ---------------------------------------------------------------------------
379
380 const wxChar* wxURI::Parse(const wxChar *uri)
381 {
382 uri = ParseScheme(uri);
383 uri = ParseAuthority(uri);
384 uri = ParsePath(uri);
385 uri = ParseQuery(uri);
386 return ParseFragment(uri);
387 }
388
389 // ---------------------------------------------------------------------------
390 // ParseXXX
391 //
392 // Individual parsers for each URI component
393 // ---------------------------------------------------------------------------
394
395 const wxChar* wxURI::ParseScheme(const wxChar *uri)
396 {
397 wxASSERT(uri != NULL);
398
399 //copy of the uri - used for figuring out
400 //length of each component
401 const wxChar* uricopy = uri;
402
403 //Does the uri have a scheme (first character alpha)?
404 if (IsAlpha(*uri))
405 {
406 m_scheme += *uri++;
407
408 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
409 while (IsAlpha(*uri) || IsDigit(*uri) ||
410 *uri == wxT('+') ||
411 *uri == wxT('-') ||
412 *uri == wxT('.'))
413 {
414 m_scheme += *uri++;
415 }
416
417 //valid scheme?
418 if (*uri == wxT(':'))
419 {
420 //mark the scheme as valid
421 m_fields |= wxURI_SCHEME;
422
423 //move reference point up to input buffer
424 uricopy = ++uri;
425 }
426 else
427 //relative uri with relative path reference
428 m_scheme = wxEmptyString;
429 }
430 // else
431 //relative uri with _possible_ relative path reference
432
433 return uricopy;
434 }
435
436 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
437 {
438 // authority = [ userinfo "@" ] host [ ":" port ]
439 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
440 {
441 //skip past the two slashes
442 uri += 2;
443
444 // ############# DEVIATION FROM RFC #########################
445 // Don't parse the server component for file URIs
446 if(m_scheme != wxT("file"))
447 {
448 //normal way
449 uri = ParseUserInfo(uri);
450 uri = ParseServer(uri);
451 return ParsePort(uri);
452 }
453 }
454
455 return uri;
456 }
457
458 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
459 {
460 wxASSERT(uri != NULL);
461
462 //copy of the uri - used for figuring out
463 //length of each component
464 const wxChar* uricopy = uri;
465
466 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
467 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
468 {
469 if(IsUnreserved(*uri) ||
470 IsSubDelim(*uri) || *uri == wxT(':'))
471 m_userinfo += *uri++;
472 else if (IsEscape(uri))
473 {
474 m_userinfo += *uri++;
475 m_userinfo += *uri++;
476 m_userinfo += *uri++;
477 }
478 else
479 Escape(m_userinfo, *uri++);
480 }
481
482 if(*uri == wxT('@'))
483 {
484 //valid userinfo
485 m_fields |= wxURI_USERINFO;
486
487 uricopy = ++uri;
488 }
489 else
490 m_userinfo = wxEmptyString;
491
492 return uricopy;
493 }
494
495 const wxChar* wxURI::ParseServer(const wxChar* uri)
496 {
497 wxASSERT(uri != NULL);
498
499 //copy of the uri - used for figuring out
500 //length of each component
501 const wxChar* uricopy = uri;
502
503 // host = IP-literal / IPv4address / reg-name
504 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
505 if (*uri == wxT('['))
506 {
507 ++uri; //some compilers don't support *&ing a ++*
508 if (ParseIPv6address(uri) && *uri == wxT(']'))
509 {
510 ++uri;
511 m_hostType = wxURI_IPV6ADDRESS;
512
513 wxStringBufferLength theBuffer(m_server, uri - uricopy);
514 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
515 theBuffer.SetLength(uri-uricopy);
516 }
517 else
518 {
519 uri = uricopy;
520
521 ++uri; //some compilers don't support *&ing a ++*
522 if (ParseIPvFuture(uri) && *uri == wxT(']'))
523 {
524 ++uri;
525 m_hostType = wxURI_IPVFUTURE;
526
527 wxStringBufferLength theBuffer(m_server, uri - uricopy);
528 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
529 theBuffer.SetLength(uri-uricopy);
530 }
531 else
532 uri = uricopy;
533 }
534 }
535 else
536 {
537 if (ParseIPv4address(uri))
538 {
539 m_hostType = wxURI_IPV4ADDRESS;
540
541 wxStringBufferLength theBuffer(m_server, uri - uricopy);
542 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
543 theBuffer.SetLength(uri-uricopy);
544 }
545 else
546 uri = uricopy;
547 }
548
549 if(m_hostType == wxURI_REGNAME)
550 {
551 uri = uricopy;
552 // reg-name = *( unreserved / pct-encoded / sub-delims )
553 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
554 {
555 if(IsUnreserved(*uri) || IsSubDelim(*uri))
556 m_server += *uri++;
557 else if (IsEscape(uri))
558 {
559 m_server += *uri++;
560 m_server += *uri++;
561 m_server += *uri++;
562 }
563 else
564 Escape(m_server, *uri++);
565 }
566 }
567
568 //mark the server as valid
569 m_fields |= wxURI_SERVER;
570
571 return uri;
572 }
573
574
575 const wxChar* wxURI::ParsePort(const wxChar* uri)
576 {
577 wxASSERT(uri != NULL);
578
579 // port = *DIGIT
580 if(*uri == wxT(':'))
581 {
582 ++uri;
583 while(IsDigit(*uri))
584 {
585 m_port += *uri++;
586 }
587
588 //mark the port as valid
589 m_fields |= wxURI_PORT;
590 }
591
592 return uri;
593 }
594
595 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
596 {
597 wxASSERT(uri != NULL);
598
599 //copy of the uri - used for figuring out
600 //length of each component
601 const wxChar* uricopy = uri;
602
603 /// hier-part = "//" authority path-abempty
604 /// / path-absolute
605 /// / path-rootless
606 /// / path-empty
607 ///
608 /// relative-part = "//" authority path-abempty
609 /// / path-absolute
610 /// / path-noscheme
611 /// / path-empty
612 ///
613 /// path-abempty = *( "/" segment )
614 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
615 /// path-noscheme = segment-nz-nc *( "/" segment )
616 /// path-rootless = segment-nz *( "/" segment )
617 /// path-empty = 0<pchar>
618 ///
619 /// segment = *pchar
620 /// segment-nz = 1*pchar
621 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
622 /// ; non-zero-length segment without any colon ":"
623 ///
624 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
625 if (*uri == wxT('/'))
626 {
627 m_path += *uri++;
628
629 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
630 {
631 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
632 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
633 m_path += *uri++;
634 else if (IsEscape(uri))
635 {
636 m_path += *uri++;
637 m_path += *uri++;
638 m_path += *uri++;
639 }
640 else
641 Escape(m_path, *uri++);
642 }
643
644 if (bNormalize)
645 {
646 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
647 #if wxUSE_STL || wxUSE_UNICODE_UTF8
648 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
649 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
650 #endif
651 Normalize(theBuffer, true);
652 theBuffer.SetLength(wxStrlen(theBuffer));
653 }
654 //mark the path as valid
655 m_fields |= wxURI_PATH;
656 }
657 else if(*uri) //Relative path
658 {
659 if (bReference)
660 {
661 //no colon allowed
662 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
663 {
664 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
665 *uri == wxT('@') || *uri == wxT('/'))
666 m_path += *uri++;
667 else if (IsEscape(uri))
668 {
669 m_path += *uri++;
670 m_path += *uri++;
671 m_path += *uri++;
672 }
673 else
674 Escape(m_path, *uri++);
675 }
676 }
677 else
678 {
679 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
680 {
681 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
682 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
683 m_path += *uri++;
684 else if (IsEscape(uri))
685 {
686 m_path += *uri++;
687 m_path += *uri++;
688 m_path += *uri++;
689 }
690 else
691 Escape(m_path, *uri++);
692 }
693 }
694
695 if (uri != uricopy)
696 {
697 if (bNormalize)
698 {
699 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
700 #if wxUSE_STL || wxUSE_UNICODE_UTF8
701 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
702 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
703 #endif
704 Normalize(theBuffer);
705 theBuffer.SetLength(wxStrlen(theBuffer));
706 }
707
708 //mark the path as valid
709 m_fields |= wxURI_PATH;
710 }
711 }
712
713 return uri;
714 }
715
716
717 const wxChar* wxURI::ParseQuery(const wxChar* uri)
718 {
719 wxASSERT(uri != NULL);
720
721 // query = *( pchar / "/" / "?" )
722 if (*uri == wxT('?'))
723 {
724 ++uri;
725 while(*uri && *uri != wxT('#'))
726 {
727 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
728 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
729 m_query += *uri++;
730 else if (IsEscape(uri))
731 {
732 m_query += *uri++;
733 m_query += *uri++;
734 m_query += *uri++;
735 }
736 else
737 Escape(m_query, *uri++);
738 }
739
740 //mark the server as valid
741 m_fields |= wxURI_QUERY;
742 }
743
744 return uri;
745 }
746
747
748 const wxChar* wxURI::ParseFragment(const wxChar* uri)
749 {
750 wxASSERT(uri != NULL);
751
752 // fragment = *( pchar / "/" / "?" )
753 if (*uri == wxT('#'))
754 {
755 ++uri;
756 while(*uri)
757 {
758 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
759 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
760 m_fragment += *uri++;
761 else if (IsEscape(uri))
762 {
763 m_fragment += *uri++;
764 m_fragment += *uri++;
765 m_fragment += *uri++;
766 }
767 else
768 Escape(m_fragment, *uri++);
769 }
770
771 //mark the server as valid
772 m_fields |= wxURI_FRAGMENT;
773 }
774
775 return uri;
776 }
777
778 // ---------------------------------------------------------------------------
779 // Resolve
780 //
781 // Builds missing components of this uri from a base uri
782 //
783 // A version of the algorithm outlined in the RFC is used here
784 // (it is shown in comments)
785 //
786 // Note that an empty URI inherits all components
787 // ---------------------------------------------------------------------------
788
789 void wxURI::Resolve(const wxURI& base, int flags)
790 {
791 wxASSERT_MSG(!base.IsReference(),
792 wxT("wxURI to inherit from must not be a reference!"));
793
794 // If we arn't being strict, enable the older (pre-RFC2396)
795 // loophole that allows this uri to inherit other
796 // properties from the base uri - even if the scheme
797 // is defined
798 if ( !(flags & wxURI_STRICT) &&
799 HasScheme() && base.HasScheme() &&
800 m_scheme == base.m_scheme )
801 {
802 m_fields -= wxURI_SCHEME;
803 }
804
805
806 // Do nothing if this is an absolute wxURI
807 // if defined(R.scheme) then
808 // T.scheme = R.scheme;
809 // T.authority = R.authority;
810 // T.path = remove_dot_segments(R.path);
811 // T.query = R.query;
812 if (HasScheme())
813 {
814 return;
815 }
816
817 //No scheme - inherit
818 m_scheme = base.m_scheme;
819 m_fields |= wxURI_SCHEME;
820
821 // All we need to do for relative URIs with an
822 // authority component is just inherit the scheme
823 // if defined(R.authority) then
824 // T.authority = R.authority;
825 // T.path = remove_dot_segments(R.path);
826 // T.query = R.query;
827 if (HasServer())
828 {
829 return;
830 }
831
832 //No authority - inherit
833 if (base.HasUserInfo())
834 {
835 m_userinfo = base.m_userinfo;
836 m_fields |= wxURI_USERINFO;
837 }
838
839 m_server = base.m_server;
840 m_hostType = base.m_hostType;
841 m_fields |= wxURI_SERVER;
842
843 if (base.HasPort())
844 {
845 m_port = base.m_port;
846 m_fields |= wxURI_PORT;
847 }
848
849
850 // Simple path inheritance from base
851 if (!HasPath())
852 {
853 // T.path = Base.path;
854 m_path = base.m_path;
855 m_fields |= wxURI_PATH;
856
857
858 // if defined(R.query) then
859 // T.query = R.query;
860 // else
861 // T.query = Base.query;
862 // endif;
863 if (!HasQuery())
864 {
865 m_query = base.m_query;
866 m_fields |= wxURI_QUERY;
867 }
868 }
869 else
870 {
871 // if (R.path starts-with "/") then
872 // T.path = remove_dot_segments(R.path);
873 // else
874 // T.path = merge(Base.path, R.path);
875 // T.path = remove_dot_segments(T.path);
876 // endif;
877 // T.query = R.query;
878 if (m_path[0u] != wxT('/'))
879 {
880 //Merge paths
881 wxString::const_iterator op = m_path.begin();
882 wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
883
884 //not a ending directory? move up
885 if (base.m_path[0] && *(bp-1) != wxT('/'))
886 UpTree(base.m_path.begin(), bp);
887
888 //normalize directories
889 while(*op == wxT('.') && *(op+1) == wxT('.') &&
890 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
891 {
892 UpTree(base.m_path.begin(), bp);
893
894 if (*(op+2) == '\0')
895 op += 2;
896 else
897 op += 3;
898 }
899
900 m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
901 m_path.substr((op - m_path.begin()), m_path.length());
902 }
903 }
904
905 //T.fragment = R.fragment;
906 }
907
908 // ---------------------------------------------------------------------------
909 // UpTree
910 //
911 // Moves a URI path up a directory
912 // ---------------------------------------------------------------------------
913
914 //static
915 void wxURI::UpTree(wxString::const_iterator uristart,
916 wxString::const_iterator& uri)
917 {
918 if (uri != uristart && *(uri-1) == wxT('/'))
919 {
920 uri -= 2;
921 }
922
923 for(;uri != uristart; --uri)
924 {
925 if (*uri == wxT('/'))
926 {
927 ++uri;
928 break;
929 }
930 }
931
932 //!!!TODO:HACK!!!//
933 if (uri == uristart && *uri == wxT('/'))
934 ++uri;
935 //!!!//
936 }
937
938 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
939 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
940 {
941 if (uri != uristart && *(uri-1) == wxT('/'))
942 {
943 uri -= 2;
944 }
945
946 for(;uri != uristart; --uri)
947 {
948 if (*uri == wxT('/'))
949 {
950 ++uri;
951 break;
952 }
953 }
954
955 //!!!TODO:HACK!!!//
956 if (uri == uristart && *uri == wxT('/'))
957 ++uri;
958 //!!!//
959 }
960 // end of FIXME-UTF8
961
962 // ---------------------------------------------------------------------------
963 // Normalize
964 //
965 // Normalizes directories in-place
966 //
967 // I.E. ./ and . are ignored
968 //
969 // ../ and .. are removed if a directory is before it, along
970 // with that directory (leading .. and ../ are kept)
971 // ---------------------------------------------------------------------------
972
973 //static
974 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
975 {
976 wxChar* cp = s;
977 wxChar* bp = s;
978
979 if(s[0] == wxT('/'))
980 ++bp;
981
982 while(*cp)
983 {
984 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
985 && (bp == cp || *(cp-1) == wxT('/')))
986 {
987 //. _or_ ./ - ignore
988 if (*(cp+1) == '\0')
989 cp += 1;
990 else
991 cp += 2;
992 }
993 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
994 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
995 && (bp == cp || *(cp-1) == wxT('/')))
996 {
997 //.. _or_ ../ - go up the tree
998 if (s != bp)
999 {
1000 UpTree((const wxChar*)bp, (const wxChar*&)s);
1001
1002 if (*(cp+2) == '\0')
1003 cp += 2;
1004 else
1005 cp += 3;
1006 }
1007 else if (!bIgnoreLeads)
1008
1009 {
1010 *bp++ = *cp++;
1011 *bp++ = *cp++;
1012 if (*cp)
1013 *bp++ = *cp++;
1014
1015 s = bp;
1016 }
1017 else
1018 {
1019 if (*(cp+2) == '\0')
1020 cp += 2;
1021 else
1022 cp += 3;
1023 }
1024 }
1025 else
1026 *s++ = *cp++;
1027 }
1028
1029 *s = '\0';
1030 }
1031
1032 // ---------------------------------------------------------------------------
1033 // ParseH16
1034 //
1035 // Parses 1 to 4 hex values. Returns true if the first character of the input
1036 // string is a valid hex character. It is the caller's responsability to move
1037 // the input string back to its original position on failure.
1038 // ---------------------------------------------------------------------------
1039
1040 bool wxURI::ParseH16(const wxChar*& uri)
1041 {
1042 // h16 = 1*4HEXDIG
1043 if(!IsHex(*++uri))
1044 return false;
1045
1046 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1047 ++uri;
1048
1049 return true;
1050 }
1051
1052 // ---------------------------------------------------------------------------
1053 // ParseIPXXX
1054 //
1055 // Parses a certain version of an IP address and moves the input string past
1056 // it. Returns true if the input string contains the proper version of an ip
1057 // address. It is the caller's responsability to move the input string back
1058 // to its original position on failure.
1059 // ---------------------------------------------------------------------------
1060
1061 bool wxURI::ParseIPv4address(const wxChar*& uri)
1062 {
1063 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1064 //
1065 //dec-octet = DIGIT ; 0-9
1066 // / %x31-39 DIGIT ; 10-99
1067 // / "1" 2DIGIT ; 100-199
1068 // / "2" %x30-34 DIGIT ; 200-249
1069 // / "25" %x30-35 ; 250-255
1070 size_t iIPv4 = 0;
1071 if (IsDigit(*uri))
1072 {
1073 ++iIPv4;
1074
1075
1076 //each ip part must be between 0-255 (dupe of version in for loop)
1077 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1078 //100 or less (note !)
1079 !( (*(uri-2) < wxT('2')) ||
1080 //240 or less
1081 (*(uri-2) == wxT('2') &&
1082 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1083 )
1084 )
1085 )
1086 {
1087 return false;
1088 }
1089
1090 if(IsDigit(*uri))++uri;
1091
1092 //compilers should unroll this loop
1093 for(; iIPv4 < 4; ++iIPv4)
1094 {
1095 if (*uri != wxT('.') || !IsDigit(*++uri))
1096 break;
1097
1098 //each ip part must be between 0-255
1099 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1100 //100 or less (note !)
1101 !( (*(uri-2) < wxT('2')) ||
1102 //240 or less
1103 (*(uri-2) == wxT('2') &&
1104 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1105 )
1106 )
1107 )
1108 {
1109 return false;
1110 }
1111 if(IsDigit(*uri))++uri;
1112 }
1113 }
1114 return iIPv4 == 4;
1115 }
1116
1117 bool wxURI::ParseIPv6address(const wxChar*& uri)
1118 {
1119 // IPv6address = 6( h16 ":" ) ls32
1120 // / "::" 5( h16 ":" ) ls32
1121 // / [ h16 ] "::" 4( h16 ":" ) ls32
1122 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1123 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1124 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1125 // / [ *4( h16 ":" ) h16 ] "::" ls32
1126 // / [ *5( h16 ":" ) h16 ] "::" h16
1127 // / [ *6( h16 ":" ) h16 ] "::"
1128
1129 size_t numPrefix = 0,
1130 maxPostfix;
1131
1132 bool bEndHex = false;
1133
1134 for( ; numPrefix < 6; ++numPrefix)
1135 {
1136 if(!ParseH16(uri))
1137 {
1138 --uri;
1139 bEndHex = true;
1140 break;
1141 }
1142
1143 if(*uri != wxT(':'))
1144 {
1145 break;
1146 }
1147 }
1148
1149 if(!bEndHex && !ParseH16(uri))
1150 {
1151 --uri;
1152
1153 if (numPrefix)
1154 return false;
1155
1156 if (*uri == wxT(':'))
1157 {
1158 if (*++uri != wxT(':'))
1159 return false;
1160
1161 maxPostfix = 5;
1162 }
1163 else
1164 maxPostfix = 6;
1165 }
1166 else
1167 {
1168 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1169 {
1170 if (numPrefix != 6)
1171 return false;
1172
1173 while (*--uri != wxT(':')) {}
1174 ++uri;
1175
1176 const wxChar* uristart = uri;
1177 //parse ls32
1178 // ls32 = ( h16 ":" h16 ) / IPv4address
1179 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1180 return true;
1181
1182 uri = uristart;
1183
1184 if (ParseIPv4address(uri))
1185 return true;
1186 else
1187 return false;
1188 }
1189 else
1190 {
1191 uri += 2;
1192
1193 if (numPrefix > 3)
1194 maxPostfix = 0;
1195 else
1196 maxPostfix = 4 - numPrefix;
1197 }
1198 }
1199
1200 bool bAllowAltEnding = maxPostfix == 0;
1201
1202 for(; maxPostfix != 0; --maxPostfix)
1203 {
1204 if(!ParseH16(uri) || *uri != wxT(':'))
1205 return false;
1206 }
1207
1208 if(numPrefix <= 4)
1209 {
1210 const wxChar* uristart = uri;
1211 //parse ls32
1212 // ls32 = ( h16 ":" h16 ) / IPv4address
1213 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1214 return true;
1215
1216 uri = uristart;
1217
1218 if (ParseIPv4address(uri))
1219 return true;
1220
1221 uri = uristart;
1222
1223 if (!bAllowAltEnding)
1224 return false;
1225 }
1226
1227 if(numPrefix <= 5 && ParseH16(uri))
1228 return true;
1229
1230 return true;
1231 }
1232
1233 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1234 {
1235 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1236 if (*++uri != wxT('v') || !IsHex(*++uri))
1237 return false;
1238
1239 while (IsHex(*++uri)) {}
1240
1241 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1242 return false;
1243
1244 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1245
1246 return true;
1247 }
1248
1249
1250 // ---------------------------------------------------------------------------
1251 // CharToHex
1252 //
1253 // Converts a character into a numeric hexidecimal value, or 0 if the
1254 // passed in character is not a valid hex character
1255 // ---------------------------------------------------------------------------
1256
1257 //static
1258 wxChar wxURI::CharToHex(const wxChar& c)
1259 {
1260 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1261 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1262 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1263
1264 return 0;
1265 }
1266
1267 // ---------------------------------------------------------------------------
1268 // IsXXX
1269 //
1270 // Returns true if the passed in character meets the criteria of the method
1271 // ---------------------------------------------------------------------------
1272
1273 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1274 bool wxURI::IsUnreserved (const wxChar& c)
1275 { return IsAlpha(c) || IsDigit(c) ||
1276 c == wxT('-') ||
1277 c == wxT('.') ||
1278 c == wxT('_') ||
1279 c == wxT('~') //tilde
1280 ;
1281 }
1282
1283 bool wxURI::IsReserved (const wxChar& c)
1284 {
1285 return IsGenDelim(c) || IsSubDelim(c);
1286 }
1287
1288 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1289 bool wxURI::IsGenDelim (const wxChar& c)
1290 {
1291 return c == wxT(':') ||
1292 c == wxT('/') ||
1293 c == wxT('?') ||
1294 c == wxT('#') ||
1295 c == wxT('[') ||
1296 c == wxT(']') ||
1297 c == wxT('@');
1298 }
1299
1300 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1301 //! / "*" / "+" / "," / ";" / "="
1302 bool wxURI::IsSubDelim (const wxChar& c)
1303 {
1304 return c == wxT('!') ||
1305 c == wxT('$') ||
1306 c == wxT('&') ||
1307 c == wxT('\'') ||
1308 c == wxT('(') ||
1309 c == wxT(')') ||
1310 c == wxT('*') ||
1311 c == wxT('+') ||
1312 c == wxT(',') ||
1313 c == wxT(';') ||
1314 c == wxT('=')
1315 ;
1316 }
1317
1318 bool wxURI::IsHex(const wxChar& c)
1319 { return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1320
1321 bool wxURI::IsAlpha(const wxChar& c)
1322 { return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
1323
1324 bool wxURI::IsDigit(const wxChar& c)
1325 { return c >= wxT('0') && c <= wxT('9'); }
1326
1327
1328 //end of uri.cpp
1329
1330
1331