]>
git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
23c5317d458c82013d2fcbb02d1fa9747c6cca10
1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: Implementation of a uri parser
7 // Copyright: (c) 2004 Ryan Norton
9 /////////////////////////////////////////////////////////////////////////////
12 //TODO: RN: I had some massive doxygen docs, I need to move these
13 //in a presentable form in these sources
16 // ===========================================================================
18 // ===========================================================================
20 // ---------------------------------------------------------------------------
22 // ---------------------------------------------------------------------------
24 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
25 #pragma implementation "uri.h"
28 // For compilers that support precompilation, includes "wx.h".
29 #include "wx/wxprec.h"
37 // ---------------------------------------------------------------------------
39 // ---------------------------------------------------------------------------
41 IMPLEMENT_CLASS(wxURI
, wxObject
);
43 // ===========================================================================
45 // ===========================================================================
47 // ---------------------------------------------------------------------------
49 // ---------------------------------------------------------------------------
51 // ---------------------------------------------------------------------------
55 // ---------------------------------------------------------------------------
57 // ---------------------------------------------------------------------------
59 // ---------------------------------------------------------------------------
61 wxURI::wxURI() : m_hostType(wxURI_REGNAME
), m_fields(0)
65 wxURI::wxURI(const wxString
& uri
) : m_hostType(wxURI_REGNAME
), m_fields(0)
70 wxURI::wxURI(const wxURI
& uri
) : m_hostType(wxURI_REGNAME
), m_fields(0)
75 // ---------------------------------------------------------------------------
76 // Destructor and cleanup
77 // ---------------------------------------------------------------------------
86 m_scheme
= m_user
= m_server
= m_port
= m_path
=
87 m_query
= m_fragment
= wxT("");
89 m_hostType
= wxURI_REGNAME
;
94 // ---------------------------------------------------------------------------
97 // This creates the URI - all we do here is call the main parsing method
98 // ---------------------------------------------------------------------------
100 const wxChar
* wxURI::Create(const wxString
& uri
)
108 // ---------------------------------------------------------------------------
109 // Escape/TranslateEscape/IsEscape
111 // TranslateEscape unencodes a 3 character URL escape sequence
112 // Escape encodes an invalid URI character into a 3 character sequence
113 // IsEscape determines if the input string contains an escape sequence,
114 // if it does, then it moves the input string past the escape sequence
115 // ---------------------------------------------------------------------------
117 wxChar
wxURI::TranslateEscape(const wxChar
* s
)
119 wxASSERT_MSG(IsHex(*s
) && IsHex(*(s
+1)), wxT("Invalid escape!"));
121 return CharToHex(*s
) * 0x10 + CharToHex(*++s
);
124 wxString
wxURI::Unescape(const wxString
& uri
)
128 for(size_t i
= 0; i
< uri
.length(); ++i
)
130 if (uri
[i
] == wxT('%'))
132 new_uri
+= wxURI::TranslateEscape( &(uri
.c_str()[i
+1]) );
140 void wxURI::Escape(wxString
& s
, const wxChar
& c
)
142 const wxChar
* hdig
= wxT("0123456789abcdef");
144 s
+= hdig
[(c
>> 4) & 15];
148 bool wxURI::IsEscape(const wxChar
*& uri
)
150 if(*uri
== '%' && IsHex(*(uri
+1)) && IsHex(*(uri
+2)))
159 // ---------------------------------------------------------------------------
162 // BuildURI() builds the entire URI into a useable
163 // representation, including proper identification characters such as slashes
164 // ---------------------------------------------------------------------------
166 wxString
wxURI::BuildURI() const
171 ret
= ret
+ m_scheme
+ wxT(":");
178 ret
= ret
+ m_user
+ wxT("@");
183 ret
= ret
+ wxT(":") + m_port
;
189 ret
= ret
+ wxT("?") + m_query
;
192 ret
= ret
+ wxT("#") + m_fragment
;
197 wxString
wxURI::BuildUnescapedURI() const
202 ret
= ret
+ m_scheme
+ wxT(":");
209 ret
= ret
+ wxURI::Unescape(m_user
) + wxT("@");
211 if (m_hostType
== wxURI_REGNAME
)
212 ret
+= wxURI::Unescape(m_server
);
217 ret
= ret
+ wxT(":") + m_port
;
220 ret
+= wxURI::Unescape(m_path
);
223 ret
= ret
+ wxT("?") + wxURI::Unescape(m_query
);
226 ret
= ret
+ wxT("#") + wxURI::Unescape(m_fragment
);
231 // ---------------------------------------------------------------------------
233 // ---------------------------------------------------------------------------
235 wxURI
& wxURI::operator = (const wxURI
& uri
)
240 wxURI
& wxURI::Assign(const wxURI
& uri
)
243 m_fields
= uri
.m_fields
;
245 //ref over components
246 m_scheme
= uri
.m_scheme
;
248 m_server
= uri
.m_server
;
249 m_hostType
= uri
.m_hostType
;
252 m_query
= uri
.m_query
;
253 m_fragment
= uri
.m_fragment
;
258 wxURI
& wxURI::operator = (const wxString
& string
)
264 bool wxURI::operator == (const wxURI
& uri
) const
268 if(m_scheme
!= uri
.m_scheme
)
271 else if (uri
.HasScheme())
279 if (m_user
!= uri
.m_user
)
282 else if (uri
.HasUser())
285 if (m_server
!= uri
.m_server
||
286 m_hostType
!= uri
.m_hostType
)
291 if(m_port
!= uri
.m_port
)
294 else if (uri
.HasPort())
297 else if (uri
.HasServer())
303 if(m_path
!= uri
.m_path
)
306 else if (uri
.HasPath())
311 if (m_query
!= uri
.m_query
)
314 else if (uri
.HasQuery())
319 if (m_fragment
!= uri
.m_fragment
)
322 else if (uri
.HasFragment())
328 // ---------------------------------------------------------------------------
331 // if there is no authority or scheme, it is a reference
332 // ---------------------------------------------------------------------------
334 bool wxURI::IsReference() const
335 { return !HasScheme() || !HasServer(); }
337 // ---------------------------------------------------------------------------
340 // Master URI parsing method. Just calls the individual parsing methods
342 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
343 // URI-reference = URI / relative-URITestCase
344 // ---------------------------------------------------------------------------
346 const wxChar
* wxURI::Parse(const wxChar
* uri
)
348 uri
= ParseScheme(uri
);
349 uri
= ParseAuthority(uri
);
350 uri
= ParsePath(uri
);
351 uri
= ParseQuery(uri
);
352 return ParseFragment(uri
);
355 // ---------------------------------------------------------------------------
358 // Individual parsers for each URI component
359 // ---------------------------------------------------------------------------
361 const wxChar
* wxURI::ParseScheme(const wxChar
* uri
)
363 wxASSERT(uri
!= NULL
);
365 //copy of the uri - used for figuring out
366 //length of each component
367 const wxChar
* uricopy
= uri
;
369 //Does the uri have a scheme (first character alpha)?
374 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
375 //RN: Scheme can not be escaped
376 while (IsAlpha(*uri
) || IsDigit(*uri
) ||
387 //mark the scheme as valid
388 m_fields
|= wxURI_SCHEME
;
390 //move reference point up to input buffer
394 //relative uri with relative path reference
398 //relative uri with _possible_ relative path reference
403 const wxChar
* wxURI::ParseAuthority(const wxChar
* uri
)
405 // authority = [ userinfo "@" ] host [ ":" port ]
406 if (*uri
== '/' && *(uri
+1) == '/')
410 uri
= ParseUser(uri
);
411 uri
= ParseServer(uri
);
412 return ParsePort(uri
);
418 const wxChar
* wxURI::ParseUser(const wxChar
* uri
)
420 wxASSERT(uri
!= NULL
);
422 //copy of the uri - used for figuring out
423 //length of each component
424 const wxChar
* uricopy
= uri
;
426 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
427 while(*uri
&& *uri
!= '@' && *uri
!= '/' && *uri
!= '#' && *uri
!= '?')
429 if(IsUnreserved(*uri
) || IsEscape(uri
) ||
430 IsSubDelim(*uri
) || *uri
== ':')
433 Escape(m_user
, *uri
++);
439 m_fields
|= wxURI_USER
;
449 const wxChar
* wxURI::ParseServer(const wxChar
* uri
)
451 wxASSERT(uri
!= NULL
);
453 //copy of the uri - used for figuring out
454 //length of each component
455 const wxChar
* uricopy
= uri
;
457 // host = IP-literal / IPv4address / reg-name
458 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
461 if (ParseIPv6address(++uri
) && *uri
== ']')
464 m_hostType
= wxURI_IPV6ADDRESS
;
466 wxStringBufferLength
theBuffer(m_server
, uri
- uricopy
);
467 wxMemcpy(theBuffer
, uricopy
, uri
-uricopy
);
468 theBuffer
.SetLength(uri
-uricopy
);
474 if (ParseIPvFuture(++uri
) && *uri
== ']')
477 m_hostType
= wxURI_IPVFUTURE
;
479 wxStringBufferLength
theBuffer(m_server
, uri
- uricopy
);
480 wxMemcpy(theBuffer
, uricopy
, uri
-uricopy
);
481 theBuffer
.SetLength(uri
-uricopy
);
489 if (ParseIPv4address(uri
))
491 m_hostType
= wxURI_IPV4ADDRESS
;
493 wxStringBufferLength
theBuffer(m_server
, uri
- uricopy
);
494 wxMemcpy(theBuffer
, uricopy
, uri
-uricopy
);
495 theBuffer
.SetLength(uri
-uricopy
);
501 if(m_hostType
== wxURI_REGNAME
)
504 // reg-name = *( unreserved / pct-encoded / sub-delims )
505 while(*uri
&& *uri
!= '/' && *uri
!= ':' && *uri
!= '#' && *uri
!= '?')
507 if(IsUnreserved(*uri
) || IsEscape(uri
) || IsSubDelim(*uri
))
510 Escape(m_server
, *uri
++);
514 //mark the server as valid
515 m_fields
|= wxURI_SERVER
;
521 const wxChar
* wxURI::ParsePort(const wxChar
* uri
)
523 wxASSERT(uri
!= NULL
);
534 //mark the port as valid
535 m_fields
|= wxURI_PORT
;
541 const wxChar
* wxURI::ParsePath(const wxChar
* uri
, bool bReference
, bool bNormalize
)
543 wxASSERT(uri
!= NULL
);
545 //copy of the uri - used for figuring out
546 //length of each component
547 const wxChar
* uricopy
= uri
;
549 /// hier-part = "//" authority path-abempty
554 /// relative-part = "//" authority path-abempty
559 /// path-abempty = *( "/" segment )
560 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
561 /// path-noscheme = segment-nz-nc *( "/" segment )
562 /// path-rootless = segment-nz *( "/" segment )
563 /// path-empty = 0<pchar>
566 /// segment-nz = 1*pchar
567 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
568 /// ; non-zero-length segment without any colon ":"
570 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
575 while(*uri
&& *uri
!= '#' && *uri
!= '?')
577 if( IsUnreserved(*uri
) || IsSubDelim(*uri
) || IsEscape(uri
) ||
578 *uri
== ':' || *uri
== '@' || *uri
== '/')
581 Escape(m_path
, *uri
++);
586 wxStringBufferLength
theBuffer(m_path
, m_path
.length() + 1);
587 Normalize(theBuffer
, true);
588 theBuffer
.SetLength(wxStrlen(theBuffer
));
590 //mark the path as valid
591 m_fields
|= wxURI_PATH
;
593 else if(*uri
) //Relative path
598 while(*uri
&& *uri
!= '#' && *uri
!= '?')
600 if(IsUnreserved(*uri
) || IsSubDelim(*uri
) || IsEscape(uri
) ||
601 *uri
== '@' || *uri
== '/')
604 Escape(m_path
, *uri
++);
609 while(*uri
&& *uri
!= '#' && *uri
!= '?')
611 if(IsUnreserved(*uri
) || IsSubDelim(*uri
) || IsEscape(uri
) ||
612 *uri
== ':' || *uri
== '@' || *uri
== '/')
615 Escape(m_path
, *uri
++);
623 wxStringBufferLength
theBuffer(m_path
, m_path
.length() + 1);
624 Normalize(theBuffer
);
625 theBuffer
.SetLength(wxStrlen(theBuffer
));
628 //mark the path as valid
629 m_fields
|= wxURI_PATH
;
637 const wxChar
* wxURI::ParseQuery(const wxChar
* uri
)
639 wxASSERT(uri
!= NULL
);
641 // query = *( pchar / "/" / "?" )
645 while(*uri
&& *uri
!= '#')
647 if (IsUnreserved(*uri
) || IsSubDelim(*uri
) || IsEscape(uri
) ||
648 *uri
== ':' || *uri
== '@' || *uri
== '/' || *uri
== '?')
651 Escape(m_query
, *uri
++);
654 //mark the server as valid
655 m_fields
|= wxURI_QUERY
;
662 const wxChar
* wxURI::ParseFragment(const wxChar
* uri
)
664 wxASSERT(uri
!= NULL
);
666 // fragment = *( pchar / "/" / "?" )
672 if (IsUnreserved(*uri
) || IsSubDelim(*uri
) || IsEscape(uri
) ||
673 *uri
== ':' || *uri
== '@' || *uri
== '/' || *uri
== '?')
674 m_fragment
+= *uri
++;
676 Escape(m_fragment
, *uri
++);
679 //mark the server as valid
680 m_fields
|= wxURI_FRAGMENT
;
686 // ---------------------------------------------------------------------------
689 // Builds missing components of this uri from a base uri
691 // A version of the algorithm outlined in the RFC is used here
692 // (it is shown in comments)
693 // ---------------------------------------------------------------------------
695 void wxURI::Resolve(const wxURI
& base
, int flags
)
697 wxASSERT_MSG(!base
.IsReference(),
698 wxT("wxURI to inherit from must not be a reference!"));
700 // If we arn't being strict, enable the older
701 // loophole that allows this uri to inherit other
702 // properties from the base uri - even if the scheme
704 if ( !(flags
& wxURI_STRICT
) &&
705 HasScheme() && base
.HasScheme() &&
706 m_scheme
== base
.m_scheme
)
708 m_fields
-= wxURI_SCHEME
;
712 // Do nothing if this is an absolute wxURI
713 // if defined(R.scheme) then
714 // T.scheme = R.scheme;
715 // T.authority = R.authority;
716 // T.path = remove_dot_segments(R.path);
717 // T.query = R.query;
724 m_scheme
= base
.m_scheme
;
725 m_fields
|= wxURI_SCHEME
;
727 // All we need to do for relative URIs with an
728 // authority component is just inherit the scheme
729 // if defined(R.authority) then
730 // T.authority = R.authority;
731 // T.path = remove_dot_segments(R.path);
732 // T.query = R.query;
738 //No authority - inherit
741 m_user
= base
.m_user
;
742 m_fields
|= wxURI_USER
;
745 m_server
= base
.m_server
;
746 m_hostType
= base
.m_hostType
;
747 m_fields
|= wxURI_SERVER
;
751 m_port
= base
.m_port
;
752 m_fields
|= wxURI_PORT
;
756 // Simple path inheritance from base
759 // T.path = Base.path;
760 m_path
= base
.m_path
;
761 m_fields
|= wxURI_PATH
;
764 // if defined(R.query) then
765 // T.query = R.query;
767 // T.query = Base.query;
771 m_query
= base
.m_query
;
772 m_fields
|= wxURI_QUERY
;
777 // if (R.path starts-with "/") then
778 // T.path = remove_dot_segments(R.path);
780 // T.path = merge(Base.path, R.path);
781 // T.path = remove_dot_segments(T.path);
783 // T.query = R.query;
784 if (m_path
[(const size_t&)0] != '/')
787 const wxChar
* op
= m_path
.c_str();
788 const wxChar
* bp
= base
.m_path
.c_str() + base
.m_path
.Length();
790 //not a ending directory? move up
791 if (base
.m_path
[0] && *(bp
-1) != '/')
792 UpTree(base
.m_path
, bp
);
794 //normalize directories
795 while(*op
== '.' && *(op
+1) == '.' &&
796 (*(op
+2) == '\0' || *(op
+2) == '/') )
798 UpTree(base
.m_path
, bp
);
806 m_path
= base
.m_path
.substr(0, bp
- base
.m_path
.c_str()) +
807 m_path
.Mid((op
- m_path
.c_str()), m_path
.Length());
812 // ---------------------------------------------------------------------------
813 // Directory Normalization (static)
815 // UpTree goes up a directory in a string and moves the pointer as such,
816 // while Normalize gets rid of duplicate/erronues directories in a URI
817 // according to RFC 2396 and modified quite a bit to meet the unit tests
819 // ---------------------------------------------------------------------------
821 void wxURI::UpTree(const wxChar
* uristart
, const wxChar
*& uri
)
823 if (uri
!= uristart
&& *(uri
-1) == '/')
828 for(;uri
!= uristart
; --uri
)
838 if (uri
== uristart
&& *uri
== '/')
843 void wxURI::Normalize(wxChar
* s
, bool bIgnoreLeads
)
853 if (*cp
== '.' && (*(cp
+1) == '/' || *(cp
+1) == '\0')
854 && (bp
== cp
|| *(cp
-1) == '/'))
862 else if (*cp
== '.' && *(cp
+1) == '.' &&
863 (*(cp
+2) == '/' || *(cp
+2) == '\0')
864 && (bp
== cp
|| *(cp
-1) == '/'))
866 //.. _or_ ../ - go up the tree
869 UpTree((const wxChar
*)bp
, (const wxChar
*&)s
);
876 else if (!bIgnoreLeads
)
901 // ---------------------------------------------------------------------------
902 // Misc. Parsing Methods
903 // ---------------------------------------------------------------------------
905 bool wxURI::ParseIPv4address(const wxChar
*& uri
)
907 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
909 //dec-octet = DIGIT ; 0-9
910 // / %x31-39 DIGIT ; 10-99
911 // / "1" 2DIGIT ; 100-199
912 // / "2" %x30-34 DIGIT ; 200-249
913 // / "25" %x30-35 ; 250-255
920 //each ip part must be between 0-255 (dupe of version in for loop)
921 if( IsDigit(*++uri
) && IsDigit(*++uri
) &&
922 //100 or less (note !)
923 !( (*(uri
-2) < '2') ||
926 (*(uri
-1) < '5' || (*(uri
-1) == '5' && *uri
<= '5'))
934 if(IsDigit(*uri
))++uri
;
936 //compilers should unroll this loop
937 for(; iIPv4
< 4; ++iIPv4
)
939 if (*uri
!= '.' || !IsDigit(*++uri
))
942 //each ip part must be between 0-255
943 if( IsDigit(*++uri
) && IsDigit(*++uri
) &&
944 //100 or less (note !)
945 !( (*(uri
-2) < '2') ||
948 (*(uri
-1) < '5' || (*(uri
-1) == '5' && *uri
<= '5'))
955 if(IsDigit(*uri
))++uri
;
961 bool wxURI::ParseH16(const wxChar
*& uri
)
967 if(IsHex(*++uri
) && IsHex(*++uri
) && IsHex(*++uri
))
973 bool wxURI::ParseIPv6address(const wxChar
*& uri
)
975 // IPv6address = 6( h16 ":" ) ls32
976 // / "::" 5( h16 ":" ) ls32
977 // / [ h16 ] "::" 4( h16 ":" ) ls32
978 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
979 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
980 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
981 // / [ *4( h16 ":" ) h16 ] "::" ls32
982 // / [ *5( h16 ":" ) h16 ] "::" h16
983 // / [ *6( h16 ":" ) h16 ] "::"
985 size_t numPrefix
= 0,
988 bool bEndHex
= false;
990 for( ; numPrefix
< 6; ++numPrefix
)
1005 if(!bEndHex
&& !ParseH16(uri
))
1024 if (*uri
!= ':' || *(uri
+1) != ':')
1029 while (*--uri
!= ':') {}
1032 const wxChar
* uristart
= uri
;
1034 // ls32 = ( h16 ":" h16 ) / IPv4address
1035 if (ParseH16(uri
) && *uri
== ':' && ParseH16(uri
))
1040 if (ParseIPv4address(uri
))
1052 maxPostfix
= 4 - numPrefix
;
1056 bool bAllowAltEnding
= maxPostfix
== 0;
1058 for(; maxPostfix
!= 0; --maxPostfix
)
1060 if(!ParseH16(uri
) || *uri
!= ':')
1066 const wxChar
* uristart
= uri
;
1068 // ls32 = ( h16 ":" h16 ) / IPv4address
1069 if (ParseH16(uri
) && *uri
== ':' && ParseH16(uri
))
1074 if (ParseIPv4address(uri
))
1079 if (!bAllowAltEnding
)
1083 if(numPrefix
<= 5 && ParseH16(uri
))
1089 bool wxURI::ParseIPvFuture(const wxChar
*& uri
)
1091 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1092 if (*++uri
!= 'v' || !IsHex(*++uri
))
1095 while (IsHex(*++uri
)) {}
1097 if (*uri
!= '.' || !(IsUnreserved(*++uri
) || IsSubDelim(*uri
) || *uri
== ':'))
1100 while(IsUnreserved(*++uri
) || IsSubDelim(*uri
) || *uri
== ':') {}
1106 // ---------------------------------------------------------------------------
1107 // Misc methods - IsXXX and CharToHex
1108 // ---------------------------------------------------------------------------
1110 wxInt32
wxURI::CharToHex(const wxChar
& c
)
1112 if ((c
>= 'A') && (c
<= 'Z')) return c
- 'A' + 0x0A;
1113 if ((c
>= 'a') && (c
<= 'z')) return c
- 'a' + 0x0a;
1114 if ((c
>= '0') && (c
<= '9')) return c
- '0' + 0x00;
1119 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1120 bool wxURI::IsUnreserved (const wxChar
& c
)
1121 { return IsAlpha(c
) || IsDigit(c
) ||
1129 bool wxURI::IsReserved (const wxChar
& c
)
1131 return IsGenDelim(c
) || IsSubDelim(c
);
1134 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1135 bool wxURI::IsGenDelim (const wxChar
& c
)
1146 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1147 //! / "*" / "+" / "," / ";" / "="
1148 bool wxURI::IsSubDelim (const wxChar
& c
)
1164 bool wxURI::IsHex(const wxChar
& c
)
1165 { return IsDigit(c
) || (c
>= 'a' && c
<= 'f') || (c
>= 'A' && c
<= 'F'); }
1167 bool wxURI::IsAlpha(const wxChar
& c
)
1168 { return (c
>= 'a' && c
<= 'z') || (c
>= 'A' && c
<= 'Z'); }
1170 bool wxURI::IsDigit(const wxChar
& c
)
1171 { return c
>= '0' && c
<= '9'; }
1174 // ---------------------------------------------------------------------------
1176 // wxURL Compatability
1178 // ---------------------------------------------------------------------------
1182 #if WXWIN_COMPATIBILITY_2_4
1186 wxString
wxURL::ConvertToValidURI(const wxString
& uri
, const wxChar
* WXUNUSED(delims
))
1188 return wxURI(uri
).BuildURI();
1191 wxString
wxURL::ConvertFromURI(const wxString
& uri
)
1193 return wxURI::Unescape(uri
);
1196 #endif //WXWIN_COMPATIBILITY_2_4