X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/ba5a47aed8025996fa5c103a151cc19fe1119ac6..29efc6e4a478652d6f59fb3f5ca7990d78a8ead4:/src/common/uri.cpp?ds=sidebyside diff --git a/src/common/uri.cpp b/src/common/uri.cpp index 0608b4c82f..fd5b69caaf 100644 --- a/src/common/uri.cpp +++ b/src/common/uri.cpp @@ -1,11 +1,13 @@ ///////////////////////////////////////////////////////////////////////////// // Name: uri.cpp -// Purpose: Implementation of a uri parser -// Author: Ryan Norton +// Purpose: Implementation of a URI parser +// Author: Ryan Norton, +// Vadim Zeitlin (UTF-8 URI support, many other changes) // Created: 10/26/04 // RCS-ID: $Id$ -// Copyright: (c) 2004 Ryan Norton -// Licence: wxWindows +// Copyright: (c) 2004 Ryan Norton, +// 2008 Vadim Zeitlin +// Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// // =========================================================================== @@ -16,10 +18,6 @@ // headers // --------------------------------------------------------------------------- -#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) - #pragma implementation "uri.h" -#endif - // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" @@ -27,59 +25,56 @@ #pragma hdrstop #endif +#ifndef WX_PRECOMP + #include "wx/crt.h" +#endif + #include "wx/uri.h" // --------------------------------------------------------------------------- // definitions // --------------------------------------------------------------------------- -IMPLEMENT_CLASS(wxURI, wxObject); +IMPLEMENT_CLASS(wxURI, wxObject) // =========================================================================== -// implementation +// wxURI implementation // =========================================================================== // --------------------------------------------------------------------------- -// utilities +// Constructors and cleanup // --------------------------------------------------------------------------- -// --------------------------------------------------------------------------- -// -// wxURI -// -// --------------------------------------------------------------------------- - -// --------------------------------------------------------------------------- -// Constructors -// --------------------------------------------------------------------------- - -wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0) +wxURI::wxURI() + : m_hostType(wxURI_REGNAME), + m_fields(0) { } - -wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0) + +wxURI::wxURI(const wxString& uri) + : m_hostType(wxURI_REGNAME), + m_fields(0) { Create(uri); } -wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0) +bool wxURI::Create(const wxString& uri) { - Assign(uri); -} - -// --------------------------------------------------------------------------- -// Destructor and cleanup -// --------------------------------------------------------------------------- + if (m_fields) + Clear(); -wxURI::~wxURI() -{ - Clear(); + return Parse(uri.utf8_str()); } void wxURI::Clear() { - m_scheme = m_user = m_server = m_port = m_path = - m_query = m_fragment = wxT(""); + m_scheme = + m_userinfo = + m_server = + m_port = + m_path = + m_query = + m_fragment = wxEmptyString; m_hostType = wxURI_REGNAME; @@ -87,189 +82,168 @@ void wxURI::Clear() } // --------------------------------------------------------------------------- -// Create -// -// This creates the URI - all we do here is call the main parsing method +// Escaped characters handling // --------------------------------------------------------------------------- -const wxChar* wxURI::Create(const wxString& uri) -{ - if (m_fields) - Clear(); +// Converts a character into a numeric hexadecimal value, or -1 if the passed +// in character is not a valid hex character - return Parse(uri); -} - -// --------------------------------------------------------------------------- -// Escape Methods -// -// TranslateEscape unencodes a 3 character URL escape sequence -// -// Escape encodes an invalid URI character into a 3 character sequence -// -// IsEscape determines if the input string contains an escape sequence, -// if it does, then it moves the input string past the escape sequence -// -// Unescape unencodes all 3 character URL escape sequences in a wxString -// --------------------------------------------------------------------------- +/* static */ +int wxURI::CharToHex(char c) +{ + if ((c >= 'A') && (c <= 'Z')) + return c - 'A' + 10; + if ((c >= 'a') && (c <= 'z')) + return c - 'a' + 10; + if ((c >= '0') && (c <= '9')) + return c - '0'; + + return -1; +} -wxChar wxURI::TranslateEscape(const wxChar* s) +int wxURI::DecodeEscape(wxString::const_iterator& i) { - wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!")); + int hi = CharToHex(*++i); + if ( hi == -1 ) + return -1; - return CharToHex(*s) * 0x10 + CharToHex(*++s); + int lo = CharToHex(*++i); + if ( lo == -1 ) + return -1; + + return (hi << 4) | lo; } +/* static */ wxString wxURI::Unescape(const wxString& uri) { - wxString new_uri; + // the unescaped version can't be longer than the original one + wxCharBuffer buf(uri.length()); + char *p = buf.data(); - for(size_t i = 0; i < uri.length(); ++i) + for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p ) { - if (uri[i] == wxT('%')) + char c = *i; + if ( c == '%' ) { - new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) ); - i += 2; + int n = wxURI::DecodeEscape(i); + if ( n == -1 ) + return wxString(); + + wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" ); + + c = static_cast(n); } + + *p = c; } - return new_uri; -} + *p = '\0'; -void wxURI::Escape(wxString& s, const wxChar& c) -{ - const wxChar* hdig = wxT("0123456789abcdef"); - s += wxT('%'); - s += hdig[(c >> 4) & 15]; - s += hdig[c & 15]; + // by default assume that the URI is in UTF-8, this is the most common + // practice + wxString s = wxString::FromUTF8(buf); + if ( s.empty() ) + { + // if it isn't, use latin-1 as a fallback -- at least this always + // succeeds + s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf); + } + + return s; } -bool wxURI::IsEscape(const wxChar*& uri) +void wxURI::AppendNextEscaped(wxString& s, const char *& p) { + // check for an already encoded character: + // // pct-encoded = "%" HEXDIG HEXDIG - if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2))) + if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) ) { - uri += 3; - return true; + s += *p++; + s += *p++; + s += *p++; + } + else // really needs escaping + { + static const char* hexDigits = "0123456789abcdef"; + + const char c = *p++; + + s += '%'; + s += hexDigits[(c >> 4) & 15]; + s += hexDigits[c & 15]; } - else - return false; } // --------------------------------------------------------------------------- -// BuildURI +// GetUser +// GetPassword // -// BuildURI() builds the entire URI into a useable -// representation, including proper identification characters such as slashes -// -// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes -// the components that accept escape sequences +// Gets the username and password via the old URL method. // --------------------------------------------------------------------------- +wxString wxURI::GetUser() const +{ + size_t dwPasswordPos = m_userinfo.find(':'); -wxString wxURI::BuildURI() const -{ - wxString ret; - - if (HasScheme()) - ret = ret + m_scheme + wxT(":"); - - if (HasServer()) - { - ret += wxT("//"); - - if (HasUser()) - ret = ret + m_user + wxT("@"); - - ret += m_server; - - if (HasPort()) - ret = ret + wxT(":") + m_port; - } - - ret += m_path; + if (dwPasswordPos == wxString::npos) + dwPasswordPos = 0; - if (HasQuery()) - ret = ret + wxT("?") + m_query; + return m_userinfo(0, dwPasswordPos); +} - if (HasFragment()) - ret = ret + wxT("#") + m_fragment; +wxString wxURI::GetPassword() const +{ + size_t dwPasswordPos = m_userinfo.find(':'); - return ret; + if (dwPasswordPos == wxString::npos) + return ""; + else + return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1); } -wxString wxURI::BuildUnescapedURI() const +// combine all URI fields in a single string, applying funcDecode to each +// component which it may make sense to decode (i.e. "unescape") +wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const { wxString ret; if (HasScheme()) - ret = ret + m_scheme + wxT(":"); + ret += m_scheme + ":"; if (HasServer()) { - ret += wxT("//"); + ret += "//"; - if (HasUser()) - ret = ret + wxURI::Unescape(m_user) + wxT("@"); + if (HasUserInfo()) + ret += funcDecode(m_userinfo) + "@"; if (m_hostType == wxURI_REGNAME) - ret += wxURI::Unescape(m_server); + ret += funcDecode(m_server); else ret += m_server; if (HasPort()) - ret = ret + wxT(":") + m_port; + ret += ":" + m_port; } - ret += wxURI::Unescape(m_path); + ret += funcDecode(m_path); if (HasQuery()) - ret = ret + wxT("?") + wxURI::Unescape(m_query); + ret += "?" + funcDecode(m_query); if (HasFragment()) - ret = ret + wxT("#") + wxURI::Unescape(m_fragment); + ret += "#" + funcDecode(m_fragment); return ret; } -// --------------------------------------------------------------------------- -// Assignment -// --------------------------------------------------------------------------- - -wxURI& wxURI::Assign(const wxURI& uri) -{ - //assign fields - m_fields = uri.m_fields; - - //ref over components - m_scheme = uri.m_scheme; - m_user = uri.m_user; - m_server = uri.m_server; - m_hostType = uri.m_hostType; - m_port = uri.m_port; - m_path = uri.m_path; - m_query = uri.m_query; - m_fragment = uri.m_fragment; - - return *this; -} - -wxURI& wxURI::operator = (const wxURI& uri) -{ - return Assign(uri); -} - -wxURI& wxURI::operator = (const wxString& string) -{ - Create(string); - return *this; -} - // --------------------------------------------------------------------------- // Comparison // --------------------------------------------------------------------------- -bool wxURI::operator == (const wxURI& uri) const -{ +bool wxURI::operator==(const wxURI& uri) const +{ if (HasScheme()) { if(m_scheme != uri.m_scheme) @@ -281,12 +255,12 @@ bool wxURI::operator == (const wxURI& uri) const if (HasServer()) { - if (HasUser()) + if (HasUserInfo()) { - if (m_user != uri.m_user) + if (m_userinfo != uri.m_userinfo) return false; } - else if (uri.HasUser()) + else if (uri.HasUserInfo()) return false; if (m_server != uri.m_server || @@ -339,7 +313,9 @@ bool wxURI::operator == (const wxURI& uri) const // --------------------------------------------------------------------------- bool wxURI::IsReference() const -{ return !HasScheme() || !HasServer(); } +{ + return !HasScheme() || !HasServer(); +} // --------------------------------------------------------------------------- // Parse @@ -347,211 +323,197 @@ bool wxURI::IsReference() const // Master URI parsing method. Just calls the individual parsing methods // // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -// URI-reference = URI / relative-URITestCase +// URI-reference = URI / relative // --------------------------------------------------------------------------- -const wxChar* wxURI::Parse(const wxChar* uri) +bool wxURI::Parse(const char *uri) { uri = ParseScheme(uri); - uri = ParseAuthority(uri); - uri = ParsePath(uri); - uri = ParseQuery(uri); - return ParseFragment(uri); + if ( uri ) + uri = ParseAuthority(uri); + if ( uri ) + uri = ParsePath(uri); + if ( uri ) + uri = ParseQuery(uri); + if ( uri ) + uri = ParseFragment(uri); + + // we only succeed if we parsed the entire string + return uri && *uri == '\0'; } -// --------------------------------------------------------------------------- -// ParseXXX -// -// Individual parsers for each URI component -// --------------------------------------------------------------------------- - -const wxChar* wxURI::ParseScheme(const wxChar* uri) +const char* wxURI::ParseScheme(const char *uri) { - wxASSERT(uri != NULL); + const char * const start = uri; - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; - - //Does the uri have a scheme (first character alpha)? - if (IsAlpha(*uri)) + // assume that we have a scheme if we have the valid start of it + if ( IsAlpha(*uri) ) { m_scheme += *uri++; //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - while (IsAlpha(*uri) || IsDigit(*uri) || - *uri == wxT('+') || - *uri == wxT('-') || - *uri == wxT('.')) - { - m_scheme += *uri++; + while (IsAlpha(*uri) || IsDigit(*uri) || + *uri == '+' || + *uri == '-' || + *uri == '.') + { + m_scheme += *uri++; } //valid scheme? - if (*uri == wxT(':')) - { + if (*uri == ':') + { //mark the scheme as valid m_fields |= wxURI_SCHEME; //move reference point up to input buffer - uricopy = ++uri; + ++uri; + } + else // no valid scheme finally + { + uri = start; // rewind + m_scheme.clear(); } - else - //relative uri with relative path reference - m_scheme = wxT(""); } -// else - //relative uri with _possible_ relative path reference + //else: can't have schema, possible a relative URI - return uricopy; + return uri; } -const wxChar* wxURI::ParseAuthority(const wxChar* uri) +const char* wxURI::ParseAuthority(const char* uri) { // authority = [ userinfo "@" ] host [ ":" port ] - if (*uri == wxT('/') && *(uri+1) == wxT('/')) + if ( uri[0] == '/' && uri[1] == '/' ) { + //skip past the two slashes uri += 2; - uri = ParseUser(uri); - uri = ParseServer(uri); - return ParsePort(uri); + // ############# DEVIATION FROM RFC ######################### + // Don't parse the server component for file URIs + if(m_scheme != "file") + { + //normal way + uri = ParseUserInfo(uri); + uri = ParseServer(uri); + return ParsePort(uri); + } } return uri; } -const wxChar* wxURI::ParseUser(const wxChar* uri) +const char* wxURI::ParseUserInfo(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) - while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?')) + while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' ) { - if(IsUnreserved(*uri) || IsEscape(uri) || - IsSubDelim(*uri) || *uri == wxT(':')) - m_user += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' ) + m_userinfo += *uri++; else - Escape(m_user, *uri++); + AppendNextEscaped(m_userinfo, uri); } - if(*uri == wxT('@')) + if ( *uri++ == '@' ) { - //valid userinfo - m_fields |= wxURI_USER; - - uricopy = ++uri; + // valid userinfo + m_fields |= wxURI_USERINFO; } else - m_user = wxT(""); + { + uri = start; // rewind + m_userinfo.clear(); + } - return uricopy; + return uri; } -const wxChar* wxURI::ParseServer(const wxChar* uri) +const char* wxURI::ParseServer(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; // host = IP-literal / IPv4address / reg-name // IP-literal = "[" ( IPv6address / IPvFuture ) "]" - if (*uri == wxT('[')) + if (*uri == '[') { - if (ParseIPv6address(++uri) && *uri == wxT(']')) + ++uri; + if (ParseIPv6address(uri) && *uri == ']') { - ++uri; m_hostType = wxURI_IPV6ADDRESS; - - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxMemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + + m_server.assign(start + 1, uri - start - 1); + ++uri; } else { - uri = uricopy; + uri = start + 1; // skip the leading '[' again - if (ParseIPvFuture(++uri) && *uri == wxT(']')) + if (ParseIPvFuture(uri) && *uri == ']') { + m_hostType = wxURI_IPVFUTURE; + + m_server.assign(start + 1, uri - start - 1); ++uri; - m_hostType = wxURI_IPVFUTURE; - - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxMemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); } - else - uri = uricopy; + else // unrecognized IP literal + { + uri = start; + } } } - else + else // IPv4 or a reg-name { if (ParseIPv4address(uri)) { m_hostType = wxURI_IPV4ADDRESS; - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxMemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + m_server.assign(start, uri - start); + } + else + { + uri = start; } - else - uri = uricopy; } - if(m_hostType == wxURI_REGNAME) + if ( m_hostType == wxURI_REGNAME ) { - uri = uricopy; + uri = start; // reg-name = *( unreserved / pct-encoded / sub-delims ) - while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?')) + while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' ) { - if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri)) + if ( IsUnreserved(*uri) || IsSubDelim(*uri) ) m_server += *uri++; else - Escape(m_server, *uri++); - } + AppendNextEscaped(m_server, uri); + } } - //mark the server as valid m_fields |= wxURI_SERVER; return uri; } - -const wxChar* wxURI::ParsePort(const wxChar* uri) -{ - wxASSERT(uri != NULL); +const char* wxURI::ParsePort(const char* uri) +{ // port = *DIGIT - if(*uri == wxT(':')) + if( *uri == ':' ) { ++uri; - while(IsDigit(*uri)) + while ( IsDigit(*uri) ) { m_port += *uri++; - } + } - //mark the port as valid m_fields |= wxURI_PORT; } return uri; } -const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize) +const char* wxURI::ParsePath(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; - /// hier-part = "//" authority path-abempty /// / path-absolute /// / path-rootless @@ -574,90 +536,77 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali /// ; non-zero-length segment without any colon ":" /// /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - if (*uri == wxT('/')) - { - m_path += *uri++; - while(*uri && *uri != wxT('#') && *uri != wxT('?')) - { - if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/')) - m_path += *uri++; - else - Escape(m_path, *uri++); - } + if ( IsEndPath(*uri) ) + return uri; - if (bNormalize) - { - wxStringBufferLength theBuffer(m_path, m_path.length() + 1); - Normalize(theBuffer, true); - theBuffer.SetLength(wxStrlen(theBuffer)); - } - //mark the path as valid - m_fields |= wxURI_PATH; - } - else if(*uri) //Relative path + const bool isAbs = *uri == '/'; + if ( isAbs ) + m_path += *uri++; + + wxArrayString segments; + wxString segment; + for ( ;; ) { - if (bReference) + const bool endPath = IsEndPath(*uri); + if ( endPath || *uri == '/' ) { - //no colon allowed - while(*uri && *uri != wxT('#') && *uri != wxT('?')) + // end of a segment, look at what we got + if ( segment == ".." ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == wxT('@') || *uri == wxT('/')) - m_path += *uri++; - else - Escape(m_path, *uri++); + if ( !segments.empty() && *segments.rbegin() != ".." ) + segments.pop_back(); + else if ( !isAbs ) + segments.push_back(".."); } - } - else - { - while(*uri && *uri != wxT('#') && *uri != wxT('?')) + else if ( segment == "." ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/')) - m_path += *uri++; - else - Escape(m_path, *uri++); + // normally we ignore "." but the last one should be taken into + // account as "path/." is the same as "path/" and not just "path" + if ( endPath ) + segments.push_back(""); } - } - - if (uri != uricopy) - { - if (bNormalize) + else // normal segment { - wxStringBufferLength theBuffer(m_path, m_path.length() + 1); - Normalize(theBuffer); - theBuffer.SetLength(wxStrlen(theBuffer)); + segments.push_back(segment); } - //mark the path as valid - m_fields |= wxURI_PATH; + if ( endPath ) + break; + + segment.clear(); + ++uri; + continue; } + + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' ) + segment += *uri++; + else + AppendNextEscaped(segment, uri); } + m_path += wxJoin(segments, '/', '\0'); + m_fields |= wxURI_PATH; + return uri; } -const wxChar* wxURI::ParseQuery(const wxChar* uri) +const char* wxURI::ParseQuery(const char* uri) { - wxASSERT(uri != NULL); - // query = *( pchar / "/" / "?" ) - if (*uri == wxT('?')) + if ( *uri == '?' ) { ++uri; - while(*uri && *uri != wxT('#')) + while ( *uri && *uri != '#' ) { - if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?')) - m_query += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || + *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' ) + m_query += *uri++; else - Escape(m_query, *uri++); + AppendNextEscaped(m_query, uri); } - //mark the server as valid m_fields |= wxURI_QUERY; } @@ -665,24 +614,21 @@ const wxChar* wxURI::ParseQuery(const wxChar* uri) } -const wxChar* wxURI::ParseFragment(const wxChar* uri) +const char* wxURI::ParseFragment(const char* uri) { - wxASSERT(uri != NULL); - // fragment = *( pchar / "/" / "?" ) - if (*uri == wxT('#')) + if ( *uri == '#' ) { ++uri; - while(*uri) + while ( *uri ) { - if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?')) - m_fragment += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || + *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') + m_fragment += *uri++; else - Escape(m_fragment, *uri++); + AppendNextEscaped(m_fragment, uri); } - //mark the server as valid m_fields |= wxURI_FRAGMENT; } @@ -697,22 +643,27 @@ const wxChar* wxURI::ParseFragment(const wxChar* uri) // A version of the algorithm outlined in the RFC is used here // (it is shown in comments) // -// Note that an empty URI inherits all components +// Note that an empty URI inherits all components // --------------------------------------------------------------------------- +/* static */ +wxArrayString wxURI::SplitInSegments(const wxString& path) +{ + return wxSplit(path, '/', '\0' /* no escape character */); +} + void wxURI::Resolve(const wxURI& base, int flags) { - wxASSERT_MSG(!base.IsReference(), - wxT("wxURI to inherit from must not be a reference!")); + wxASSERT_MSG(!base.IsReference(), + "wxURI to inherit from must not be a reference!"); - // If we arn't being strict, enable the older (pre-RFC2396) - // loophole that allows this uri to inherit other - // properties from the base uri - even if the scheme - // is defined + // If we aren't being strict, enable the older (pre-RFC2396) loophole that + // allows this uri to inherit other properties from the base uri - even if + // the scheme is defined if ( !(flags & wxURI_STRICT) && HasScheme() && base.HasScheme() && m_scheme == base.m_scheme ) - { + { m_fields -= wxURI_SCHEME; } @@ -724,11 +675,9 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(R.path); // T.query = R.query; if (HasScheme()) - { return; - } - //No sheme - inherit + //No scheme - inherit m_scheme = base.m_scheme; m_fields |= wxURI_SCHEME; @@ -739,27 +688,25 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(R.path); // T.query = R.query; if (HasServer()) - { return; - } //No authority - inherit - if (base.HasUser()) + if (base.HasUserInfo()) { - m_user = base.m_user; - m_fields |= wxURI_USER; + m_userinfo = base.m_userinfo; + m_fields |= wxURI_USERINFO; } - + m_server = base.m_server; m_hostType = base.m_hostType; m_fields |= wxURI_SERVER; - + if (base.HasPort()) { m_port = base.m_port; m_fields |= wxURI_PORT; } - + // Simple path inheritance from base if (!HasPath()) @@ -767,7 +714,7 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = Base.path; m_path = base.m_path; m_fields |= wxURI_PATH; - + // if defined(R.query) then // T.query = R.query; @@ -780,7 +727,7 @@ void wxURI::Resolve(const wxURI& base, int flags) m_fields |= wxURI_QUERY; } } - else + else if ( m_path.empty() || m_path[0u] != '/' ) { // if (R.path starts-with "/") then // T.path = remove_dot_segments(R.path); @@ -789,144 +736,74 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(T.path); // endif; // T.query = R.query; - if (m_path[0u] != wxT('/')) - { - //Marge paths - const wxChar* op = m_path.c_str(); - const wxChar* bp = base.m_path.c_str() + base.m_path.Length(); + // + // So we don't do anything for absolute paths and implement merge for + // the relative ones - //not a ending directory? move up - if (base.m_path[0] && *(bp-1) != wxT('/')) - UpTree(base.m_path, bp); - - //normalize directories - while(*op == wxT('.') && *(op+1) == wxT('.') && - (*(op+2) == '\0' || *(op+2) == wxT('/')) ) - { - UpTree(base.m_path, bp); - - if (*(op+2) == '\0') - op += 2; - else - op += 3; - } + wxArrayString our(SplitInSegments(m_path)), + result(SplitInSegments(base.m_path)); - m_path = base.m_path.substr(0, bp - base.m_path.c_str()) + - m_path.substr((op - m_path.c_str()), m_path.Length()); - } - } - - //T.fragment = R.fragment; -} - -// --------------------------------------------------------------------------- -// UpTree -// -// Moves a URI path up a directory -// --------------------------------------------------------------------------- + if ( !result.empty() ) + result.pop_back(); -//static -void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri) -{ - if (uri != uristart && *(uri-1) == wxT('/')) - { - uri -= 2; - } - - for(;uri != uristart; --uri) - { - if (*uri == wxT('/')) + if ( our.empty() ) { - ++uri; - break; + // if we have an empty path it means we were constructed from a "." + // string or something similar (e.g. "././././"), it should count + // as (empty) segment + our.push_back(""); } - } - //!!!TODO:HACK!!!// - if (uri == uristart && *uri == wxT('/')) - ++uri; - //!!!// -} - -// --------------------------------------------------------------------------- -// Normalize -// -// Normalizes directories in-place -// -// I.E. ./ and . are ignored -// -// ../ and .. are removed if a directory is before it, along -// with that directory (leading .. and ../ are kept) -// --------------------------------------------------------------------------- - -//static -void wxURI::Normalize(wxChar* s, bool bIgnoreLeads) -{ - wxChar* cp = s; - wxChar* bp = s; - - if(s[0] == wxT('/')) - ++bp; - - while(*cp) - { - if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0') - && (bp == cp || *(cp-1) == wxT('/'))) + const wxArrayString::const_iterator end = our.end(); + for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i ) { - //. _or_ ./ - ignore - if (*(cp+1) == '\0') - cp += 1; - else - cp += 2; - } - else if (*cp == wxT('.') && *(cp+1) == wxT('.') && - (*(cp+2) == wxT('/') || *(cp+2) == '\0') - && (bp == cp || *(cp-1) == wxT('/'))) - { - //.. _or_ ../ - go up the tree - if (s != bp) + if ( i->empty() || *i == "." ) { - UpTree((const wxChar*)bp, (const wxChar*&)s); - - if (*(cp+2) == '\0') - cp += 2; - else - cp += 3; + // as in ParsePath(), while normally we ignore the empty + // segments, we need to take account of them at the end + if ( i == end - 1 ) + result.push_back(""); + continue; } - else if (!bIgnoreLeads) + if ( *i == ".." ) { - *bp++ = *cp++; - *bp++ = *cp++; - if (*cp) - *bp++ = *cp++; - - s = bp; + if ( !result.empty() ) + { + result.pop_back(); + + if ( i == end - 1 ) + result.push_back(""); + } + //else: just ignore, extra ".." don't accumulate } else { - if (*(cp+2) == '\0') - cp += 2; - else - cp += 3; + if ( result.empty() ) + { + // ensure that the resulting path will always be absolute + result.push_back(""); + } + + result.push_back(*i); } } - else - *s++ = *cp++; + + m_path = wxJoin(result, '/', '\0'); } - *s = '\0'; + //T.fragment = R.fragment; } // --------------------------------------------------------------------------- // ParseH16 // // Parses 1 to 4 hex values. Returns true if the first character of the input -// string is a valid hex character. It is the caller's responsability to move +// string is a valid hex character. It is the caller's responsibility to move // the input string back to its original position on failure. // --------------------------------------------------------------------------- -bool wxURI::ParseH16(const wxChar*& uri) +bool wxURI::ParseH16(const char*& uri) { // h16 = 1*4HEXDIG if(!IsHex(*++uri)) @@ -941,13 +818,13 @@ bool wxURI::ParseH16(const wxChar*& uri) // --------------------------------------------------------------------------- // ParseIPXXX // -// Parses a certain version of an IP address and moves the input string past -// it. Returns true if the input string contains the proper version of an ip -// address. It is the caller's responsability to move the input string back +// Parses a certain version of an IP address and moves the input string past +// it. Returns true if the input string contains the proper version of an ip +// address. It is the caller's responsability to move the input string back // to its original position on failure. // --------------------------------------------------------------------------- -bool wxURI::ParseIPv4address(const wxChar*& uri) +bool wxURI::ParseIPv4address(const char*& uri) { //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet // @@ -961,14 +838,14 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) { ++iIPv4; - + //each ip part must be between 0-255 (dupe of version in for loop) if( IsDigit(*++uri) && IsDigit(*++uri) && //100 or less (note !) - !( (*(uri-2) < wxT('2')) || - //240 or less - (*(uri-2) == wxT('2') && - (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5'))) + !( (*(uri-2) < '2') || + //240 or less + (*(uri-2) == '2' && + (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) ) ) ) @@ -981,16 +858,16 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) //compilers should unroll this loop for(; iIPv4 < 4; ++iIPv4) { - if (*uri != wxT('.') || !IsDigit(*++uri)) + if (*uri != '.' || !IsDigit(*++uri)) break; //each ip part must be between 0-255 if( IsDigit(*++uri) && IsDigit(*++uri) && //100 or less (note !) - !( (*(uri-2) < wxT('2')) || - //240 or less - (*(uri-2) == wxT('2') && - (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5'))) + !( (*(uri-2) < '2') || + //240 or less + (*(uri-2) == '2' && + (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) ) ) ) @@ -1003,7 +880,7 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) return iIPv4 == 4; } -bool wxURI::ParseIPv6address(const wxChar*& uri) +bool wxURI::ParseIPv6address(const char*& uri) { // IPv6address = 6( h16 ":" ) ls32 // / "::" 5( h16 ":" ) ls32 @@ -1028,8 +905,8 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) bEndHex = true; break; } - - if(*uri != wxT(':')) + + if(*uri != ':') { break; } @@ -1042,9 +919,9 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) if (numPrefix) return false; - if (*uri == wxT(':')) + if (*uri == ':') { - if (*++uri != wxT(':')) + if (*++uri != ':') return false; maxPostfix = 5; @@ -1054,21 +931,21 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) } else { - if (*uri != wxT(':') || *(uri+1) != wxT(':')) + if (*uri != ':' || *(uri+1) != ':') { if (numPrefix != 6) return false; - while (*--uri != wxT(':')) {} + while (*--uri != ':') {} ++uri; - const wxChar* uristart = uri; + const char * const start = uri; //parse ls32 // ls32 = ( h16 ":" h16 ) / IPv4address - if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri)) + if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) return true; - uri = uristart; + uri = start; if (ParseIPv4address(uri)) return true; @@ -1078,7 +955,7 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) else { uri += 2; - + if (numPrefix > 3) maxPostfix = 0; else @@ -1090,25 +967,25 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) for(; maxPostfix != 0; --maxPostfix) { - if(!ParseH16(uri) || *uri != wxT(':')) + if(!ParseH16(uri) || *uri != ':') return false; } if(numPrefix <= 4) { - const wxChar* uristart = uri; + const char * const start = uri; //parse ls32 // ls32 = ( h16 ":" h16 ) / IPv4address - if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri)) + if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) return true; - uri = uristart; + uri = start; if (ParseIPv4address(uri)) return true; - uri = uristart; - + uri = start; + if (!bAllowAltEnding) return false; } @@ -1119,171 +996,96 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) return true; } -bool wxURI::ParseIPvFuture(const wxChar*& uri) +bool wxURI::ParseIPvFuture(const char*& uri) { // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) - if (*++uri != wxT('v') || !IsHex(*++uri)) + if (*++uri != 'v' || !IsHex(*++uri)) return false; - while (IsHex(*++uri)) {} + while (IsHex(*++uri)) + ; - if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':'))) + if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) return false; - while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {} + while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {} return true; } -// --------------------------------------------------------------------------- -// CharToHex -// -// Converts a character into a numeric hexidecimal value, or 0 if the -// passed in character is not a valid hex character -// --------------------------------------------------------------------------- - -//static -wxInt32 wxURI::CharToHex(const wxChar& c) -{ - if ((c >= wxT('A')) && (c <= wxT('Z'))) return c - wxT('A') + 0x0A; - if ((c >= wxT('a')) && (c <= wxT('z'))) return c - wxT('a') + 0x0a; - if ((c >= wxT('0')) && (c <= wxT('9'))) return c - wxT('0') + 0x00; - - return 0; -} - // --------------------------------------------------------------------------- // IsXXX // // Returns true if the passed in character meets the criteria of the method // --------------------------------------------------------------------------- -//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -bool wxURI::IsUnreserved (const wxChar& c) -{ return IsAlpha(c) || IsDigit(c) || - c == wxT('-') || - c == wxT('.') || - c == wxT('_') || - c == wxT('~') //tilde - ; +// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +bool wxURI::IsUnreserved(char c) +{ + return IsAlpha(c) || + IsDigit(c) || + c == '-' || + c == '.' || + c == '_' || + c == '~' + ; } -bool wxURI::IsReserved (const wxChar& c) -{ +bool wxURI::IsReserved(char c) +{ return IsGenDelim(c) || IsSubDelim(c); } -//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -bool wxURI::IsGenDelim (const wxChar& c) +// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +bool wxURI::IsGenDelim(char c) { - return c == wxT(':') || - c == wxT('/') || - c == wxT('?') || - c == wxT('#') || - c == wxT('[') || - c == wxT(']') || - c == wxT('@'); + return c == ':' || + c == '/' || + c == '?' || + c == '#' || + c == '[' || + c == ']' || + c == '@'; } -//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -//! / "*" / "+" / "," / ";" / "=" -bool wxURI::IsSubDelim (const wxChar& c) +// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +// / "*" / "+" / "," / ";" / "=" +bool wxURI::IsSubDelim(char c) { - return c == wxT('!') || - c == wxT('$') || - c == wxT('&') || - c == wxT('\'') || - c == wxT('(') || - c == wxT(')') || - c == wxT('*') || - c == wxT('+') || - c == wxT(',') || - c == wxT(';') || - c == wxT('=') + return c == '!' || + c == '$' || + c == '&' || + c == '\'' || + c == '(' || + c == ')' || + c == '*' || + c == '+' || + c == ',' || + c == ';' || + c == '=' ; } -bool wxURI::IsHex(const wxChar& c) -{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); } - -bool wxURI::IsAlpha(const wxChar& c) -{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); } - -bool wxURI::IsDigit(const wxChar& c) -{ return c >= wxT('0') && c <= wxT('9'); } - - -// --------------------------------------------------------------------------- -// -// wxURL Compatability -// -// --------------------------------------------------------------------------- - -#if wxUSE_URL - -#if WXWIN_COMPATIBILITY_2_4 - -#include "wx/url.h" - -//Note that this old code really doesn't convert to a URI that well and looks -//more like a dirty hack than anything else... - -wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims) +bool wxURI::IsHex(char c) { - wxString out_str; - wxString hexa_code; - size_t i; - - for (i = 0; i < uri.Len(); i++) - { - wxChar c = uri.GetChar(i); - - if (c == wxT(' ')) - { - // GRG, Apr/2000: changed to "%20" instead of '+' - - out_str += wxT("%20"); - } - else - { - // GRG, Apr/2000: modified according to the URI definition (RFC 2396) - // - // - Alphanumeric characters are never escaped - // - Unreserved marks are never escaped - // - Delimiters must be escaped if they appear within a component - // but not if they are used to separate components. Here we have - // no clear way to distinguish between these two cases, so they - // are escaped unless they are passed in the 'delims' parameter - // (allowed delimiters). - - static const wxChar marks[] = wxT("-_.!~*()'"); - - if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) ) - { - hexa_code.Printf(wxT("%%%02X"), c); - out_str += hexa_code; - } - else - { - out_str += c; - } - } - } - - return out_str; + return IsDigit(c) || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); } -wxString wxURL::ConvertFromURI(const wxString& uri) +bool wxURI::IsAlpha(char c) { - return wxURI::Unescape(uri); + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } -#endif //WXWIN_COMPATIBILITY_2_4 - -#endif //wxUSE_URL - -//end of uri.cpp - +bool wxURI::IsDigit(char c) +{ + return c >= '0' && c <= '9'; +} +bool wxURI::IsEndPath(char c) +{ + return c == '\0' || c == '#' || c == '?'; +}