X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/8404931e0d899429c99c0081b18703e8609eb94a..87bf626584b14a57f9da0eee0d43a60ab6661301:/src/common/uri.cpp diff --git a/src/common/uri.cpp b/src/common/uri.cpp index e37c99ccd0..aac7aec3d1 100644 --- a/src/common/uri.cpp +++ b/src/common/uri.cpp @@ -1,11 +1,13 @@ ///////////////////////////////////////////////////////////////////////////// // Name: uri.cpp -// Purpose: Implementation of a uri parser -// Author: Ryan Norton +// Purpose: Implementation of a URI parser +// Author: Ryan Norton, +// Vadim Zeitlin (UTF-8 URI support, many other changes) // Created: 10/26/04 // RCS-ID: $Id$ -// Copyright: (c) 2004 Ryan Norton -// Licence: wxWindows +// Copyright: (c) 2004 Ryan Norton, +// 2008 Vadim Zeitlin +// Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// // =========================================================================== @@ -16,10 +18,6 @@ // headers // --------------------------------------------------------------------------- -#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA) - #pragma implementation "uri.h" -#endif - // For compilers that support precompilation, includes "wx.h". #include "wx/wxprec.h" @@ -27,59 +25,56 @@ #pragma hdrstop #endif +#ifndef WX_PRECOMP + #include "wx/crt.h" +#endif + #include "wx/uri.h" // --------------------------------------------------------------------------- // definitions // --------------------------------------------------------------------------- -IMPLEMENT_CLASS(wxURI, wxObject); +IMPLEMENT_CLASS(wxURI, wxObject) // =========================================================================== -// implementation +// wxURI implementation // =========================================================================== // --------------------------------------------------------------------------- -// utilities -// --------------------------------------------------------------------------- - -// --------------------------------------------------------------------------- -// -// wxURI -// +// Constructors and cleanup // --------------------------------------------------------------------------- -// --------------------------------------------------------------------------- -// Constructors -// --------------------------------------------------------------------------- - -wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0) +wxURI::wxURI() + : m_hostType(wxURI_REGNAME), + m_fields(0) { } - -wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0) + +wxURI::wxURI(const wxString& uri) + : m_hostType(wxURI_REGNAME), + m_fields(0) { Create(uri); } -wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0) +bool wxURI::Create(const wxString& uri) { - *this = uri; -} - -// --------------------------------------------------------------------------- -// Destructor and cleanup -// --------------------------------------------------------------------------- + if (m_fields) + Clear(); -wxURI::~wxURI() -{ - Clear(); + return Parse(uri.utf8_str()); } void wxURI::Clear() { - m_scheme = m_user = m_server = m_port = m_path = - m_query = m_fragment = wxT(""); + m_scheme = + m_userinfo = + m_server = + m_port = + m_path = + m_query = + m_fragment = wxEmptyString; m_hostType = wxURI_REGNAME; @@ -87,184 +82,165 @@ void wxURI::Clear() } // --------------------------------------------------------------------------- -// Create -// -// This creates the URI - all we do here is call the main parsing method +// Escaped characters handling // --------------------------------------------------------------------------- -void wxURI::Create(const wxString& uri) -{ - if (m_fields) - Clear(); - - Parse(uri); -} - -// --------------------------------------------------------------------------- -// Escape/Unescape/IsEscape -// -// Unescape unencodes a 3 character URL escape sequence -// Escape encodes an invalid URI character into a 3 character sequence -// IsEscape determines if the input string contains an escape sequence, -// if it does, then it moves the input string past the escape sequence -// --------------------------------------------------------------------------- +// Converts a character into a numeric hexadecimal value, or -1 if the passed +// in character is not a valid hex character -wxChar wxURI::Unescape(const wxChar* s) +/* static */ +int wxURI::CharToHex(char c) { - wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!")); - - return CharToHex(*s) * 0x10 + CharToHex(*++s); + if ((c >= 'A') && (c <= 'Z')) + return c - 'A' + 10; + if ((c >= 'a') && (c <= 'z')) + return c - 'a' + 10; + if ((c >= '0') && (c <= '9')) + return c - '0'; + + return -1; } -void wxURI::Escape(wxString& s, const wxChar& c) +int wxURI::DecodeEscape(wxString::const_iterator& i) { - const wxChar* hdig = wxT("0123456789abcdef"); - s += '%'; - s += hdig[(c >> 4) & 15]; - s += hdig[c & 15]; + int hi = CharToHex(*++i); + if ( hi == -1 ) + return -1; + + int lo = CharToHex(*++i); + if ( lo == -1 ) + return -1; + + return (hi << 4) | lo; } -bool wxURI::IsEscape(const wxChar*& uri) +/* static */ +wxString wxURI::Unescape(const wxString& uri) { - if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2))) + // the unescaped version can't be longer than the original one + wxCharBuffer buf(uri.length()); + char *p = buf.data(); + + for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p ) { - uri += 3; - return true; - } - else - return false; -} + char c = *i; + if ( c == '%' ) + { + int n = wxURI::DecodeEscape(i); + if ( n == -1 ) + return wxString(); -// --------------------------------------------------------------------------- -// HasXXX -// --------------------------------------------------------------------------- + wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" ); + + c = static_cast(n); + } -bool wxURI::HasScheme() const -{ return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; } + *p = c; + } -bool wxURI::HasUser() const -{ return (m_fields & wxURI_USER) == wxURI_USER; } + *p = '\0'; -bool wxURI::HasServer() const -{ return (m_fields & wxURI_SERVER) == wxURI_SERVER; } + // by default assume that the URI is in UTF-8, this is the most common + // practice + wxString s = wxString::FromUTF8(buf); + if ( s.empty() ) + { + // if it isn't, use latin-1 as a fallback -- at least this always + // succeeds + s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf); + } -bool wxURI::HasPort() const -{ return (m_fields & wxURI_PORT) == wxURI_PORT; } + return s; +} -bool wxURI::HasPath() const -{ return (m_fields & wxURI_PATH) == wxURI_PATH; } +void wxURI::AppendNextEscaped(wxString& s, const char *& p) +{ + // check for an already encoded character: + // + // pct-encoded = "%" HEXDIG HEXDIG + if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) ) + { + s += *p++; + s += *p++; + s += *p++; + } + else // really needs escaping + { + static const char* hexDigits = "0123456789abcdef"; -bool wxURI::HasQuery() const -{ return (m_fields & wxURI_QUERY) == wxURI_QUERY; } + const char c = *p++; -bool wxURI::HasFragment() const -{ return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; } + s += '%'; + s += hexDigits[(c >> 4) & 15]; + s += hexDigits[c & 15]; + } +} // --------------------------------------------------------------------------- -// GetXXX +// GetUser +// GetPassword // -// The normal Get() actually builds the entire URI into a useable -// representation, including proper identification characters such as slashes +// Gets the username and password via the old URL method. // --------------------------------------------------------------------------- +wxString wxURI::GetUser() const +{ + // if there is no colon at all, find() returns npos and this method returns + // the entire string which is correct as it means that password was omitted + return m_userinfo(0, m_userinfo.find(':')); +} -const wxString& wxURI::GetScheme() const -{ return m_scheme; } - -const wxString& wxURI::GetPath() const -{ return m_path; } - -const wxString& wxURI::GetQuery() const -{ return m_query; } - -const wxString& wxURI::GetFragment() const -{ return m_fragment; } - -const wxString& wxURI::GetPort() const -{ return m_port; } - -const wxString& wxURI::GetUser() const -{ return m_user; } +wxString wxURI::GetPassword() const +{ + size_t posColon = m_userinfo.find(':'); -const wxString& wxURI::GetServer() const -{ return m_server; } + if ( posColon == wxString::npos ) + return ""; -const wxURIHostType& wxURI::GetHostType() const -{ return m_hostType; } + return m_userinfo(posColon + 1, wxString::npos); +} -wxString wxURI::Get() const -{ +// combine all URI fields in a single string, applying funcDecode to each +// component which it may make sense to decode (i.e. "unescape") +wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const +{ wxString ret; if (HasScheme()) - ret = ret + m_scheme + wxT(":"); + ret += m_scheme + ":"; if (HasServer()) { - ret += wxT("//"); + ret += "//"; - if (HasUser()) - ret = ret + m_user + wxT("@"); + if (HasUserInfo()) + ret += funcDecode(m_userinfo) + "@"; - ret += m_server; + if (m_hostType == wxURI_REGNAME) + ret += funcDecode(m_server); + else + ret += m_server; if (HasPort()) - ret = ret + wxT(":") + m_port; + ret += ":" + m_port; } - ret += m_path; + ret += funcDecode(m_path); if (HasQuery()) - ret = ret + wxT("?") + m_query; + ret += "?" + funcDecode(m_query); if (HasFragment()) - ret = ret + wxT("#") + m_fragment; + ret += "#" + funcDecode(m_fragment); return ret; } // --------------------------------------------------------------------------- -// operator = and == +// Comparison // --------------------------------------------------------------------------- -wxURI& wxURI::operator = (const wxURI& uri) +bool wxURI::operator==(const wxURI& uri) const { - if (HasScheme()) - m_scheme = uri.m_scheme; - - - if (HasServer()) - { - if (HasUser()) - m_user = uri.m_user; - - m_server = uri.m_server; - m_hostType = uri.m_hostType; - - if (HasPort()) - m_port = uri.m_port; - } - - - if (HasPath()) - m_path = uri.m_path; - - if (HasQuery()) - m_query = uri.m_query; - - if (HasFragment()) - m_fragment = uri.m_fragment; - - return *this; -} - -wxURI& wxURI::operator = (const wxChar* string) -{ - Create(string); - return *this; -} - -bool wxURI::operator == (const wxURI& uri) const -{ if (HasScheme()) { if(m_scheme != uri.m_scheme) @@ -276,12 +252,12 @@ bool wxURI::operator == (const wxURI& uri) const if (HasServer()) { - if (HasUser()) + if (HasUserInfo()) { - if (m_user != uri.m_user) + if (m_userinfo != uri.m_userinfo) return false; } - else if (uri.HasUser()) + else if (uri.HasUserInfo()) return false; if (m_server != uri.m_server || @@ -334,7 +310,9 @@ bool wxURI::operator == (const wxURI& uri) const // --------------------------------------------------------------------------- bool wxURI::IsReference() const -{ return !HasScheme() || !HasServer(); } +{ + return !HasScheme() || !HasServer(); +} // --------------------------------------------------------------------------- // Parse @@ -342,212 +320,197 @@ bool wxURI::IsReference() const // Master URI parsing method. Just calls the individual parsing methods // // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] -// URI-reference = URI / relative-URITestCase +// URI-reference = URI / relative // --------------------------------------------------------------------------- -const wxChar* wxURI::Parse(const wxChar* uri) +bool wxURI::Parse(const char *uri) { uri = ParseScheme(uri); - uri = ParseAuthority(uri); - uri = ParsePath(uri); - uri = ParseQuery(uri); - return ParseFragment(uri); + if ( uri ) + uri = ParseAuthority(uri); + if ( uri ) + uri = ParsePath(uri); + if ( uri ) + uri = ParseQuery(uri); + if ( uri ) + uri = ParseFragment(uri); + + // we only succeed if we parsed the entire string + return uri && *uri == '\0'; } -// --------------------------------------------------------------------------- -// ParseXXX -// -// Individual parsers for each URI component -// --------------------------------------------------------------------------- - -const wxChar* wxURI::ParseScheme(const wxChar* uri) +const char* wxURI::ParseScheme(const char *uri) { - wxASSERT(uri != NULL); + const char * const start = uri; - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; - - //Does the uri have a scheme (first character alpha)? - if (IsAlpha(*uri)) + // assume that we have a scheme if we have the valid start of it + if ( IsAlpha(*uri) ) { m_scheme += *uri++; //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - //RN: Scheme can not be escaped - while (IsAlpha(*uri) || IsDigit(*uri) || + while (IsAlpha(*uri) || IsDigit(*uri) || *uri == '+' || *uri == '-' || - *uri == '.') - { - m_scheme += *uri++; + *uri == '.') + { + m_scheme += *uri++; } //valid scheme? if (*uri == ':') - { + { //mark the scheme as valid m_fields |= wxURI_SCHEME; //move reference point up to input buffer - uricopy = ++uri; + ++uri; + } + else // no valid scheme finally + { + uri = start; // rewind + m_scheme.clear(); } - else - //relative uri with relative path reference - m_scheme = wxT(""); } -// else - //relative uri with _possible_ relative path reference + //else: can't have schema, possible a relative URI - return uricopy; + return uri; } -const wxChar* wxURI::ParseAuthority(const wxChar* uri) +const char* wxURI::ParseAuthority(const char* uri) { // authority = [ userinfo "@" ] host [ ":" port ] - if (*uri == '/' && *(uri+1) == '/') + if ( uri[0] == '/' && uri[1] == '/' ) { + //skip past the two slashes uri += 2; - uri = ParseUser(uri); - uri = ParseServer(uri); - return ParsePort(uri); + // ############# DEVIATION FROM RFC ######################### + // Don't parse the server component for file URIs + if(m_scheme != "file") + { + //normal way + uri = ParseUserInfo(uri); + uri = ParseServer(uri); + return ParsePort(uri); + } } return uri; } -const wxChar* wxURI::ParseUser(const wxChar* uri) +const char* wxURI::ParseUserInfo(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) - while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?') + while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' ) { - if(IsUnreserved(*uri) || IsEscape(uri) || - IsSubDelim(*uri) || *uri == ':') - m_user += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' ) + m_userinfo += *uri++; else - Escape(m_user, *uri++); + AppendNextEscaped(m_userinfo, uri); } - if(*uri == '@') + if ( *uri++ == '@' ) { - //valid userinfo - m_fields |= wxURI_USER; - - uricopy = ++uri; + // valid userinfo + m_fields |= wxURI_USERINFO; } else - m_user = wxT(""); + { + uri = start; // rewind + m_userinfo.clear(); + } - return uricopy; + return uri; } -const wxChar* wxURI::ParseServer(const wxChar* uri) +const char* wxURI::ParseServer(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; // host = IP-literal / IPv4address / reg-name // IP-literal = "[" ( IPv6address / IPvFuture ) "]" if (*uri == '[') { - if (ParseIPv6address(++uri) && *uri == ']') + ++uri; + if (ParseIPv6address(uri) && *uri == ']') { - ++uri; m_hostType = wxURI_IPV6ADDRESS; - - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxMemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + + m_server.assign(start + 1, uri - start - 1); + ++uri; } else { - uri = uricopy; + uri = start + 1; // skip the leading '[' again - if (ParseIPvFuture(++uri) && *uri == ']') + if (ParseIPvFuture(uri) && *uri == ']') { + m_hostType = wxURI_IPVFUTURE; + + m_server.assign(start + 1, uri - start - 1); ++uri; - m_hostType = wxURI_IPVFUTURE; - - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxMemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); } - else - uri = uricopy; + else // unrecognized IP literal + { + uri = start; + } } } - else + else // IPv4 or a reg-name { if (ParseIPv4address(uri)) { m_hostType = wxURI_IPV4ADDRESS; - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxMemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + m_server.assign(start, uri - start); + } + else + { + uri = start; } - else - uri = uricopy; } - if(m_hostType == wxURI_REGNAME) + if ( m_hostType == wxURI_REGNAME ) { - uri = uricopy; + uri = start; // reg-name = *( unreserved / pct-encoded / sub-delims ) - while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?') + while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' ) { - if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri)) + if ( IsUnreserved(*uri) || IsSubDelim(*uri) ) m_server += *uri++; else - Escape(m_server, *uri++); - } + AppendNextEscaped(m_server, uri); + } } - //mark the server as valid m_fields |= wxURI_SERVER; return uri; } - -const wxChar* wxURI::ParsePort(const wxChar* uri) -{ - wxASSERT(uri != NULL); +const char* wxURI::ParsePort(const char* uri) +{ // port = *DIGIT - if(*uri == ':') + if( *uri == ':' ) { ++uri; - while(IsDigit(*uri)) + while ( IsDigit(*uri) ) { m_port += *uri++; - } + } - //mark the port as valid m_fields |= wxURI_PORT; } return uri; } -const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize) +const char* wxURI::ParsePath(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; - /// hier-part = "//" authority path-abempty /// / path-absolute /// / path-rootless @@ -570,90 +533,77 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali /// ; non-zero-length segment without any colon ":" /// /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - if (*uri == '/') - { - m_path += *uri++; - while(*uri && *uri != '#' && *uri != '?') - { - if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == ':' || *uri == '@' || *uri == '/') - m_path += *uri++; - else - Escape(m_path, *uri++); - } + if ( IsEndPath(*uri) ) + return uri; - if (bNormalize) - { - wxStringBufferLength theBuffer(m_path, m_path.length() + 1); - Normalize(theBuffer, true); - theBuffer.SetLength(wxStrlen(theBuffer)); - } - //mark the path as valid - m_fields |= wxURI_PATH; - } - else if(*uri) //Relative path + const bool isAbs = *uri == '/'; + if ( isAbs ) + m_path += *uri++; + + wxArrayString segments; + wxString segment; + for ( ;; ) { - if (bReference) + const bool endPath = IsEndPath(*uri); + if ( endPath || *uri == '/' ) { - //no colon allowed - while(*uri && *uri != '#' && *uri != '?') + // end of a segment, look at what we got + if ( segment == ".." ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == '@' || *uri == '/') - m_path += *uri++; - else - Escape(m_path, *uri++); + if ( !segments.empty() && *segments.rbegin() != ".." ) + segments.pop_back(); + else if ( !isAbs ) + segments.push_back(".."); } - } - else - { - while(*uri && *uri != '#' && *uri != '?') + else if ( segment == "." ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == ':' || *uri == '@' || *uri == '/') - m_path += *uri++; - else - Escape(m_path, *uri++); + // normally we ignore "." but the last one should be taken into + // account as "path/." is the same as "path/" and not just "path" + if ( endPath ) + segments.push_back(""); } - } - - if (uri != uricopy) - { - if (bNormalize) + else // normal segment { - wxStringBufferLength theBuffer(m_path, m_path.length() + 1); - Normalize(theBuffer); - theBuffer.SetLength(wxStrlen(theBuffer)); + segments.push_back(segment); } - //mark the path as valid - m_fields |= wxURI_PATH; + if ( endPath ) + break; + + segment.clear(); + ++uri; + continue; } + + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' ) + segment += *uri++; + else + AppendNextEscaped(segment, uri); } + m_path += wxJoin(segments, '/', '\0'); + m_fields |= wxURI_PATH; + return uri; } -const wxChar* wxURI::ParseQuery(const wxChar* uri) +const char* wxURI::ParseQuery(const char* uri) { - wxASSERT(uri != NULL); - // query = *( pchar / "/" / "?" ) - if (*uri == '?') + if ( *uri == '?' ) { ++uri; - while(*uri && *uri != '#') + while ( *uri && *uri != '#' ) { - if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') - m_query += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || + *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' ) + m_query += *uri++; else - Escape(m_query, *uri++); + AppendNextEscaped(m_query, uri); } - //mark the server as valid m_fields |= wxURI_QUERY; } @@ -661,24 +611,21 @@ const wxChar* wxURI::ParseQuery(const wxChar* uri) } -const wxChar* wxURI::ParseFragment(const wxChar* uri) +const char* wxURI::ParseFragment(const char* uri) { - wxASSERT(uri != NULL); - // fragment = *( pchar / "/" / "?" ) - if (*uri == '#') + if ( *uri == '#' ) { ++uri; - while(*uri) + while ( *uri ) { - if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) || - *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') - m_fragment += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || + *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') + m_fragment += *uri++; else - Escape(m_fragment, *uri++); + AppendNextEscaped(m_fragment, uri); } - //mark the server as valid m_fields |= wxURI_FRAGMENT; } @@ -686,27 +633,34 @@ const wxChar* wxURI::ParseFragment(const wxChar* uri) } // --------------------------------------------------------------------------- -// Resolve URI +// Resolve // -// Builds missing components of this uri from a base uri +// Builds missing components of this uri from a base uri // -// A version of the algorithm outlined in the RFC is used here -// (it is shown in comments) +// A version of the algorithm outlined in the RFC is used here +// (it is shown in comments) +// +// Note that an empty URI inherits all components // --------------------------------------------------------------------------- +/* static */ +wxArrayString wxURI::SplitInSegments(const wxString& path) +{ + return wxSplit(path, '/', '\0' /* no escape character */); +} + void wxURI::Resolve(const wxURI& base, int flags) { - wxASSERT_MSG(!base.IsReference(), - wxT("wxURI to inherit from must not be a reference!")); + wxASSERT_MSG(!base.IsReference(), + "wxURI to inherit from must not be a reference!"); - // If we arn't being strict, enable the older - // loophole that allows this uri to inherit other - // properties from the base uri - even if the scheme - // is defined + // If we aren't being strict, enable the older (pre-RFC2396) loophole that + // allows this uri to inherit other properties from the base uri - even if + // the scheme is defined if ( !(flags & wxURI_STRICT) && HasScheme() && base.HasScheme() && m_scheme == base.m_scheme ) - { + { m_fields -= wxURI_SCHEME; } @@ -718,11 +672,9 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(R.path); // T.query = R.query; if (HasScheme()) - { return; - } - //No sheme - inherit + //No scheme - inherit m_scheme = base.m_scheme; m_fields |= wxURI_SCHEME; @@ -733,27 +685,25 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(R.path); // T.query = R.query; if (HasServer()) - { return; - } //No authority - inherit - if (base.HasUser()) + if (base.HasUserInfo()) { - m_user = base.m_user; - m_fields |= wxURI_USER; + m_userinfo = base.m_userinfo; + m_fields |= wxURI_USERINFO; } - + m_server = base.m_server; m_hostType = base.m_hostType; m_fields |= wxURI_SERVER; - + if (base.HasPort()) { m_port = base.m_port; m_fields |= wxURI_PORT; } - + // Simple path inheritance from base if (!HasPath()) @@ -761,7 +711,7 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = Base.path; m_path = base.m_path; m_fields |= wxURI_PATH; - + // if defined(R.query) then // T.query = R.query; @@ -774,7 +724,7 @@ void wxURI::Resolve(const wxURI& base, int flags) m_fields |= wxURI_QUERY; } } - else + else if ( m_path.empty() || m_path[0u] != '/' ) { // if (R.path starts-with "/") then // T.path = remove_dot_segments(R.path); @@ -783,128 +733,95 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(T.path); // endif; // T.query = R.query; - if (m_path[(const size_t&)0] != '/') - { - //Marge paths - const wxChar* op = m_path.c_str(); - const wxChar* bp = base.m_path.c_str() + base.m_path.Length(); - - //not a ending directory? move up - if (base.m_path[0] && *(bp-1) != '/') - UpTree(base.m_path, bp); - - //normalize directories - while(*op == '.' && *(op+1) == '.' && - (*(op+2) == '\0' || *(op+2) == '/') ) - { - UpTree(base.m_path, bp); + // + // So we don't do anything for absolute paths and implement merge for + // the relative ones - if (*(op+2) == '\0') - op += 2; - else - op += 3; - } - - m_path = base.m_path.substr(0, bp - base.m_path.c_str()) + - m_path.Mid((op - m_path.c_str()), m_path.Length()); - } - } -} + wxArrayString our(SplitInSegments(m_path)), + result(SplitInSegments(base.m_path)); -// --------------------------------------------------------------------------- -// Directory Normalization (static) -// -// UpTree goes up a directory in a string and moves the pointer as such, -// while Normalize gets rid of duplicate/erronues directories in a URI -// according to RFC 2396 and modified quite a bit to meet the unit tests -// in it. -// --------------------------------------------------------------------------- + if ( !result.empty() ) + result.pop_back(); -void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri) -{ - if (uri != uristart && *(uri-1) == '/') - { - uri -= 2; - } - - for(;uri != uristart; --uri) - { - if (*uri == '/') + if ( our.empty() ) { - ++uri; - break; + // if we have an empty path it means we were constructed from a "." + // string or something similar (e.g. "././././"), it should count + // as (empty) segment + our.push_back(""); } - } - - //!!!TODO:HACK!!!// - if (uri == uristart && *uri == '/') - ++uri; - //!!!// -} - -void wxURI::Normalize(wxChar* s, bool bIgnoreLeads) -{ - wxChar* cp = s; - wxChar* bp = s; - if(s[0] == '/') - ++bp; - - while(*cp) - { - if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0') - && (bp == cp || *(cp-1) == '/')) - { - //. _or_ ./ - ignore - if (*(cp+1) == '\0') - cp += 1; - else - cp += 2; - } - else if (*cp == '.' && *(cp+1) == '.' && - (*(cp+2) == '/' || *(cp+2) == '\0') - && (bp == cp || *(cp-1) == '/')) + const wxArrayString::const_iterator end = our.end(); + for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i ) { - //.. _or_ ../ - go up the tree - if (s != bp) + if ( i->empty() || *i == "." ) { - UpTree((const wxChar*)bp, (const wxChar*&)s); - - if (*(cp+2) == '\0') - cp += 2; - else - cp += 3; + // as in ParsePath(), while normally we ignore the empty + // segments, we need to take account of them at the end + if ( i == end - 1 ) + result.push_back(""); + continue; } - else if (!bIgnoreLeads) + if ( *i == ".." ) { - *bp++ = *cp++; - *bp++ = *cp++; - if (*cp) - *bp++ = *cp++; - - s = bp; + if ( !result.empty() ) + { + result.pop_back(); + + if ( i == end - 1 ) + result.push_back(""); + } + //else: just ignore, extra ".." don't accumulate } else { - if (*(cp+2) == '\0') - cp += 2; - else - cp += 3; + if ( result.empty() ) + { + // ensure that the resulting path will always be absolute + result.push_back(""); + } + + result.push_back(*i); } } - else - *s++ = *cp++; + + m_path = wxJoin(result, '/', '\0'); } - *s = '\0'; + //T.fragment = R.fragment; +} + +// --------------------------------------------------------------------------- +// ParseH16 +// +// Parses 1 to 4 hex values. Returns true if the first character of the input +// string is a valid hex character. It is the caller's responsibility to move +// the input string back to its original position on failure. +// --------------------------------------------------------------------------- + +bool wxURI::ParseH16(const char*& uri) +{ + // h16 = 1*4HEXDIG + if(!IsHex(*++uri)) + return false; + + if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) + ++uri; + + return true; } // --------------------------------------------------------------------------- -// Misc. Parsing Methods +// ParseIPXXX +// +// Parses a certain version of an IP address and moves the input string past +// it. Returns true if the input string contains the proper version of an ip +// address. It is the caller's responsability to move the input string back +// to its original position on failure. // --------------------------------------------------------------------------- -bool wxURI::ParseIPv4address(const wxChar*& uri) +bool wxURI::ParseIPv4address(const char*& uri) { //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet // @@ -918,13 +835,13 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) { ++iIPv4; - + //each ip part must be between 0-255 (dupe of version in for loop) if( IsDigit(*++uri) && IsDigit(*++uri) && //100 or less (note !) - !( (*(uri-2) < '2') || - //240 or less - (*(uri-2) == '2' && + !( (*(uri-2) < '2') || + //240 or less + (*(uri-2) == '2' && (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) ) ) @@ -944,9 +861,9 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) //each ip part must be between 0-255 if( IsDigit(*++uri) && IsDigit(*++uri) && //100 or less (note !) - !( (*(uri-2) < '2') || - //240 or less - (*(uri-2) == '2' && + !( (*(uri-2) < '2') || + //240 or less + (*(uri-2) == '2' && (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) ) ) @@ -960,19 +877,7 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) return iIPv4 == 4; } -bool wxURI::ParseH16(const wxChar*& uri) -{ - // h16 = 1*4HEXDIG - if(!IsHex(*++uri)) - return false; - - if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri)) - ++uri; - - return true; -} - -bool wxURI::ParseIPv6address(const wxChar*& uri) +bool wxURI::ParseIPv6address(const char*& uri) { // IPv6address = 6( h16 ":" ) ls32 // / "::" 5( h16 ":" ) ls32 @@ -997,7 +902,7 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) bEndHex = true; break; } - + if(*uri != ':') { break; @@ -1031,13 +936,13 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) while (*--uri != ':') {} ++uri; - const wxChar* uristart = uri; + const char * const start = uri; //parse ls32 // ls32 = ( h16 ":" h16 ) / IPv4address - if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) + if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) return true; - uri = uristart; + uri = start; if (ParseIPv4address(uri)) return true; @@ -1047,7 +952,7 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) else { uri += 2; - + if (numPrefix > 3) maxPostfix = 0; else @@ -1065,19 +970,19 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) if(numPrefix <= 4) { - const wxChar* uristart = uri; + const char * const start = uri; //parse ls32 // ls32 = ( h16 ":" h16 ) / IPv4address - if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) + if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) return true; - uri = uristart; + uri = start; if (ParseIPv4address(uri)) return true; - uri = uristart; - + uri = start; + if (!bAllowAltEnding) return false; } @@ -1088,13 +993,14 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) return true; } -bool wxURI::ParseIPvFuture(const wxChar*& uri) +bool wxURI::ParseIPvFuture(const char*& uri) { // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) if (*++uri != 'v' || !IsHex(*++uri)) return false; - while (IsHex(*++uri)) {} + while (IsHex(*++uri)) + ; if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) return false; @@ -1106,35 +1012,30 @@ bool wxURI::ParseIPvFuture(const wxChar*& uri) // --------------------------------------------------------------------------- -// Misc methods - IsXXX and CharToHex +// IsXXX +// +// Returns true if the passed in character meets the criteria of the method // --------------------------------------------------------------------------- -int wxURI::CharToHex(const wxChar& c) +// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +bool wxURI::IsUnreserved(char c) { - if ((c >= 'A') && (c <= 'Z')) return c - 'A' + 0x0A; - if ((c >= 'a') && (c <= 'z')) return c - 'a' + 0x0a; - if ((c >= '0') && (c <= '9')) return c - '0' + 0x00; - - return 0; -} - -//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -bool wxURI::IsUnreserved (const wxChar& c) -{ return IsAlpha(c) || IsDigit(c) || + return IsAlpha(c) || + IsDigit(c) || c == '-' || c == '.' || c == '_' || - c == '~' //tilde - ; + c == '~' + ; } -bool wxURI::IsReserved (const wxChar& c) -{ +bool wxURI::IsReserved(char c) +{ return IsGenDelim(c) || IsSubDelim(c); } -//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -bool wxURI::IsGenDelim (const wxChar& c) +// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +bool wxURI::IsGenDelim(char c) { return c == ':' || c == '/' || @@ -1145,9 +1046,9 @@ bool wxURI::IsGenDelim (const wxChar& c) c == '@'; } -//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -//! / "*" / "+" / "," / ";" / "=" -bool wxURI::IsSubDelim (const wxChar& c) +// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +// / "*" / "+" / "," / ";" / "=" +bool wxURI::IsSubDelim(char c) { return c == '!' || c == '$' || @@ -1159,115 +1060,29 @@ bool wxURI::IsSubDelim (const wxChar& c) c == '+' || c == ',' || c == ';' || - c == '=' + c == '=' ; } -bool wxURI::IsHex(const wxChar& c) -{ return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); } - -bool wxURI::IsAlpha(const wxChar& c) -{ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } - -bool wxURI::IsDigit(const wxChar& c) -{ return c >= '0' && c <= '9'; } - - -// --------------------------------------------------------------------------- -// -// wxURL Compatability -// -// TODO: Use wxURI instead here... -// --------------------------------------------------------------------------- - -#if wxUSE_URL - -#include "wx/url.h" - -wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims) +bool wxURI::IsHex(char c) { - wxString out_str; - wxString hexa_code; - size_t i; - - for (i = 0; i < uri.Len(); i++) - { - wxChar c = uri.GetChar(i); - - if (c == wxT(' ')) - { - // GRG, Apr/2000: changed to "%20" instead of '+' - - out_str += wxT("%20"); - } - else - { - // GRG, Apr/2000: modified according to the URI definition (RFC 2396) - // - // - Alphanumeric characters are never escaped - // - Unreserved marks are never escaped - // - Delimiters must be escaped if they appear within a component - // but not if they are used to separate components. Here we have - // no clear way to distinguish between these two cases, so they - // are escaped unless they are passed in the 'delims' parameter - // (allowed delimiters). - - static const wxChar marks[] = wxT("-_.!~*()'"); - - if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) ) - { - hexa_code.Printf(wxT("%%%02X"), c); - out_str += hexa_code; - } - else - { - out_str += c; - } - } - } - - return out_str; + return IsDigit(c) || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); } -wxString wxURL::ConvertFromURI(const wxString& uri) +bool wxURI::IsAlpha(char c) { - wxString new_uri; - - size_t i = 0; - while (i < uri.Len()) - { - int code; - if (uri[i] == wxT('%')) - { - i++; - if (uri[i] >= wxT('A') && uri[i] <= wxT('F')) - code = (uri[i] - wxT('A') + 10) * 16; - else if (uri[i] >= wxT('a') && uri[i] <= wxT('f')) - code = (uri[i] - wxT('a') + 10) * 16; - else - code = (uri[i] - wxT('0')) * 16; - - i++; - if (uri[i] >= wxT('A') && uri[i] <= wxT('F')) - code += (uri[i] - wxT('A')) + 10; - else if (uri[i] >= wxT('a') && uri[i] <= wxT('f')) - code += (uri[i] - wxT('a')) + 10; - else - code += (uri[i] - wxT('0')); - - i++; - new_uri += (wxChar)code; - continue; - } - new_uri += uri[i]; - i++; - } - return new_uri; + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } -#endif //wxUSE_URL - -//end of uri.cpp - +bool wxURI::IsDigit(char c) +{ + return c >= '0' && c <= '9'; +} +bool wxURI::IsEndPath(char c) +{ + return c == '\0' || c == '#' || c == '?'; +}