From 2186321ff5525cfc61c7085a298685b8afc669ab Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Sat, 19 Jul 2008 23:14:51 +0000 Subject: [PATCH] big wxURI cleanup; it now handles Unicode characters correctly (#3874) git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@54723 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- docs/changes.txt | 1 + include/wx/uri.h | 180 ++++---- src/common/uri.cpp | 974 +++++++++++++++++--------------------------- tests/uris/uris.cpp | 108 ++--- 4 files changed, 534 insertions(+), 729 deletions(-) diff --git a/docs/changes.txt b/docs/changes.txt index 572750c2d8..6545f2c96f 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -249,6 +249,7 @@ Major new features in this release All: - Added (experimental) IPv6 support to wxSocket (Arcen) +- Cleaned up wxURI and made it Unicode-friendly. - Add support for wxExecute(wxEXEC_ASYNC) in wxBase (Lukasz Michalski) - Added wxXLocale class and xlocale-like functions using it - Allow loading message catalogs from wxFileSystem (Axel Gembe) diff --git a/include/wx/uri.h b/include/wx/uri.h index b273c37a12..3ce9811a55 100644 --- a/include/wx/uri.h +++ b/include/wx/uri.h @@ -1,11 +1,12 @@ ///////////////////////////////////////////////////////////////////////////// -// Name: uri.h +// Name: wx/uri.h // Purpose: wxURI - Class for parsing URIs // Author: Ryan Norton -// Modified By: +// Vadim Zeitlin (UTF-8 URI support, many other changes) // Created: 07/01/2004 // RCS-ID: $Id$ -// Copyright: (c) Ryan Norton +// Copyright: (c) 2004 Ryan Norton +// 2008 Vadim Zeitlin // Licence: wxWindows Licence ///////////////////////////////////////////////////////////////////////////// @@ -52,86 +53,119 @@ class WXDLLIMPEXP_BASE wxURI : public wxObject public: wxURI(); wxURI(const wxString& uri); - wxURI(const wxURI& uri); - - virtual ~wxURI(); - - const wxChar* Create(const wxString& uri); - - bool HasScheme() const { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; } - bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) == wxURI_USERINFO; } - bool HasServer() const { return (m_fields & wxURI_SERVER) == wxURI_SERVER; } - bool HasPort() const { return (m_fields & wxURI_PORT) == wxURI_PORT; } - bool HasPath() const { return (m_fields & wxURI_PATH) == wxURI_PATH; } - bool HasQuery() const { return (m_fields & wxURI_QUERY) == wxURI_QUERY; } - bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; } - - const wxString& GetScheme() const { return m_scheme; } - const wxString& GetPath() const { return m_path; } - const wxString& GetQuery() const { return m_query; } - const wxString& GetFragment() const { return m_fragment; } - const wxString& GetPort() const { return m_port; } - const wxString& GetUserInfo() const { return m_userinfo; } - const wxString& GetServer() const { return m_server; } - const wxURIHostType& GetHostType() const { return m_hostType; } - - //Note that the following two get functions are explicitly depreciated by RFC 2396 + + // default copy ctor, assignment operator and dtor are ok + + bool Create(const wxString& uri); + + wxURI& operator=(const wxString& string) + { + Create(string); + return *this; + } + + bool operator==(const wxURI& uri) const; + + // various accessors + + bool HasScheme() const { return (m_fields & wxURI_SCHEME) != 0; } + bool HasUserInfo() const { return (m_fields & wxURI_USERINFO) != 0; } + bool HasServer() const { return (m_fields & wxURI_SERVER) != 0; } + bool HasPort() const { return (m_fields & wxURI_PORT) != 0; } + bool HasPath() const { return (m_fields & wxURI_PATH) != 0; } + bool HasQuery() const { return (m_fields & wxURI_QUERY) != 0; } + bool HasFragment() const { return (m_fields & wxURI_FRAGMENT) != 0; } + + const wxString& GetScheme() const { return m_scheme; } + const wxString& GetPath() const { return m_path; } + const wxString& GetQuery() const { return m_query; } + const wxString& GetFragment() const { return m_fragment; } + const wxString& GetPort() const { return m_port; } + const wxString& GetUserInfo() const { return m_userinfo; } + const wxString& GetServer() const { return m_server; } + wxURIHostType GetHostType() const { return m_hostType; } + + // these functions only work if the user information part of the URI is in + // the usual (but insecure and hence explicitly recommended against by the + // RFC) "user:password" form wxString GetUser() const; wxString GetPassword() const; - wxString BuildURI() const; - wxString BuildUnescapedURI() const; - void Resolve(const wxURI& base, int flags = wxURI_STRICT); - bool IsReference() const; + // combine all URI components into a single string + // + // BuildURI() returns the real URI suitable for use with network libraries, + // for example, while BuildUnescapedURI() returns a string suitable to be + // shown to the user. + wxString BuildURI() const { return DoBuildURI(&wxURI::Nothing); } + wxString BuildUnescapedURI() const { return DoBuildURI(&wxURI::Unescape); } - wxURI& operator = (const wxURI& uri); - wxURI& operator = (const wxString& string); - bool operator == (const wxURI& uri) const; + // the escaped URI should contain only ASCII characters, including possible + // escape sequences + static wxString Unescape(const wxString& escapedURI); - static wxString Unescape (const wxString& szEscapedURI); -protected: - wxURI& Assign(const wxURI& uri); + void Resolve(const wxURI& base, int flags = wxURI_STRICT); + bool IsReference() const; +protected: void Clear(); - const wxChar* Parse (const wxChar* uri); - const wxChar* ParseAuthority (const wxChar* uri); - const wxChar* ParseScheme (const wxChar* uri); - const wxChar* ParseUserInfo (const wxChar* uri); - const wxChar* ParseServer (const wxChar* uri); - const wxChar* ParsePort (const wxChar* uri); - const wxChar* ParsePath (const wxChar* uri, - bool bReference = false, - bool bNormalize = true); - const wxChar* ParseQuery (const wxChar* uri); - const wxChar* ParseFragment (const wxChar* uri); - - - static bool ParseH16(const wxChar*& uri); - static bool ParseIPv4address(const wxChar*& uri); - static bool ParseIPv6address(const wxChar*& uri); - static bool ParseIPvFuture(const wxChar*& uri); - - static void Normalize(wxChar* uri, bool bIgnoreLeads = false); - static void UpTree(const wxChar* uristart, const wxChar*& uri); - static void UpTree(wxString::const_iterator uristart, - wxString::const_iterator& uri); - - static wxUniChar TranslateEscape(const wxString::const_iterator& s); - static void Escape(wxString& s, const wxChar& c); - static bool IsEscape(const wxChar*& uri); - - static wxChar CharToHex(const wxChar& c); - - static bool IsUnreserved (const wxChar& c); - static bool IsReserved (const wxChar& c); - static bool IsGenDelim (const wxChar& c); - static bool IsSubDelim (const wxChar& c); - static bool IsHex(const wxChar& c); - static bool IsAlpha(const wxChar& c); - static bool IsDigit(const wxChar& c); + // common part of BuildURI() and BuildUnescapedURI() + wxString DoBuildURI(wxString (*funcDecode)(const wxString&)) const; + + // function which returns its argument unmodified, this is used by + // BuildURI() to tell DoBuildURI() that nothing needs to be done with the + // URI components + static wxString Nothing(const wxString& value) { return value; } + + bool Parse(const char* uri); + + const char* ParseAuthority (const char* uri); + const char* ParseScheme (const char* uri); + const char* ParseUserInfo (const char* uri); + const char* ParseServer (const char* uri); + const char* ParsePort (const char* uri); + const char* ParsePath (const char* uri); + const char* ParseQuery (const char* uri); + const char* ParseFragment (const char* uri); + + + static bool ParseH16(const char*& uri); + static bool ParseIPv4address(const char*& uri); + static bool ParseIPv6address(const char*& uri); + static bool ParseIPvFuture(const char*& uri); + + // should be called with i pointing to '%', returns the encoded character + // following it or -1 if invalid and advances i past it (so that it points + // to the last character consumed on return) + static int DecodeEscape(wxString::const_iterator& i); + + // append next character pointer to by p to the string in an escaped form + // and advance p past it + // + // if the next character is '%' and it's followed by 2 hex digits, they are + // not escaped (again) by this function, this allows to keep (backwards- + // compatible) ambiguity about the input format to wxURI::Create(): it can + // be either already escaped or not + void AppendNextEscaped(wxString& s, const char *& p); + + // convert hexadecimal digit to its value; return -1 if c isn't valid + static int CharToHex(char c); + + // split an URI path string in its component segments (including empty and + // "." ones, no post-processing is done) + static wxArrayString SplitInSegments(const wxString& path); + + // various URI grammar helpers + static bool IsUnreserved(char c); + static bool IsReserved(char c); + static bool IsGenDelim(char c); + static bool IsSubDelim(char c); + static bool IsHex(char c); + static bool IsAlpha(char c); + static bool IsDigit(char c); + static bool IsEndPath(char c); wxString m_scheme; wxString m_path; diff --git a/src/common/uri.cpp b/src/common/uri.cpp index 7ddc18d1dd..b8b569ab1b 100644 --- a/src/common/uri.cpp +++ b/src/common/uri.cpp @@ -1,11 +1,13 @@ ///////////////////////////////////////////////////////////////////////////// // Name: uri.cpp -// Purpose: Implementation of a uri parser -// Author: Ryan Norton +// Purpose: Implementation of a URI parser +// Author: Ryan Norton, +// Vadim Zeitlin (UTF-8 URI support, many other changes) // Created: 10/26/04 // RCS-ID: $Id$ -// Copyright: (c) 2004 Ryan Norton -// Licence: wxWindows +// Copyright: (c) 2004 Ryan Norton, +// 2008 Vadim Zeitlin +// Licence: wxWindows licence ///////////////////////////////////////////////////////////////////////////// // =========================================================================== @@ -36,50 +38,43 @@ IMPLEMENT_CLASS(wxURI, wxObject) // =========================================================================== -// implementation +// wxURI implementation // =========================================================================== // --------------------------------------------------------------------------- -// utilities +// Constructors and cleanup // --------------------------------------------------------------------------- -// --------------------------------------------------------------------------- -// -// wxURI -// -// --------------------------------------------------------------------------- - -// --------------------------------------------------------------------------- -// Constructors -// --------------------------------------------------------------------------- - -wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0) +wxURI::wxURI() + : m_hostType(wxURI_REGNAME), + m_fields(0) { } -wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0) +wxURI::wxURI(const wxString& uri) + : m_hostType(wxURI_REGNAME), + m_fields(0) { Create(uri); } -wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0) +bool wxURI::Create(const wxString& uri) { - Assign(uri); -} - -// --------------------------------------------------------------------------- -// Destructor and cleanup -// --------------------------------------------------------------------------- + if (m_fields) + Clear(); -wxURI::~wxURI() -{ - Clear(); + return Parse(uri.utf8_str()); } void wxURI::Clear() { - m_scheme = m_userinfo = m_server = m_port = m_path = - m_query = m_fragment = wxEmptyString; + m_scheme = + m_userinfo = + m_server = + m_port = + m_path = + m_query = + m_fragment = wxEmptyString; m_hostType = wxURI_REGNAME; @@ -87,78 +82,98 @@ void wxURI::Clear() } // --------------------------------------------------------------------------- -// Create -// -// This creates the URI - all we do here is call the main parsing method +// Escaped characters handling // --------------------------------------------------------------------------- -const wxChar* wxURI::Create(const wxString& uri) -{ - if (m_fields) - Clear(); +// Converts a character into a numeric hexadecimal value, or -1 if the passed +// in character is not a valid hex character - // FIXME-UTF8: rewrite ParseXXX() methods using iterators - // NB: using wxWxCharBuffer instead of just c_str() avoids keeping - // converted string in memory for longer than needed - return Parse(wxWxCharBuffer(uri.c_str())); +/* static */ +int wxURI::CharToHex(char c) +{ + if ((c >= 'A') && (c <= 'Z')) + return c - 'A' + 10; + if ((c >= 'a') && (c <= 'z')) + return c - 'a' + 10; + if ((c >= '0') && (c <= '9')) + return c - '0'; + + return -1; } -// --------------------------------------------------------------------------- -// Escape Methods -// -// TranslateEscape unencodes a 3 character URL escape sequence -// -// Escape encodes an invalid URI character into a 3 character sequence -// -// IsEscape determines if the input string contains an escape sequence, -// if it does, then it moves the input string past the escape sequence -// -// Unescape unencodes all 3 character URL escape sequences in a wxString -// --------------------------------------------------------------------------- - -wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s) +int wxURI::DecodeEscape(wxString::const_iterator& i) { - wxChar c1(*s); - wxChar c2(*(s + 1)); + int hi = CharToHex(*++i); + if ( hi == -1 ) + return -1; - wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!")); + int lo = CharToHex(*++i); + if ( lo == -1 ) + return -1; - return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2)); + return (hi << 4) | lo; } +/* static */ wxString wxURI::Unescape(const wxString& uri) { - wxString new_uri; + // the unescaped version can't be longer than the original one + wxCharBuffer buf(uri.length()); + char *p = buf.data(); - for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i) + for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p ) { - if ( *i == wxT('%') ) + char c = *i; + if ( c == '%' ) { - new_uri += wxURI::TranslateEscape(i + 1); - i += 2; + int n = wxURI::DecodeEscape(i); + if ( n == -1 ) + return wxString(); + + wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" ); + + c = wx_static_cast(char, n); } - else - new_uri += *i; + + *p = c; } - return new_uri; -} + *p = '\0'; -void wxURI::Escape(wxString& s, const wxChar& c) -{ - const wxChar* hdig = wxT("0123456789abcdef"); - s += wxT('%'); - s += hdig[(c >> 4) & 15]; - s += hdig[c & 15]; + // by default assume that the URI is in UTF-8, this is the most common + // practice + wxString s = wxString::FromUTF8(buf); + if ( s.empty() ) + { + // if it isn't, use latin-1 as a fallback -- at least this always + // succeeds + s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf); + } + + return s; } -bool wxURI::IsEscape(const wxChar*& uri) +void wxURI::AppendNextEscaped(wxString& s, const char *& p) { + // check for an already encoded character: + // // pct-encoded = "%" HEXDIG HEXDIG - if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2))) - return true; - else - return false; + if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) ) + { + s += *p++; + s += *p++; + s += *p++; + } + else // really needs escaping + { + static const char* hexDigits = "0123456789abcdef"; + + const char c = *p++; + + s += '%'; + s += hexDigits[(c >> 4) & 15]; + s += hexDigits[c & 15]; + } } // --------------------------------------------------------------------------- @@ -173,7 +188,7 @@ wxString wxURI::GetUser() const if (dwPasswordPos == wxString::npos) dwPasswordPos = 0; - + return m_userinfo(0, dwPasswordPos); } @@ -182,124 +197,52 @@ wxString wxURI::GetPassword() const size_t dwPasswordPos = m_userinfo.find(':'); if (dwPasswordPos == wxString::npos) - return wxT(""); + return ""; else - return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1); -} - -// --------------------------------------------------------------------------- -// BuildURI -// -// BuildURI() builds the entire URI into a useable -// representation, including proper identification characters such as slashes -// -// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes -// the components that accept escape sequences -// --------------------------------------------------------------------------- - -wxString wxURI::BuildURI() const -{ - wxString ret; - - if (HasScheme()) - ret = ret + m_scheme + wxT(":"); - - if (HasServer()) - { - ret += wxT("//"); - - if (HasUserInfo()) - ret = ret + m_userinfo + wxT("@"); - - ret += m_server; - - if (HasPort()) - ret = ret + wxT(":") + m_port; - } - - ret += m_path; - - if (HasQuery()) - ret = ret + wxT("?") + m_query; - - if (HasFragment()) - ret = ret + wxT("#") + m_fragment; - - return ret; + return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1); } -wxString wxURI::BuildUnescapedURI() const +// combine all URI fields in a single string, applying funcDecode to each +// component which it may make sense to decode (i.e. "unescape") +wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const { wxString ret; if (HasScheme()) - ret = ret + m_scheme + wxT(":"); + ret += m_scheme + ":"; if (HasServer()) { - ret += wxT("//"); + ret += "//"; if (HasUserInfo()) - ret = ret + wxURI::Unescape(m_userinfo) + wxT("@"); + ret += funcDecode(m_userinfo) + "@"; if (m_hostType == wxURI_REGNAME) - ret += wxURI::Unescape(m_server); + ret += funcDecode(m_server); else ret += m_server; if (HasPort()) - ret = ret + wxT(":") + m_port; + ret += ":" + m_port; } - ret += wxURI::Unescape(m_path); + ret += funcDecode(m_path); if (HasQuery()) - ret = ret + wxT("?") + wxURI::Unescape(m_query); + ret += "?" + funcDecode(m_query); if (HasFragment()) - ret = ret + wxT("#") + wxURI::Unescape(m_fragment); + ret += "#" + funcDecode(m_fragment); return ret; } -// --------------------------------------------------------------------------- -// Assignment -// --------------------------------------------------------------------------- - -wxURI& wxURI::Assign(const wxURI& uri) -{ - //assign fields - m_fields = uri.m_fields; - - //ref over components - m_scheme = uri.m_scheme; - m_userinfo = uri.m_userinfo; - m_server = uri.m_server; - m_hostType = uri.m_hostType; - m_port = uri.m_port; - m_path = uri.m_path; - m_query = uri.m_query; - m_fragment = uri.m_fragment; - - return *this; -} - -wxURI& wxURI::operator = (const wxURI& uri) -{ - return Assign(uri); -} - -wxURI& wxURI::operator = (const wxString& string) -{ - Create(string); - return *this; -} - // --------------------------------------------------------------------------- // Comparison // --------------------------------------------------------------------------- -bool wxURI::operator == (const wxURI& uri) const +bool wxURI::operator==(const wxURI& uri) const { if (HasScheme()) { @@ -370,7 +313,9 @@ bool wxURI::operator == (const wxURI& uri) const // --------------------------------------------------------------------------- bool wxURI::IsReference() const -{ return !HasScheme() || !HasServer(); } +{ + return !HasScheme() || !HasServer(); +} // --------------------------------------------------------------------------- // Parse @@ -381,229 +326,194 @@ bool wxURI::IsReference() const // URI-reference = URI / relative // --------------------------------------------------------------------------- -const wxChar* wxURI::Parse(const wxChar *uri) +bool wxURI::Parse(const char *uri) { uri = ParseScheme(uri); - uri = ParseAuthority(uri); - uri = ParsePath(uri); - uri = ParseQuery(uri); - return ParseFragment(uri); + if ( uri ) + uri = ParseAuthority(uri); + if ( uri ) + uri = ParsePath(uri); + if ( uri ) + uri = ParseQuery(uri); + if ( uri ) + uri = ParseFragment(uri); + + // we only succeed if we parsed the entire string + return uri && *uri == '\0'; } -// --------------------------------------------------------------------------- -// ParseXXX -// -// Individual parsers for each URI component -// --------------------------------------------------------------------------- - -const wxChar* wxURI::ParseScheme(const wxChar *uri) +const char* wxURI::ParseScheme(const char *uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; - //Does the uri have a scheme (first character alpha)? - if (IsAlpha(*uri)) + // assume that we have a scheme if we have the valid start of it + if ( IsAlpha(*uri) ) { m_scheme += *uri++; //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) while (IsAlpha(*uri) || IsDigit(*uri) || - *uri == wxT('+') || - *uri == wxT('-') || - *uri == wxT('.')) + *uri == '+' || + *uri == '-' || + *uri == '.') { m_scheme += *uri++; } //valid scheme? - if (*uri == wxT(':')) + if (*uri == ':') { //mark the scheme as valid m_fields |= wxURI_SCHEME; //move reference point up to input buffer - uricopy = ++uri; + ++uri; + } + else // no valid scheme finally + { + uri = start; // rewind + m_scheme.clear(); } - else - //relative uri with relative path reference - m_scheme = wxEmptyString; } -// else - //relative uri with _possible_ relative path reference + //else: can't have schema, possible a relative URI - return uricopy; + return uri; } -const wxChar* wxURI::ParseAuthority(const wxChar* uri) +const char* wxURI::ParseAuthority(const char* uri) { // authority = [ userinfo "@" ] host [ ":" port ] - if (*uri == wxT('/') && *(uri+1) == wxT('/')) + if ( uri[0] == '/' && uri[1] == '/' ) { //skip past the two slashes uri += 2; // ############# DEVIATION FROM RFC ######################### // Don't parse the server component for file URIs - if(m_scheme != wxT("file")) + if(m_scheme != "file") { //normal way - uri = ParseUserInfo(uri); - uri = ParseServer(uri); - return ParsePort(uri); + uri = ParseUserInfo(uri); + uri = ParseServer(uri); + return ParsePort(uri); } } return uri; } -const wxChar* wxURI::ParseUserInfo(const wxChar* uri) +const char* wxURI::ParseUserInfo(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; // userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) - while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?')) + while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' ) { - if(IsUnreserved(*uri) || - IsSubDelim(*uri) || *uri == wxT(':')) - m_userinfo += *uri++; - else if (IsEscape(uri)) - { - m_userinfo += *uri++; - m_userinfo += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' ) m_userinfo += *uri++; - } else - Escape(m_userinfo, *uri++); + AppendNextEscaped(m_userinfo, uri); } - if(*uri == wxT('@')) + if ( *uri++ == '@' ) { - //valid userinfo + // valid userinfo m_fields |= wxURI_USERINFO; - - uricopy = ++uri; } else - m_userinfo = wxEmptyString; + { + uri = start; // rewind + m_userinfo.clear(); + } - return uricopy; + return uri; } -const wxChar* wxURI::ParseServer(const wxChar* uri) +const char* wxURI::ParseServer(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; + const char * const start = uri; // host = IP-literal / IPv4address / reg-name // IP-literal = "[" ( IPv6address / IPvFuture ) "]" - if (*uri == wxT('[')) + if (*uri == '[') { - ++uri; //some compilers don't support *&ing a ++* - if (ParseIPv6address(uri) && *uri == wxT(']')) + ++uri; + if (ParseIPv6address(uri) && *uri == ']') { - ++uri; m_hostType = wxURI_IPV6ADDRESS; - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxTmemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + m_server.assign(start, uri - start - 1); + ++uri; } else { - uri = uricopy; + uri = start + 1; // skip the leading '[' again - ++uri; //some compilers don't support *&ing a ++* - if (ParseIPvFuture(uri) && *uri == wxT(']')) + if (ParseIPvFuture(uri) && *uri == ']') { - ++uri; m_hostType = wxURI_IPVFUTURE; - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxTmemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + m_server.assign(start, uri - start - 1); + ++uri; + } + else // unrecognized IP literal + { + uri = start; } - else - uri = uricopy; } } - else + else // IPv4 or a reg-name { if (ParseIPv4address(uri)) { m_hostType = wxURI_IPV4ADDRESS; - wxStringBufferLength theBuffer(m_server, uri - uricopy); - wxTmemcpy(theBuffer, uricopy, uri-uricopy); - theBuffer.SetLength(uri-uricopy); + m_server.assign(start, uri - start - 1); } else - uri = uricopy; + { + uri = start; + } } - if(m_hostType == wxURI_REGNAME) + if ( m_hostType == wxURI_REGNAME ) { - uri = uricopy; + uri = start; // reg-name = *( unreserved / pct-encoded / sub-delims ) - while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?')) + while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri)) - m_server += *uri++; - else if (IsEscape(uri)) - { - m_server += *uri++; + if ( IsUnreserved(*uri) || IsSubDelim(*uri) ) m_server += *uri++; - m_server += *uri++; - } else - Escape(m_server, *uri++); + AppendNextEscaped(m_server, uri); } } - //mark the server as valid m_fields |= wxURI_SERVER; return uri; } -const wxChar* wxURI::ParsePort(const wxChar* uri) +const char* wxURI::ParsePort(const char* uri) { - wxASSERT(uri != NULL); - // port = *DIGIT - if(*uri == wxT(':')) + if( *uri == ':' ) { ++uri; - while(IsDigit(*uri)) + while ( IsDigit(*uri) ) { m_port += *uri++; } - //mark the port as valid m_fields |= wxURI_PORT; } return uri; } -const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize) +const char* wxURI::ParsePath(const char* uri) { - wxASSERT(uri != NULL); - - //copy of the uri - used for figuring out - //length of each component - const wxChar* uricopy = uri; - /// hier-part = "//" authority path-abempty /// / path-absolute /// / path-rootless @@ -626,114 +536,77 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali /// ; non-zero-length segment without any colon ":" /// /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - if (*uri == wxT('/')) - { - m_path += *uri++; - while(*uri && *uri != wxT('#') && *uri != wxT('?')) - { - if( IsUnreserved(*uri) || IsSubDelim(*uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/')) - m_path += *uri++; - else if (IsEscape(uri)) - { - m_path += *uri++; - m_path += *uri++; - m_path += *uri++; - } - else - Escape(m_path, *uri++); - } + if ( IsEndPath(*uri) ) + return uri; - if (bNormalize) - { - wxStringBufferLength theBuffer(m_path, m_path.length() + 1); - Normalize(theBuffer, true); - theBuffer.SetLength(wxStrlen(theBuffer)); - } - //mark the path as valid - m_fields |= wxURI_PATH; - } - else if(*uri) //Relative path + const bool isAbs = *uri == '/'; + if ( isAbs ) + m_path += *uri++; + + wxArrayString segments; + wxString segment; + for ( ;; ) { - if (bReference) + const bool endPath = IsEndPath(*uri); + if ( endPath || *uri == '/' ) { - //no colon allowed - while(*uri && *uri != wxT('#') && *uri != wxT('?')) + // end of a segment, look at what we got + if ( segment == ".." ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri) || - *uri == wxT('@') || *uri == wxT('/')) - m_path += *uri++; - else if (IsEscape(uri)) - { - m_path += *uri++; - m_path += *uri++; - m_path += *uri++; - } - else - Escape(m_path, *uri++); + if ( !segments.empty() && *segments.rbegin() != ".." ) + segments.pop_back(); + else if ( !isAbs ) + segments.push_back(".."); } - } - else - { - while(*uri && *uri != wxT('#') && *uri != wxT('?')) + else if ( segment == "." ) { - if(IsUnreserved(*uri) || IsSubDelim(*uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/')) - m_path += *uri++; - else if (IsEscape(uri)) - { - m_path += *uri++; - m_path += *uri++; - m_path += *uri++; - } - else - Escape(m_path, *uri++); + // normally we ignore "." but the last one should be taken into + // account as "path/." is the same as "path/" and not just "path" + if ( endPath ) + segments.push_back(""); } - } - - if (uri != uricopy) - { - if (bNormalize) + else // normal segment { - wxStringBufferLength theBuffer(m_path, m_path.length() + 1); - Normalize(theBuffer); - theBuffer.SetLength(wxStrlen(theBuffer)); + segments.push_back(segment); } - //mark the path as valid - m_fields |= wxURI_PATH; + if ( endPath ) + break; + + segment.clear(); + ++uri; + continue; } + + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' ) + segment += *uri++; + else + AppendNextEscaped(segment, uri); } + m_path += wxJoin(segments, '/', '\0'); + m_fields |= wxURI_PATH; + return uri; } -const wxChar* wxURI::ParseQuery(const wxChar* uri) +const char* wxURI::ParseQuery(const char* uri) { - wxASSERT(uri != NULL); - // query = *( pchar / "/" / "?" ) - if (*uri == wxT('?')) + if ( *uri == '?' ) { ++uri; - while(*uri && *uri != wxT('#')) + while ( *uri && *uri != '#' ) { - if (IsUnreserved(*uri) || IsSubDelim(*uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?')) - m_query += *uri++; - else if (IsEscape(uri)) - { - m_query += *uri++; - m_query += *uri++; - m_query += *uri++; - } + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || + *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' ) + m_query += *uri++; else - Escape(m_query, *uri++); + AppendNextEscaped(m_query, uri); } - //mark the server as valid m_fields |= wxURI_QUERY; } @@ -741,30 +614,21 @@ const wxChar* wxURI::ParseQuery(const wxChar* uri) } -const wxChar* wxURI::ParseFragment(const wxChar* uri) +const char* wxURI::ParseFragment(const char* uri) { - wxASSERT(uri != NULL); - // fragment = *( pchar / "/" / "?" ) - if (*uri == wxT('#')) + if ( *uri == '#' ) { ++uri; - while(*uri) + while ( *uri ) { - if (IsUnreserved(*uri) || IsSubDelim(*uri) || - *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?')) - m_fragment += *uri++; - else if (IsEscape(uri)) - { - m_fragment += *uri++; - m_fragment += *uri++; - m_fragment += *uri++; - } + if ( IsUnreserved(*uri) || IsSubDelim(*uri) || + *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?') + m_fragment += *uri++; else - Escape(m_fragment, *uri++); + AppendNextEscaped(m_fragment, uri); } - //mark the server as valid m_fields |= wxURI_FRAGMENT; } @@ -782,15 +646,20 @@ const wxChar* wxURI::ParseFragment(const wxChar* uri) // Note that an empty URI inherits all components // --------------------------------------------------------------------------- +/* static */ +wxArrayString wxURI::SplitInSegments(const wxString& path) +{ + return wxSplit(path, '/', '\0' /* no escape character */); +} + void wxURI::Resolve(const wxURI& base, int flags) { wxASSERT_MSG(!base.IsReference(), - wxT("wxURI to inherit from must not be a reference!")); + "wxURI to inherit from must not be a reference!"); - // If we arn't being strict, enable the older (pre-RFC2396) - // loophole that allows this uri to inherit other - // properties from the base uri - even if the scheme - // is defined + // If we aren't being strict, enable the older (pre-RFC2396) loophole that + // allows this uri to inherit other properties from the base uri - even if + // the scheme is defined if ( !(flags & wxURI_STRICT) && HasScheme() && base.HasScheme() && m_scheme == base.m_scheme ) @@ -806,9 +675,7 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(R.path); // T.query = R.query; if (HasScheme()) - { return; - } //No scheme - inherit m_scheme = base.m_scheme; @@ -821,9 +688,7 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(R.path); // T.query = R.query; if (HasServer()) - { return; - } //No authority - inherit if (base.HasUserInfo()) @@ -862,7 +727,7 @@ void wxURI::Resolve(const wxURI& base, int flags) m_fields |= wxURI_QUERY; } } - else + else if ( m_path.empty() || m_path[0u] != '/' ) { // if (R.path starts-with "/") then // T.path = remove_dot_segments(R.path); @@ -871,169 +736,74 @@ void wxURI::Resolve(const wxURI& base, int flags) // T.path = remove_dot_segments(T.path); // endif; // T.query = R.query; - if (m_path[0u] != wxT('/')) - { - //Merge paths - wxString::const_iterator op = m_path.begin(); - wxString::const_iterator bp = base.m_path.begin() + base.m_path.length(); - - //not a ending directory? move up - if (base.m_path[0] && *(bp-1) != wxT('/')) - UpTree(base.m_path.begin(), bp); - - //normalize directories - while(*op == wxT('.') && *(op+1) == wxT('.') && - (*(op+2) == '\0' || *(op+2) == wxT('/')) ) - { - UpTree(base.m_path.begin(), bp); - - if (*(op+2) == '\0') - op += 2; - else - op += 3; - } - - m_path = base.m_path.substr(0, bp - base.m_path.begin()) + - m_path.substr((op - m_path.begin()), m_path.length()); - } - } - - //T.fragment = R.fragment; -} - -// --------------------------------------------------------------------------- -// UpTree -// -// Moves a URI path up a directory -// --------------------------------------------------------------------------- - -//static -void wxURI::UpTree(wxString::const_iterator uristart, - wxString::const_iterator& uri) -{ - if (uri != uristart && *(uri-1) == wxT('/')) - { - uri -= 2; - } - - for(;uri != uristart; --uri) - { - if (*uri == wxT('/')) - { - ++uri; - break; - } - } + // + // So we don't do anything for absolute paths and implement merge for + // the relative ones - //!!!TODO:HACK!!!// - if (uri == uristart && *uri == wxT('/')) - ++uri; - //!!!// -} + wxArrayString our(SplitInSegments(m_path)), + result(SplitInSegments(base.m_path)); -// FIXME-UTF8: fix Normalize() to use iterators instead of having this method! -/*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri) -{ - if (uri != uristart && *(uri-1) == wxT('/')) - { - uri -= 2; - } + if ( !result.empty() ) + result.pop_back(); - for(;uri != uristart; --uri) - { - if (*uri == wxT('/')) + if ( our.empty() ) { - ++uri; - break; + // if we have an empty path it means we were constructed from a "." + // string or something similar (e.g. "././././"), it should count + // as (empty) segment + our.push_back(""); } - } - - //!!!TODO:HACK!!!// - if (uri == uristart && *uri == wxT('/')) - ++uri; - //!!!// -} -// end of FIXME-UTF8 - -// --------------------------------------------------------------------------- -// Normalize -// -// Normalizes directories in-place -// -// I.E. ./ and . are ignored -// -// ../ and .. are removed if a directory is before it, along -// with that directory (leading .. and ../ are kept) -// --------------------------------------------------------------------------- - -//static -void wxURI::Normalize(wxChar* s, bool bIgnoreLeads) -{ - wxChar* cp = s; - wxChar* bp = s; - - if(s[0] == wxT('/')) - ++bp; - while(*cp) - { - if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0') - && (bp == cp || *(cp-1) == wxT('/'))) + const wxArrayString::const_iterator end = our.end(); + for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i ) { - //. _or_ ./ - ignore - if (*(cp+1) == '\0') - cp += 1; - else - cp += 2; - } - else if (*cp == wxT('.') && *(cp+1) == wxT('.') && - (*(cp+2) == wxT('/') || *(cp+2) == '\0') - && (bp == cp || *(cp-1) == wxT('/'))) - { - //.. _or_ ../ - go up the tree - if (s != bp) + if ( i->empty() || *i == "." ) { - UpTree((const wxChar*)bp, (const wxChar*&)s); - - if (*(cp+2) == '\0') - cp += 2; - else - cp += 3; + // as in ParsePath(), while normally we ignore the empty + // segments, we need to take account of them at the end + if ( i == end - 1 ) + result.push_back(""); + continue; } - else if (!bIgnoreLeads) + if ( *i == ".." ) { - *bp++ = *cp++; - *bp++ = *cp++; - if (*cp) - *bp++ = *cp++; + if ( !result.empty() ) + { + result.pop_back(); - s = bp; + if ( i == end - 1 ) + result.push_back(""); + } + //else: just ignore, extra ".." don't accumulate } else { - if (*(cp+2) == '\0') - cp += 2; - else - cp += 3; + if ( result.empty() ) + { + // ensure that the resulting path will always be absolute + result.push_back(""); + } + + result.push_back(*i); } } - else - *s++ = *cp++; + + m_path = wxJoin(result, '/', '\0'); } - *s = '\0'; + //T.fragment = R.fragment; } // --------------------------------------------------------------------------- // ParseH16 // // Parses 1 to 4 hex values. Returns true if the first character of the input -// string is a valid hex character. It is the caller's responsability to move +// string is a valid hex character. It is the caller's responsibility to move // the input string back to its original position on failure. // --------------------------------------------------------------------------- -bool wxURI::ParseH16(const wxChar*& uri) +bool wxURI::ParseH16(const char*& uri) { // h16 = 1*4HEXDIG if(!IsHex(*++uri)) @@ -1054,7 +824,7 @@ bool wxURI::ParseH16(const wxChar*& uri) // to its original position on failure. // --------------------------------------------------------------------------- -bool wxURI::ParseIPv4address(const wxChar*& uri) +bool wxURI::ParseIPv4address(const char*& uri) { //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet // @@ -1072,10 +842,10 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) //each ip part must be between 0-255 (dupe of version in for loop) if( IsDigit(*++uri) && IsDigit(*++uri) && //100 or less (note !) - !( (*(uri-2) < wxT('2')) || + !( (*(uri-2) < '2') || //240 or less - (*(uri-2) == wxT('2') && - (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5'))) + (*(uri-2) == '2' && + (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) ) ) ) @@ -1088,16 +858,16 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) //compilers should unroll this loop for(; iIPv4 < 4; ++iIPv4) { - if (*uri != wxT('.') || !IsDigit(*++uri)) + if (*uri != '.' || !IsDigit(*++uri)) break; //each ip part must be between 0-255 if( IsDigit(*++uri) && IsDigit(*++uri) && //100 or less (note !) - !( (*(uri-2) < wxT('2')) || + !( (*(uri-2) < '2') || //240 or less - (*(uri-2) == wxT('2') && - (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5'))) + (*(uri-2) == '2' && + (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5')) ) ) ) @@ -1110,7 +880,7 @@ bool wxURI::ParseIPv4address(const wxChar*& uri) return iIPv4 == 4; } -bool wxURI::ParseIPv6address(const wxChar*& uri) +bool wxURI::ParseIPv6address(const char*& uri) { // IPv6address = 6( h16 ":" ) ls32 // / "::" 5( h16 ":" ) ls32 @@ -1136,7 +906,7 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) break; } - if(*uri != wxT(':')) + if(*uri != ':') { break; } @@ -1149,9 +919,9 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) if (numPrefix) return false; - if (*uri == wxT(':')) + if (*uri == ':') { - if (*++uri != wxT(':')) + if (*++uri != ':') return false; maxPostfix = 5; @@ -1161,21 +931,21 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) } else { - if (*uri != wxT(':') || *(uri+1) != wxT(':')) + if (*uri != ':' || *(uri+1) != ':') { if (numPrefix != 6) return false; - while (*--uri != wxT(':')) {} + while (*--uri != ':') {} ++uri; - const wxChar* uristart = uri; + const char * const start = uri; //parse ls32 // ls32 = ( h16 ":" h16 ) / IPv4address - if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri)) + if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) return true; - uri = uristart; + uri = start; if (ParseIPv4address(uri)) return true; @@ -1197,24 +967,24 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) for(; maxPostfix != 0; --maxPostfix) { - if(!ParseH16(uri) || *uri != wxT(':')) + if(!ParseH16(uri) || *uri != ':') return false; } if(numPrefix <= 4) { - const wxChar* uristart = uri; + const char * const start = uri; //parse ls32 // ls32 = ( h16 ":" h16 ) / IPv4address - if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri)) + if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) return true; - uri = uristart; + uri = start; if (ParseIPv4address(uri)) return true; - uri = uristart; + uri = start; if (!bAllowAltEnding) return false; @@ -1226,102 +996,96 @@ bool wxURI::ParseIPv6address(const wxChar*& uri) return true; } -bool wxURI::ParseIPvFuture(const wxChar*& uri) +bool wxURI::ParseIPvFuture(const char*& uri) { // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) - if (*++uri != wxT('v') || !IsHex(*++uri)) + if (*++uri != 'v' || !IsHex(*++uri)) return false; - while (IsHex(*++uri)) {} + while (IsHex(*++uri)) + ; - if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':'))) + if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':')) return false; - while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {} + while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {} return true; } -// --------------------------------------------------------------------------- -// CharToHex -// -// Converts a character into a numeric hexidecimal value, or 0 if the -// passed in character is not a valid hex character -// --------------------------------------------------------------------------- - -//static -wxChar wxURI::CharToHex(const wxChar& c) -{ - if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A); - if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a); - if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00); - - return 0; -} - // --------------------------------------------------------------------------- // IsXXX // // Returns true if the passed in character meets the criteria of the method // --------------------------------------------------------------------------- -//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -bool wxURI::IsUnreserved (const wxChar& c) -{ return IsAlpha(c) || IsDigit(c) || - c == wxT('-') || - c == wxT('.') || - c == wxT('_') || - c == wxT('~') //tilde +// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" +bool wxURI::IsUnreserved(char c) +{ + return IsAlpha(c) || + IsDigit(c) || + c == '-' || + c == '.' || + c == '_' || + c == '~' ; } -bool wxURI::IsReserved (const wxChar& c) +bool wxURI::IsReserved(char c) { return IsGenDelim(c) || IsSubDelim(c); } -//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" -bool wxURI::IsGenDelim (const wxChar& c) +// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" +bool wxURI::IsGenDelim(char c) { - return c == wxT(':') || - c == wxT('/') || - c == wxT('?') || - c == wxT('#') || - c == wxT('[') || - c == wxT(']') || - c == wxT('@'); + return c == ':' || + c == '/' || + c == '?' || + c == '#' || + c == '[' || + c == ']' || + c == '@'; } -//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")" -//! / "*" / "+" / "," / ";" / "=" -bool wxURI::IsSubDelim (const wxChar& c) +// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" +// / "*" / "+" / "," / ";" / "=" +bool wxURI::IsSubDelim(char c) { - return c == wxT('!') || - c == wxT('$') || - c == wxT('&') || - c == wxT('\'') || - c == wxT('(') || - c == wxT(')') || - c == wxT('*') || - c == wxT('+') || - c == wxT(',') || - c == wxT(';') || - c == wxT('=') + return c == '!' || + c == '$' || + c == '&' || + c == '\'' || + c == '(' || + c == ')' || + c == '*' || + c == '+' || + c == ',' || + c == ';' || + c == '=' ; } -bool wxURI::IsHex(const wxChar& c) -{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); } - -bool wxURI::IsAlpha(const wxChar& c) -{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); } - -bool wxURI::IsDigit(const wxChar& c) -{ return c >= wxT('0') && c <= wxT('9'); } - +bool wxURI::IsHex(char c) +{ + return IsDigit(c) || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F'); +} -//end of uri.cpp +bool wxURI::IsAlpha(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} +bool wxURI::IsDigit(char c) +{ + return c >= '0' && c <= '9'; +} +bool wxURI::IsEndPath(char c) +{ + return c == '\0' || c == '#' || c == '?'; +} diff --git a/tests/uris/uris.cpp b/tests/uris/uris.cpp index d0d30e0b21..3948ef784e 100644 --- a/tests/uris/uris.cpp +++ b/tests/uris/uris.cpp @@ -158,10 +158,11 @@ void URITestCase::Paths() } #define URI_TEST_RESOLVE_IMPL(string, eq, strict) \ - uri = new wxURI(wxT(string));\ - uri->Resolve(masteruri, strict);\ - CPPUNIT_ASSERT(uri->BuildURI() == wxT(eq));\ - delete uri; + { \ + wxURI uri(string); \ + uri.Resolve(masteruri, strict); \ + CPPUNIT_ASSERT_EQUAL(eq, uri.BuildURI()); \ + } #define URI_TEST_RESOLVE(string, eq) \ URI_TEST_RESOLVE_IMPL(string, eq, true); @@ -174,8 +175,7 @@ void URITestCase::Paths() void URITestCase::NormalResolving() { - wxURI masteruri(wxT("http://a/b/c/d;p?q")); - wxURI* uri; + wxURI masteruri("http://a/b/c/d;p?q"); URI_TEST_RESOLVE("g:h" ,"g:h") URI_TEST_RESOLVE("g" ,"http://a/b/c/g") @@ -205,10 +205,12 @@ void URITestCase::NormalResolving() void URITestCase::ComplexResolving() { - wxURI masteruri(wxT("http://a/b/c/d;p?q")); - wxURI* uri; + wxURI masteruri("http://a/b/c/d;p?q"); //odd path examples + URI_TEST_RESOLVE("../../../g" , "http://a/g") + URI_TEST_RESOLVE("../../../../g", "http://a/g") + URI_TEST_RESOLVE("/./g" ,"http://a/g") URI_TEST_RESOLVE("/../g" ,"http://a/g") URI_TEST_RESOLVE("g." ,"http://a/b/c/g.") @@ -216,14 +218,10 @@ void URITestCase::ComplexResolving() URI_TEST_RESOLVE("g.." ,"http://a/b/c/g..") URI_TEST_RESOLVE("..g" ,"http://a/b/c/..g") } - //Should Fail - //"../../../g" = "http://a/g" - //"../../../../g" = "http://a/g" void URITestCase::ReallyComplexResolving() { - wxURI masteruri(wxT("http://a/b/c/d;p?q")); - wxURI* uri; + wxURI masteruri("http://a/b/c/d;p?q"); //even more odder path examples URI_TEST_RESOLVE("./../g" ,"http://a/b/g") @@ -236,8 +234,7 @@ void URITestCase::ReallyComplexResolving() void URITestCase::QueryFragmentResolving() { - wxURI masteruri(wxT("http://a/b/c/d;p?q")); - wxURI* uri; + wxURI masteruri("http://a/b/c/d;p?q"); //query/fragment ambigiousness URI_TEST_RESOLVE("g?y/./x","http://a/b/c/g?y/./x") @@ -248,8 +245,7 @@ void URITestCase::QueryFragmentResolving() void URITestCase::BackwardsResolving() { - wxURI masteruri(wxT("http://a/b/c/d;p?q")); - wxURI* uri; + wxURI masteruri("http://a/b/c/d;p?q"); //"NEW" URI_TEST_RESOLVE("http:g" , "http:g") //strict @@ -259,70 +255,76 @@ void URITestCase::BackwardsResolving() void URITestCase::Assignment() { - wxURI uri1(wxT("http://mysite.com")), - uri2(wxT("http://mysite2.com")); + wxURI uri1("http://mysite.com"), + uri2("http://mysite2.com"); uri2 = uri1; - CPPUNIT_ASSERT(uri1.BuildURI() == uri2.BuildURI()); + CPPUNIT_ASSERT_EQUAL(uri1.BuildURI(), uri2.BuildURI()); } void URITestCase::Comparison() { - CPPUNIT_ASSERT(wxURI(wxT("http://mysite.com")) == wxURI(wxT("http://mysite.com"))); + CPPUNIT_ASSERT(wxURI("http://mysite.com") == wxURI("http://mysite.com")); } void URITestCase::Unescaping() { - wxString orig = wxT("http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C") - wxT("escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss"); + wxString escaped, + unescaped; + + escaped = "http://test.com/of/file%3A%2F%2FC%3A%5Curi%5C" + "escaping%5Cthat%5Cseems%5Cbroken%5Csadly%5B1%5D.rss"; - wxString works= wxURI(orig).BuildUnescapedURI(); + unescaped = wxURI(escaped).BuildUnescapedURI(); - CPPUNIT_ASSERT(orig.IsSameAs(works) == false); + CPPUNIT_ASSERT_EQUAL( "http://test.com/of/file://C:\\uri\\" + "escaping\\that\\seems\\broken\\sadly[1].rss", + unescaped ); - wxString orig2 = wxT("http://test.com/of/file%3A%2F%") - wxT("2FC%3A%5Curi%5Cescaping%5Cthat%5Cseems%") - wxT("5Cbroken%5Csadly%5B1%5D.rss"); + CPPUNIT_ASSERT_EQUAL( unescaped, wxURI::Unescape(escaped) ); - wxString works2 = wxURI::Unescape(orig2); - wxString broken2 = wxURI(orig2).BuildUnescapedURI(); - CPPUNIT_ASSERT(works2.IsSameAs(broken2)); + escaped = "http://ru.wikipedia.org/wiki/" + "%D0%A6%D0%B5%D0%BB%D0%BE%D0%B5_%D1%87%D0%B8%D1%81%D0%BB%D0%BE"; + unescaped = wxURI::Unescape(escaped); + + CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8( + "http://ru.wikipedia.org/wiki/" + "\xD0\xA6\xD0\xB5\xD0\xBB\xD0\xBE\xD0\xB5_" + "\xD1\x87\xD0\xB8\xD1\x81\xD0\xBB\xD0\xBE" + ), + unescaped ); } void URITestCase::FileScheme() { //file:// variety (NOT CONFORMANT TO THE RFC) - CPPUNIT_ASSERT(wxURI(wxString(wxT("file://e:/wxcode/script1.xml"))).GetPath() - == wxT("e:/wxcode/script1.xml") ); + URI_TEST_EQUAL( "file://e:/wxcode/script1.xml", + "e:/wxcode/script1.xml", GetPath() ); //file:/// variety - CPPUNIT_ASSERT(wxURI(wxString(wxT("file:///e:/wxcode/script1.xml"))).GetPath() - == wxT("/e:/wxcode/script1.xml") ); + URI_TEST_EQUAL( "file:///e:/wxcode/script1.xml", + "/e:/wxcode/script1.xml", GetPath() ); //file:/ variety - CPPUNIT_ASSERT(wxURI(wxString(wxT("file:/e:/wxcode/script1.xml"))).GetPath() - == wxT("/e:/wxcode/script1.xml") ); + URI_TEST_EQUAL( "file:/e:/wxcode/script1.xml", + "/e:/wxcode/script1.xml", GetPath() ); //file: variety - CPPUNIT_ASSERT(wxURI(wxString(wxT("file:e:/wxcode/script1.xml"))).GetPath() - == wxT("e:/wxcode/script1.xml") ); + URI_TEST_EQUAL( "file:e:/wxcode/script1.xml", + "e:/wxcode/script1.xml", GetPath() ); } #if TEST_URL -const wxChar* pszProblemUrls[] = { wxT("http://www.csdn.net"), - wxT("http://www.163.com"), - wxT("http://www.sina.com.cn") }; - #include "wx/url.h" #include "wx/file.h" void URITestCase::URLCompat() { - wxURL url(wxT("http://user:password@wxwidgets.org")); + wxURL url("http://user:password@wxwidgets.org"); CPPUNIT_ASSERT(url.GetError() == wxURL_NOERR); @@ -332,9 +334,9 @@ void URITestCase::URLCompat() CPPUNIT_ASSERT( pInput != NULL ); #endif - CPPUNIT_ASSERT( url == wxURL(wxT("http://user:password@wxwidgets.org")) ); + CPPUNIT_ASSERT( url == wxURL("http://user:password@wxwidgets.org") ); - wxURI uri(wxT("http://user:password@wxwidgets.org")); + wxURI uri("http://user:password@wxwidgets.org"); CPPUNIT_ASSERT( url == uri ); @@ -348,18 +350,22 @@ void URITestCase::URLCompat() CPPUNIT_ASSERT( uricopy == url ); CPPUNIT_ASSERT( uricopy == urlcopy ); CPPUNIT_ASSERT( uricopy == uri ); - CPPUNIT_ASSERT( wxURI::Unescape(wxT("%20%41%20")) == wxT(" A ") ); + CPPUNIT_ASSERT_EQUAL( " A ", wxURI::Unescape("%20%41%20") ); - wxURI test(wxT("file:\"myf\"ile.txt")); + wxURI test("file:\"myf\"ile.txt"); - CPPUNIT_ASSERT( test.BuildURI() == wxT("file:%22myf%22ile.txt") ); - CPPUNIT_ASSERT( test.GetScheme() == wxT("file") ); - CPPUNIT_ASSERT( test.GetPath() == wxT("%22myf%22ile.txt") ); + CPPUNIT_ASSERT_EQUAL( "file:%22myf%22ile.txt" , test.BuildURI() ); + CPPUNIT_ASSERT_EQUAL( "file", test.GetScheme() ); + CPPUNIT_ASSERT_EQUAL( "%22myf%22ile.txt", test.GetPath() ); // these could be put under a named registry since they take some // time to complete #if 0 // Test problem urls (reported not to work some time ago by a user...) + const wxChar* pszProblemUrls[] = { "http://www.csdn.net", + "http://www.163.com", + "http://www.sina.com.cn" }; + for ( size_t i = 0; i < WXSIZEOF(pszProblemUrls); ++i ) { wxURL urlProblem(pszProblemUrls[i]); -- 2.47.2