no real change: put event handlers together at the end of the file

[wxWidgets.git] / src / common / uri.cpp
diff --git a/src/common/uri.cpp b/src/common/uri.cpp

index 0c9a16f65b0b8a1c8c8881ca3642d5d077804075..aac7aec3d19d2c7a6a79ccb9b2a229ff0f4e3539 100644 (file)
--- a/src/common/uri.cpp
+++ b/src/common/uri.cpp
@@ -1,18 +1,15 @@
  /////////////////////////////////////////////////////////////////////////////
  // Name:        uri.cpp
-// Purpose:     Implementation of a uri parser
-// Author:      Ryan Norton
+// Purpose:     Implementation of a URI parser
+// Author:      Ryan Norton,
+//              Vadim Zeitlin (UTF-8 URI support, many other changes)
  // Created:     10/26/04
  // RCS-ID:      $Id$
-// Copyright:   (c) 2004 Ryan Norton
-// Licence:     wxWindows
+// Copyright:   (c) 2004 Ryan Norton,
+//                  2008 Vadim Zeitlin
+// Licence:     wxWindows licence
  /////////////////////////////////////////////////////////////////////////////
  
-//
-//TODO:  RN:  I had some massive doxygen docs, I need to move these
-//in a presentable form in these sources
-//
-
  // ===========================================================================
  // declarations
  // ===========================================================================
@@ -21,10 +18,6 @@
  // headers
  // ---------------------------------------------------------------------------
  
-#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
-    #pragma implementation "uri.h"
-#endif
-
  // For compilers that support precompilation, includes "wx.h".
  #include "wx/wxprec.h"
  
@@ -32,59 +25,56 @@
      #pragma hdrstop
  #endif
  
+#ifndef WX_PRECOMP
+    #include "wx/crt.h"
+#endif
+
  #include "wx/uri.h"
  
  // ---------------------------------------------------------------------------
  // definitions
  // ---------------------------------------------------------------------------
  
-IMPLEMENT_CLASS(wxURI, wxObject);
+IMPLEMENT_CLASS(wxURI, wxObject)
  
  // ===========================================================================
-// implementation
+// wxURI implementation
  // ===========================================================================
  
  // ---------------------------------------------------------------------------
-// utilities
-// ---------------------------------------------------------------------------
-
-// ---------------------------------------------------------------------------
-//
-//                        wxURI
-//
-// ---------------------------------------------------------------------------
-
-// ---------------------------------------------------------------------------
-//  Constructors
+// Constructors and cleanup
  // ---------------------------------------------------------------------------
  
-wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
+wxURI::wxURI()
+     : m_hostType(wxURI_REGNAME),
+       m_fields(0)
  {
  }
- 
-wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
+
+wxURI::wxURI(const wxString& uri)
+     : m_hostType(wxURI_REGNAME),
+       m_fields(0)
  {
      Create(uri);
  }
  
-wxURI::wxURI(const wxURI& uri)  : m_hostType(wxURI_REGNAME), m_fields(0)
+bool wxURI::Create(const wxString& uri)
  {
-    Assign(uri);
-}
-
-// ---------------------------------------------------------------------------
-// Destructor and cleanup
-// ---------------------------------------------------------------------------
+    if (m_fields)
+        Clear();
  
-wxURI::~wxURI()
-{
-    Clear();
+    return Parse(uri.utf8_str());
  }
  
  void wxURI::Clear()
  {
-    m_scheme = m_user = m_server = m_port = m_path =
-    m_query = m_fragment = wxT("");
+    m_scheme =
+    m_userinfo =
+    m_server =
+    m_port =
+    m_path =
+    m_query =
+    m_fragment = wxEmptyString;
  
      m_hostType = wxURI_REGNAME;
  
@@ -92,127 +82,165 @@ void wxURI::Clear()
  }
  
  // ---------------------------------------------------------------------------
-// Create
-//
-// This creates the URI - all we do here is call the main parsing method 
+// Escaped characters handling
  // ---------------------------------------------------------------------------
  
-void wxURI::Create(const wxString& uri)
-{   
-    if (m_fields)
-        Clear();
+// Converts a character into a numeric hexadecimal value, or -1 if the passed
+// in character is not a valid hex character
  
-    Parse(uri);    
-} 
-
-// ---------------------------------------------------------------------------
-// Escape/Unescape/IsEscape
-//
-// Unescape unencodes a 3 character URL escape sequence 
-// Escape encodes an invalid URI character into a 3 character sequence
-// IsEscape determines if the input string contains an escape sequence,
-// if it does, then it moves the input string past the escape sequence
-// ---------------------------------------------------------------------------
+/* static */
+int wxURI::CharToHex(char c)
+{
+    if ((c >= 'A') && (c <= 'Z'))
+        return c - 'A' + 10;
+    if ((c >= 'a') && (c <= 'z'))
+        return c - 'a' + 10;
+    if ((c >= '0') && (c <= '9'))
+        return c - '0';
+
+    return -1;
+}
  
-wxChar wxURI::Unescape(const wxChar* s)
+int wxURI::DecodeEscape(wxString::const_iterator& i)
  {
-    wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
+    int hi = CharToHex(*++i);
+    if ( hi == -1 )
+        return -1;
+
+    int lo = CharToHex(*++i);
+    if ( lo == -1 )
+        return -1;
  
-    return CharToHex(*s) * 0x10 + CharToHex(*++s);
+    return (hi << 4) | lo;
  }
  
-void wxURI::Escape(wxString& s, const wxChar& c)
+/* static */
+wxString wxURI::Unescape(const wxString& uri)
  {
-    const wxChar* hdig = wxT("0123456789abcdef");
-    s += '%';
-    s += hdig[(c >> 4) & 15];
-       s += hdig[c & 15];
+    // the unescaped version can't be longer than the original one
+    wxCharBuffer buf(uri.length());
+    char *p = buf.data();
+
+    for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
+    {
+        char c = *i;
+        if ( c == '%' )
+        {
+            int n = wxURI::DecodeEscape(i);
+            if ( n == -1 )
+                return wxString();
+
+            wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
+
+            c = static_cast<char>(n);
+        }
+
+        *p = c;
+    }
+
+    *p = '\0';
+
+    // by default assume that the URI is in UTF-8, this is the most common
+    // practice
+    wxString s = wxString::FromUTF8(buf);
+    if ( s.empty() )
+    {
+        // if it isn't, use latin-1 as a fallback -- at least this always
+        // succeeds
+        s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
+    }
+
+    return s;
  }
  
-bool wxURI::IsEscape(const wxChar*& uri)
+void wxURI::AppendNextEscaped(wxString& s, const char *& p)
  {
-    if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2)))
+    // check for an already encoded character:
+    //
+    // pct-encoded   = "%" HEXDIG HEXDIG
+    if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
      {
-        uri += 3;
-        return true;
+        s += *p++;
+        s += *p++;
+        s += *p++;
+    }
+    else // really needs escaping
+    {
+        static const char* hexDigits = "0123456789abcdef";
+
+        const char c = *p++;
+
+        s += '%';
+        s += hexDigits[(c >> 4) & 15];
+        s += hexDigits[c & 15];
      }
-    else
-        return false;
  }
  
  // ---------------------------------------------------------------------------
-// Get
+// GetUser
+// GetPassword
  //
-// Get() actually builds the entire URI into a useable 
-// representation, including proper identification characters such as slashes
+// Gets the username and password via the old URL method.
  // ---------------------------------------------------------------------------
+wxString wxURI::GetUser() const
+{
+    // if there is no colon at all, find() returns npos and this method returns
+    // the entire string which is correct as it means that password was omitted
+    return m_userinfo(0, m_userinfo.find(':'));
+}
+
+wxString wxURI::GetPassword() const
+{
+      size_t posColon = m_userinfo.find(':');
+
+      if ( posColon == wxString::npos )
+          return "";
+
+      return m_userinfo(posColon + 1, wxString::npos);
+}
  
-wxString wxURI::Get() const
-{   
+// combine all URI fields in a single string, applying funcDecode to each
+// component which it may make sense to decode (i.e. "unescape")
+wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
+{
      wxString ret;
  
      if (HasScheme())
-        ret = ret + m_scheme + wxT(":");
+        ret += m_scheme + ":";
  
      if (HasServer())
      {
-        ret += wxT("//");
+        ret += "//";
  
-        if (HasUser())
-            ret = ret + m_user + wxT("@");
+        if (HasUserInfo())
+            ret += funcDecode(m_userinfo) + "@";
  
-        ret += m_server;
+        if (m_hostType == wxURI_REGNAME)
+            ret += funcDecode(m_server);
+        else
+            ret += m_server;
  
          if (HasPort())
-            ret = ret + wxT(":") + m_port;
+            ret += ":" + m_port;
      }
  
-    ret += m_path;
+    ret += funcDecode(m_path);
  
      if (HasQuery())
-        ret = ret + wxT("?") + m_query;
+        ret += "?" + funcDecode(m_query);
  
      if (HasFragment())
-        ret = ret + wxT("#") + m_fragment;
+        ret += "#" + funcDecode(m_fragment);
  
      return ret;
  }
  
  // ---------------------------------------------------------------------------
-// operator = and ==
+// Comparison
  // ---------------------------------------------------------------------------
  
-wxURI& wxURI::operator = (const wxURI& uri)
-{
-    return Assign(uri);
-}
-
-wxURI& wxURI::Assign(const wxURI& uri)
+bool wxURI::operator==(const wxURI& uri) const
  {
-    //assign fields
-    m_fields = uri.m_fields;
-
-    //ref over components
-    m_scheme = uri.m_scheme;
-    m_user = uri.m_user;
-    m_server = uri.m_server;
-    m_hostType = uri.m_hostType;
-    m_port = uri.m_port;
-    m_path = uri.m_path;
-    m_query = uri.m_query;
-    m_fragment = uri.m_fragment;
-
-    return *this;
-}
-
-wxURI& wxURI::operator = (const wxString& string)
-{   
-    Create(string);
-    return *this;
-}
-
-bool wxURI::operator == (const wxURI& uri) const
-{    
      if (HasScheme())
      {
          if(m_scheme != uri.m_scheme)
@@ -224,12 +252,12 @@ bool wxURI::operator == (const wxURI& uri) const
  
      if (HasServer())
      {
-        if (HasUser())
+        if (HasUserInfo())
          {
-            if (m_user != uri.m_user)
+            if (m_userinfo != uri.m_userinfo)
                  return false;
          }
-        else if (uri.HasUser())
+        else if (uri.HasUserInfo())
              return false;
  
          if (m_server != uri.m_server ||
@@ -282,7 +310,9 @@ bool wxURI::operator == (const wxURI& uri) const
  // ---------------------------------------------------------------------------
  
  bool wxURI::IsReference() const
-{   return !HasScheme() || !HasServer();  }
+{
+    return !HasScheme() || !HasServer();
+}
  
  // ---------------------------------------------------------------------------
  // Parse
@@ -290,212 +320,197 @@ bool wxURI::IsReference() const
  // Master URI parsing method.  Just calls the individual parsing methods
  //
  // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
-// URI-reference = URI / relative-URITestCase
+// URI-reference = URI / relative
  // ---------------------------------------------------------------------------
  
-const wxChar* wxURI::Parse(const wxChar* uri)
+bool wxURI::Parse(const char *uri)
  {
      uri = ParseScheme(uri);
-    uri = ParseAuthority(uri);
-    uri = ParsePath(uri);
-    uri = ParseQuery(uri);
-    return ParseFragment(uri);
+    if ( uri )
+        uri = ParseAuthority(uri);
+    if ( uri )
+        uri = ParsePath(uri);
+    if ( uri )
+        uri = ParseQuery(uri);
+    if ( uri )
+        uri = ParseFragment(uri);
+
+    // we only succeed if we parsed the entire string
+    return uri && *uri == '\0';
  }
  
-// ---------------------------------------------------------------------------
-// ParseXXX
-//
-// Individual parsers for each URI component
-// ---------------------------------------------------------------------------
-
-const wxChar* wxURI::ParseScheme(const wxChar* uri)
+const char* wxURI::ParseScheme(const char *uri)
  {
-    wxASSERT(uri != NULL);
+    const char * const start = uri;
  
-    //copy of the uri - used for figuring out
-    //length of each component
-    const wxChar* uricopy = uri;
-
-    //Does the uri have a scheme (first character alpha)?
-    if (IsAlpha(*uri))
+    // assume that we have a scheme if we have the valid start of it
+    if ( IsAlpha(*uri) )
      {
          m_scheme += *uri++;
  
          //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-        //RN: Scheme can not be escaped
-        while (IsAlpha(*uri) || IsDigit(*uri) || 
+        while (IsAlpha(*uri) || IsDigit(*uri) ||
                 *uri == '+'   ||
                 *uri == '-'   ||
-               *uri == '.') 
-        { 
-            m_scheme += *uri++; 
+               *uri == '.')
+        {
+            m_scheme += *uri++;
          }
  
          //valid scheme?
          if (*uri == ':')
-        { 
+        {
              //mark the scheme as valid
              m_fields |= wxURI_SCHEME;
  
              //move reference point up to input buffer
-            uricopy = ++uri;
+            ++uri;
+        }
+        else // no valid scheme finally
+        {
+            uri = start; // rewind
+            m_scheme.clear();
          }
-        else 
-            //relative uri with relative path reference
-            m_scheme = wxT("");
      }
-//    else 
-        //relative uri with _possible_ relative path reference
+    //else: can't have schema, possible a relative URI
  
-    return uricopy;
+    return uri;
  }
  
-const wxChar* wxURI::ParseAuthority(const wxChar* uri)
+const char* wxURI::ParseAuthority(const char* uri)
  {
      // authority     = [ userinfo "@" ] host [ ":" port ]
-    if (*uri == '/' && *(uri+1) == '/') 
+    if ( uri[0] == '/' && uri[1] == '/' )
      {
+        //skip past the two slashes
          uri += 2;
  
-        uri = ParseUser(uri);
-        uri = ParseServer(uri);
-        return ParsePort(uri);
+        // ############# DEVIATION FROM RFC #########################
+        // Don't parse the server component for file URIs
+        if(m_scheme != "file")
+        {
+            //normal way
+            uri = ParseUserInfo(uri);
+            uri = ParseServer(uri);
+            return ParsePort(uri);
+        }
      }
  
      return uri;
  }
  
-const wxChar* wxURI::ParseUser(const wxChar* uri)
+const char* wxURI::ParseUserInfo(const char* uri)
  {
-    wxASSERT(uri != NULL);
-
-    //copy of the uri - used for figuring out
-    //length of each component
-    const wxChar* uricopy = uri;
+    const char * const start = uri;
  
      // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
-    while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?') 
+    while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
      {
-        if(IsUnreserved(*uri) || IsEscape(uri) || 
-           IsSubDelim(*uri) || *uri == ':')
-            m_user += *uri++;
+        if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
+            m_userinfo += *uri++;
          else
-            Escape(m_user, *uri++);
+            AppendNextEscaped(m_userinfo, uri);
      }
  
-    if(*uri == '@')
+    if ( *uri++ == '@' )
      {
-        //valid userinfo
-        m_fields |= wxURI_USER;
-
-        uricopy = ++uri;
+        // valid userinfo
+        m_fields |= wxURI_USERINFO;
      }
      else
-        m_user = wxT("");
+    {
+        uri = start; // rewind
+        m_userinfo.clear();
+    }
  
-    return uricopy;
+    return uri;
  }
  
-const wxChar* wxURI::ParseServer(const wxChar* uri)
+const char* wxURI::ParseServer(const char* uri)
  {
-    wxASSERT(uri != NULL);
-
-    //copy of the uri - used for figuring out
-    //length of each component
-    const wxChar* uricopy = uri;
+    const char * const start = uri;
  
      // host          = IP-literal / IPv4address / reg-name
      // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
      if (*uri == '[')
      {
-        if (ParseIPv6address(++uri) && *uri == ']')
+        ++uri;
+        if (ParseIPv6address(uri) && *uri == ']')
          {
-            ++uri;
              m_hostType = wxURI_IPV6ADDRESS;
-           
-            wxStringBufferLength theBuffer(m_server, uri - uricopy);
-            wxMemcpy(theBuffer, uricopy, uri-uricopy);
-            theBuffer.SetLength(uri-uricopy);
+
+            m_server.assign(start + 1, uri - start - 1);
+            ++uri;
          }
          else
          {
-            uri = uricopy;
+            uri = start + 1; // skip the leading '[' again
  
-            if (ParseIPvFuture(++uri) && *uri == ']')
+            if (ParseIPvFuture(uri) && *uri == ']')
              {
+                m_hostType = wxURI_IPVFUTURE;
+
+                m_server.assign(start + 1, uri - start - 1);
                  ++uri;
-                m_hostType = wxURI_IPVFUTURE; 
-           
-                wxStringBufferLength theBuffer(m_server, uri - uricopy);
-                wxMemcpy(theBuffer, uricopy, uri-uricopy);
-                theBuffer.SetLength(uri-uricopy);
              }
-            else 
-                uri = uricopy;
+            else // unrecognized IP literal
+            {
+                uri = start;
+            }
          }
      }
-    else 
+    else // IPv4 or a reg-name
      {
          if (ParseIPv4address(uri))
          {
              m_hostType = wxURI_IPV4ADDRESS;
  
-            wxStringBufferLength theBuffer(m_server, uri - uricopy);
-            wxMemcpy(theBuffer, uricopy, uri-uricopy);
-            theBuffer.SetLength(uri-uricopy);
+            m_server.assign(start, uri - start);
+        }
+        else
+        {
+            uri = start;
          }
-        else 
-            uri = uricopy;
      }
  
-    if(m_hostType == wxURI_REGNAME)
+    if ( m_hostType == wxURI_REGNAME )
      {
-        uri = uricopy;
+        uri = start;
          // reg-name      = *( unreserved / pct-encoded / sub-delims )
-        while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?') 
+        while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
          {
-            if(IsUnreserved(*uri) || IsEscape(uri) ||  IsSubDelim(*uri))
+            if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
                  m_server += *uri++;
              else
-                Escape(m_server, *uri++);
-        }                
+                AppendNextEscaped(m_server, uri);
+        }
      }
  
-    //mark the server as valid
      m_fields |= wxURI_SERVER;
  
      return uri;
  }
  
- 
-const wxChar* wxURI::ParsePort(const wxChar* uri)
-{
-    wxASSERT(uri != NULL);
  
+const char* wxURI::ParsePort(const char* uri)
+{
      // port          = *DIGIT
-    if(*uri == ':')
+    if( *uri == ':' )
      {
          ++uri;
-        while(IsDigit(*uri)) 
+        while ( IsDigit(*uri) )
          {
              m_port += *uri++;
-        }                
+        }
  
-        //mark the port as valid
          m_fields |= wxURI_PORT;
      }
  
      return uri;
  }
  
-const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
+const char* wxURI::ParsePath(const char* uri)
  {
-    wxASSERT(uri != NULL);
-
-    //copy of the uri - used for figuring out
-    //length of each component
-    const wxChar* uricopy = uri;
-
      /// hier-part     = "//" authority path-abempty
      ///               / path-absolute
      ///               / path-rootless
@@ -518,90 +533,77 @@ const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormali
      ///               ; non-zero-length segment without any colon ":"
      ///
      /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
-    if (*uri == '/')
-    {
-        m_path += *uri++;
  
-        while(*uri && *uri != '#' && *uri != '?') 
-        { 
-            if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
-                *uri == ':' || *uri == '@' || *uri == '/')
-                m_path += *uri++; 
-            else    
-                Escape(m_path, *uri++);    
-        }
+    if ( IsEndPath(*uri) )
+        return uri;
  
-        if (bNormalize)
-        {
-            wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
-            Normalize(theBuffer, true);
-            theBuffer.SetLength(wxStrlen(theBuffer));
-        }
-        //mark the path as valid
-        m_fields |= wxURI_PATH;
-    }
-    else if(*uri) //Relative path
+    const bool isAbs = *uri == '/';
+    if ( isAbs )
+        m_path += *uri++;
+
+    wxArrayString segments;
+    wxString segment;
+    for ( ;; )
      {
-        if (bReference)
+        const bool endPath = IsEndPath(*uri);
+        if ( endPath || *uri == '/' )
          {
-            //no colon allowed
-            while(*uri && *uri != '#' && *uri != '?') 
+            // end of a segment, look at what we got
+            if ( segment == ".." )
              {
-                if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
-                  *uri == '@' || *uri == '/')
-                    m_path += *uri++; 
-                else    
-                    Escape(m_path, *uri++);    
+                if ( !segments.empty() && *segments.rbegin() != ".." )
+                    segments.pop_back();
+                else if ( !isAbs )
+                    segments.push_back("..");
              }
-        } 
-        else
-        {
-            while(*uri && *uri != '#' && *uri != '?') 
+            else if ( segment == "." )
              {
-                if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
-                   *uri == ':' || *uri == '@' || *uri == '/')
-                    m_path += *uri++; 
-                else    
-                    Escape(m_path, *uri++);    
+                // normally we ignore "." but the last one should be taken into
+                // account as "path/." is the same as "path/" and not just "path"
+                if ( endPath )
+                    segments.push_back("");
              }
-        }
-
-        if (uri != uricopy)
-        {         
-            if (bNormalize)
+            else // normal segment
              {
-                wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
-                Normalize(theBuffer);
-                theBuffer.SetLength(wxStrlen(theBuffer));
+                segments.push_back(segment);
              }
  
-            //mark the path as valid
-            m_fields |= wxURI_PATH;
+            if ( endPath )
+                break;
+
+            segment.clear();
+            ++uri;
+            continue;
          }
+
+        if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
+            segment += *uri++;
+        else
+            AppendNextEscaped(segment, uri);
      }
  
+    m_path += wxJoin(segments, '/', '\0');
+    m_fields |= wxURI_PATH;
+
      return uri;
  }
  
  
-const wxChar* wxURI::ParseQuery(const wxChar* uri)
+const char* wxURI::ParseQuery(const char* uri)
  {
-    wxASSERT(uri != NULL);
-
      // query         = *( pchar / "/" / "?" )
-    if (*uri == '?')
+    if ( *uri == '?' )
      {
          ++uri;
-        while(*uri && *uri != '#')
+        while ( *uri && *uri != '#' )
          {
-            if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
-                *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
-                  m_query += *uri++;  
+            if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
+                    *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
+                m_query += *uri++;
              else
-                  Escape(m_query, *uri++); 
+                AppendNextEscaped(m_query, uri);
          }
  
-        //mark the server as valid
          m_fields |= wxURI_QUERY;
      }
  
@@ -609,24 +611,21 @@ const wxChar* wxURI::ParseQuery(const wxChar* uri)
  }
  
  
-const wxChar* wxURI::ParseFragment(const wxChar* uri)
+const char* wxURI::ParseFragment(const char* uri)
  {
-    wxASSERT(uri != NULL);
-
      // fragment      = *( pchar / "/" / "?" )
-    if (*uri == '#')
+    if ( *uri == '#' )
      {
          ++uri;
-        while(*uri)
+        while ( *uri )
          {
-            if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
-                *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
-                  m_fragment += *uri++;  
+            if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
+                    *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
+                m_fragment += *uri++;
              else
-                  Escape(m_fragment, *uri++); 
+                AppendNextEscaped(m_fragment, uri);
          }
  
-        //mark the server as valid
          m_fields |= wxURI_FRAGMENT;
      }
  
@@ -634,27 +633,34 @@ const wxChar* wxURI::ParseFragment(const wxChar* uri)
  }
  
  // ---------------------------------------------------------------------------
-//  Resolve URI
+// Resolve
+//
+// Builds missing components of this uri from a base uri
  //
-//  Builds missing components of this uri from a base uri
+// A version of the algorithm outlined in the RFC is used here
+// (it is shown in comments)
  //
-//  A version of the algorithm outlined in the RFC is used here
-//  (it is shown in comments)
+// Note that an empty URI inherits all components
  // ---------------------------------------------------------------------------
  
+/* static */
+wxArrayString wxURI::SplitInSegments(const wxString& path)
+{
+    return wxSplit(path, '/', '\0' /* no escape character */);
+}
+
  void wxURI::Resolve(const wxURI& base, int flags)
  {
-    wxASSERT_MSG(!base.IsReference(), 
-                wxT("wxURI to inherit from must not be a reference!"));
+    wxASSERT_MSG(!base.IsReference(),
+                "wxURI to inherit from must not be a reference!");
  
-    // If we arn't being strict, enable the older
-    // loophole that allows this uri to inherit other
-    // properties from the base uri - even if the scheme
-    // is defined
+    // If we aren't being strict, enable the older (pre-RFC2396) loophole that
+    // allows this uri to inherit other properties from the base uri - even if
+    // the scheme is defined
      if ( !(flags & wxURI_STRICT) &&
              HasScheme() && base.HasScheme() &&
                  m_scheme == base.m_scheme )
-    {   
+    {
          m_fields -= wxURI_SCHEME;
      }
  
@@ -666,11 +672,9 @@ void wxURI::Resolve(const wxURI& base, int flags)
      //       T.path      = remove_dot_segments(R.path);
      //       T.query     = R.query;
      if (HasScheme())
-    {
          return;
-    }
  
-    //No sheme - inherit
+    //No scheme - inherit
      m_scheme = base.m_scheme;
      m_fields |= wxURI_SCHEME;
  
@@ -681,27 +685,25 @@ void wxURI::Resolve(const wxURI& base, int flags)
      //          T.path      = remove_dot_segments(R.path);
      //          T.query     = R.query;
      if (HasServer())
-    {
          return;
-    }
  
      //No authority - inherit
-    if (base.HasUser())
+    if (base.HasUserInfo())
      {
-        m_user = base.m_user;
-        m_fields |= wxURI_USER;
+        m_userinfo = base.m_userinfo;
+        m_fields |= wxURI_USERINFO;
      }
-    
+
      m_server = base.m_server;
      m_hostType = base.m_hostType;
      m_fields |= wxURI_SERVER;
-    
+
      if (base.HasPort())
      {
          m_port = base.m_port;
          m_fields |= wxURI_PORT;
      }
-    
+
  
      // Simple path inheritance from base
      if (!HasPath())
@@ -709,7 +711,7 @@ void wxURI::Resolve(const wxURI& base, int flags)
          //             T.path = Base.path;
          m_path = base.m_path;
          m_fields |= wxURI_PATH;
-        
+
  
          //             if defined(R.query) then
          //                T.query = R.query;
@@ -722,7 +724,7 @@ void wxURI::Resolve(const wxURI& base, int flags)
              m_fields |= wxURI_QUERY;
          }
      }
-    else
+    else if ( m_path.empty() || m_path[0u] != '/' )
      {
          //             if (R.path starts-with "/") then
          //                T.path = remove_dot_segments(R.path);
@@ -731,128 +733,95 @@ void wxURI::Resolve(const wxURI& base, int flags)
          //                T.path = remove_dot_segments(T.path);
          //             endif;
          //             T.query = R.query;
-        if (m_path[(const size_t&)0] != '/')
-        {
-            //Marge paths
-            const wxChar* op = m_path.c_str();
-            const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
+        //
+        // So we don't do anything for absolute paths and implement merge for
+        // the relative ones
  
-            //not a ending directory?  move up
-            if (base.m_path[0] && *(bp-1) != '/')
-                UpTree(base.m_path, bp);
+        wxArrayString our(SplitInSegments(m_path)),
+                      result(SplitInSegments(base.m_path));
  
-            //normalize directories
-            while(*op == '.' && *(op+1) == '.' && 
-                       (*(op+2) == '\0' || *(op+2) == '/') )
-            {
-                UpTree(base.m_path, bp);
+        if ( !result.empty() )
+            result.pop_back();
  
-                if (*(op+2) == '\0')
-                    op += 2;
-                else
-                    op += 3;
-            }
-
-            m_path = base.m_path.substr(0, bp - base.m_path.c_str()) + 
-                    m_path.Mid((op - m_path.c_str()), m_path.Length());
-        }
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Directory Normalization (static)
-//
-// UpTree goes up a directory in a string and moves the pointer as such,
-// while Normalize gets rid of duplicate/erronues directories in a URI
-// according to RFC 2396 and modified quite a bit to meet the unit tests
-// in it.
-// ---------------------------------------------------------------------------
-
-void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
-{
-    if (uri != uristart && *(uri-1) == '/')
-    {
-        uri -= 2;
-    }
-    
-    for(;uri != uristart; --uri)
-    {
-        if (*uri == '/')
+        if ( our.empty() )
          {
-            ++uri;
-            break;
+            // if we have an empty path it means we were constructed from a "."
+            // string or something similar (e.g. "././././"), it should count
+            // as (empty) segment
+            our.push_back("");
          }
-    }
-
-    //!!!TODO:HACK!!!//
-    if (uri == uristart && *uri == '/')
-        ++uri;
-    //!!!//
-}
-
-void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
-{
-    wxChar* cp = s;
-    wxChar* bp = s;
-
-    if(s[0] == '/')
-        ++bp;
  
-    while(*cp)
-    {
-        if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0')
-            && (bp == cp || *(cp-1) == '/'))
-        {
-            //. _or_ ./  - ignore
-            if (*(cp+1) == '\0')
-                cp += 1;
-            else
-                cp += 2;
-        }
-        else if (*cp == '.' && *(cp+1) == '.' && 
-                (*(cp+2) == '/' || *(cp+2) == '\0')
-                && (bp == cp || *(cp-1) == '/'))
+        const wxArrayString::const_iterator end = our.end();
+        for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
          {
-            //.. _or_ ../ - go up the tree
-            if (s != bp)
+            if ( i->empty() || *i == "." )
              {
-                UpTree((const wxChar*)bp, (const wxChar*&)s);
-
-                if (*(cp+2) == '\0')
-                    cp += 2;
-                else
-                    cp += 3;
+                // as in ParsePath(), while normally we ignore the empty
+                // segments, we need to take account of them at the end
+                if ( i == end - 1 )
+                    result.push_back("");
+                continue;
              }
-            else if (!bIgnoreLeads)
  
+            if ( *i == ".." )
              {
-                *bp++ = *cp++;
-                *bp++ = *cp++;
-                if (*cp)
-                    *bp++ = *cp++;
-
-                s = bp;
+                if ( !result.empty() )
+                {
+                    result.pop_back();
+
+                    if ( i == end - 1 )
+                        result.push_back("");
+                }
+                //else: just ignore, extra ".." don't accumulate
              }
              else
              {
-                if (*(cp+2) == '\0')
-                    cp += 2;
-                else
-                    cp += 3;
+                if ( result.empty() )
+                {
+                    // ensure that the resulting path will always be absolute
+                    result.push_back("");
+                }
+
+                result.push_back(*i);
              }
          }
-        else
-            *s++ = *cp++; 
+
+        m_path = wxJoin(result, '/', '\0');
      }
  
-    *s = '\0';
+    //T.fragment = R.fragment;
  }
  
  // ---------------------------------------------------------------------------
-// Misc. Parsing Methods
+// ParseH16
+//
+// Parses 1 to 4 hex values.  Returns true if the first character of the input
+// string is a valid hex character.  It is the caller's responsibility to move
+// the input string back to its original position on failure.
  // ---------------------------------------------------------------------------
  
-bool wxURI::ParseIPv4address(const wxChar*& uri)
+bool wxURI::ParseH16(const char*& uri)
+{
+    // h16           = 1*4HEXDIG
+    if(!IsHex(*++uri))
+        return false;
+
+    if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
+        ++uri;
+
+    return true;
+}
+
+// ---------------------------------------------------------------------------
+// ParseIPXXX
+//
+// Parses a certain version of an IP address and moves the input string past
+// it.  Returns true if the input  string contains the proper version of an ip
+// address.  It is the caller's responsability to move the input string back
+// to its original position on failure.
+// ---------------------------------------------------------------------------
+
+bool wxURI::ParseIPv4address(const char*& uri)
  {
      //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
      //
@@ -866,13 +835,13 @@ bool wxURI::ParseIPv4address(const wxChar*& uri)
      {
          ++iIPv4;
  
-        
+
          //each ip part must be between 0-255 (dupe of version in for loop)
          if( IsDigit(*++uri) && IsDigit(*++uri) &&
             //100 or less  (note !)
-           !( (*(uri-2) < '2') || 
-           //240 or less   
-             (*(uri-2) == '2' && 
+           !( (*(uri-2) < '2') ||
+           //240 or less
+             (*(uri-2) == '2' &&
                 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
               )
              )
@@ -892,9 +861,9 @@ bool wxURI::ParseIPv4address(const wxChar*& uri)
              //each ip part must be between 0-255
              if( IsDigit(*++uri) && IsDigit(*++uri) &&
                 //100 or less  (note !)
-               !( (*(uri-2) < '2') || 
-               //240 or less   
-                 (*(uri-2) == '2' && 
+               !( (*(uri-2) < '2') ||
+               //240 or less
+                 (*(uri-2) == '2' &&
                     (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
                   )
                  )
@@ -908,19 +877,7 @@ bool wxURI::ParseIPv4address(const wxChar*& uri)
      return iIPv4 == 4;
  }
  
-bool wxURI::ParseH16(const wxChar*& uri)
-{
-    // h16           = 1*4HEXDIG
-    if(!IsHex(*++uri))
-        return false;
-
-    if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
-        ++uri;
-
-    return true;
-}
-
-bool wxURI::ParseIPv6address(const wxChar*& uri)
+bool wxURI::ParseIPv6address(const char*& uri)
  {
      // IPv6address   =                            6( h16 ":" ) ls32
      //               /                       "::" 5( h16 ":" ) ls32
@@ -945,7 +902,7 @@ bool wxURI::ParseIPv6address(const wxChar*& uri)
              bEndHex = true;
              break;
          }
-        
+
          if(*uri != ':')
          {
              break;
@@ -979,13 +936,13 @@ bool wxURI::ParseIPv6address(const wxChar*& uri)
              while (*--uri != ':') {}
              ++uri;
  
-            const wxChar* uristart = uri;
+            const char * const start = uri;
              //parse ls32
              // ls32          = ( h16 ":" h16 ) / IPv4address
-            if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) 
+            if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
                  return true;
  
-            uri = uristart;
+            uri = start;
  
              if (ParseIPv4address(uri))
                  return true;
@@ -995,7 +952,7 @@ bool wxURI::ParseIPv6address(const wxChar*& uri)
          else
          {
              uri += 2;
-    
+
              if (numPrefix > 3)
                  maxPostfix = 0;
              else
@@ -1013,19 +970,19 @@ bool wxURI::ParseIPv6address(const wxChar*& uri)
  
      if(numPrefix <= 4)
      {
-        const wxChar* uristart = uri;
+        const char * const start = uri;
          //parse ls32
          // ls32          = ( h16 ":" h16 ) / IPv4address
-        if (ParseH16(uri) && *uri == ':' && ParseH16(uri)) 
+        if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
              return true;
  
-        uri = uristart;
+        uri = start;
  
          if (ParseIPv4address(uri))
              return true;
  
-        uri = uristart;
-        
+        uri = start;
+
          if (!bAllowAltEnding)
              return false;
      }
@@ -1036,13 +993,14 @@ bool wxURI::ParseIPv6address(const wxChar*& uri)
      return true;
  }
  
-bool wxURI::ParseIPvFuture(const wxChar*& uri)
+bool wxURI::ParseIPvFuture(const char*& uri)
  {
      // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
      if (*++uri != 'v' || !IsHex(*++uri))
          return false;
  
-    while (IsHex(*++uri)) {}
+    while (IsHex(*++uri))
+        ;
  
      if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
          return false;
@@ -1054,35 +1012,30 @@ bool wxURI::ParseIPvFuture(const wxChar*& uri)
  
  
  // ---------------------------------------------------------------------------
-// Misc methods - IsXXX and CharToHex
+// IsXXX
+//
+// Returns true if the passed in character meets the criteria of the method
  // ---------------------------------------------------------------------------
  
-int wxURI::CharToHex(const wxChar& c)
+// unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+bool wxURI::IsUnreserved(char c)
  {
-       if ((c >= 'A') && (c <= 'Z'))   return c - 'A' + 0x0A;
-       if ((c >= 'a') && (c <= 'z'))   return c - 'a' + 0x0a;
-       if ((c >= '0') && (c <= '9'))   return c - '0' + 0x00;
-
-       return 0;
-}
-
-//! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
-bool wxURI::IsUnreserved (const wxChar& c)
-{   return IsAlpha(c) || IsDigit(c) || 
+    return IsAlpha(c) ||
+           IsDigit(c) ||
             c == '-' ||
             c == '.' ||
             c == '_' ||
-           c == '~' //tilde
-           ;  
+           c == '~'
+           ;
  }
  
-bool wxURI::IsReserved (const wxChar& c)
-{   
+bool wxURI::IsReserved(char c)
+{
      return IsGenDelim(c) || IsSubDelim(c);
  }
  
-//! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
-bool wxURI::IsGenDelim (const wxChar& c)
+// gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+bool wxURI::IsGenDelim(char c)
  {
      return c == ':' ||
             c == '/' ||
@@ -1093,9 +1046,9 @@ bool wxURI::IsGenDelim (const wxChar& c)
             c == '@';
  }
  
-//! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
-//!               / "*" / "+" / "," / ";" / "="
-bool wxURI::IsSubDelim (const wxChar& c)
+// sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+//               / "*" / "+" / "," / ";" / "="
+bool wxURI::IsSubDelim(char c)
  {
      return c == '!' ||
             c == '$' ||
@@ -1107,115 +1060,29 @@ bool wxURI::IsSubDelim (const wxChar& c)
             c == '+' ||
             c == ',' ||
             c == ';' ||
-           c == '=' 
+           c == '='
             ;
  }
  
-bool wxURI::IsHex(const wxChar& c)
-{   return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
-
-bool wxURI::IsAlpha(const wxChar& c)
-{   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');  }
-
-bool wxURI::IsDigit(const wxChar& c)
-{   return c >= '0' && c <= '9';        }
-
-
-// ---------------------------------------------------------------------------
-//
-//                        wxURL Compatability
-//
-// TODO:  Use wxURI instead here...
-// ---------------------------------------------------------------------------
-
-#if wxUSE_URL
-
-#include "wx/url.h"
-
-wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
+bool wxURI::IsHex(char c)
  {
-  wxString out_str;
-  wxString hexa_code;
-  size_t i;
-
-  for (i = 0; i < uri.Len(); i++)
-  {
-    wxChar c = uri.GetChar(i);
-
-    if (c == wxT(' '))
-    {
-      // GRG, Apr/2000: changed to "%20" instead of '+'
-
-      out_str += wxT("%20");
-    }
-    else
-    {
-      // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
-      //
-      // - Alphanumeric characters are never escaped
-      // - Unreserved marks are never escaped
-      // - Delimiters must be escaped if they appear within a component
-      //     but not if they are used to separate components. Here we have
-      //     no clear way to distinguish between these two cases, so they
-      //     are escaped unless they are passed in the 'delims' parameter
-      //     (allowed delimiters).
-
-      static const wxChar marks[] = wxT("-_.!~*()'");
-
-      if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
-      {
-        hexa_code.Printf(wxT("%%%02X"), c);
-        out_str += hexa_code;
-      }
-      else
-      {
-        out_str += c;
-      }
-    }
-  }
-
-  return out_str;
+    return IsDigit(c) ||
+           (c >= 'a' && c <= 'f') ||
+           (c >= 'A' && c <= 'F');
  }
  
-wxString wxURL::ConvertFromURI(const wxString& uri)
+bool wxURI::IsAlpha(char c)
  {
-  wxString new_uri;
-
-  size_t i = 0;
-  while (i < uri.Len())
-  {
-    int code;
-    if (uri[i] == wxT('%'))
-    {
-      i++;
-      if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
-        code = (uri[i] - wxT('A') + 10) * 16;
-      else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
-        code = (uri[i] - wxT('a') + 10) * 16;
-      else
-        code = (uri[i] - wxT('0')) * 16;
-
-      i++;
-      if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
-        code += (uri[i] - wxT('A')) + 10;
-      else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
-        code += (uri[i] - wxT('a')) + 10;
-      else
-        code += (uri[i] - wxT('0'));
-
-      i++;
-      new_uri += (wxChar)code;
-      continue;
-    }
-    new_uri += uri[i];
-    i++;
-  }
-  return new_uri;
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
  }
  
-#endif //wxUSE_URL
-
-//end of uri.cpp
-
+bool wxURI::IsDigit(char c)
+{
+    return c >= '0' && c <= '9';
+}
  
+bool wxURI::IsEndPath(char c)
+{
+    return c == '\0' || c == '#' || c == '?';
+}