src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  20     #pragma implementation "uri.h"
  21 #endif
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27     #pragma hdrstop
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject);
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Assign(uri);
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_userinfo = m_server = m_port = m_path =
  82     m_query = m_fragment = wxEmptyString;
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 const wxChar* wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     return Parse(uri);
 101 }
 102
 103 // ---------------------------------------------------------------------------
 104 // Escape Methods
 105 //
 106 // TranslateEscape unencodes a 3 character URL escape sequence
 107 //
 108 // Escape encodes an invalid URI character into a 3 character sequence
 109 //
 110 // IsEscape determines if the input string contains an escape sequence,
 111 // if it does, then it moves the input string past the escape sequence
 112 //
 113 // Unescape unencodes all 3 character URL escape sequences in a wxString
 114 // ---------------------------------------------------------------------------
 115
 116 wxChar wxURI::TranslateEscape(const wxChar* s)
 117 {
 118     wxASSERT_MSG( IsHex(s[0]) && IsHex(s[1]), wxT("Invalid escape sequence!"));
 119
 120     return (wxChar)( CharToHex(s[0]) << 4 ) | CharToHex(s[1]);
 121 }
 122
 123 wxString wxURI::Unescape(const wxString& uri)
 124 {
 125     wxString new_uri;
 126
 127     for(size_t i = 0; i < uri.length(); ++i)
 128     {
 129         if (uri[i] == wxT('%'))
 130         {
 131             new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
 132             i += 2;
 133         }
 134         else
 135             new_uri += uri[i];
 136     }
 137
 138     return new_uri;
 139 }
 140
 141 void wxURI::Escape(wxString& s, const wxChar& c)
 142 {
 143     const wxChar* hdig = wxT("0123456789abcdef");
 144     s += wxT('%');
 145     s += hdig[(c >> 4) & 15];
 146     s += hdig[c & 15];
 147 }
 148
 149 bool wxURI::IsEscape(const wxChar*& uri)
 150 {
 151     // pct-encoded   = "%" HEXDIG HEXDIG
 152     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 153         return true;
 154     else
 155         return false;
 156 }
 157
 158 // ---------------------------------------------------------------------------
 159 // GetUser
 160 // GetPassword
 161 //
 162 // Gets the username and password via the old URL method.
 163 // ---------------------------------------------------------------------------
 164 wxString wxURI::GetUser() const
 165 {
 166       size_t dwPasswordPos = m_userinfo.find(':');
 167
 168       if (dwPasswordPos == wxString::npos)
 169           dwPasswordPos = 0;
 170
 171       return m_userinfo(0, dwPasswordPos);
 172 }
 173
 174 wxString wxURI::GetPassword() const
 175 {
 176       size_t dwPasswordPos = m_userinfo.find(':');
 177
 178       if (dwPasswordPos == wxString::npos)
 179           return wxT("");
 180       else
 181           return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
 182 }
 183
 184 // ---------------------------------------------------------------------------
 185 // BuildURI
 186 //
 187 // BuildURI() builds the entire URI into a useable
 188 // representation, including proper identification characters such as slashes
 189 //
 190 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 191 // the components that accept escape sequences
 192 // ---------------------------------------------------------------------------
 193
 194 wxString wxURI::BuildURI() const
 195 {
 196     wxString ret;
 197
 198     if (HasScheme())
 199         ret = ret + m_scheme + wxT(":");
 200
 201     if (HasServer())
 202     {
 203         ret += wxT("//");
 204
 205         if (HasUserInfo())
 206             ret = ret + m_userinfo + wxT("@");
 207
 208         ret += m_server;
 209
 210         if (HasPort())
 211             ret = ret + wxT(":") + m_port;
 212     }
 213
 214     ret += m_path;
 215
 216     if (HasQuery())
 217         ret = ret + wxT("?") + m_query;
 218
 219     if (HasFragment())
 220         ret = ret + wxT("#") + m_fragment;
 221
 222     return ret;
 223 }
 224
 225 wxString wxURI::BuildUnescapedURI() const
 226 {
 227     wxString ret;
 228
 229     if (HasScheme())
 230         ret = ret + m_scheme + wxT(":");
 231
 232     if (HasServer())
 233     {
 234         ret += wxT("//");
 235
 236         if (HasUserInfo())
 237             ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
 238
 239         if (m_hostType == wxURI_REGNAME)
 240             ret += wxURI::Unescape(m_server);
 241         else
 242             ret += m_server;
 243
 244         if (HasPort())
 245             ret = ret + wxT(":") + m_port;
 246     }
 247
 248     ret += wxURI::Unescape(m_path);
 249
 250     if (HasQuery())
 251         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 252
 253     if (HasFragment())
 254         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 255
 256     return ret;
 257 }
 258
 259 // ---------------------------------------------------------------------------
 260 // Assignment
 261 // ---------------------------------------------------------------------------
 262
 263 wxURI& wxURI::Assign(const wxURI& uri)
 264 {
 265     //assign fields
 266     m_fields = uri.m_fields;
 267
 268     //ref over components
 269     m_scheme = uri.m_scheme;
 270     m_userinfo = uri.m_userinfo;
 271     m_server = uri.m_server;
 272     m_hostType = uri.m_hostType;
 273     m_port = uri.m_port;
 274     m_path = uri.m_path;
 275     m_query = uri.m_query;
 276     m_fragment = uri.m_fragment;
 277
 278     return *this;
 279 }
 280
 281 wxURI& wxURI::operator = (const wxURI& uri)
 282 {
 283     return Assign(uri);
 284 }
 285
 286 wxURI& wxURI::operator = (const wxString& string)
 287 {
 288     Create(string);
 289     return *this;
 290 }
 291
 292 // ---------------------------------------------------------------------------
 293 // Comparison
 294 // ---------------------------------------------------------------------------
 295
 296 bool wxURI::operator == (const wxURI& uri) const
 297 {
 298     if (HasScheme())
 299     {
 300         if(m_scheme != uri.m_scheme)
 301             return false;
 302     }
 303     else if (uri.HasScheme())
 304         return false;
 305
 306
 307     if (HasServer())
 308     {
 309         if (HasUserInfo())
 310         {
 311             if (m_userinfo != uri.m_userinfo)
 312                 return false;
 313         }
 314         else if (uri.HasUserInfo())
 315             return false;
 316
 317         if (m_server != uri.m_server ||
 318             m_hostType != uri.m_hostType)
 319             return false;
 320
 321         if (HasPort())
 322         {
 323             if(m_port != uri.m_port)
 324                 return false;
 325         }
 326         else if (uri.HasPort())
 327             return false;
 328     }
 329     else if (uri.HasServer())
 330         return false;
 331
 332
 333     if (HasPath())
 334     {
 335         if(m_path != uri.m_path)
 336             return false;
 337     }
 338     else if (uri.HasPath())
 339         return false;
 340
 341     if (HasQuery())
 342     {
 343         if (m_query != uri.m_query)
 344             return false;
 345     }
 346     else if (uri.HasQuery())
 347         return false;
 348
 349     if (HasFragment())
 350     {
 351         if (m_fragment != uri.m_fragment)
 352             return false;
 353     }
 354     else if (uri.HasFragment())
 355         return false;
 356
 357     return true;
 358 }
 359
 360 // ---------------------------------------------------------------------------
 361 // IsReference
 362 //
 363 // if there is no authority or scheme, it is a reference
 364 // ---------------------------------------------------------------------------
 365
 366 bool wxURI::IsReference() const
 367 {   return !HasScheme() || !HasServer();  }
 368
 369 // ---------------------------------------------------------------------------
 370 // Parse
 371 //
 372 // Master URI parsing method.  Just calls the individual parsing methods
 373 //
 374 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 375 // URI-reference = URI / relative
 376 // ---------------------------------------------------------------------------
 377
 378 const wxChar* wxURI::Parse(const wxChar* uri)
 379 {
 380     uri = ParseScheme(uri);
 381     uri = ParseAuthority(uri);
 382     uri = ParsePath(uri);
 383     uri = ParseQuery(uri);
 384     return ParseFragment(uri);
 385 }
 386
 387 // ---------------------------------------------------------------------------
 388 // ParseXXX
 389 //
 390 // Individual parsers for each URI component
 391 // ---------------------------------------------------------------------------
 392
 393 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 394 {
 395     wxASSERT(uri != NULL);
 396
 397     //copy of the uri - used for figuring out
 398     //length of each component
 399     const wxChar* uricopy = uri;
 400
 401     //Does the uri have a scheme (first character alpha)?
 402     if (IsAlpha(*uri))
 403     {
 404         m_scheme += *uri++;
 405
 406         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 407         while (IsAlpha(*uri) || IsDigit(*uri) ||
 408                *uri == wxT('+')   ||
 409                *uri == wxT('-')   ||
 410                *uri == wxT('.'))
 411         {
 412             m_scheme += *uri++;
 413         }
 414
 415         //valid scheme?
 416         if (*uri == wxT(':'))
 417         {
 418             //mark the scheme as valid
 419             m_fields |= wxURI_SCHEME;
 420
 421             //move reference point up to input buffer
 422             uricopy = ++uri;
 423         }
 424         else
 425             //relative uri with relative path reference
 426             m_scheme = wxEmptyString;
 427     }
 428 //    else
 429         //relative uri with _possible_ relative path reference
 430
 431     return uricopy;
 432 }
 433
 434 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 435 {
 436     // authority     = [ userinfo "@" ] host [ ":" port ]
 437     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 438     {
 439         uri += 2;
 440
 441         uri = ParseUserInfo(uri);
 442         uri = ParseServer(uri);
 443         return ParsePort(uri);
 444     }
 445
 446     return uri;
 447 }
 448
 449 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
 450 {
 451     wxASSERT(uri != NULL);
 452
 453     //copy of the uri - used for figuring out
 454     //length of each component
 455     const wxChar* uricopy = uri;
 456
 457     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 458     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 459     {
 460         if(IsUnreserved(*uri) ||
 461            IsSubDelim(*uri) || *uri == wxT(':'))
 462             m_userinfo += *uri++;
 463         else if (IsEscape(uri))
 464         {
 465             m_userinfo += *uri++;
 466             m_userinfo += *uri++;
 467             m_userinfo += *uri++;
 468         }
 469         else
 470             Escape(m_userinfo, *uri++);
 471     }
 472
 473     if(*uri == wxT('@'))
 474     {
 475         //valid userinfo
 476         m_fields |= wxURI_USERINFO;
 477
 478         uricopy = ++uri;
 479     }
 480     else
 481         m_userinfo = wxEmptyString;
 482
 483     return uricopy;
 484 }
 485
 486 const wxChar* wxURI::ParseServer(const wxChar* uri)
 487 {
 488     wxASSERT(uri != NULL);
 489
 490     //copy of the uri - used for figuring out
 491     //length of each component
 492     const wxChar* uricopy = uri;
 493
 494     // host          = IP-literal / IPv4address / reg-name
 495     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 496     if (*uri == wxT('['))
 497     {
 498         ++uri; //some compilers don't support *&ing a ++*
 499         if (ParseIPv6address(uri) && *uri == wxT(']'))
 500         {
 501             ++uri;
 502             m_hostType = wxURI_IPV6ADDRESS;
 503
 504             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 505             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 506             theBuffer.SetLength(uri-uricopy);
 507         }
 508         else
 509         {
 510             uri = uricopy;
 511
 512             ++uri; //some compilers don't support *&ing a ++*
 513             if (ParseIPvFuture(uri) && *uri == wxT(']'))
 514             {
 515                 ++uri;
 516                 m_hostType = wxURI_IPVFUTURE;
 517
 518                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 519                 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 520                 theBuffer.SetLength(uri-uricopy);
 521             }
 522             else
 523                 uri = uricopy;
 524         }
 525     }
 526     else
 527     {
 528         if (ParseIPv4address(uri))
 529         {
 530             m_hostType = wxURI_IPV4ADDRESS;
 531
 532             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 533             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 534             theBuffer.SetLength(uri-uricopy);
 535         }
 536         else
 537             uri = uricopy;
 538     }
 539
 540     if(m_hostType == wxURI_REGNAME)
 541     {
 542         uri = uricopy;
 543         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 544         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 545         {
 546             if(IsUnreserved(*uri) ||  IsSubDelim(*uri))
 547                 m_server += *uri++;
 548             else if (IsEscape(uri))
 549             {
 550                 m_server += *uri++;
 551                 m_server += *uri++;
 552                 m_server += *uri++;
 553             }
 554             else
 555                 Escape(m_server, *uri++);
 556         }
 557     }
 558
 559     //mark the server as valid
 560     m_fields |= wxURI_SERVER;
 561
 562     return uri;
 563 }
 564
 565
 566 const wxChar* wxURI::ParsePort(const wxChar* uri)
 567 {
 568     wxASSERT(uri != NULL);
 569
 570     // port          = *DIGIT
 571     if(*uri == wxT(':'))
 572     {
 573         ++uri;
 574         while(IsDigit(*uri))
 575         {
 576             m_port += *uri++;
 577         }
 578
 579         //mark the port as valid
 580         m_fields |= wxURI_PORT;
 581     }
 582
 583     return uri;
 584 }
 585
 586 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 587 {
 588     wxASSERT(uri != NULL);
 589
 590     //copy of the uri - used for figuring out
 591     //length of each component
 592     const wxChar* uricopy = uri;
 593
 594     /// hier-part     = "//" authority path-abempty
 595     ///               / path-absolute
 596     ///               / path-rootless
 597     ///               / path-empty
 598     ///
 599     /// relative-part = "//" authority path-abempty
 600     ///               / path-absolute
 601     ///               / path-noscheme
 602     ///               / path-empty
 603     ///
 604     /// path-abempty  = *( "/" segment )
 605     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 606     /// path-noscheme = segment-nz-nc *( "/" segment )
 607     /// path-rootless = segment-nz *( "/" segment )
 608     /// path-empty    = 0<pchar>
 609     ///
 610     /// segment       = *pchar
 611     /// segment-nz    = 1*pchar
 612     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 613     ///               ; non-zero-length segment without any colon ":"
 614     ///
 615     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 616     if (*uri == wxT('/'))
 617     {
 618         m_path += *uri++;
 619
 620         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 621         {
 622             if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 623                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 624                 m_path += *uri++;
 625             else if (IsEscape(uri))
 626             {
 627                 m_path += *uri++;
 628                 m_path += *uri++;
 629                 m_path += *uri++;
 630             }
 631             else
 632                 Escape(m_path, *uri++);
 633         }
 634
 635         if (bNormalize)
 636         {
 637             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 638 #if wxUSE_STL
 639             wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 640 #endif
 641             Normalize(theBuffer, true);
 642             theBuffer.SetLength(wxStrlen(theBuffer));
 643         }
 644         //mark the path as valid
 645         m_fields |= wxURI_PATH;
 646     }
 647     else if(*uri) //Relative path
 648     {
 649         if (bReference)
 650         {
 651             //no colon allowed
 652             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 653             {
 654                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 655                   *uri == wxT('@') || *uri == wxT('/'))
 656                     m_path += *uri++;
 657                 else if (IsEscape(uri))
 658                 {
 659                     m_path += *uri++;
 660                     m_path += *uri++;
 661                     m_path += *uri++;
 662                 }
 663                 else
 664                     Escape(m_path, *uri++);
 665             }
 666         }
 667         else
 668         {
 669             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 670             {
 671                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 672                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 673                     m_path += *uri++;
 674                 else if (IsEscape(uri))
 675                 {
 676                     m_path += *uri++;
 677                     m_path += *uri++;
 678                     m_path += *uri++;
 679                 }
 680                 else
 681                     Escape(m_path, *uri++);
 682             }
 683         }
 684
 685         if (uri != uricopy)
 686         {
 687             if (bNormalize)
 688             {
 689                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 690 #if wxUSE_STL
 691                 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 692 #endif
 693                 Normalize(theBuffer);
 694                 theBuffer.SetLength(wxStrlen(theBuffer));
 695             }
 696
 697             //mark the path as valid
 698             m_fields |= wxURI_PATH;
 699         }
 700     }
 701
 702     return uri;
 703 }
 704
 705
 706 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 707 {
 708     wxASSERT(uri != NULL);
 709
 710     // query         = *( pchar / "/" / "?" )
 711     if (*uri == wxT('?'))
 712     {
 713         ++uri;
 714         while(*uri && *uri != wxT('#'))
 715         {
 716             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 717                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 718                   m_query += *uri++;
 719             else if (IsEscape(uri))
 720             {
 721                   m_query += *uri++;
 722                   m_query += *uri++;
 723                   m_query += *uri++;
 724             }
 725             else
 726                   Escape(m_query, *uri++);
 727         }
 728
 729         //mark the server as valid
 730         m_fields |= wxURI_QUERY;
 731     }
 732
 733     return uri;
 734 }
 735
 736
 737 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 738 {
 739     wxASSERT(uri != NULL);
 740
 741     // fragment      = *( pchar / "/" / "?" )
 742     if (*uri == wxT('#'))
 743     {
 744         ++uri;
 745         while(*uri)
 746         {
 747             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 748                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 749                   m_fragment += *uri++;
 750             else if (IsEscape(uri))
 751             {
 752                   m_fragment += *uri++;
 753                   m_fragment += *uri++;
 754                   m_fragment += *uri++;
 755             }
 756             else
 757                   Escape(m_fragment, *uri++);
 758         }
 759
 760         //mark the server as valid
 761         m_fields |= wxURI_FRAGMENT;
 762     }
 763
 764     return uri;
 765 }
 766
 767 // ---------------------------------------------------------------------------
 768 // Resolve
 769 //
 770 // Builds missing components of this uri from a base uri
 771 //
 772 // A version of the algorithm outlined in the RFC is used here
 773 // (it is shown in comments)
 774 //
 775 // Note that an empty URI inherits all components
 776 // ---------------------------------------------------------------------------
 777
 778 void wxURI::Resolve(const wxURI& base, int flags)
 779 {
 780     wxASSERT_MSG(!base.IsReference(),
 781                 wxT("wxURI to inherit from must not be a reference!"));
 782
 783     // If we arn't being strict, enable the older (pre-RFC2396)
 784     // loophole that allows this uri to inherit other
 785     // properties from the base uri - even if the scheme
 786     // is defined
 787     if ( !(flags & wxURI_STRICT) &&
 788             HasScheme() && base.HasScheme() &&
 789                 m_scheme == base.m_scheme )
 790     {
 791         m_fields -= wxURI_SCHEME;
 792     }
 793
 794
 795     // Do nothing if this is an absolute wxURI
 796     //    if defined(R.scheme) then
 797     //       T.scheme    = R.scheme;
 798     //       T.authority = R.authority;
 799     //       T.path      = remove_dot_segments(R.path);
 800     //       T.query     = R.query;
 801     if (HasScheme())
 802     {
 803         return;
 804     }
 805
 806     //No scheme - inherit
 807     m_scheme = base.m_scheme;
 808     m_fields |= wxURI_SCHEME;
 809
 810     // All we need to do for relative URIs with an
 811     // authority component is just inherit the scheme
 812     //       if defined(R.authority) then
 813     //          T.authority = R.authority;
 814     //          T.path      = remove_dot_segments(R.path);
 815     //          T.query     = R.query;
 816     if (HasServer())
 817     {
 818         return;
 819     }
 820
 821     //No authority - inherit
 822     if (base.HasUserInfo())
 823     {
 824         m_userinfo = base.m_userinfo;
 825         m_fields |= wxURI_USERINFO;
 826     }
 827
 828     m_server = base.m_server;
 829     m_hostType = base.m_hostType;
 830     m_fields |= wxURI_SERVER;
 831
 832     if (base.HasPort())
 833     {
 834         m_port = base.m_port;
 835         m_fields |= wxURI_PORT;
 836     }
 837
 838
 839     // Simple path inheritance from base
 840     if (!HasPath())
 841     {
 842         //             T.path = Base.path;
 843         m_path = base.m_path;
 844         m_fields |= wxURI_PATH;
 845
 846
 847         //             if defined(R.query) then
 848         //                T.query = R.query;
 849         //             else
 850         //                T.query = Base.query;
 851         //             endif;
 852         if (!HasQuery())
 853         {
 854             m_query = base.m_query;
 855             m_fields |= wxURI_QUERY;
 856         }
 857     }
 858     else
 859     {
 860         //             if (R.path starts-with "/") then
 861         //                T.path = remove_dot_segments(R.path);
 862         //             else
 863         //                T.path = merge(Base.path, R.path);
 864         //                T.path = remove_dot_segments(T.path);
 865         //             endif;
 866         //             T.query = R.query;
 867         if (m_path[0u] != wxT('/'))
 868         {
 869             //Merge paths
 870             const wxChar* op = m_path.c_str();
 871             const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
 872
 873             //not a ending directory?  move up
 874             if (base.m_path[0] && *(bp-1) != wxT('/'))
 875                 UpTree(base.m_path, bp);
 876
 877             //normalize directories
 878             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 879                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 880             {
 881                 UpTree(base.m_path, bp);
 882
 883                 if (*(op+2) == '\0')
 884                     op += 2;
 885                 else
 886                     op += 3;
 887             }
 888
 889             m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
 890                     m_path.substr((op - m_path.c_str()), m_path.Length());
 891         }
 892     }
 893
 894     //T.fragment = R.fragment;
 895 }
 896
 897 // ---------------------------------------------------------------------------
 898 // UpTree
 899 //
 900 // Moves a URI path up a directory
 901 // ---------------------------------------------------------------------------
 902
 903 //static
 904 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 905 {
 906     if (uri != uristart && *(uri-1) == wxT('/'))
 907     {
 908         uri -= 2;
 909     }
 910
 911     for(;uri != uristart; --uri)
 912     {
 913         if (*uri == wxT('/'))
 914         {
 915             ++uri;
 916             break;
 917         }
 918     }
 919
 920     //!!!TODO:HACK!!!//
 921     if (uri == uristart && *uri == wxT('/'))
 922         ++uri;
 923     //!!!//
 924 }
 925
 926 // ---------------------------------------------------------------------------
 927 // Normalize
 928 //
 929 // Normalizes directories in-place
 930 //
 931 // I.E. ./ and . are ignored
 932 //
 933 // ../ and .. are removed if a directory is before it, along
 934 // with that directory (leading .. and ../ are kept)
 935 // ---------------------------------------------------------------------------
 936
 937 //static
 938 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 939 {
 940     wxChar* cp = s;
 941     wxChar* bp = s;
 942
 943     if(s[0] == wxT('/'))
 944         ++bp;
 945
 946     while(*cp)
 947     {
 948         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 949             && (bp == cp || *(cp-1) == wxT('/')))
 950         {
 951             //. _or_ ./  - ignore
 952             if (*(cp+1) == '\0')
 953                 cp += 1;
 954             else
 955                 cp += 2;
 956         }
 957         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 958                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 959                 && (bp == cp || *(cp-1) == wxT('/')))
 960         {
 961             //.. _or_ ../ - go up the tree
 962             if (s != bp)
 963             {
 964                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 965
 966                 if (*(cp+2) == '\0')
 967                     cp += 2;
 968                 else
 969                     cp += 3;
 970             }
 971             else if (!bIgnoreLeads)
 972
 973             {
 974                 *bp++ = *cp++;
 975                 *bp++ = *cp++;
 976                 if (*cp)
 977                     *bp++ = *cp++;
 978
 979                 s = bp;
 980             }
 981             else
 982             {
 983                 if (*(cp+2) == '\0')
 984                     cp += 2;
 985                 else
 986                     cp += 3;
 987             }
 988         }
 989         else
 990             *s++ = *cp++;
 991     }
 992
 993     *s = '\0';
 994 }
 995
 996 // ---------------------------------------------------------------------------
 997 // ParseH16
 998 //
 999 // Parses 1 to 4 hex values.  Returns true if the first character of the input
1000 // string is a valid hex character.  It is the caller's responsability to move
1001 // the input string back to its original position on failure.
1002 // ---------------------------------------------------------------------------
1003
1004 bool wxURI::ParseH16(const wxChar*& uri)
1005 {
1006     // h16           = 1*4HEXDIG
1007     if(!IsHex(*++uri))
1008         return false;
1009
1010     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1011         ++uri;
1012
1013     return true;
1014 }
1015
1016 // ---------------------------------------------------------------------------
1017 // ParseIPXXX
1018 //
1019 // Parses a certain version of an IP address and moves the input string past
1020 // it.  Returns true if the input  string contains the proper version of an ip
1021 // address.  It is the caller's responsability to move the input string back
1022 // to its original position on failure.
1023 // ---------------------------------------------------------------------------
1024
1025 bool wxURI::ParseIPv4address(const wxChar*& uri)
1026 {
1027     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
1028     //
1029     //dec-octet     =      DIGIT                    ; 0-9
1030     //                / %x31-39 DIGIT               ; 10-99
1031     //                / "1" 2DIGIT                  ; 100-199
1032     //                / "2" %x30-34 DIGIT           ; 200-249
1033     //                / "25" %x30-35                ; 250-255
1034     size_t iIPv4 = 0;
1035     if (IsDigit(*uri))
1036     {
1037         ++iIPv4;
1038
1039
1040         //each ip part must be between 0-255 (dupe of version in for loop)
1041         if( IsDigit(*++uri) && IsDigit(*++uri) &&
1042            //100 or less  (note !)
1043            !( (*(uri-2) < wxT('2')) ||
1044            //240 or less
1045              (*(uri-2) == wxT('2') &&
1046                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1047              )
1048             )
1049           )
1050         {
1051             return false;
1052         }
1053
1054         if(IsDigit(*uri))++uri;
1055
1056         //compilers should unroll this loop
1057         for(; iIPv4 < 4; ++iIPv4)
1058         {
1059             if (*uri != wxT('.') || !IsDigit(*++uri))
1060                 break;
1061
1062             //each ip part must be between 0-255
1063             if( IsDigit(*++uri) && IsDigit(*++uri) &&
1064                //100 or less  (note !)
1065                !( (*(uri-2) < wxT('2')) ||
1066                //240 or less
1067                  (*(uri-2) == wxT('2') &&
1068                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1069                  )
1070                 )
1071               )
1072             {
1073                 return false;
1074             }
1075             if(IsDigit(*uri))++uri;
1076         }
1077     }
1078     return iIPv4 == 4;
1079 }
1080
1081 bool wxURI::ParseIPv6address(const wxChar*& uri)
1082 {
1083     // IPv6address   =                            6( h16 ":" ) ls32
1084     //               /                       "::" 5( h16 ":" ) ls32
1085     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1086     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1087     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1088     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1089     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1090     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1091     //               / [ *6( h16 ":" ) h16 ] "::"
1092
1093     size_t numPrefix = 0,
1094               maxPostfix;
1095
1096     bool bEndHex = false;
1097
1098     for( ; numPrefix < 6; ++numPrefix)
1099     {
1100         if(!ParseH16(uri))
1101         {
1102             --uri;
1103             bEndHex = true;
1104             break;
1105         }
1106
1107         if(*uri != wxT(':'))
1108         {
1109             break;
1110         }
1111     }
1112
1113     if(!bEndHex && !ParseH16(uri))
1114     {
1115         --uri;
1116
1117         if (numPrefix)
1118             return false;
1119
1120         if (*uri == wxT(':'))
1121         {
1122             if (*++uri != wxT(':'))
1123                 return false;
1124
1125             maxPostfix = 5;
1126         }
1127         else
1128             maxPostfix = 6;
1129     }
1130     else
1131     {
1132         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1133         {
1134             if (numPrefix != 6)
1135                 return false;
1136
1137             while (*--uri != wxT(':')) {}
1138             ++uri;
1139
1140             const wxChar* uristart = uri;
1141             //parse ls32
1142             // ls32          = ( h16 ":" h16 ) / IPv4address
1143             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1144                 return true;
1145
1146             uri = uristart;
1147
1148             if (ParseIPv4address(uri))
1149                 return true;
1150             else
1151                 return false;
1152         }
1153         else
1154         {
1155             uri += 2;
1156
1157             if (numPrefix > 3)
1158                 maxPostfix = 0;
1159             else
1160                 maxPostfix = 4 - numPrefix;
1161         }
1162     }
1163
1164     bool bAllowAltEnding = maxPostfix == 0;
1165
1166     for(; maxPostfix != 0; --maxPostfix)
1167     {
1168         if(!ParseH16(uri) || *uri != wxT(':'))
1169             return false;
1170     }
1171
1172     if(numPrefix <= 4)
1173     {
1174         const wxChar* uristart = uri;
1175         //parse ls32
1176         // ls32          = ( h16 ":" h16 ) / IPv4address
1177         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1178             return true;
1179
1180         uri = uristart;
1181
1182         if (ParseIPv4address(uri))
1183             return true;
1184
1185         uri = uristart;
1186
1187         if (!bAllowAltEnding)
1188             return false;
1189     }
1190
1191     if(numPrefix <= 5 && ParseH16(uri))
1192         return true;
1193
1194     return true;
1195 }
1196
1197 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1198 {
1199     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1200     if (*++uri != wxT('v') || !IsHex(*++uri))
1201         return false;
1202
1203     while (IsHex(*++uri)) {}
1204
1205     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1206         return false;
1207
1208     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1209
1210     return true;
1211 }
1212
1213
1214 // ---------------------------------------------------------------------------
1215 // CharToHex
1216 //
1217 // Converts a character into a numeric hexidecimal value, or 0 if the
1218 // passed in character is not a valid hex character
1219 // ---------------------------------------------------------------------------
1220
1221 //static
1222 wxChar wxURI::CharToHex(const wxChar& c)
1223 {
1224     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1225     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1226     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1227
1228     return 0;
1229 }
1230
1231 // ---------------------------------------------------------------------------
1232 // IsXXX
1233 //
1234 // Returns true if the passed in character meets the criteria of the method
1235 // ---------------------------------------------------------------------------
1236
1237 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1238 bool wxURI::IsUnreserved (const wxChar& c)
1239 {   return IsAlpha(c) || IsDigit(c) ||
1240            c == wxT('-') ||
1241            c == wxT('.') ||
1242            c == wxT('_') ||
1243            c == wxT('~') //tilde
1244            ;
1245 }
1246
1247 bool wxURI::IsReserved (const wxChar& c)
1248 {
1249     return IsGenDelim(c) || IsSubDelim(c);
1250 }
1251
1252 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1253 bool wxURI::IsGenDelim (const wxChar& c)
1254 {
1255     return c == wxT(':') ||
1256            c == wxT('/') ||
1257            c == wxT('?') ||
1258            c == wxT('#') ||
1259            c == wxT('[') ||
1260            c == wxT(']') ||
1261            c == wxT('@');
1262 }
1263
1264 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1265 //!               / "*" / "+" / "," / ";" / "="
1266 bool wxURI::IsSubDelim (const wxChar& c)
1267 {
1268     return c == wxT('!') ||
1269            c == wxT('$') ||
1270            c == wxT('&') ||
1271            c == wxT('\'') ||
1272            c == wxT('(') ||
1273            c == wxT(')') ||
1274            c == wxT('*') ||
1275            c == wxT('+') ||
1276            c == wxT(',') ||
1277            c == wxT(';') ||
1278            c == wxT('=')
1279            ;
1280 }
1281
1282 bool wxURI::IsHex(const wxChar& c)
1283 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1284
1285 bool wxURI::IsAlpha(const wxChar& c)
1286 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1287
1288 bool wxURI::IsDigit(const wxChar& c)
1289 {   return c >= wxT('0') && c <= wxT('9');        }
1290
1291
1292 // ---------------------------------------------------------------------------
1293 //
1294 //                        wxURL Compatibility
1295 //
1296 // ---------------------------------------------------------------------------
1297
1298 #if wxUSE_URL
1299
1300 #if WXWIN_COMPATIBILITY_2_4
1301
1302 #include "wx/url.h"
1303
1304 wxString wxURL::GetProtocolName() const
1305 {
1306     return m_scheme;
1307 }
1308
1309 wxString wxURL::GetHostName() const
1310 {
1311     return m_server;
1312 }
1313
1314 wxString wxURL::GetPath() const
1315 {
1316     return m_path;
1317 }
1318
1319 //Note that this old code really doesn't convert to a URI that well and looks
1320 //more like a dirty hack than anything else...
1321
1322 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1323 {
1324   wxString out_str;
1325   wxString hexa_code;
1326   size_t i;
1327
1328   for (i = 0; i < uri.Len(); i++)
1329   {
1330     wxChar c = uri.GetChar(i);
1331
1332     if (c == wxT(' '))
1333     {
1334       // GRG, Apr/2000: changed to "%20" instead of '+'
1335
1336       out_str += wxT("%20");
1337     }
1338     else
1339     {
1340       // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1341       //
1342       // - Alphanumeric characters are never escaped
1343       // - Unreserved marks are never escaped
1344       // - Delimiters must be escaped if they appear within a component
1345       //     but not if they are used to separate components. Here we have
1346       //     no clear way to distinguish between these two cases, so they
1347       //     are escaped unless they are passed in the 'delims' parameter
1348       //     (allowed delimiters).
1349
1350       static const wxChar marks[] = wxT("-_.!~*()'");
1351
1352       if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1353       {
1354         hexa_code.Printf(wxT("%%%02X"), c);
1355         out_str += hexa_code;
1356       }
1357       else
1358       {
1359         out_str += c;
1360       }
1361     }
1362   }
1363
1364   return out_str;
1365 }
1366
1367 wxString wxURL::ConvertFromURI(const wxString& uri)
1368 {
1369     return wxURI::Unescape(uri);
1370 }
1371
1372 #endif //WXWIN_COMPATIBILITY_2_4
1373
1374 #endif //wxUSE_URL
1375
1376 //end of uri.cpp
1377
1378
1379