src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 // For compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #ifndef WX_PRECOMP
  27     #include "wx/crt.h"
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject)
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Assign(uri);
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_userinfo = m_server = m_port = m_path =
  82     m_query = m_fragment = wxEmptyString;
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 const wxChar* wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     // FIXME-UTF8: rewrite ParseXXX() methods using iterators
 101     // NB: using wxWxCharBuffer instead of just c_str() avoids keeping
 102     //     converted string in memory for longer than needed
 103     return Parse(wxWxCharBuffer(uri.c_str()));
 104 }
 105
 106 // ---------------------------------------------------------------------------
 107 // Escape Methods
 108 //
 109 // TranslateEscape unencodes a 3 character URL escape sequence
 110 //
 111 // Escape encodes an invalid URI character into a 3 character sequence
 112 //
 113 // IsEscape determines if the input string contains an escape sequence,
 114 // if it does, then it moves the input string past the escape sequence
 115 //
 116 // Unescape unencodes all 3 character URL escape sequences in a wxString
 117 // ---------------------------------------------------------------------------
 118
 119 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
 120 {
 121     wxChar c1(*s);
 122     wxChar c2(*(s + 1));
 123
 124     wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
 125
 126     return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
 127 }
 128
 129 wxString wxURI::Unescape(const wxString& uri)
 130 {
 131     wxString new_uri;
 132
 133     for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
 134     {
 135         if ( *i == wxT('%') )
 136         {
 137             new_uri += wxURI::TranslateEscape(i + 1);
 138             i += 2;
 139         }
 140         else
 141             new_uri += *i;
 142     }
 143
 144     return new_uri;
 145 }
 146
 147 void wxURI::Escape(wxString& s, const wxChar& c)
 148 {
 149     const wxChar* hdig = wxT("0123456789abcdef");
 150     s += wxT('%');
 151     s += hdig[(c >> 4) & 15];
 152     s += hdig[c & 15];
 153 }
 154
 155 bool wxURI::IsEscape(const wxChar*& uri)
 156 {
 157     // pct-encoded   = "%" HEXDIG HEXDIG
 158     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 159         return true;
 160     else
 161         return false;
 162 }
 163
 164 // ---------------------------------------------------------------------------
 165 // GetUser
 166 // GetPassword
 167 //
 168 // Gets the username and password via the old URL method.
 169 // ---------------------------------------------------------------------------
 170 wxString wxURI::GetUser() const
 171 {
 172       size_t dwPasswordPos = m_userinfo.find(':');
 173
 174       if (dwPasswordPos == wxString::npos)
 175           dwPasswordPos = 0;
 176
 177       return m_userinfo(0, dwPasswordPos);
 178 }
 179
 180 wxString wxURI::GetPassword() const
 181 {
 182       size_t dwPasswordPos = m_userinfo.find(':');
 183
 184       if (dwPasswordPos == wxString::npos)
 185           return wxT("");
 186       else
 187           return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
 188 }
 189
 190 // ---------------------------------------------------------------------------
 191 // BuildURI
 192 //
 193 // BuildURI() builds the entire URI into a useable
 194 // representation, including proper identification characters such as slashes
 195 //
 196 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 197 // the components that accept escape sequences
 198 // ---------------------------------------------------------------------------
 199
 200 wxString wxURI::BuildURI() const
 201 {
 202     wxString ret;
 203
 204     if (HasScheme())
 205         ret = ret + m_scheme + wxT(":");
 206
 207     if (HasServer())
 208     {
 209         ret += wxT("//");
 210
 211         if (HasUserInfo())
 212             ret = ret + m_userinfo + wxT("@");
 213
 214         ret += m_server;
 215
 216         if (HasPort())
 217             ret = ret + wxT(":") + m_port;
 218     }
 219
 220     ret += m_path;
 221
 222     if (HasQuery())
 223         ret = ret + wxT("?") + m_query;
 224
 225     if (HasFragment())
 226         ret = ret + wxT("#") + m_fragment;
 227
 228     return ret;
 229 }
 230
 231 wxString wxURI::BuildUnescapedURI() const
 232 {
 233     wxString ret;
 234
 235     if (HasScheme())
 236         ret = ret + m_scheme + wxT(":");
 237
 238     if (HasServer())
 239     {
 240         ret += wxT("//");
 241
 242         if (HasUserInfo())
 243             ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
 244
 245         if (m_hostType == wxURI_REGNAME)
 246             ret += wxURI::Unescape(m_server);
 247         else
 248             ret += m_server;
 249
 250         if (HasPort())
 251             ret = ret + wxT(":") + m_port;
 252     }
 253
 254     ret += wxURI::Unescape(m_path);
 255
 256     if (HasQuery())
 257         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 258
 259     if (HasFragment())
 260         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 261
 262     return ret;
 263 }
 264
 265 // ---------------------------------------------------------------------------
 266 // Assignment
 267 // ---------------------------------------------------------------------------
 268
 269 wxURI& wxURI::Assign(const wxURI& uri)
 270 {
 271     //assign fields
 272     m_fields = uri.m_fields;
 273
 274     //ref over components
 275     m_scheme = uri.m_scheme;
 276     m_userinfo = uri.m_userinfo;
 277     m_server = uri.m_server;
 278     m_hostType = uri.m_hostType;
 279     m_port = uri.m_port;
 280     m_path = uri.m_path;
 281     m_query = uri.m_query;
 282     m_fragment = uri.m_fragment;
 283
 284     return *this;
 285 }
 286
 287 wxURI& wxURI::operator = (const wxURI& uri)
 288 {
 289     return Assign(uri);
 290 }
 291
 292 wxURI& wxURI::operator = (const wxString& string)
 293 {
 294     Create(string);
 295     return *this;
 296 }
 297
 298 // ---------------------------------------------------------------------------
 299 // Comparison
 300 // ---------------------------------------------------------------------------
 301
 302 bool wxURI::operator == (const wxURI& uri) const
 303 {
 304     if (HasScheme())
 305     {
 306         if(m_scheme != uri.m_scheme)
 307             return false;
 308     }
 309     else if (uri.HasScheme())
 310         return false;
 311
 312
 313     if (HasServer())
 314     {
 315         if (HasUserInfo())
 316         {
 317             if (m_userinfo != uri.m_userinfo)
 318                 return false;
 319         }
 320         else if (uri.HasUserInfo())
 321             return false;
 322
 323         if (m_server != uri.m_server ||
 324             m_hostType != uri.m_hostType)
 325             return false;
 326
 327         if (HasPort())
 328         {
 329             if(m_port != uri.m_port)
 330                 return false;
 331         }
 332         else if (uri.HasPort())
 333             return false;
 334     }
 335     else if (uri.HasServer())
 336         return false;
 337
 338
 339     if (HasPath())
 340     {
 341         if(m_path != uri.m_path)
 342             return false;
 343     }
 344     else if (uri.HasPath())
 345         return false;
 346
 347     if (HasQuery())
 348     {
 349         if (m_query != uri.m_query)
 350             return false;
 351     }
 352     else if (uri.HasQuery())
 353         return false;
 354
 355     if (HasFragment())
 356     {
 357         if (m_fragment != uri.m_fragment)
 358             return false;
 359     }
 360     else if (uri.HasFragment())
 361         return false;
 362
 363     return true;
 364 }
 365
 366 // ---------------------------------------------------------------------------
 367 // IsReference
 368 //
 369 // if there is no authority or scheme, it is a reference
 370 // ---------------------------------------------------------------------------
 371
 372 bool wxURI::IsReference() const
 373 {   return !HasScheme() || !HasServer();  }
 374
 375 // ---------------------------------------------------------------------------
 376 // Parse
 377 //
 378 // Master URI parsing method.  Just calls the individual parsing methods
 379 //
 380 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 381 // URI-reference = URI / relative
 382 // ---------------------------------------------------------------------------
 383
 384 const wxChar* wxURI::Parse(const wxChar *uri)
 385 {
 386     uri = ParseScheme(uri);
 387     uri = ParseAuthority(uri);
 388     uri = ParsePath(uri);
 389     uri = ParseQuery(uri);
 390     return ParseFragment(uri);
 391 }
 392
 393 // ---------------------------------------------------------------------------
 394 // ParseXXX
 395 //
 396 // Individual parsers for each URI component
 397 // ---------------------------------------------------------------------------
 398
 399 const wxChar* wxURI::ParseScheme(const wxChar *uri)
 400 {
 401     wxASSERT(uri != NULL);
 402
 403     //copy of the uri - used for figuring out
 404     //length of each component
 405     const wxChar* uricopy = uri;
 406
 407     //Does the uri have a scheme (first character alpha)?
 408     if (IsAlpha(*uri))
 409     {
 410         m_scheme += *uri++;
 411
 412         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 413         while (IsAlpha(*uri) || IsDigit(*uri) ||
 414                *uri == wxT('+')   ||
 415                *uri == wxT('-')   ||
 416                *uri == wxT('.'))
 417         {
 418             m_scheme += *uri++;
 419         }
 420
 421         //valid scheme?
 422         if (*uri == wxT(':'))
 423         {
 424             //mark the scheme as valid
 425             m_fields |= wxURI_SCHEME;
 426
 427             //move reference point up to input buffer
 428             uricopy = ++uri;
 429         }
 430         else
 431             //relative uri with relative path reference
 432             m_scheme = wxEmptyString;
 433     }
 434 //    else
 435         //relative uri with _possible_ relative path reference
 436
 437     return uricopy;
 438 }
 439
 440 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 441 {
 442     // authority     = [ userinfo "@" ] host [ ":" port ]
 443     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 444     {
 445         //skip past the two slashes
 446         uri += 2;
 447
 448         // ############# DEVIATION FROM RFC #########################
 449         // Don't parse the server component for file URIs
 450         if(m_scheme != wxT("file"))
 451         {
 452             //normal way
 453         uri = ParseUserInfo(uri);
 454         uri = ParseServer(uri);
 455         return ParsePort(uri);
 456         }
 457     }
 458
 459     return uri;
 460 }
 461
 462 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
 463 {
 464     wxASSERT(uri != NULL);
 465
 466     //copy of the uri - used for figuring out
 467     //length of each component
 468     const wxChar* uricopy = uri;
 469
 470     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 471     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 472     {
 473         if(IsUnreserved(*uri) ||
 474            IsSubDelim(*uri) || *uri == wxT(':'))
 475             m_userinfo += *uri++;
 476         else if (IsEscape(uri))
 477         {
 478             m_userinfo += *uri++;
 479             m_userinfo += *uri++;
 480             m_userinfo += *uri++;
 481         }
 482         else
 483             Escape(m_userinfo, *uri++);
 484     }
 485
 486     if(*uri == wxT('@'))
 487     {
 488         //valid userinfo
 489         m_fields |= wxURI_USERINFO;
 490
 491         uricopy = ++uri;
 492     }
 493     else
 494         m_userinfo = wxEmptyString;
 495
 496     return uricopy;
 497 }
 498
 499 const wxChar* wxURI::ParseServer(const wxChar* uri)
 500 {
 501     wxASSERT(uri != NULL);
 502
 503     //copy of the uri - used for figuring out
 504     //length of each component
 505     const wxChar* uricopy = uri;
 506
 507     // host          = IP-literal / IPv4address / reg-name
 508     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 509     if (*uri == wxT('['))
 510     {
 511         ++uri; //some compilers don't support *&ing a ++*
 512         if (ParseIPv6address(uri) && *uri == wxT(']'))
 513         {
 514             ++uri;
 515             m_hostType = wxURI_IPV6ADDRESS;
 516
 517             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 518             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 519             theBuffer.SetLength(uri-uricopy);
 520         }
 521         else
 522         {
 523             uri = uricopy;
 524
 525             ++uri; //some compilers don't support *&ing a ++*
 526             if (ParseIPvFuture(uri) && *uri == wxT(']'))
 527             {
 528                 ++uri;
 529                 m_hostType = wxURI_IPVFUTURE;
 530
 531                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 532                 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 533                 theBuffer.SetLength(uri-uricopy);
 534             }
 535             else
 536                 uri = uricopy;
 537         }
 538     }
 539     else
 540     {
 541         if (ParseIPv4address(uri))
 542         {
 543             m_hostType = wxURI_IPV4ADDRESS;
 544
 545             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 546             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 547             theBuffer.SetLength(uri-uricopy);
 548         }
 549         else
 550             uri = uricopy;
 551     }
 552
 553     if(m_hostType == wxURI_REGNAME)
 554     {
 555         uri = uricopy;
 556         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 557         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 558         {
 559             if(IsUnreserved(*uri) ||  IsSubDelim(*uri))
 560                 m_server += *uri++;
 561             else if (IsEscape(uri))
 562             {
 563                 m_server += *uri++;
 564                 m_server += *uri++;
 565                 m_server += *uri++;
 566             }
 567             else
 568                 Escape(m_server, *uri++);
 569         }
 570     }
 571
 572     //mark the server as valid
 573     m_fields |= wxURI_SERVER;
 574
 575     return uri;
 576 }
 577
 578
 579 const wxChar* wxURI::ParsePort(const wxChar* uri)
 580 {
 581     wxASSERT(uri != NULL);
 582
 583     // port          = *DIGIT
 584     if(*uri == wxT(':'))
 585     {
 586         ++uri;
 587         while(IsDigit(*uri))
 588         {
 589             m_port += *uri++;
 590         }
 591
 592         //mark the port as valid
 593         m_fields |= wxURI_PORT;
 594     }
 595
 596     return uri;
 597 }
 598
 599 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 600 {
 601     wxASSERT(uri != NULL);
 602
 603     //copy of the uri - used for figuring out
 604     //length of each component
 605     const wxChar* uricopy = uri;
 606
 607     /// hier-part     = "//" authority path-abempty
 608     ///               / path-absolute
 609     ///               / path-rootless
 610     ///               / path-empty
 611     ///
 612     /// relative-part = "//" authority path-abempty
 613     ///               / path-absolute
 614     ///               / path-noscheme
 615     ///               / path-empty
 616     ///
 617     /// path-abempty  = *( "/" segment )
 618     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 619     /// path-noscheme = segment-nz-nc *( "/" segment )
 620     /// path-rootless = segment-nz *( "/" segment )
 621     /// path-empty    = 0<pchar>
 622     ///
 623     /// segment       = *pchar
 624     /// segment-nz    = 1*pchar
 625     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 626     ///               ; non-zero-length segment without any colon ":"
 627     ///
 628     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 629     if (*uri == wxT('/'))
 630     {
 631         m_path += *uri++;
 632
 633         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 634         {
 635             if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 636                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 637                 m_path += *uri++;
 638             else if (IsEscape(uri))
 639             {
 640                 m_path += *uri++;
 641                 m_path += *uri++;
 642                 m_path += *uri++;
 643             }
 644             else
 645                 Escape(m_path, *uri++);
 646         }
 647
 648         if (bNormalize)
 649         {
 650             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 651             Normalize(theBuffer, true);
 652             theBuffer.SetLength(wxStrlen(theBuffer));
 653         }
 654         //mark the path as valid
 655         m_fields |= wxURI_PATH;
 656     }
 657     else if(*uri) //Relative path
 658     {
 659         if (bReference)
 660         {
 661             //no colon allowed
 662             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 663             {
 664                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 665                   *uri == wxT('@') || *uri == wxT('/'))
 666                     m_path += *uri++;
 667                 else if (IsEscape(uri))
 668                 {
 669                     m_path += *uri++;
 670                     m_path += *uri++;
 671                     m_path += *uri++;
 672                 }
 673                 else
 674                     Escape(m_path, *uri++);
 675             }
 676         }
 677         else
 678         {
 679             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 680             {
 681                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 682                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 683                     m_path += *uri++;
 684                 else if (IsEscape(uri))
 685                 {
 686                     m_path += *uri++;
 687                     m_path += *uri++;
 688                     m_path += *uri++;
 689                 }
 690                 else
 691                     Escape(m_path, *uri++);
 692             }
 693         }
 694
 695         if (uri != uricopy)
 696         {
 697             if (bNormalize)
 698             {
 699                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 700 #if wxUSE_STL || wxUSE_UNICODE_UTF8
 701                 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
 702                 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 703 #endif
 704                 Normalize(theBuffer);
 705                 theBuffer.SetLength(wxStrlen(theBuffer));
 706             }
 707
 708             //mark the path as valid
 709             m_fields |= wxURI_PATH;
 710         }
 711     }
 712
 713     return uri;
 714 }
 715
 716
 717 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 718 {
 719     wxASSERT(uri != NULL);
 720
 721     // query         = *( pchar / "/" / "?" )
 722     if (*uri == wxT('?'))
 723     {
 724         ++uri;
 725         while(*uri && *uri != wxT('#'))
 726         {
 727             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 728                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 729                   m_query += *uri++;
 730             else if (IsEscape(uri))
 731             {
 732                   m_query += *uri++;
 733                   m_query += *uri++;
 734                   m_query += *uri++;
 735             }
 736             else
 737                   Escape(m_query, *uri++);
 738         }
 739
 740         //mark the server as valid
 741         m_fields |= wxURI_QUERY;
 742     }
 743
 744     return uri;
 745 }
 746
 747
 748 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 749 {
 750     wxASSERT(uri != NULL);
 751
 752     // fragment      = *( pchar / "/" / "?" )
 753     if (*uri == wxT('#'))
 754     {
 755         ++uri;
 756         while(*uri)
 757         {
 758             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 759                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 760                   m_fragment += *uri++;
 761             else if (IsEscape(uri))
 762             {
 763                   m_fragment += *uri++;
 764                   m_fragment += *uri++;
 765                   m_fragment += *uri++;
 766             }
 767             else
 768                   Escape(m_fragment, *uri++);
 769         }
 770
 771         //mark the server as valid
 772         m_fields |= wxURI_FRAGMENT;
 773     }
 774
 775     return uri;
 776 }
 777
 778 // ---------------------------------------------------------------------------
 779 // Resolve
 780 //
 781 // Builds missing components of this uri from a base uri
 782 //
 783 // A version of the algorithm outlined in the RFC is used here
 784 // (it is shown in comments)
 785 //
 786 // Note that an empty URI inherits all components
 787 // ---------------------------------------------------------------------------
 788
 789 void wxURI::Resolve(const wxURI& base, int flags)
 790 {
 791     wxASSERT_MSG(!base.IsReference(),
 792                 wxT("wxURI to inherit from must not be a reference!"));
 793
 794     // If we arn't being strict, enable the older (pre-RFC2396)
 795     // loophole that allows this uri to inherit other
 796     // properties from the base uri - even if the scheme
 797     // is defined
 798     if ( !(flags & wxURI_STRICT) &&
 799             HasScheme() && base.HasScheme() &&
 800                 m_scheme == base.m_scheme )
 801     {
 802         m_fields -= wxURI_SCHEME;
 803     }
 804
 805
 806     // Do nothing if this is an absolute wxURI
 807     //    if defined(R.scheme) then
 808     //       T.scheme    = R.scheme;
 809     //       T.authority = R.authority;
 810     //       T.path      = remove_dot_segments(R.path);
 811     //       T.query     = R.query;
 812     if (HasScheme())
 813     {
 814         return;
 815     }
 816
 817     //No scheme - inherit
 818     m_scheme = base.m_scheme;
 819     m_fields |= wxURI_SCHEME;
 820
 821     // All we need to do for relative URIs with an
 822     // authority component is just inherit the scheme
 823     //       if defined(R.authority) then
 824     //          T.authority = R.authority;
 825     //          T.path      = remove_dot_segments(R.path);
 826     //          T.query     = R.query;
 827     if (HasServer())
 828     {
 829         return;
 830     }
 831
 832     //No authority - inherit
 833     if (base.HasUserInfo())
 834     {
 835         m_userinfo = base.m_userinfo;
 836         m_fields |= wxURI_USERINFO;
 837     }
 838
 839     m_server = base.m_server;
 840     m_hostType = base.m_hostType;
 841     m_fields |= wxURI_SERVER;
 842
 843     if (base.HasPort())
 844     {
 845         m_port = base.m_port;
 846         m_fields |= wxURI_PORT;
 847     }
 848
 849
 850     // Simple path inheritance from base
 851     if (!HasPath())
 852     {
 853         //             T.path = Base.path;
 854         m_path = base.m_path;
 855         m_fields |= wxURI_PATH;
 856
 857
 858         //             if defined(R.query) then
 859         //                T.query = R.query;
 860         //             else
 861         //                T.query = Base.query;
 862         //             endif;
 863         if (!HasQuery())
 864         {
 865             m_query = base.m_query;
 866             m_fields |= wxURI_QUERY;
 867         }
 868     }
 869     else
 870     {
 871         //             if (R.path starts-with "/") then
 872         //                T.path = remove_dot_segments(R.path);
 873         //             else
 874         //                T.path = merge(Base.path, R.path);
 875         //                T.path = remove_dot_segments(T.path);
 876         //             endif;
 877         //             T.query = R.query;
 878         if (m_path[0u] != wxT('/'))
 879         {
 880             //Merge paths
 881             wxString::const_iterator op = m_path.begin();
 882             wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
 883
 884             //not a ending directory?  move up
 885             if (base.m_path[0] && *(bp-1) != wxT('/'))
 886                 UpTree(base.m_path.begin(), bp);
 887
 888             //normalize directories
 889             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 890                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 891             {
 892                 UpTree(base.m_path.begin(), bp);
 893
 894                 if (*(op+2) == '\0')
 895                     op += 2;
 896                 else
 897                     op += 3;
 898             }
 899
 900             m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
 901                      m_path.substr((op - m_path.begin()), m_path.length());
 902         }
 903     }
 904
 905     //T.fragment = R.fragment;
 906 }
 907
 908 // ---------------------------------------------------------------------------
 909 // UpTree
 910 //
 911 // Moves a URI path up a directory
 912 // ---------------------------------------------------------------------------
 913
 914 //static
 915 void wxURI::UpTree(wxString::const_iterator uristart,
 916                    wxString::const_iterator& uri)
 917 {
 918     if (uri != uristart && *(uri-1) == wxT('/'))
 919     {
 920         uri -= 2;
 921     }
 922
 923     for(;uri != uristart; --uri)
 924     {
 925         if (*uri == wxT('/'))
 926         {
 927             ++uri;
 928             break;
 929         }
 930     }
 931
 932     //!!!TODO:HACK!!!//
 933     if (uri == uristart && *uri == wxT('/'))
 934         ++uri;
 935     //!!!//
 936 }
 937
 938 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
 939 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 940 {
 941     if (uri != uristart && *(uri-1) == wxT('/'))
 942     {
 943         uri -= 2;
 944     }
 945
 946     for(;uri != uristart; --uri)
 947     {
 948         if (*uri == wxT('/'))
 949         {
 950             ++uri;
 951             break;
 952         }
 953     }
 954
 955     //!!!TODO:HACK!!!//
 956     if (uri == uristart && *uri == wxT('/'))
 957         ++uri;
 958     //!!!//
 959 }
 960 // end of FIXME-UTF8
 961
 962 // ---------------------------------------------------------------------------
 963 // Normalize
 964 //
 965 // Normalizes directories in-place
 966 //
 967 // I.E. ./ and . are ignored
 968 //
 969 // ../ and .. are removed if a directory is before it, along
 970 // with that directory (leading .. and ../ are kept)
 971 // ---------------------------------------------------------------------------
 972
 973 //static
 974 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 975 {
 976     wxChar* cp = s;
 977     wxChar* bp = s;
 978
 979     if(s[0] == wxT('/'))
 980         ++bp;
 981
 982     while(*cp)
 983     {
 984         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 985             && (bp == cp || *(cp-1) == wxT('/')))
 986         {
 987             //. _or_ ./  - ignore
 988             if (*(cp+1) == '\0')
 989                 cp += 1;
 990             else
 991                 cp += 2;
 992         }
 993         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 994                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 995                 && (bp == cp || *(cp-1) == wxT('/')))
 996         {
 997             //.. _or_ ../ - go up the tree
 998             if (s != bp)
 999             {
1000                 UpTree((const wxChar*)bp, (const wxChar*&)s);
1001
1002                 if (*(cp+2) == '\0')
1003                     cp += 2;
1004                 else
1005                     cp += 3;
1006             }
1007             else if (!bIgnoreLeads)
1008
1009             {
1010                 *bp++ = *cp++;
1011                 *bp++ = *cp++;
1012                 if (*cp)
1013                     *bp++ = *cp++;
1014
1015                 s = bp;
1016             }
1017             else
1018             {
1019                 if (*(cp+2) == '\0')
1020                     cp += 2;
1021                 else
1022                     cp += 3;
1023             }
1024         }
1025         else
1026             *s++ = *cp++;
1027     }
1028
1029     *s = '\0';
1030 }
1031
1032 // ---------------------------------------------------------------------------
1033 // ParseH16
1034 //
1035 // Parses 1 to 4 hex values.  Returns true if the first character of the input
1036 // string is a valid hex character.  It is the caller's responsability to move
1037 // the input string back to its original position on failure.
1038 // ---------------------------------------------------------------------------
1039
1040 bool wxURI::ParseH16(const wxChar*& uri)
1041 {
1042     // h16           = 1*4HEXDIG
1043     if(!IsHex(*++uri))
1044         return false;
1045
1046     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1047         ++uri;
1048
1049     return true;
1050 }
1051
1052 // ---------------------------------------------------------------------------
1053 // ParseIPXXX
1054 //
1055 // Parses a certain version of an IP address and moves the input string past
1056 // it.  Returns true if the input  string contains the proper version of an ip
1057 // address.  It is the caller's responsability to move the input string back
1058 // to its original position on failure.
1059 // ---------------------------------------------------------------------------
1060
1061 bool wxURI::ParseIPv4address(const wxChar*& uri)
1062 {
1063     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
1064     //
1065     //dec-octet     =      DIGIT                    ; 0-9
1066     //                / %x31-39 DIGIT               ; 10-99
1067     //                / "1" 2DIGIT                  ; 100-199
1068     //                / "2" %x30-34 DIGIT           ; 200-249
1069     //                / "25" %x30-35                ; 250-255
1070     size_t iIPv4 = 0;
1071     if (IsDigit(*uri))
1072     {
1073         ++iIPv4;
1074
1075
1076         //each ip part must be between 0-255 (dupe of version in for loop)
1077         if( IsDigit(*++uri) && IsDigit(*++uri) &&
1078            //100 or less  (note !)
1079            !( (*(uri-2) < wxT('2')) ||
1080            //240 or less
1081              (*(uri-2) == wxT('2') &&
1082                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1083              )
1084             )
1085           )
1086         {
1087             return false;
1088         }
1089
1090         if(IsDigit(*uri))++uri;
1091
1092         //compilers should unroll this loop
1093         for(; iIPv4 < 4; ++iIPv4)
1094         {
1095             if (*uri != wxT('.') || !IsDigit(*++uri))
1096                 break;
1097
1098             //each ip part must be between 0-255
1099             if( IsDigit(*++uri) && IsDigit(*++uri) &&
1100                //100 or less  (note !)
1101                !( (*(uri-2) < wxT('2')) ||
1102                //240 or less
1103                  (*(uri-2) == wxT('2') &&
1104                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1105                  )
1106                 )
1107               )
1108             {
1109                 return false;
1110             }
1111             if(IsDigit(*uri))++uri;
1112         }
1113     }
1114     return iIPv4 == 4;
1115 }
1116
1117 bool wxURI::ParseIPv6address(const wxChar*& uri)
1118 {
1119     // IPv6address   =                            6( h16 ":" ) ls32
1120     //               /                       "::" 5( h16 ":" ) ls32
1121     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1122     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1123     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1124     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1125     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1126     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1127     //               / [ *6( h16 ":" ) h16 ] "::"
1128
1129     size_t numPrefix = 0,
1130               maxPostfix;
1131
1132     bool bEndHex = false;
1133
1134     for( ; numPrefix < 6; ++numPrefix)
1135     {
1136         if(!ParseH16(uri))
1137         {
1138             --uri;
1139             bEndHex = true;
1140             break;
1141         }
1142
1143         if(*uri != wxT(':'))
1144         {
1145             break;
1146         }
1147     }
1148
1149     if(!bEndHex && !ParseH16(uri))
1150     {
1151         --uri;
1152
1153         if (numPrefix)
1154             return false;
1155
1156         if (*uri == wxT(':'))
1157         {
1158             if (*++uri != wxT(':'))
1159                 return false;
1160
1161             maxPostfix = 5;
1162         }
1163         else
1164             maxPostfix = 6;
1165     }
1166     else
1167     {
1168         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1169         {
1170             if (numPrefix != 6)
1171                 return false;
1172
1173             while (*--uri != wxT(':')) {}
1174             ++uri;
1175
1176             const wxChar* uristart = uri;
1177             //parse ls32
1178             // ls32          = ( h16 ":" h16 ) / IPv4address
1179             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1180                 return true;
1181
1182             uri = uristart;
1183
1184             if (ParseIPv4address(uri))
1185                 return true;
1186             else
1187                 return false;
1188         }
1189         else
1190         {
1191             uri += 2;
1192
1193             if (numPrefix > 3)
1194                 maxPostfix = 0;
1195             else
1196                 maxPostfix = 4 - numPrefix;
1197         }
1198     }
1199
1200     bool bAllowAltEnding = maxPostfix == 0;
1201
1202     for(; maxPostfix != 0; --maxPostfix)
1203     {
1204         if(!ParseH16(uri) || *uri != wxT(':'))
1205             return false;
1206     }
1207
1208     if(numPrefix <= 4)
1209     {
1210         const wxChar* uristart = uri;
1211         //parse ls32
1212         // ls32          = ( h16 ":" h16 ) / IPv4address
1213         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1214             return true;
1215
1216         uri = uristart;
1217
1218         if (ParseIPv4address(uri))
1219             return true;
1220
1221         uri = uristart;
1222
1223         if (!bAllowAltEnding)
1224             return false;
1225     }
1226
1227     if(numPrefix <= 5 && ParseH16(uri))
1228         return true;
1229
1230     return true;
1231 }
1232
1233 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1234 {
1235     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1236     if (*++uri != wxT('v') || !IsHex(*++uri))
1237         return false;
1238
1239     while (IsHex(*++uri)) {}
1240
1241     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1242         return false;
1243
1244     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1245
1246     return true;
1247 }
1248
1249
1250 // ---------------------------------------------------------------------------
1251 // CharToHex
1252 //
1253 // Converts a character into a numeric hexidecimal value, or 0 if the
1254 // passed in character is not a valid hex character
1255 // ---------------------------------------------------------------------------
1256
1257 //static
1258 wxChar wxURI::CharToHex(const wxChar& c)
1259 {
1260     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1261     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1262     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1263
1264     return 0;
1265 }
1266
1267 // ---------------------------------------------------------------------------
1268 // IsXXX
1269 //
1270 // Returns true if the passed in character meets the criteria of the method
1271 // ---------------------------------------------------------------------------
1272
1273 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1274 bool wxURI::IsUnreserved (const wxChar& c)
1275 {   return IsAlpha(c) || IsDigit(c) ||
1276            c == wxT('-') ||
1277            c == wxT('.') ||
1278            c == wxT('_') ||
1279            c == wxT('~') //tilde
1280            ;
1281 }
1282
1283 bool wxURI::IsReserved (const wxChar& c)
1284 {
1285     return IsGenDelim(c) || IsSubDelim(c);
1286 }
1287
1288 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1289 bool wxURI::IsGenDelim (const wxChar& c)
1290 {
1291     return c == wxT(':') ||
1292            c == wxT('/') ||
1293            c == wxT('?') ||
1294            c == wxT('#') ||
1295            c == wxT('[') ||
1296            c == wxT(']') ||
1297            c == wxT('@');
1298 }
1299
1300 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1301 //!               / "*" / "+" / "," / ";" / "="
1302 bool wxURI::IsSubDelim (const wxChar& c)
1303 {
1304     return c == wxT('!') ||
1305            c == wxT('$') ||
1306            c == wxT('&') ||
1307            c == wxT('\'') ||
1308            c == wxT('(') ||
1309            c == wxT(')') ||
1310            c == wxT('*') ||
1311            c == wxT('+') ||
1312            c == wxT(',') ||
1313            c == wxT(';') ||
1314            c == wxT('=')
1315            ;
1316 }
1317
1318 bool wxURI::IsHex(const wxChar& c)
1319 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1320
1321 bool wxURI::IsAlpha(const wxChar& c)
1322 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1323
1324 bool wxURI::IsDigit(const wxChar& c)
1325 {   return c >= wxT('0') && c <= wxT('9');        }
1326
1327
1328 //end of uri.cpp
1329
1330
1331