src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 // For compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #ifndef WX_PRECOMP
  27     #include "wx/crt.h"
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject)
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Assign(uri);
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_userinfo = m_server = m_port = m_path =
  82     m_query = m_fragment = wxEmptyString;
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 const wxChar* wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     // FIXME-UTF8: rewrite ParseXXX() methods using iterators
 101     // NB: using wxWxCharBuffer instead of just c_str() avoids keeping
 102     //     converted string in memory for longer than needed
 103     return Parse(wxWxCharBuffer(uri.c_str()));
 104 }
 105
 106 // ---------------------------------------------------------------------------
 107 // Escape Methods
 108 //
 109 // TranslateEscape unencodes a 3 character URL escape sequence
 110 //
 111 // Escape encodes an invalid URI character into a 3 character sequence
 112 //
 113 // IsEscape determines if the input string contains an escape sequence,
 114 // if it does, then it moves the input string past the escape sequence
 115 //
 116 // Unescape unencodes all 3 character URL escape sequences in a wxString
 117 // ---------------------------------------------------------------------------
 118
 119 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
 120 {
 121     wxChar c1(*s);
 122     wxChar c2(*(s + 1));
 123
 124     wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
 125
 126     return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
 127 }
 128
 129 wxString wxURI::Unescape(const wxString& uri)
 130 {
 131     wxString new_uri;
 132
 133     for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
 134     {
 135         if ( *i == wxT('%') )
 136         {
 137             new_uri += wxURI::TranslateEscape(i + 1);
 138             i += 2;
 139         }
 140         else
 141             new_uri += *i;
 142     }
 143
 144     return new_uri;
 145 }
 146
 147 void wxURI::Escape(wxString& s, const wxChar& c)
 148 {
 149     const wxChar* hdig = wxT("0123456789abcdef");
 150     s += wxT('%');
 151     s += hdig[(c >> 4) & 15];
 152     s += hdig[c & 15];
 153 }
 154
 155 bool wxURI::IsEscape(const wxChar*& uri)
 156 {
 157     // pct-encoded   = "%" HEXDIG HEXDIG
 158     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 159         return true;
 160     else
 161         return false;
 162 }
 163
 164 // ---------------------------------------------------------------------------
 165 // GetUser
 166 // GetPassword
 167 //
 168 // Gets the username and password via the old URL method.
 169 // ---------------------------------------------------------------------------
 170 wxString wxURI::GetUser() const
 171 {
 172       size_t dwPasswordPos = m_userinfo.find(':');
 173
 174       if (dwPasswordPos == wxString::npos)
 175           dwPasswordPos = 0;
 176
 177       return m_userinfo(0, dwPasswordPos);
 178 }
 179
 180 wxString wxURI::GetPassword() const
 181 {
 182       size_t dwPasswordPos = m_userinfo.find(':');
 183
 184       if (dwPasswordPos == wxString::npos)
 185           return wxT("");
 186       else
 187           return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
 188 }
 189
 190 // ---------------------------------------------------------------------------
 191 // BuildURI
 192 //
 193 // BuildURI() builds the entire URI into a useable
 194 // representation, including proper identification characters such as slashes
 195 //
 196 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 197 // the components that accept escape sequences
 198 // ---------------------------------------------------------------------------
 199
 200 wxString wxURI::BuildURI() const
 201 {
 202     wxString ret;
 203
 204     if (HasScheme())
 205         ret = ret + m_scheme + wxT(":");
 206
 207     if (HasServer())
 208     {
 209         ret += wxT("//");
 210
 211         if (HasUserInfo())
 212             ret = ret + m_userinfo + wxT("@");
 213
 214         ret += m_server;
 215
 216         if (HasPort())
 217             ret = ret + wxT(":") + m_port;
 218     }
 219
 220     ret += m_path;
 221
 222     if (HasQuery())
 223         ret = ret + wxT("?") + m_query;
 224
 225     if (HasFragment())
 226         ret = ret + wxT("#") + m_fragment;
 227
 228     return ret;
 229 }
 230
 231 wxString wxURI::BuildUnescapedURI() const
 232 {
 233     wxString ret;
 234
 235     if (HasScheme())
 236         ret = ret + m_scheme + wxT(":");
 237
 238     if (HasServer())
 239     {
 240         ret += wxT("//");
 241
 242         if (HasUserInfo())
 243             ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
 244
 245         if (m_hostType == wxURI_REGNAME)
 246             ret += wxURI::Unescape(m_server);
 247         else
 248             ret += m_server;
 249
 250         if (HasPort())
 251             ret = ret + wxT(":") + m_port;
 252     }
 253
 254     ret += wxURI::Unescape(m_path);
 255
 256     if (HasQuery())
 257         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 258
 259     if (HasFragment())
 260         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 261
 262     return ret;
 263 }
 264
 265 // ---------------------------------------------------------------------------
 266 // Assignment
 267 // ---------------------------------------------------------------------------
 268
 269 wxURI& wxURI::Assign(const wxURI& uri)
 270 {
 271     //assign fields
 272     m_fields = uri.m_fields;
 273
 274     //ref over components
 275     m_scheme = uri.m_scheme;
 276     m_userinfo = uri.m_userinfo;
 277     m_server = uri.m_server;
 278     m_hostType = uri.m_hostType;
 279     m_port = uri.m_port;
 280     m_path = uri.m_path;
 281     m_query = uri.m_query;
 282     m_fragment = uri.m_fragment;
 283
 284     return *this;
 285 }
 286
 287 wxURI& wxURI::operator = (const wxURI& uri)
 288 {
 289     return Assign(uri);
 290 }
 291
 292 wxURI& wxURI::operator = (const wxString& string)
 293 {
 294     Create(string);
 295     return *this;
 296 }
 297
 298 // ---------------------------------------------------------------------------
 299 // Comparison
 300 // ---------------------------------------------------------------------------
 301
 302 bool wxURI::operator == (const wxURI& uri) const
 303 {
 304     if (HasScheme())
 305     {
 306         if(m_scheme != uri.m_scheme)
 307             return false;
 308     }
 309     else if (uri.HasScheme())
 310         return false;
 311
 312
 313     if (HasServer())
 314     {
 315         if (HasUserInfo())
 316         {
 317             if (m_userinfo != uri.m_userinfo)
 318                 return false;
 319         }
 320         else if (uri.HasUserInfo())
 321             return false;
 322
 323         if (m_server != uri.m_server ||
 324             m_hostType != uri.m_hostType)
 325             return false;
 326
 327         if (HasPort())
 328         {
 329             if(m_port != uri.m_port)
 330                 return false;
 331         }
 332         else if (uri.HasPort())
 333             return false;
 334     }
 335     else if (uri.HasServer())
 336         return false;
 337
 338
 339     if (HasPath())
 340     {
 341         if(m_path != uri.m_path)
 342             return false;
 343     }
 344     else if (uri.HasPath())
 345         return false;
 346
 347     if (HasQuery())
 348     {
 349         if (m_query != uri.m_query)
 350             return false;
 351     }
 352     else if (uri.HasQuery())
 353         return false;
 354
 355     if (HasFragment())
 356     {
 357         if (m_fragment != uri.m_fragment)
 358             return false;
 359     }
 360     else if (uri.HasFragment())
 361         return false;
 362
 363     return true;
 364 }
 365
 366 // ---------------------------------------------------------------------------
 367 // IsReference
 368 //
 369 // if there is no authority or scheme, it is a reference
 370 // ---------------------------------------------------------------------------
 371
 372 bool wxURI::IsReference() const
 373 {   return !HasScheme() || !HasServer();  }
 374
 375 // ---------------------------------------------------------------------------
 376 // Parse
 377 //
 378 // Master URI parsing method.  Just calls the individual parsing methods
 379 //
 380 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 381 // URI-reference = URI / relative
 382 // ---------------------------------------------------------------------------
 383
 384 const wxChar* wxURI::Parse(const wxChar *uri)
 385 {
 386     uri = ParseScheme(uri);
 387     uri = ParseAuthority(uri);
 388     uri = ParsePath(uri);
 389     uri = ParseQuery(uri);
 390     return ParseFragment(uri);
 391 }
 392
 393 // ---------------------------------------------------------------------------
 394 // ParseXXX
 395 //
 396 // Individual parsers for each URI component
 397 // ---------------------------------------------------------------------------
 398
 399 const wxChar* wxURI::ParseScheme(const wxChar *uri)
 400 {
 401     wxASSERT(uri != NULL);
 402
 403     //copy of the uri - used for figuring out
 404     //length of each component
 405     const wxChar* uricopy = uri;
 406
 407     //Does the uri have a scheme (first character alpha)?
 408     if (IsAlpha(*uri))
 409     {
 410         m_scheme += *uri++;
 411
 412         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 413         while (IsAlpha(*uri) || IsDigit(*uri) ||
 414                *uri == wxT('+')   ||
 415                *uri == wxT('-')   ||
 416                *uri == wxT('.'))
 417         {
 418             m_scheme += *uri++;
 419         }
 420
 421         //valid scheme?
 422         if (*uri == wxT(':'))
 423         {
 424             //mark the scheme as valid
 425             m_fields |= wxURI_SCHEME;
 426
 427             //move reference point up to input buffer
 428             uricopy = ++uri;
 429         }
 430         else
 431             //relative uri with relative path reference
 432             m_scheme = wxEmptyString;
 433     }
 434 //    else
 435         //relative uri with _possible_ relative path reference
 436
 437     return uricopy;
 438 }
 439
 440 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 441 {
 442     // authority     = [ userinfo "@" ] host [ ":" port ]
 443     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 444     {
 445         //skip past the two slashes
 446         uri += 2;
 447
 448         // ############# DEVIATION FROM RFC #########################
 449         // Don't parse the server component for file URIs
 450         if(m_scheme != wxT("file"))
 451         {
 452             //normal way
 453         uri = ParseUserInfo(uri);
 454         uri = ParseServer(uri);
 455         return ParsePort(uri);
 456         }
 457     }
 458
 459     return uri;
 460 }
 461
 462 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
 463 {
 464     wxASSERT(uri != NULL);
 465
 466     //copy of the uri - used for figuring out
 467     //length of each component
 468     const wxChar* uricopy = uri;
 469
 470     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 471     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 472     {
 473         if(IsUnreserved(*uri) ||
 474            IsSubDelim(*uri) || *uri == wxT(':'))
 475             m_userinfo += *uri++;
 476         else if (IsEscape(uri))
 477         {
 478             m_userinfo += *uri++;
 479             m_userinfo += *uri++;
 480             m_userinfo += *uri++;
 481         }
 482         else
 483             Escape(m_userinfo, *uri++);
 484     }
 485
 486     if(*uri == wxT('@'))
 487     {
 488         //valid userinfo
 489         m_fields |= wxURI_USERINFO;
 490
 491         uricopy = ++uri;
 492     }
 493     else
 494         m_userinfo = wxEmptyString;
 495
 496     return uricopy;
 497 }
 498
 499 const wxChar* wxURI::ParseServer(const wxChar* uri)
 500 {
 501     wxASSERT(uri != NULL);
 502
 503     //copy of the uri - used for figuring out
 504     //length of each component
 505     const wxChar* uricopy = uri;
 506
 507     // host          = IP-literal / IPv4address / reg-name
 508     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 509     if (*uri == wxT('['))
 510     {
 511         ++uri; //some compilers don't support *&ing a ++*
 512         if (ParseIPv6address(uri) && *uri == wxT(']'))
 513         {
 514             ++uri;
 515             m_hostType = wxURI_IPV6ADDRESS;
 516
 517             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 518             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 519             theBuffer.SetLength(uri-uricopy);
 520         }
 521         else
 522         {
 523             uri = uricopy;
 524
 525             ++uri; //some compilers don't support *&ing a ++*
 526             if (ParseIPvFuture(uri) && *uri == wxT(']'))
 527             {
 528                 ++uri;
 529                 m_hostType = wxURI_IPVFUTURE;
 530
 531                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 532                 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 533                 theBuffer.SetLength(uri-uricopy);
 534             }
 535             else
 536                 uri = uricopy;
 537         }
 538     }
 539     else
 540     {
 541         if (ParseIPv4address(uri))
 542         {
 543             m_hostType = wxURI_IPV4ADDRESS;
 544
 545             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 546             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 547             theBuffer.SetLength(uri-uricopy);
 548         }
 549         else
 550             uri = uricopy;
 551     }
 552
 553     if(m_hostType == wxURI_REGNAME)
 554     {
 555         uri = uricopy;
 556         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 557         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 558         {
 559             if(IsUnreserved(*uri) ||  IsSubDelim(*uri))
 560                 m_server += *uri++;
 561             else if (IsEscape(uri))
 562             {
 563                 m_server += *uri++;
 564                 m_server += *uri++;
 565                 m_server += *uri++;
 566             }
 567             else
 568                 Escape(m_server, *uri++);
 569         }
 570     }
 571
 572     //mark the server as valid
 573     m_fields |= wxURI_SERVER;
 574
 575     return uri;
 576 }
 577
 578
 579 const wxChar* wxURI::ParsePort(const wxChar* uri)
 580 {
 581     wxASSERT(uri != NULL);
 582
 583     // port          = *DIGIT
 584     if(*uri == wxT(':'))
 585     {
 586         ++uri;
 587         while(IsDigit(*uri))
 588         {
 589             m_port += *uri++;
 590         }
 591
 592         //mark the port as valid
 593         m_fields |= wxURI_PORT;
 594     }
 595
 596     return uri;
 597 }
 598
 599 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 600 {
 601     wxASSERT(uri != NULL);
 602
 603     //copy of the uri - used for figuring out
 604     //length of each component
 605     const wxChar* uricopy = uri;
 606
 607     /// hier-part     = "//" authority path-abempty
 608     ///               / path-absolute
 609     ///               / path-rootless
 610     ///               / path-empty
 611     ///
 612     /// relative-part = "//" authority path-abempty
 613     ///               / path-absolute
 614     ///               / path-noscheme
 615     ///               / path-empty
 616     ///
 617     /// path-abempty  = *( "/" segment )
 618     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 619     /// path-noscheme = segment-nz-nc *( "/" segment )
 620     /// path-rootless = segment-nz *( "/" segment )
 621     /// path-empty    = 0<pchar>
 622     ///
 623     /// segment       = *pchar
 624     /// segment-nz    = 1*pchar
 625     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 626     ///               ; non-zero-length segment without any colon ":"
 627     ///
 628     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 629     if (*uri == wxT('/'))
 630     {
 631         m_path += *uri++;
 632
 633         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 634         {
 635             if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 636                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 637                 m_path += *uri++;
 638             else if (IsEscape(uri))
 639             {
 640                 m_path += *uri++;
 641                 m_path += *uri++;
 642                 m_path += *uri++;
 643             }
 644             else
 645                 Escape(m_path, *uri++);
 646         }
 647
 648         if (bNormalize)
 649         {
 650             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 651             Normalize(theBuffer, true);
 652             theBuffer.SetLength(wxStrlen(theBuffer));
 653         }
 654         //mark the path as valid
 655         m_fields |= wxURI_PATH;
 656     }
 657     else if(*uri) //Relative path
 658     {
 659         if (bReference)
 660         {
 661             //no colon allowed
 662             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 663             {
 664                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 665                   *uri == wxT('@') || *uri == wxT('/'))
 666                     m_path += *uri++;
 667                 else if (IsEscape(uri))
 668                 {
 669                     m_path += *uri++;
 670                     m_path += *uri++;
 671                     m_path += *uri++;
 672                 }
 673                 else
 674                     Escape(m_path, *uri++);
 675             }
 676         }
 677         else
 678         {
 679             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 680             {
 681                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 682                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 683                     m_path += *uri++;
 684                 else if (IsEscape(uri))
 685                 {
 686                     m_path += *uri++;
 687                     m_path += *uri++;
 688                     m_path += *uri++;
 689                 }
 690                 else
 691                     Escape(m_path, *uri++);
 692             }
 693         }
 694
 695         if (uri != uricopy)
 696         {
 697             if (bNormalize)
 698             {
 699                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 700                 Normalize(theBuffer);
 701                 theBuffer.SetLength(wxStrlen(theBuffer));
 702             }
 703
 704             //mark the path as valid
 705             m_fields |= wxURI_PATH;
 706         }
 707     }
 708
 709     return uri;
 710 }
 711
 712
 713 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 714 {
 715     wxASSERT(uri != NULL);
 716
 717     // query         = *( pchar / "/" / "?" )
 718     if (*uri == wxT('?'))
 719     {
 720         ++uri;
 721         while(*uri && *uri != wxT('#'))
 722         {
 723             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 724                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 725                   m_query += *uri++;
 726             else if (IsEscape(uri))
 727             {
 728                   m_query += *uri++;
 729                   m_query += *uri++;
 730                   m_query += *uri++;
 731             }
 732             else
 733                   Escape(m_query, *uri++);
 734         }
 735
 736         //mark the server as valid
 737         m_fields |= wxURI_QUERY;
 738     }
 739
 740     return uri;
 741 }
 742
 743
 744 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 745 {
 746     wxASSERT(uri != NULL);
 747
 748     // fragment      = *( pchar / "/" / "?" )
 749     if (*uri == wxT('#'))
 750     {
 751         ++uri;
 752         while(*uri)
 753         {
 754             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 755                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 756                   m_fragment += *uri++;
 757             else if (IsEscape(uri))
 758             {
 759                   m_fragment += *uri++;
 760                   m_fragment += *uri++;
 761                   m_fragment += *uri++;
 762             }
 763             else
 764                   Escape(m_fragment, *uri++);
 765         }
 766
 767         //mark the server as valid
 768         m_fields |= wxURI_FRAGMENT;
 769     }
 770
 771     return uri;
 772 }
 773
 774 // ---------------------------------------------------------------------------
 775 // Resolve
 776 //
 777 // Builds missing components of this uri from a base uri
 778 //
 779 // A version of the algorithm outlined in the RFC is used here
 780 // (it is shown in comments)
 781 //
 782 // Note that an empty URI inherits all components
 783 // ---------------------------------------------------------------------------
 784
 785 void wxURI::Resolve(const wxURI& base, int flags)
 786 {
 787     wxASSERT_MSG(!base.IsReference(),
 788                 wxT("wxURI to inherit from must not be a reference!"));
 789
 790     // If we arn't being strict, enable the older (pre-RFC2396)
 791     // loophole that allows this uri to inherit other
 792     // properties from the base uri - even if the scheme
 793     // is defined
 794     if ( !(flags & wxURI_STRICT) &&
 795             HasScheme() && base.HasScheme() &&
 796                 m_scheme == base.m_scheme )
 797     {
 798         m_fields -= wxURI_SCHEME;
 799     }
 800
 801
 802     // Do nothing if this is an absolute wxURI
 803     //    if defined(R.scheme) then
 804     //       T.scheme    = R.scheme;
 805     //       T.authority = R.authority;
 806     //       T.path      = remove_dot_segments(R.path);
 807     //       T.query     = R.query;
 808     if (HasScheme())
 809     {
 810         return;
 811     }
 812
 813     //No scheme - inherit
 814     m_scheme = base.m_scheme;
 815     m_fields |= wxURI_SCHEME;
 816
 817     // All we need to do for relative URIs with an
 818     // authority component is just inherit the scheme
 819     //       if defined(R.authority) then
 820     //          T.authority = R.authority;
 821     //          T.path      = remove_dot_segments(R.path);
 822     //          T.query     = R.query;
 823     if (HasServer())
 824     {
 825         return;
 826     }
 827
 828     //No authority - inherit
 829     if (base.HasUserInfo())
 830     {
 831         m_userinfo = base.m_userinfo;
 832         m_fields |= wxURI_USERINFO;
 833     }
 834
 835     m_server = base.m_server;
 836     m_hostType = base.m_hostType;
 837     m_fields |= wxURI_SERVER;
 838
 839     if (base.HasPort())
 840     {
 841         m_port = base.m_port;
 842         m_fields |= wxURI_PORT;
 843     }
 844
 845
 846     // Simple path inheritance from base
 847     if (!HasPath())
 848     {
 849         //             T.path = Base.path;
 850         m_path = base.m_path;
 851         m_fields |= wxURI_PATH;
 852
 853
 854         //             if defined(R.query) then
 855         //                T.query = R.query;
 856         //             else
 857         //                T.query = Base.query;
 858         //             endif;
 859         if (!HasQuery())
 860         {
 861             m_query = base.m_query;
 862             m_fields |= wxURI_QUERY;
 863         }
 864     }
 865     else
 866     {
 867         //             if (R.path starts-with "/") then
 868         //                T.path = remove_dot_segments(R.path);
 869         //             else
 870         //                T.path = merge(Base.path, R.path);
 871         //                T.path = remove_dot_segments(T.path);
 872         //             endif;
 873         //             T.query = R.query;
 874         if (m_path[0u] != wxT('/'))
 875         {
 876             //Merge paths
 877             wxString::const_iterator op = m_path.begin();
 878             wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
 879
 880             //not a ending directory?  move up
 881             if (base.m_path[0] && *(bp-1) != wxT('/'))
 882                 UpTree(base.m_path.begin(), bp);
 883
 884             //normalize directories
 885             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 886                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 887             {
 888                 UpTree(base.m_path.begin(), bp);
 889
 890                 if (*(op+2) == '\0')
 891                     op += 2;
 892                 else
 893                     op += 3;
 894             }
 895
 896             m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
 897                      m_path.substr((op - m_path.begin()), m_path.length());
 898         }
 899     }
 900
 901     //T.fragment = R.fragment;
 902 }
 903
 904 // ---------------------------------------------------------------------------
 905 // UpTree
 906 //
 907 // Moves a URI path up a directory
 908 // ---------------------------------------------------------------------------
 909
 910 //static
 911 void wxURI::UpTree(wxString::const_iterator uristart,
 912                    wxString::const_iterator& uri)
 913 {
 914     if (uri != uristart && *(uri-1) == wxT('/'))
 915     {
 916         uri -= 2;
 917     }
 918
 919     for(;uri != uristart; --uri)
 920     {
 921         if (*uri == wxT('/'))
 922         {
 923             ++uri;
 924             break;
 925         }
 926     }
 927
 928     //!!!TODO:HACK!!!//
 929     if (uri == uristart && *uri == wxT('/'))
 930         ++uri;
 931     //!!!//
 932 }
 933
 934 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
 935 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 936 {
 937     if (uri != uristart && *(uri-1) == wxT('/'))
 938     {
 939         uri -= 2;
 940     }
 941
 942     for(;uri != uristart; --uri)
 943     {
 944         if (*uri == wxT('/'))
 945         {
 946             ++uri;
 947             break;
 948         }
 949     }
 950
 951     //!!!TODO:HACK!!!//
 952     if (uri == uristart && *uri == wxT('/'))
 953         ++uri;
 954     //!!!//
 955 }
 956 // end of FIXME-UTF8
 957
 958 // ---------------------------------------------------------------------------
 959 // Normalize
 960 //
 961 // Normalizes directories in-place
 962 //
 963 // I.E. ./ and . are ignored
 964 //
 965 // ../ and .. are removed if a directory is before it, along
 966 // with that directory (leading .. and ../ are kept)
 967 // ---------------------------------------------------------------------------
 968
 969 //static
 970 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 971 {
 972     wxChar* cp = s;
 973     wxChar* bp = s;
 974
 975     if(s[0] == wxT('/'))
 976         ++bp;
 977
 978     while(*cp)
 979     {
 980         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 981             && (bp == cp || *(cp-1) == wxT('/')))
 982         {
 983             //. _or_ ./  - ignore
 984             if (*(cp+1) == '\0')
 985                 cp += 1;
 986             else
 987                 cp += 2;
 988         }
 989         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 990                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 991                 && (bp == cp || *(cp-1) == wxT('/')))
 992         {
 993             //.. _or_ ../ - go up the tree
 994             if (s != bp)
 995             {
 996                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 997
 998                 if (*(cp+2) == '\0')
 999                     cp += 2;
1000                 else
1001                     cp += 3;
1002             }
1003             else if (!bIgnoreLeads)
1004
1005             {
1006                 *bp++ = *cp++;
1007                 *bp++ = *cp++;
1008                 if (*cp)
1009                     *bp++ = *cp++;
1010
1011                 s = bp;
1012             }
1013             else
1014             {
1015                 if (*(cp+2) == '\0')
1016                     cp += 2;
1017                 else
1018                     cp += 3;
1019             }
1020         }
1021         else
1022             *s++ = *cp++;
1023     }
1024
1025     *s = '\0';
1026 }
1027
1028 // ---------------------------------------------------------------------------
1029 // ParseH16
1030 //
1031 // Parses 1 to 4 hex values.  Returns true if the first character of the input
1032 // string is a valid hex character.  It is the caller's responsability to move
1033 // the input string back to its original position on failure.
1034 // ---------------------------------------------------------------------------
1035
1036 bool wxURI::ParseH16(const wxChar*& uri)
1037 {
1038     // h16           = 1*4HEXDIG
1039     if(!IsHex(*++uri))
1040         return false;
1041
1042     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1043         ++uri;
1044
1045     return true;
1046 }
1047
1048 // ---------------------------------------------------------------------------
1049 // ParseIPXXX
1050 //
1051 // Parses a certain version of an IP address and moves the input string past
1052 // it.  Returns true if the input  string contains the proper version of an ip
1053 // address.  It is the caller's responsability to move the input string back
1054 // to its original position on failure.
1055 // ---------------------------------------------------------------------------
1056
1057 bool wxURI::ParseIPv4address(const wxChar*& uri)
1058 {
1059     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
1060     //
1061     //dec-octet     =      DIGIT                    ; 0-9
1062     //                / %x31-39 DIGIT               ; 10-99
1063     //                / "1" 2DIGIT                  ; 100-199
1064     //                / "2" %x30-34 DIGIT           ; 200-249
1065     //                / "25" %x30-35                ; 250-255
1066     size_t iIPv4 = 0;
1067     if (IsDigit(*uri))
1068     {
1069         ++iIPv4;
1070
1071
1072         //each ip part must be between 0-255 (dupe of version in for loop)
1073         if( IsDigit(*++uri) && IsDigit(*++uri) &&
1074            //100 or less  (note !)
1075            !( (*(uri-2) < wxT('2')) ||
1076            //240 or less
1077              (*(uri-2) == wxT('2') &&
1078                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1079              )
1080             )
1081           )
1082         {
1083             return false;
1084         }
1085
1086         if(IsDigit(*uri))++uri;
1087
1088         //compilers should unroll this loop
1089         for(; iIPv4 < 4; ++iIPv4)
1090         {
1091             if (*uri != wxT('.') || !IsDigit(*++uri))
1092                 break;
1093
1094             //each ip part must be between 0-255
1095             if( IsDigit(*++uri) && IsDigit(*++uri) &&
1096                //100 or less  (note !)
1097                !( (*(uri-2) < wxT('2')) ||
1098                //240 or less
1099                  (*(uri-2) == wxT('2') &&
1100                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1101                  )
1102                 )
1103               )
1104             {
1105                 return false;
1106             }
1107             if(IsDigit(*uri))++uri;
1108         }
1109     }
1110     return iIPv4 == 4;
1111 }
1112
1113 bool wxURI::ParseIPv6address(const wxChar*& uri)
1114 {
1115     // IPv6address   =                            6( h16 ":" ) ls32
1116     //               /                       "::" 5( h16 ":" ) ls32
1117     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1118     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1119     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1120     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1121     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1122     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1123     //               / [ *6( h16 ":" ) h16 ] "::"
1124
1125     size_t numPrefix = 0,
1126               maxPostfix;
1127
1128     bool bEndHex = false;
1129
1130     for( ; numPrefix < 6; ++numPrefix)
1131     {
1132         if(!ParseH16(uri))
1133         {
1134             --uri;
1135             bEndHex = true;
1136             break;
1137         }
1138
1139         if(*uri != wxT(':'))
1140         {
1141             break;
1142         }
1143     }
1144
1145     if(!bEndHex && !ParseH16(uri))
1146     {
1147         --uri;
1148
1149         if (numPrefix)
1150             return false;
1151
1152         if (*uri == wxT(':'))
1153         {
1154             if (*++uri != wxT(':'))
1155                 return false;
1156
1157             maxPostfix = 5;
1158         }
1159         else
1160             maxPostfix = 6;
1161     }
1162     else
1163     {
1164         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1165         {
1166             if (numPrefix != 6)
1167                 return false;
1168
1169             while (*--uri != wxT(':')) {}
1170             ++uri;
1171
1172             const wxChar* uristart = uri;
1173             //parse ls32
1174             // ls32          = ( h16 ":" h16 ) / IPv4address
1175             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1176                 return true;
1177
1178             uri = uristart;
1179
1180             if (ParseIPv4address(uri))
1181                 return true;
1182             else
1183                 return false;
1184         }
1185         else
1186         {
1187             uri += 2;
1188
1189             if (numPrefix > 3)
1190                 maxPostfix = 0;
1191             else
1192                 maxPostfix = 4 - numPrefix;
1193         }
1194     }
1195
1196     bool bAllowAltEnding = maxPostfix == 0;
1197
1198     for(; maxPostfix != 0; --maxPostfix)
1199     {
1200         if(!ParseH16(uri) || *uri != wxT(':'))
1201             return false;
1202     }
1203
1204     if(numPrefix <= 4)
1205     {
1206         const wxChar* uristart = uri;
1207         //parse ls32
1208         // ls32          = ( h16 ":" h16 ) / IPv4address
1209         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1210             return true;
1211
1212         uri = uristart;
1213
1214         if (ParseIPv4address(uri))
1215             return true;
1216
1217         uri = uristart;
1218
1219         if (!bAllowAltEnding)
1220             return false;
1221     }
1222
1223     if(numPrefix <= 5 && ParseH16(uri))
1224         return true;
1225
1226     return true;
1227 }
1228
1229 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1230 {
1231     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1232     if (*++uri != wxT('v') || !IsHex(*++uri))
1233         return false;
1234
1235     while (IsHex(*++uri)) {}
1236
1237     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1238         return false;
1239
1240     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1241
1242     return true;
1243 }
1244
1245
1246 // ---------------------------------------------------------------------------
1247 // CharToHex
1248 //
1249 // Converts a character into a numeric hexidecimal value, or 0 if the
1250 // passed in character is not a valid hex character
1251 // ---------------------------------------------------------------------------
1252
1253 //static
1254 wxChar wxURI::CharToHex(const wxChar& c)
1255 {
1256     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1257     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1258     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1259
1260     return 0;
1261 }
1262
1263 // ---------------------------------------------------------------------------
1264 // IsXXX
1265 //
1266 // Returns true if the passed in character meets the criteria of the method
1267 // ---------------------------------------------------------------------------
1268
1269 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1270 bool wxURI::IsUnreserved (const wxChar& c)
1271 {   return IsAlpha(c) || IsDigit(c) ||
1272            c == wxT('-') ||
1273            c == wxT('.') ||
1274            c == wxT('_') ||
1275            c == wxT('~') //tilde
1276            ;
1277 }
1278
1279 bool wxURI::IsReserved (const wxChar& c)
1280 {
1281     return IsGenDelim(c) || IsSubDelim(c);
1282 }
1283
1284 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1285 bool wxURI::IsGenDelim (const wxChar& c)
1286 {
1287     return c == wxT(':') ||
1288            c == wxT('/') ||
1289            c == wxT('?') ||
1290            c == wxT('#') ||
1291            c == wxT('[') ||
1292            c == wxT(']') ||
1293            c == wxT('@');
1294 }
1295
1296 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1297 //!               / "*" / "+" / "," / ";" / "="
1298 bool wxURI::IsSubDelim (const wxChar& c)
1299 {
1300     return c == wxT('!') ||
1301            c == wxT('$') ||
1302            c == wxT('&') ||
1303            c == wxT('\'') ||
1304            c == wxT('(') ||
1305            c == wxT(')') ||
1306            c == wxT('*') ||
1307            c == wxT('+') ||
1308            c == wxT(',') ||
1309            c == wxT(';') ||
1310            c == wxT('=')
1311            ;
1312 }
1313
1314 bool wxURI::IsHex(const wxChar& c)
1315 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1316
1317 bool wxURI::IsAlpha(const wxChar& c)
1318 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1319
1320 bool wxURI::IsDigit(const wxChar& c)
1321 {   return c >= wxT('0') && c <= wxT('9');        }
1322
1323
1324 //end of uri.cpp
1325
1326
1327