src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 // For compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/uri.h"
  27
  28 // ---------------------------------------------------------------------------
  29 // definitions
  30 // ---------------------------------------------------------------------------
  31
  32 IMPLEMENT_CLASS(wxURI, wxObject)
  33
  34 // ===========================================================================
  35 // implementation
  36 // ===========================================================================
  37
  38 // ---------------------------------------------------------------------------
  39 // utilities
  40 // ---------------------------------------------------------------------------
  41
  42 // ---------------------------------------------------------------------------
  43 //
  44 //                        wxURI
  45 //
  46 // ---------------------------------------------------------------------------
  47
  48 // ---------------------------------------------------------------------------
  49 //  Constructors
  50 // ---------------------------------------------------------------------------
  51
  52 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  53 {
  54 }
  55
  56 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58     Create(uri);
  59 }
  60
  61 wxURI::wxURI(const wxURI& uri)  : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
  62 {
  63     Assign(uri);
  64 }
  65
  66 // ---------------------------------------------------------------------------
  67 // Destructor and cleanup
  68 // ---------------------------------------------------------------------------
  69
  70 wxURI::~wxURI()
  71 {
  72     Clear();
  73 }
  74
  75 void wxURI::Clear()
  76 {
  77     m_scheme = m_userinfo = m_server = m_port = m_path =
  78     m_query = m_fragment = wxEmptyString;
  79
  80     m_hostType = wxURI_REGNAME;
  81
  82     m_fields = 0;
  83 }
  84
  85 // ---------------------------------------------------------------------------
  86 // Create
  87 //
  88 // This creates the URI - all we do here is call the main parsing method
  89 // ---------------------------------------------------------------------------
  90
  91 const wxChar* wxURI::Create(const wxString& uri)
  92 {
  93     if (m_fields)
  94         Clear();
  95
  96     return Parse(uri);
  97 }
  98
  99 // ---------------------------------------------------------------------------
 100 // Escape Methods
 101 //
 102 // TranslateEscape unencodes a 3 character URL escape sequence
 103 //
 104 // Escape encodes an invalid URI character into a 3 character sequence
 105 //
 106 // IsEscape determines if the input string contains an escape sequence,
 107 // if it does, then it moves the input string past the escape sequence
 108 //
 109 // Unescape unencodes all 3 character URL escape sequences in a wxString
 110 // ---------------------------------------------------------------------------
 111
 112 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
 113 {
 114     wxChar c1(*s);
 115     wxChar c2(*(s + 1));
 116
 117     wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
 118
 119     return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
 120 }
 121
 122 wxString wxURI::Unescape(const wxString& uri)
 123 {
 124     wxString new_uri;
 125
 126     for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
 127     {
 128         if ( *i == wxT('%') )
 129         {
 130             new_uri += wxURI::TranslateEscape(i + 1);
 131             i += 2;
 132         }
 133         else
 134             new_uri += *i;
 135     }
 136
 137     return new_uri;
 138 }
 139
 140 void wxURI::Escape(wxString& s, const wxChar& c)
 141 {
 142     const wxChar* hdig = wxT("0123456789abcdef");
 143     s += wxT('%');
 144     s += hdig[(c >> 4) & 15];
 145     s += hdig[c & 15];
 146 }
 147
 148 bool wxURI::IsEscape(const wxChar*& uri)
 149 {
 150     // pct-encoded   = "%" HEXDIG HEXDIG
 151     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 152         return true;
 153     else
 154         return false;
 155 }
 156
 157 // ---------------------------------------------------------------------------
 158 // GetUser
 159 // GetPassword
 160 //
 161 // Gets the username and password via the old URL method.
 162 // ---------------------------------------------------------------------------
 163 wxString wxURI::GetUser() const
 164 {
 165       size_t dwPasswordPos = m_userinfo.find(':');
 166
 167       if (dwPasswordPos == wxString::npos)
 168           dwPasswordPos = 0;
 169
 170       return m_userinfo(0, dwPasswordPos);
 171 }
 172
 173 wxString wxURI::GetPassword() const
 174 {
 175       size_t dwPasswordPos = m_userinfo.find(':');
 176
 177       if (dwPasswordPos == wxString::npos)
 178           return wxT("");
 179       else
 180           return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
 181 }
 182
 183 // ---------------------------------------------------------------------------
 184 // BuildURI
 185 //
 186 // BuildURI() builds the entire URI into a useable
 187 // representation, including proper identification characters such as slashes
 188 //
 189 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 190 // the components that accept escape sequences
 191 // ---------------------------------------------------------------------------
 192
 193 wxString wxURI::BuildURI() const
 194 {
 195     wxString ret;
 196
 197     if (HasScheme())
 198         ret = ret + m_scheme + wxT(":");
 199
 200     if (HasServer())
 201     {
 202         ret += wxT("//");
 203
 204         if (HasUserInfo())
 205             ret = ret + m_userinfo + wxT("@");
 206
 207         ret += m_server;
 208
 209         if (HasPort())
 210             ret = ret + wxT(":") + m_port;
 211     }
 212
 213     ret += m_path;
 214
 215     if (HasQuery())
 216         ret = ret + wxT("?") + m_query;
 217
 218     if (HasFragment())
 219         ret = ret + wxT("#") + m_fragment;
 220
 221     return ret;
 222 }
 223
 224 wxString wxURI::BuildUnescapedURI() const
 225 {
 226     wxString ret;
 227
 228     if (HasScheme())
 229         ret = ret + m_scheme + wxT(":");
 230
 231     if (HasServer())
 232     {
 233         ret += wxT("//");
 234
 235         if (HasUserInfo())
 236             ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
 237
 238         if (m_hostType == wxURI_REGNAME)
 239             ret += wxURI::Unescape(m_server);
 240         else
 241             ret += m_server;
 242
 243         if (HasPort())
 244             ret = ret + wxT(":") + m_port;
 245     }
 246
 247     ret += wxURI::Unescape(m_path);
 248
 249     if (HasQuery())
 250         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 251
 252     if (HasFragment())
 253         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 254
 255     return ret;
 256 }
 257
 258 // ---------------------------------------------------------------------------
 259 // Assignment
 260 // ---------------------------------------------------------------------------
 261
 262 wxURI& wxURI::Assign(const wxURI& uri)
 263 {
 264     //assign fields
 265     m_fields = uri.m_fields;
 266
 267     //ref over components
 268     m_scheme = uri.m_scheme;
 269     m_userinfo = uri.m_userinfo;
 270     m_server = uri.m_server;
 271     m_hostType = uri.m_hostType;
 272     m_port = uri.m_port;
 273     m_path = uri.m_path;
 274     m_query = uri.m_query;
 275     m_fragment = uri.m_fragment;
 276
 277     return *this;
 278 }
 279
 280 wxURI& wxURI::operator = (const wxURI& uri)
 281 {
 282     return Assign(uri);
 283 }
 284
 285 wxURI& wxURI::operator = (const wxString& string)
 286 {
 287     Create(string);
 288     return *this;
 289 }
 290
 291 // ---------------------------------------------------------------------------
 292 // Comparison
 293 // ---------------------------------------------------------------------------
 294
 295 bool wxURI::operator == (const wxURI& uri) const
 296 {
 297     if (HasScheme())
 298     {
 299         if(m_scheme != uri.m_scheme)
 300             return false;
 301     }
 302     else if (uri.HasScheme())
 303         return false;
 304
 305
 306     if (HasServer())
 307     {
 308         if (HasUserInfo())
 309         {
 310             if (m_userinfo != uri.m_userinfo)
 311                 return false;
 312         }
 313         else if (uri.HasUserInfo())
 314             return false;
 315
 316         if (m_server != uri.m_server ||
 317             m_hostType != uri.m_hostType)
 318             return false;
 319
 320         if (HasPort())
 321         {
 322             if(m_port != uri.m_port)
 323                 return false;
 324         }
 325         else if (uri.HasPort())
 326             return false;
 327     }
 328     else if (uri.HasServer())
 329         return false;
 330
 331
 332     if (HasPath())
 333     {
 334         if(m_path != uri.m_path)
 335             return false;
 336     }
 337     else if (uri.HasPath())
 338         return false;
 339
 340     if (HasQuery())
 341     {
 342         if (m_query != uri.m_query)
 343             return false;
 344     }
 345     else if (uri.HasQuery())
 346         return false;
 347
 348     if (HasFragment())
 349     {
 350         if (m_fragment != uri.m_fragment)
 351             return false;
 352     }
 353     else if (uri.HasFragment())
 354         return false;
 355
 356     return true;
 357 }
 358
 359 // ---------------------------------------------------------------------------
 360 // IsReference
 361 //
 362 // if there is no authority or scheme, it is a reference
 363 // ---------------------------------------------------------------------------
 364
 365 bool wxURI::IsReference() const
 366 {   return !HasScheme() || !HasServer();  }
 367
 368 // ---------------------------------------------------------------------------
 369 // Parse
 370 //
 371 // Master URI parsing method.  Just calls the individual parsing methods
 372 //
 373 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 374 // URI-reference = URI / relative
 375 // ---------------------------------------------------------------------------
 376
 377 const wxChar* wxURI::Parse(const wxChar* uri)
 378 {
 379     uri = ParseScheme(uri);
 380     uri = ParseAuthority(uri);
 381     uri = ParsePath(uri);
 382     uri = ParseQuery(uri);
 383     return ParseFragment(uri);
 384 }
 385
 386 // ---------------------------------------------------------------------------
 387 // ParseXXX
 388 //
 389 // Individual parsers for each URI component
 390 // ---------------------------------------------------------------------------
 391
 392 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 393 {
 394     wxASSERT(uri != NULL);
 395
 396     //copy of the uri - used for figuring out
 397     //length of each component
 398     const wxChar* uricopy = uri;
 399
 400     //Does the uri have a scheme (first character alpha)?
 401     if (IsAlpha(*uri))
 402     {
 403         m_scheme += *uri++;
 404
 405         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 406         while (IsAlpha(*uri) || IsDigit(*uri) ||
 407                *uri == wxT('+')   ||
 408                *uri == wxT('-')   ||
 409                *uri == wxT('.'))
 410         {
 411             m_scheme += *uri++;
 412         }
 413
 414         //valid scheme?
 415         if (*uri == wxT(':'))
 416         {
 417             //mark the scheme as valid
 418             m_fields |= wxURI_SCHEME;
 419
 420             //move reference point up to input buffer
 421             uricopy = ++uri;
 422         }
 423         else
 424             //relative uri with relative path reference
 425             m_scheme = wxEmptyString;
 426     }
 427 //    else
 428         //relative uri with _possible_ relative path reference
 429
 430     return uricopy;
 431 }
 432
 433 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 434 {
 435     // authority     = [ userinfo "@" ] host [ ":" port ]
 436     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 437     {
 438         //skip past the two slashes
 439         uri += 2;
 440
 441         // ############# DEVIATION FROM RFC #########################
 442         // Don't parse the server component for file URIs
 443         if(m_scheme != wxT("file"))
 444         {
 445             //normal way
 446         uri = ParseUserInfo(uri);
 447         uri = ParseServer(uri);
 448         return ParsePort(uri);
 449         }
 450     }
 451
 452     return uri;
 453 }
 454
 455 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
 456 {
 457     wxASSERT(uri != NULL);
 458
 459     //copy of the uri - used for figuring out
 460     //length of each component
 461     const wxChar* uricopy = uri;
 462
 463     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 464     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 465     {
 466         if(IsUnreserved(*uri) ||
 467            IsSubDelim(*uri) || *uri == wxT(':'))
 468             m_userinfo += *uri++;
 469         else if (IsEscape(uri))
 470         {
 471             m_userinfo += *uri++;
 472             m_userinfo += *uri++;
 473             m_userinfo += *uri++;
 474         }
 475         else
 476             Escape(m_userinfo, *uri++);
 477     }
 478
 479     if(*uri == wxT('@'))
 480     {
 481         //valid userinfo
 482         m_fields |= wxURI_USERINFO;
 483
 484         uricopy = ++uri;
 485     }
 486     else
 487         m_userinfo = wxEmptyString;
 488
 489     return uricopy;
 490 }
 491
 492 const wxChar* wxURI::ParseServer(const wxChar* uri)
 493 {
 494     wxASSERT(uri != NULL);
 495
 496     //copy of the uri - used for figuring out
 497     //length of each component
 498     const wxChar* uricopy = uri;
 499
 500     // host          = IP-literal / IPv4address / reg-name
 501     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 502     if (*uri == wxT('['))
 503     {
 504         ++uri; //some compilers don't support *&ing a ++*
 505         if (ParseIPv6address(uri) && *uri == wxT(']'))
 506         {
 507             ++uri;
 508             m_hostType = wxURI_IPV6ADDRESS;
 509
 510             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 511             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 512             theBuffer.SetLength(uri-uricopy);
 513         }
 514         else
 515         {
 516             uri = uricopy;
 517
 518             ++uri; //some compilers don't support *&ing a ++*
 519             if (ParseIPvFuture(uri) && *uri == wxT(']'))
 520             {
 521                 ++uri;
 522                 m_hostType = wxURI_IPVFUTURE;
 523
 524                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 525                 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 526                 theBuffer.SetLength(uri-uricopy);
 527             }
 528             else
 529                 uri = uricopy;
 530         }
 531     }
 532     else
 533     {
 534         if (ParseIPv4address(uri))
 535         {
 536             m_hostType = wxURI_IPV4ADDRESS;
 537
 538             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 539             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 540             theBuffer.SetLength(uri-uricopy);
 541         }
 542         else
 543             uri = uricopy;
 544     }
 545
 546     if(m_hostType == wxURI_REGNAME)
 547     {
 548         uri = uricopy;
 549         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 550         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 551         {
 552             if(IsUnreserved(*uri) ||  IsSubDelim(*uri))
 553                 m_server += *uri++;
 554             else if (IsEscape(uri))
 555             {
 556                 m_server += *uri++;
 557                 m_server += *uri++;
 558                 m_server += *uri++;
 559             }
 560             else
 561                 Escape(m_server, *uri++);
 562         }
 563     }
 564
 565     //mark the server as valid
 566     m_fields |= wxURI_SERVER;
 567
 568     return uri;
 569 }
 570
 571
 572 const wxChar* wxURI::ParsePort(const wxChar* uri)
 573 {
 574     wxASSERT(uri != NULL);
 575
 576     // port          = *DIGIT
 577     if(*uri == wxT(':'))
 578     {
 579         ++uri;
 580         while(IsDigit(*uri))
 581         {
 582             m_port += *uri++;
 583         }
 584
 585         //mark the port as valid
 586         m_fields |= wxURI_PORT;
 587     }
 588
 589     return uri;
 590 }
 591
 592 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 593 {
 594     wxASSERT(uri != NULL);
 595
 596     //copy of the uri - used for figuring out
 597     //length of each component
 598     const wxChar* uricopy = uri;
 599
 600     /// hier-part     = "//" authority path-abempty
 601     ///               / path-absolute
 602     ///               / path-rootless
 603     ///               / path-empty
 604     ///
 605     /// relative-part = "//" authority path-abempty
 606     ///               / path-absolute
 607     ///               / path-noscheme
 608     ///               / path-empty
 609     ///
 610     /// path-abempty  = *( "/" segment )
 611     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 612     /// path-noscheme = segment-nz-nc *( "/" segment )
 613     /// path-rootless = segment-nz *( "/" segment )
 614     /// path-empty    = 0<pchar>
 615     ///
 616     /// segment       = *pchar
 617     /// segment-nz    = 1*pchar
 618     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 619     ///               ; non-zero-length segment without any colon ":"
 620     ///
 621     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 622     if (*uri == wxT('/'))
 623     {
 624         m_path += *uri++;
 625
 626         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 627         {
 628             if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 629                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 630                 m_path += *uri++;
 631             else if (IsEscape(uri))
 632             {
 633                 m_path += *uri++;
 634                 m_path += *uri++;
 635                 m_path += *uri++;
 636             }
 637             else
 638                 Escape(m_path, *uri++);
 639         }
 640
 641         if (bNormalize)
 642         {
 643             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 644 #if wxUSE_STL
 645             wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 646 #endif
 647             Normalize(theBuffer, true);
 648             theBuffer.SetLength(wxStrlen(theBuffer));
 649         }
 650         //mark the path as valid
 651         m_fields |= wxURI_PATH;
 652     }
 653     else if(*uri) //Relative path
 654     {
 655         if (bReference)
 656         {
 657             //no colon allowed
 658             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 659             {
 660                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 661                   *uri == wxT('@') || *uri == wxT('/'))
 662                     m_path += *uri++;
 663                 else if (IsEscape(uri))
 664                 {
 665                     m_path += *uri++;
 666                     m_path += *uri++;
 667                     m_path += *uri++;
 668                 }
 669                 else
 670                     Escape(m_path, *uri++);
 671             }
 672         }
 673         else
 674         {
 675             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 676             {
 677                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 678                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 679                     m_path += *uri++;
 680                 else if (IsEscape(uri))
 681                 {
 682                     m_path += *uri++;
 683                     m_path += *uri++;
 684                     m_path += *uri++;
 685                 }
 686                 else
 687                     Escape(m_path, *uri++);
 688             }
 689         }
 690
 691         if (uri != uricopy)
 692         {
 693             if (bNormalize)
 694             {
 695                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 696 #if wxUSE_STL
 697                 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 698 #endif
 699                 Normalize(theBuffer);
 700                 theBuffer.SetLength(wxStrlen(theBuffer));
 701             }
 702
 703             //mark the path as valid
 704             m_fields |= wxURI_PATH;
 705         }
 706     }
 707
 708     return uri;
 709 }
 710
 711
 712 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 713 {
 714     wxASSERT(uri != NULL);
 715
 716     // query         = *( pchar / "/" / "?" )
 717     if (*uri == wxT('?'))
 718     {
 719         ++uri;
 720         while(*uri && *uri != wxT('#'))
 721         {
 722             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 723                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 724                   m_query += *uri++;
 725             else if (IsEscape(uri))
 726             {
 727                   m_query += *uri++;
 728                   m_query += *uri++;
 729                   m_query += *uri++;
 730             }
 731             else
 732                   Escape(m_query, *uri++);
 733         }
 734
 735         //mark the server as valid
 736         m_fields |= wxURI_QUERY;
 737     }
 738
 739     return uri;
 740 }
 741
 742
 743 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 744 {
 745     wxASSERT(uri != NULL);
 746
 747     // fragment      = *( pchar / "/" / "?" )
 748     if (*uri == wxT('#'))
 749     {
 750         ++uri;
 751         while(*uri)
 752         {
 753             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 754                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 755                   m_fragment += *uri++;
 756             else if (IsEscape(uri))
 757             {
 758                   m_fragment += *uri++;
 759                   m_fragment += *uri++;
 760                   m_fragment += *uri++;
 761             }
 762             else
 763                   Escape(m_fragment, *uri++);
 764         }
 765
 766         //mark the server as valid
 767         m_fields |= wxURI_FRAGMENT;
 768     }
 769
 770     return uri;
 771 }
 772
 773 // ---------------------------------------------------------------------------
 774 // Resolve
 775 //
 776 // Builds missing components of this uri from a base uri
 777 //
 778 // A version of the algorithm outlined in the RFC is used here
 779 // (it is shown in comments)
 780 //
 781 // Note that an empty URI inherits all components
 782 // ---------------------------------------------------------------------------
 783
 784 void wxURI::Resolve(const wxURI& base, int flags)
 785 {
 786     wxASSERT_MSG(!base.IsReference(),
 787                 wxT("wxURI to inherit from must not be a reference!"));
 788
 789     // If we arn't being strict, enable the older (pre-RFC2396)
 790     // loophole that allows this uri to inherit other
 791     // properties from the base uri - even if the scheme
 792     // is defined
 793     if ( !(flags & wxURI_STRICT) &&
 794             HasScheme() && base.HasScheme() &&
 795                 m_scheme == base.m_scheme )
 796     {
 797         m_fields -= wxURI_SCHEME;
 798     }
 799
 800
 801     // Do nothing if this is an absolute wxURI
 802     //    if defined(R.scheme) then
 803     //       T.scheme    = R.scheme;
 804     //       T.authority = R.authority;
 805     //       T.path      = remove_dot_segments(R.path);
 806     //       T.query     = R.query;
 807     if (HasScheme())
 808     {
 809         return;
 810     }
 811
 812     //No scheme - inherit
 813     m_scheme = base.m_scheme;
 814     m_fields |= wxURI_SCHEME;
 815
 816     // All we need to do for relative URIs with an
 817     // authority component is just inherit the scheme
 818     //       if defined(R.authority) then
 819     //          T.authority = R.authority;
 820     //          T.path      = remove_dot_segments(R.path);
 821     //          T.query     = R.query;
 822     if (HasServer())
 823     {
 824         return;
 825     }
 826
 827     //No authority - inherit
 828     if (base.HasUserInfo())
 829     {
 830         m_userinfo = base.m_userinfo;
 831         m_fields |= wxURI_USERINFO;
 832     }
 833
 834     m_server = base.m_server;
 835     m_hostType = base.m_hostType;
 836     m_fields |= wxURI_SERVER;
 837
 838     if (base.HasPort())
 839     {
 840         m_port = base.m_port;
 841         m_fields |= wxURI_PORT;
 842     }
 843
 844
 845     // Simple path inheritance from base
 846     if (!HasPath())
 847     {
 848         //             T.path = Base.path;
 849         m_path = base.m_path;
 850         m_fields |= wxURI_PATH;
 851
 852
 853         //             if defined(R.query) then
 854         //                T.query = R.query;
 855         //             else
 856         //                T.query = Base.query;
 857         //             endif;
 858         if (!HasQuery())
 859         {
 860             m_query = base.m_query;
 861             m_fields |= wxURI_QUERY;
 862         }
 863     }
 864     else
 865     {
 866         //             if (R.path starts-with "/") then
 867         //                T.path = remove_dot_segments(R.path);
 868         //             else
 869         //                T.path = merge(Base.path, R.path);
 870         //                T.path = remove_dot_segments(T.path);
 871         //             endif;
 872         //             T.query = R.query;
 873         if (m_path[0u] != wxT('/'))
 874         {
 875             //Merge paths
 876             wxString::const_iterator op = m_path.begin();
 877             wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
 878
 879             //not a ending directory?  move up
 880             if (base.m_path[0] && *(bp-1) != wxT('/'))
 881                 UpTree(base.m_path.begin(), bp);
 882
 883             //normalize directories
 884             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 885                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 886             {
 887                 UpTree(base.m_path.begin(), bp);
 888
 889                 if (*(op+2) == '\0')
 890                     op += 2;
 891                 else
 892                     op += 3;
 893             }
 894
 895             m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
 896                      m_path.substr((op - m_path.begin()), m_path.length());
 897         }
 898     }
 899
 900     //T.fragment = R.fragment;
 901 }
 902
 903 // ---------------------------------------------------------------------------
 904 // UpTree
 905 //
 906 // Moves a URI path up a directory
 907 // ---------------------------------------------------------------------------
 908
 909 //static
 910 void wxURI::UpTree(wxString::const_iterator uristart,
 911                    wxString::const_iterator& uri)
 912 {
 913     if (uri != uristart && *(uri-1) == wxT('/'))
 914     {
 915         uri -= 2;
 916     }
 917
 918     for(;uri != uristart; --uri)
 919     {
 920         if (*uri == wxT('/'))
 921         {
 922             ++uri;
 923             break;
 924         }
 925     }
 926
 927     //!!!TODO:HACK!!!//
 928     if (uri == uristart && *uri == wxT('/'))
 929         ++uri;
 930     //!!!//
 931 }
 932
 933 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
 934 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 935 {
 936     if (uri != uristart && *(uri-1) == wxT('/'))
 937     {
 938         uri -= 2;
 939     }
 940
 941     for(;uri != uristart; --uri)
 942     {
 943         if (*uri == wxT('/'))
 944         {
 945             ++uri;
 946             break;
 947         }
 948     }
 949
 950     //!!!TODO:HACK!!!//
 951     if (uri == uristart && *uri == wxT('/'))
 952         ++uri;
 953     //!!!//
 954 }
 955 // end of FIXME-UTF8
 956
 957 // ---------------------------------------------------------------------------
 958 // Normalize
 959 //
 960 // Normalizes directories in-place
 961 //
 962 // I.E. ./ and . are ignored
 963 //
 964 // ../ and .. are removed if a directory is before it, along
 965 // with that directory (leading .. and ../ are kept)
 966 // ---------------------------------------------------------------------------
 967
 968 //static
 969 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 970 {
 971     wxChar* cp = s;
 972     wxChar* bp = s;
 973
 974     if(s[0] == wxT('/'))
 975         ++bp;
 976
 977     while(*cp)
 978     {
 979         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 980             && (bp == cp || *(cp-1) == wxT('/')))
 981         {
 982             //. _or_ ./  - ignore
 983             if (*(cp+1) == '\0')
 984                 cp += 1;
 985             else
 986                 cp += 2;
 987         }
 988         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 989                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 990                 && (bp == cp || *(cp-1) == wxT('/')))
 991         {
 992             //.. _or_ ../ - go up the tree
 993             if (s != bp)
 994             {
 995                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 996
 997                 if (*(cp+2) == '\0')
 998                     cp += 2;
 999                 else
1000                     cp += 3;
1001             }
1002             else if (!bIgnoreLeads)
1003
1004             {
1005                 *bp++ = *cp++;
1006                 *bp++ = *cp++;
1007                 if (*cp)
1008                     *bp++ = *cp++;
1009
1010                 s = bp;
1011             }
1012             else
1013             {
1014                 if (*(cp+2) == '\0')
1015                     cp += 2;
1016                 else
1017                     cp += 3;
1018             }
1019         }
1020         else
1021             *s++ = *cp++;
1022     }
1023
1024     *s = '\0';
1025 }
1026
1027 // ---------------------------------------------------------------------------
1028 // ParseH16
1029 //
1030 // Parses 1 to 4 hex values.  Returns true if the first character of the input
1031 // string is a valid hex character.  It is the caller's responsability to move
1032 // the input string back to its original position on failure.
1033 // ---------------------------------------------------------------------------
1034
1035 bool wxURI::ParseH16(const wxChar*& uri)
1036 {
1037     // h16           = 1*4HEXDIG
1038     if(!IsHex(*++uri))
1039         return false;
1040
1041     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1042         ++uri;
1043
1044     return true;
1045 }
1046
1047 // ---------------------------------------------------------------------------
1048 // ParseIPXXX
1049 //
1050 // Parses a certain version of an IP address and moves the input string past
1051 // it.  Returns true if the input  string contains the proper version of an ip
1052 // address.  It is the caller's responsability to move the input string back
1053 // to its original position on failure.
1054 // ---------------------------------------------------------------------------
1055
1056 bool wxURI::ParseIPv4address(const wxChar*& uri)
1057 {
1058     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
1059     //
1060     //dec-octet     =      DIGIT                    ; 0-9
1061     //                / %x31-39 DIGIT               ; 10-99
1062     //                / "1" 2DIGIT                  ; 100-199
1063     //                / "2" %x30-34 DIGIT           ; 200-249
1064     //                / "25" %x30-35                ; 250-255
1065     size_t iIPv4 = 0;
1066     if (IsDigit(*uri))
1067     {
1068         ++iIPv4;
1069
1070
1071         //each ip part must be between 0-255 (dupe of version in for loop)
1072         if( IsDigit(*++uri) && IsDigit(*++uri) &&
1073            //100 or less  (note !)
1074            !( (*(uri-2) < wxT('2')) ||
1075            //240 or less
1076              (*(uri-2) == wxT('2') &&
1077                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1078              )
1079             )
1080           )
1081         {
1082             return false;
1083         }
1084
1085         if(IsDigit(*uri))++uri;
1086
1087         //compilers should unroll this loop
1088         for(; iIPv4 < 4; ++iIPv4)
1089         {
1090             if (*uri != wxT('.') || !IsDigit(*++uri))
1091                 break;
1092
1093             //each ip part must be between 0-255
1094             if( IsDigit(*++uri) && IsDigit(*++uri) &&
1095                //100 or less  (note !)
1096                !( (*(uri-2) < wxT('2')) ||
1097                //240 or less
1098                  (*(uri-2) == wxT('2') &&
1099                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1100                  )
1101                 )
1102               )
1103             {
1104                 return false;
1105             }
1106             if(IsDigit(*uri))++uri;
1107         }
1108     }
1109     return iIPv4 == 4;
1110 }
1111
1112 bool wxURI::ParseIPv6address(const wxChar*& uri)
1113 {
1114     // IPv6address   =                            6( h16 ":" ) ls32
1115     //               /                       "::" 5( h16 ":" ) ls32
1116     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1117     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1118     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1119     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1120     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1121     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1122     //               / [ *6( h16 ":" ) h16 ] "::"
1123
1124     size_t numPrefix = 0,
1125               maxPostfix;
1126
1127     bool bEndHex = false;
1128
1129     for( ; numPrefix < 6; ++numPrefix)
1130     {
1131         if(!ParseH16(uri))
1132         {
1133             --uri;
1134             bEndHex = true;
1135             break;
1136         }
1137
1138         if(*uri != wxT(':'))
1139         {
1140             break;
1141         }
1142     }
1143
1144     if(!bEndHex && !ParseH16(uri))
1145     {
1146         --uri;
1147
1148         if (numPrefix)
1149             return false;
1150
1151         if (*uri == wxT(':'))
1152         {
1153             if (*++uri != wxT(':'))
1154                 return false;
1155
1156             maxPostfix = 5;
1157         }
1158         else
1159             maxPostfix = 6;
1160     }
1161     else
1162     {
1163         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1164         {
1165             if (numPrefix != 6)
1166                 return false;
1167
1168             while (*--uri != wxT(':')) {}
1169             ++uri;
1170
1171             const wxChar* uristart = uri;
1172             //parse ls32
1173             // ls32          = ( h16 ":" h16 ) / IPv4address
1174             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1175                 return true;
1176
1177             uri = uristart;
1178
1179             if (ParseIPv4address(uri))
1180                 return true;
1181             else
1182                 return false;
1183         }
1184         else
1185         {
1186             uri += 2;
1187
1188             if (numPrefix > 3)
1189                 maxPostfix = 0;
1190             else
1191                 maxPostfix = 4 - numPrefix;
1192         }
1193     }
1194
1195     bool bAllowAltEnding = maxPostfix == 0;
1196
1197     for(; maxPostfix != 0; --maxPostfix)
1198     {
1199         if(!ParseH16(uri) || *uri != wxT(':'))
1200             return false;
1201     }
1202
1203     if(numPrefix <= 4)
1204     {
1205         const wxChar* uristart = uri;
1206         //parse ls32
1207         // ls32          = ( h16 ":" h16 ) / IPv4address
1208         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1209             return true;
1210
1211         uri = uristart;
1212
1213         if (ParseIPv4address(uri))
1214             return true;
1215
1216         uri = uristart;
1217
1218         if (!bAllowAltEnding)
1219             return false;
1220     }
1221
1222     if(numPrefix <= 5 && ParseH16(uri))
1223         return true;
1224
1225     return true;
1226 }
1227
1228 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1229 {
1230     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1231     if (*++uri != wxT('v') || !IsHex(*++uri))
1232         return false;
1233
1234     while (IsHex(*++uri)) {}
1235
1236     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1237         return false;
1238
1239     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1240
1241     return true;
1242 }
1243
1244
1245 // ---------------------------------------------------------------------------
1246 // CharToHex
1247 //
1248 // Converts a character into a numeric hexidecimal value, or 0 if the
1249 // passed in character is not a valid hex character
1250 // ---------------------------------------------------------------------------
1251
1252 //static
1253 wxChar wxURI::CharToHex(const wxChar& c)
1254 {
1255     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1256     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1257     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1258
1259     return 0;
1260 }
1261
1262 // ---------------------------------------------------------------------------
1263 // IsXXX
1264 //
1265 // Returns true if the passed in character meets the criteria of the method
1266 // ---------------------------------------------------------------------------
1267
1268 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1269 bool wxURI::IsUnreserved (const wxChar& c)
1270 {   return IsAlpha(c) || IsDigit(c) ||
1271            c == wxT('-') ||
1272            c == wxT('.') ||
1273            c == wxT('_') ||
1274            c == wxT('~') //tilde
1275            ;
1276 }
1277
1278 bool wxURI::IsReserved (const wxChar& c)
1279 {
1280     return IsGenDelim(c) || IsSubDelim(c);
1281 }
1282
1283 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1284 bool wxURI::IsGenDelim (const wxChar& c)
1285 {
1286     return c == wxT(':') ||
1287            c == wxT('/') ||
1288            c == wxT('?') ||
1289            c == wxT('#') ||
1290            c == wxT('[') ||
1291            c == wxT(']') ||
1292            c == wxT('@');
1293 }
1294
1295 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1296 //!               / "*" / "+" / "," / ";" / "="
1297 bool wxURI::IsSubDelim (const wxChar& c)
1298 {
1299     return c == wxT('!') ||
1300            c == wxT('$') ||
1301            c == wxT('&') ||
1302            c == wxT('\'') ||
1303            c == wxT('(') ||
1304            c == wxT(')') ||
1305            c == wxT('*') ||
1306            c == wxT('+') ||
1307            c == wxT(',') ||
1308            c == wxT(';') ||
1309            c == wxT('=')
1310            ;
1311 }
1312
1313 bool wxURI::IsHex(const wxChar& c)
1314 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1315
1316 bool wxURI::IsAlpha(const wxChar& c)
1317 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1318
1319 bool wxURI::IsDigit(const wxChar& c)
1320 {   return c >= wxT('0') && c <= wxT('9');        }
1321
1322
1323 //end of uri.cpp
1324
1325
1326