src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 // For compilers that support precompilation, includes "wx.h".
  20 #include "wx/wxprec.h"
  21
  22 #ifdef __BORLANDC__
  23     #pragma hdrstop
  24 #endif
  25
  26 #include "wx/uri.h"
  27
  28 // ---------------------------------------------------------------------------
  29 // definitions
  30 // ---------------------------------------------------------------------------
  31
  32 IMPLEMENT_CLASS(wxURI, wxObject)
  33
  34 // ===========================================================================
  35 // implementation
  36 // ===========================================================================
  37
  38 // ---------------------------------------------------------------------------
  39 // utilities
  40 // ---------------------------------------------------------------------------
  41
  42 // ---------------------------------------------------------------------------
  43 //
  44 //                        wxURI
  45 //
  46 // ---------------------------------------------------------------------------
  47
  48 // ---------------------------------------------------------------------------
  49 //  Constructors
  50 // ---------------------------------------------------------------------------
  51
  52 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  53 {
  54 }
  55
  56 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58     Create(uri);
  59 }
  60
  61 wxURI::wxURI(const wxURI& uri)  : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
  62 {
  63     Assign(uri);
  64 }
  65
  66 // ---------------------------------------------------------------------------
  67 // Destructor and cleanup
  68 // ---------------------------------------------------------------------------
  69
  70 wxURI::~wxURI()
  71 {
  72     Clear();
  73 }
  74
  75 void wxURI::Clear()
  76 {
  77     m_scheme = m_userinfo = m_server = m_port = m_path =
  78     m_query = m_fragment = wxEmptyString;
  79
  80     m_hostType = wxURI_REGNAME;
  81
  82     m_fields = 0;
  83 }
  84
  85 // ---------------------------------------------------------------------------
  86 // Create
  87 //
  88 // This creates the URI - all we do here is call the main parsing method
  89 // ---------------------------------------------------------------------------
  90
  91 const wxChar* wxURI::Create(const wxString& uri)
  92 {
  93     if (m_fields)
  94         Clear();
  95
  96     return Parse(uri);
  97 }
  98
  99 // ---------------------------------------------------------------------------
 100 // Escape Methods
 101 //
 102 // TranslateEscape unencodes a 3 character URL escape sequence
 103 //
 104 // Escape encodes an invalid URI character into a 3 character sequence
 105 //
 106 // IsEscape determines if the input string contains an escape sequence,
 107 // if it does, then it moves the input string past the escape sequence
 108 //
 109 // Unescape unencodes all 3 character URL escape sequences in a wxString
 110 // ---------------------------------------------------------------------------
 111
 112 wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
 113 {
 114     wxChar c1(*s);
 115     wxChar c2(*(s + 1));
 116
 117     wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
 118
 119     return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
 120 }
 121
 122 wxString wxURI::Unescape(const wxString& uri)
 123 {
 124     wxString new_uri;
 125
 126     for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
 127     {
 128         if ( *i == wxT('%') )
 129         {
 130             new_uri += wxURI::TranslateEscape(i + 1);
 131             i += 2;
 132         }
 133         else
 134             new_uri += *i;
 135     }
 136
 137     return new_uri;
 138 }
 139
 140 void wxURI::Escape(wxString& s, const wxChar& c)
 141 {
 142     const wxChar* hdig = wxT("0123456789abcdef");
 143     s += wxT('%');
 144     s += hdig[(c >> 4) & 15];
 145     s += hdig[c & 15];
 146 }
 147
 148 bool wxURI::IsEscape(const wxChar*& uri)
 149 {
 150     // pct-encoded   = "%" HEXDIG HEXDIG
 151     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 152         return true;
 153     else
 154         return false;
 155 }
 156
 157 // ---------------------------------------------------------------------------
 158 // GetUser
 159 // GetPassword
 160 //
 161 // Gets the username and password via the old URL method.
 162 // ---------------------------------------------------------------------------
 163 wxString wxURI::GetUser() const
 164 {
 165       size_t dwPasswordPos = m_userinfo.find(':');
 166
 167       if (dwPasswordPos == wxString::npos)
 168           dwPasswordPos = 0;
 169
 170       return m_userinfo(0, dwPasswordPos);
 171 }
 172
 173 wxString wxURI::GetPassword() const
 174 {
 175       size_t dwPasswordPos = m_userinfo.find(':');
 176
 177       if (dwPasswordPos == wxString::npos)
 178           return wxT("");
 179       else
 180           return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
 181 }
 182
 183 // ---------------------------------------------------------------------------
 184 // BuildURI
 185 //
 186 // BuildURI() builds the entire URI into a useable
 187 // representation, including proper identification characters such as slashes
 188 //
 189 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 190 // the components that accept escape sequences
 191 // ---------------------------------------------------------------------------
 192
 193 wxString wxURI::BuildURI() const
 194 {
 195     wxString ret;
 196
 197     if (HasScheme())
 198         ret = ret + m_scheme + wxT(":");
 199
 200     if (HasServer())
 201     {
 202         ret += wxT("//");
 203
 204         if (HasUserInfo())
 205             ret = ret + m_userinfo + wxT("@");
 206
 207         ret += m_server;
 208
 209         if (HasPort())
 210             ret = ret + wxT(":") + m_port;
 211     }
 212
 213     ret += m_path;
 214
 215     if (HasQuery())
 216         ret = ret + wxT("?") + m_query;
 217
 218     if (HasFragment())
 219         ret = ret + wxT("#") + m_fragment;
 220
 221     return ret;
 222 }
 223
 224 wxString wxURI::BuildUnescapedURI() const
 225 {
 226     wxString ret;
 227
 228     if (HasScheme())
 229         ret = ret + m_scheme + wxT(":");
 230
 231     if (HasServer())
 232     {
 233         ret += wxT("//");
 234
 235         if (HasUserInfo())
 236             ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
 237
 238         if (m_hostType == wxURI_REGNAME)
 239             ret += wxURI::Unescape(m_server);
 240         else
 241             ret += m_server;
 242
 243         if (HasPort())
 244             ret = ret + wxT(":") + m_port;
 245     }
 246
 247     ret += wxURI::Unescape(m_path);
 248
 249     if (HasQuery())
 250         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 251
 252     if (HasFragment())
 253         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 254
 255     return ret;
 256 }
 257
 258 // ---------------------------------------------------------------------------
 259 // Assignment
 260 // ---------------------------------------------------------------------------
 261
 262 wxURI& wxURI::Assign(const wxURI& uri)
 263 {
 264     //assign fields
 265     m_fields = uri.m_fields;
 266
 267     //ref over components
 268     m_scheme = uri.m_scheme;
 269     m_userinfo = uri.m_userinfo;
 270     m_server = uri.m_server;
 271     m_hostType = uri.m_hostType;
 272     m_port = uri.m_port;
 273     m_path = uri.m_path;
 274     m_query = uri.m_query;
 275     m_fragment = uri.m_fragment;
 276
 277     return *this;
 278 }
 279
 280 wxURI& wxURI::operator = (const wxURI& uri)
 281 {
 282     return Assign(uri);
 283 }
 284
 285 wxURI& wxURI::operator = (const wxString& string)
 286 {
 287     Create(string);
 288     return *this;
 289 }
 290
 291 // ---------------------------------------------------------------------------
 292 // Comparison
 293 // ---------------------------------------------------------------------------
 294
 295 bool wxURI::operator == (const wxURI& uri) const
 296 {
 297     if (HasScheme())
 298     {
 299         if(m_scheme != uri.m_scheme)
 300             return false;
 301     }
 302     else if (uri.HasScheme())
 303         return false;
 304
 305
 306     if (HasServer())
 307     {
 308         if (HasUserInfo())
 309         {
 310             if (m_userinfo != uri.m_userinfo)
 311                 return false;
 312         }
 313         else if (uri.HasUserInfo())
 314             return false;
 315
 316         if (m_server != uri.m_server ||
 317             m_hostType != uri.m_hostType)
 318             return false;
 319
 320         if (HasPort())
 321         {
 322             if(m_port != uri.m_port)
 323                 return false;
 324         }
 325         else if (uri.HasPort())
 326             return false;
 327     }
 328     else if (uri.HasServer())
 329         return false;
 330
 331
 332     if (HasPath())
 333     {
 334         if(m_path != uri.m_path)
 335             return false;
 336     }
 337     else if (uri.HasPath())
 338         return false;
 339
 340     if (HasQuery())
 341     {
 342         if (m_query != uri.m_query)
 343             return false;
 344     }
 345     else if (uri.HasQuery())
 346         return false;
 347
 348     if (HasFragment())
 349     {
 350         if (m_fragment != uri.m_fragment)
 351             return false;
 352     }
 353     else if (uri.HasFragment())
 354         return false;
 355
 356     return true;
 357 }
 358
 359 // ---------------------------------------------------------------------------
 360 // IsReference
 361 //
 362 // if there is no authority or scheme, it is a reference
 363 // ---------------------------------------------------------------------------
 364
 365 bool wxURI::IsReference() const
 366 {   return !HasScheme() || !HasServer();  }
 367
 368 // ---------------------------------------------------------------------------
 369 // Parse
 370 //
 371 // Master URI parsing method.  Just calls the individual parsing methods
 372 //
 373 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 374 // URI-reference = URI / relative
 375 // ---------------------------------------------------------------------------
 376
 377 const wxChar* wxURI::Parse(const wxChar* uri)
 378 {
 379     uri = ParseScheme(uri);
 380     uri = ParseAuthority(uri);
 381     uri = ParsePath(uri);
 382     uri = ParseQuery(uri);
 383     return ParseFragment(uri);
 384 }
 385
 386 // ---------------------------------------------------------------------------
 387 // ParseXXX
 388 //
 389 // Individual parsers for each URI component
 390 // ---------------------------------------------------------------------------
 391
 392 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 393 {
 394     wxASSERT(uri != NULL);
 395
 396     //copy of the uri - used for figuring out
 397     //length of each component
 398     const wxChar* uricopy = uri;
 399
 400     //Does the uri have a scheme (first character alpha)?
 401     if (IsAlpha(*uri))
 402     {
 403         m_scheme += *uri++;
 404
 405         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 406         while (IsAlpha(*uri) || IsDigit(*uri) ||
 407                *uri == wxT('+')   ||
 408                *uri == wxT('-')   ||
 409                *uri == wxT('.'))
 410         {
 411             m_scheme += *uri++;
 412         }
 413
 414         //valid scheme?
 415         if (*uri == wxT(':'))
 416         {
 417             //mark the scheme as valid
 418             m_fields |= wxURI_SCHEME;
 419
 420             //move reference point up to input buffer
 421             uricopy = ++uri;
 422         }
 423         else
 424             //relative uri with relative path reference
 425             m_scheme = wxEmptyString;
 426     }
 427 //    else
 428         //relative uri with _possible_ relative path reference
 429
 430     return uricopy;
 431 }
 432
 433 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 434 {
 435     // authority     = [ userinfo "@" ] host [ ":" port ]
 436     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 437     {
 438         //skip past the two slashes
 439         uri += 2;
 440
 441         // ############# DEVIATION FROM RFC #########################
 442         // Don't parse the server component for file URIs
 443         if(m_scheme != wxT("file"))
 444         {
 445             //normal way
 446         uri = ParseUserInfo(uri);
 447         uri = ParseServer(uri);
 448         return ParsePort(uri);
 449         }
 450     }
 451
 452     return uri;
 453 }
 454
 455 const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
 456 {
 457     wxASSERT(uri != NULL);
 458
 459     //copy of the uri - used for figuring out
 460     //length of each component
 461     const wxChar* uricopy = uri;
 462
 463     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 464     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 465     {
 466         if(IsUnreserved(*uri) ||
 467            IsSubDelim(*uri) || *uri == wxT(':'))
 468             m_userinfo += *uri++;
 469         else if (IsEscape(uri))
 470         {
 471             m_userinfo += *uri++;
 472             m_userinfo += *uri++;
 473             m_userinfo += *uri++;
 474         }
 475         else
 476             Escape(m_userinfo, *uri++);
 477     }
 478
 479     if(*uri == wxT('@'))
 480     {
 481         //valid userinfo
 482         m_fields |= wxURI_USERINFO;
 483
 484         uricopy = ++uri;
 485     }
 486     else
 487         m_userinfo = wxEmptyString;
 488
 489     return uricopy;
 490 }
 491
 492 const wxChar* wxURI::ParseServer(const wxChar* uri)
 493 {
 494     wxASSERT(uri != NULL);
 495
 496     //copy of the uri - used for figuring out
 497     //length of each component
 498     const wxChar* uricopy = uri;
 499
 500     // host          = IP-literal / IPv4address / reg-name
 501     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 502     if (*uri == wxT('['))
 503     {
 504         ++uri; //some compilers don't support *&ing a ++*
 505         if (ParseIPv6address(uri) && *uri == wxT(']'))
 506         {
 507             ++uri;
 508             m_hostType = wxURI_IPV6ADDRESS;
 509
 510             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 511             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 512             theBuffer.SetLength(uri-uricopy);
 513         }
 514         else
 515         {
 516             uri = uricopy;
 517
 518             ++uri; //some compilers don't support *&ing a ++*
 519             if (ParseIPvFuture(uri) && *uri == wxT(']'))
 520             {
 521                 ++uri;
 522                 m_hostType = wxURI_IPVFUTURE;
 523
 524                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 525                 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 526                 theBuffer.SetLength(uri-uricopy);
 527             }
 528             else
 529                 uri = uricopy;
 530         }
 531     }
 532     else
 533     {
 534         if (ParseIPv4address(uri))
 535         {
 536             m_hostType = wxURI_IPV4ADDRESS;
 537
 538             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 539             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 540             theBuffer.SetLength(uri-uricopy);
 541         }
 542         else
 543             uri = uricopy;
 544     }
 545
 546     if(m_hostType == wxURI_REGNAME)
 547     {
 548         uri = uricopy;
 549         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 550         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 551         {
 552             if(IsUnreserved(*uri) ||  IsSubDelim(*uri))
 553                 m_server += *uri++;
 554             else if (IsEscape(uri))
 555             {
 556                 m_server += *uri++;
 557                 m_server += *uri++;
 558                 m_server += *uri++;
 559             }
 560             else
 561                 Escape(m_server, *uri++);
 562         }
 563     }
 564
 565     //mark the server as valid
 566     m_fields |= wxURI_SERVER;
 567
 568     return uri;
 569 }
 570
 571
 572 const wxChar* wxURI::ParsePort(const wxChar* uri)
 573 {
 574     wxASSERT(uri != NULL);
 575
 576     // port          = *DIGIT
 577     if(*uri == wxT(':'))
 578     {
 579         ++uri;
 580         while(IsDigit(*uri))
 581         {
 582             m_port += *uri++;
 583         }
 584
 585         //mark the port as valid
 586         m_fields |= wxURI_PORT;
 587     }
 588
 589     return uri;
 590 }
 591
 592 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 593 {
 594     wxASSERT(uri != NULL);
 595
 596     //copy of the uri - used for figuring out
 597     //length of each component
 598     const wxChar* uricopy = uri;
 599
 600     /// hier-part     = "//" authority path-abempty
 601     ///               / path-absolute
 602     ///               / path-rootless
 603     ///               / path-empty
 604     ///
 605     /// relative-part = "//" authority path-abempty
 606     ///               / path-absolute
 607     ///               / path-noscheme
 608     ///               / path-empty
 609     ///
 610     /// path-abempty  = *( "/" segment )
 611     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 612     /// path-noscheme = segment-nz-nc *( "/" segment )
 613     /// path-rootless = segment-nz *( "/" segment )
 614     /// path-empty    = 0<pchar>
 615     ///
 616     /// segment       = *pchar
 617     /// segment-nz    = 1*pchar
 618     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 619     ///               ; non-zero-length segment without any colon ":"
 620     ///
 621     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 622     if (*uri == wxT('/'))
 623     {
 624         m_path += *uri++;
 625
 626         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 627         {
 628             if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 629                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 630                 m_path += *uri++;
 631             else if (IsEscape(uri))
 632             {
 633                 m_path += *uri++;
 634                 m_path += *uri++;
 635                 m_path += *uri++;
 636             }
 637             else
 638                 Escape(m_path, *uri++);
 639         }
 640
 641         if (bNormalize)
 642         {
 643             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 644 #if wxUSE_STL || wxUSE_UNICODE_UTF8
 645             // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
 646             wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 647 #endif
 648             Normalize(theBuffer, true);
 649             theBuffer.SetLength(wxStrlen(theBuffer));
 650         }
 651         //mark the path as valid
 652         m_fields |= wxURI_PATH;
 653     }
 654     else if(*uri) //Relative path
 655     {
 656         if (bReference)
 657         {
 658             //no colon allowed
 659             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 660             {
 661                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 662                   *uri == wxT('@') || *uri == wxT('/'))
 663                     m_path += *uri++;
 664                 else if (IsEscape(uri))
 665                 {
 666                     m_path += *uri++;
 667                     m_path += *uri++;
 668                     m_path += *uri++;
 669                 }
 670                 else
 671                     Escape(m_path, *uri++);
 672             }
 673         }
 674         else
 675         {
 676             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 677             {
 678                 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
 679                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 680                     m_path += *uri++;
 681                 else if (IsEscape(uri))
 682                 {
 683                     m_path += *uri++;
 684                     m_path += *uri++;
 685                     m_path += *uri++;
 686                 }
 687                 else
 688                     Escape(m_path, *uri++);
 689             }
 690         }
 691
 692         if (uri != uricopy)
 693         {
 694             if (bNormalize)
 695             {
 696                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 697 #if wxUSE_STL || wxUSE_UNICODE_UTF8
 698                 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
 699                 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 700 #endif
 701                 Normalize(theBuffer);
 702                 theBuffer.SetLength(wxStrlen(theBuffer));
 703             }
 704
 705             //mark the path as valid
 706             m_fields |= wxURI_PATH;
 707         }
 708     }
 709
 710     return uri;
 711 }
 712
 713
 714 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 715 {
 716     wxASSERT(uri != NULL);
 717
 718     // query         = *( pchar / "/" / "?" )
 719     if (*uri == wxT('?'))
 720     {
 721         ++uri;
 722         while(*uri && *uri != wxT('#'))
 723         {
 724             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 725                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 726                   m_query += *uri++;
 727             else if (IsEscape(uri))
 728             {
 729                   m_query += *uri++;
 730                   m_query += *uri++;
 731                   m_query += *uri++;
 732             }
 733             else
 734                   Escape(m_query, *uri++);
 735         }
 736
 737         //mark the server as valid
 738         m_fields |= wxURI_QUERY;
 739     }
 740
 741     return uri;
 742 }
 743
 744
 745 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 746 {
 747     wxASSERT(uri != NULL);
 748
 749     // fragment      = *( pchar / "/" / "?" )
 750     if (*uri == wxT('#'))
 751     {
 752         ++uri;
 753         while(*uri)
 754         {
 755             if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
 756                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 757                   m_fragment += *uri++;
 758             else if (IsEscape(uri))
 759             {
 760                   m_fragment += *uri++;
 761                   m_fragment += *uri++;
 762                   m_fragment += *uri++;
 763             }
 764             else
 765                   Escape(m_fragment, *uri++);
 766         }
 767
 768         //mark the server as valid
 769         m_fields |= wxURI_FRAGMENT;
 770     }
 771
 772     return uri;
 773 }
 774
 775 // ---------------------------------------------------------------------------
 776 // Resolve
 777 //
 778 // Builds missing components of this uri from a base uri
 779 //
 780 // A version of the algorithm outlined in the RFC is used here
 781 // (it is shown in comments)
 782 //
 783 // Note that an empty URI inherits all components
 784 // ---------------------------------------------------------------------------
 785
 786 void wxURI::Resolve(const wxURI& base, int flags)
 787 {
 788     wxASSERT_MSG(!base.IsReference(),
 789                 wxT("wxURI to inherit from must not be a reference!"));
 790
 791     // If we arn't being strict, enable the older (pre-RFC2396)
 792     // loophole that allows this uri to inherit other
 793     // properties from the base uri - even if the scheme
 794     // is defined
 795     if ( !(flags & wxURI_STRICT) &&
 796             HasScheme() && base.HasScheme() &&
 797                 m_scheme == base.m_scheme )
 798     {
 799         m_fields -= wxURI_SCHEME;
 800     }
 801
 802
 803     // Do nothing if this is an absolute wxURI
 804     //    if defined(R.scheme) then
 805     //       T.scheme    = R.scheme;
 806     //       T.authority = R.authority;
 807     //       T.path      = remove_dot_segments(R.path);
 808     //       T.query     = R.query;
 809     if (HasScheme())
 810     {
 811         return;
 812     }
 813
 814     //No scheme - inherit
 815     m_scheme = base.m_scheme;
 816     m_fields |= wxURI_SCHEME;
 817
 818     // All we need to do for relative URIs with an
 819     // authority component is just inherit the scheme
 820     //       if defined(R.authority) then
 821     //          T.authority = R.authority;
 822     //          T.path      = remove_dot_segments(R.path);
 823     //          T.query     = R.query;
 824     if (HasServer())
 825     {
 826         return;
 827     }
 828
 829     //No authority - inherit
 830     if (base.HasUserInfo())
 831     {
 832         m_userinfo = base.m_userinfo;
 833         m_fields |= wxURI_USERINFO;
 834     }
 835
 836     m_server = base.m_server;
 837     m_hostType = base.m_hostType;
 838     m_fields |= wxURI_SERVER;
 839
 840     if (base.HasPort())
 841     {
 842         m_port = base.m_port;
 843         m_fields |= wxURI_PORT;
 844     }
 845
 846
 847     // Simple path inheritance from base
 848     if (!HasPath())
 849     {
 850         //             T.path = Base.path;
 851         m_path = base.m_path;
 852         m_fields |= wxURI_PATH;
 853
 854
 855         //             if defined(R.query) then
 856         //                T.query = R.query;
 857         //             else
 858         //                T.query = Base.query;
 859         //             endif;
 860         if (!HasQuery())
 861         {
 862             m_query = base.m_query;
 863             m_fields |= wxURI_QUERY;
 864         }
 865     }
 866     else
 867     {
 868         //             if (R.path starts-with "/") then
 869         //                T.path = remove_dot_segments(R.path);
 870         //             else
 871         //                T.path = merge(Base.path, R.path);
 872         //                T.path = remove_dot_segments(T.path);
 873         //             endif;
 874         //             T.query = R.query;
 875         if (m_path[0u] != wxT('/'))
 876         {
 877             //Merge paths
 878             wxString::const_iterator op = m_path.begin();
 879             wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
 880
 881             //not a ending directory?  move up
 882             if (base.m_path[0] && *(bp-1) != wxT('/'))
 883                 UpTree(base.m_path.begin(), bp);
 884
 885             //normalize directories
 886             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 887                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 888             {
 889                 UpTree(base.m_path.begin(), bp);
 890
 891                 if (*(op+2) == '\0')
 892                     op += 2;
 893                 else
 894                     op += 3;
 895             }
 896
 897             m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
 898                      m_path.substr((op - m_path.begin()), m_path.length());
 899         }
 900     }
 901
 902     //T.fragment = R.fragment;
 903 }
 904
 905 // ---------------------------------------------------------------------------
 906 // UpTree
 907 //
 908 // Moves a URI path up a directory
 909 // ---------------------------------------------------------------------------
 910
 911 //static
 912 void wxURI::UpTree(wxString::const_iterator uristart,
 913                    wxString::const_iterator& uri)
 914 {
 915     if (uri != uristart && *(uri-1) == wxT('/'))
 916     {
 917         uri -= 2;
 918     }
 919
 920     for(;uri != uristart; --uri)
 921     {
 922         if (*uri == wxT('/'))
 923         {
 924             ++uri;
 925             break;
 926         }
 927     }
 928
 929     //!!!TODO:HACK!!!//
 930     if (uri == uristart && *uri == wxT('/'))
 931         ++uri;
 932     //!!!//
 933 }
 934
 935 // FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
 936 /*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 937 {
 938     if (uri != uristart && *(uri-1) == wxT('/'))
 939     {
 940         uri -= 2;
 941     }
 942
 943     for(;uri != uristart; --uri)
 944     {
 945         if (*uri == wxT('/'))
 946         {
 947             ++uri;
 948             break;
 949         }
 950     }
 951
 952     //!!!TODO:HACK!!!//
 953     if (uri == uristart && *uri == wxT('/'))
 954         ++uri;
 955     //!!!//
 956 }
 957 // end of FIXME-UTF8
 958
 959 // ---------------------------------------------------------------------------
 960 // Normalize
 961 //
 962 // Normalizes directories in-place
 963 //
 964 // I.E. ./ and . are ignored
 965 //
 966 // ../ and .. are removed if a directory is before it, along
 967 // with that directory (leading .. and ../ are kept)
 968 // ---------------------------------------------------------------------------
 969
 970 //static
 971 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 972 {
 973     wxChar* cp = s;
 974     wxChar* bp = s;
 975
 976     if(s[0] == wxT('/'))
 977         ++bp;
 978
 979     while(*cp)
 980     {
 981         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 982             && (bp == cp || *(cp-1) == wxT('/')))
 983         {
 984             //. _or_ ./  - ignore
 985             if (*(cp+1) == '\0')
 986                 cp += 1;
 987             else
 988                 cp += 2;
 989         }
 990         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 991                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 992                 && (bp == cp || *(cp-1) == wxT('/')))
 993         {
 994             //.. _or_ ../ - go up the tree
 995             if (s != bp)
 996             {
 997                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 998
 999                 if (*(cp+2) == '\0')
1000                     cp += 2;
1001                 else
1002                     cp += 3;
1003             }
1004             else if (!bIgnoreLeads)
1005
1006             {
1007                 *bp++ = *cp++;
1008                 *bp++ = *cp++;
1009                 if (*cp)
1010                     *bp++ = *cp++;
1011
1012                 s = bp;
1013             }
1014             else
1015             {
1016                 if (*(cp+2) == '\0')
1017                     cp += 2;
1018                 else
1019                     cp += 3;
1020             }
1021         }
1022         else
1023             *s++ = *cp++;
1024     }
1025
1026     *s = '\0';
1027 }
1028
1029 // ---------------------------------------------------------------------------
1030 // ParseH16
1031 //
1032 // Parses 1 to 4 hex values.  Returns true if the first character of the input
1033 // string is a valid hex character.  It is the caller's responsability to move
1034 // the input string back to its original position on failure.
1035 // ---------------------------------------------------------------------------
1036
1037 bool wxURI::ParseH16(const wxChar*& uri)
1038 {
1039     // h16           = 1*4HEXDIG
1040     if(!IsHex(*++uri))
1041         return false;
1042
1043     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1044         ++uri;
1045
1046     return true;
1047 }
1048
1049 // ---------------------------------------------------------------------------
1050 // ParseIPXXX
1051 //
1052 // Parses a certain version of an IP address and moves the input string past
1053 // it.  Returns true if the input  string contains the proper version of an ip
1054 // address.  It is the caller's responsability to move the input string back
1055 // to its original position on failure.
1056 // ---------------------------------------------------------------------------
1057
1058 bool wxURI::ParseIPv4address(const wxChar*& uri)
1059 {
1060     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
1061     //
1062     //dec-octet     =      DIGIT                    ; 0-9
1063     //                / %x31-39 DIGIT               ; 10-99
1064     //                / "1" 2DIGIT                  ; 100-199
1065     //                / "2" %x30-34 DIGIT           ; 200-249
1066     //                / "25" %x30-35                ; 250-255
1067     size_t iIPv4 = 0;
1068     if (IsDigit(*uri))
1069     {
1070         ++iIPv4;
1071
1072
1073         //each ip part must be between 0-255 (dupe of version in for loop)
1074         if( IsDigit(*++uri) && IsDigit(*++uri) &&
1075            //100 or less  (note !)
1076            !( (*(uri-2) < wxT('2')) ||
1077            //240 or less
1078              (*(uri-2) == wxT('2') &&
1079                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1080              )
1081             )
1082           )
1083         {
1084             return false;
1085         }
1086
1087         if(IsDigit(*uri))++uri;
1088
1089         //compilers should unroll this loop
1090         for(; iIPv4 < 4; ++iIPv4)
1091         {
1092             if (*uri != wxT('.') || !IsDigit(*++uri))
1093                 break;
1094
1095             //each ip part must be between 0-255
1096             if( IsDigit(*++uri) && IsDigit(*++uri) &&
1097                //100 or less  (note !)
1098                !( (*(uri-2) < wxT('2')) ||
1099                //240 or less
1100                  (*(uri-2) == wxT('2') &&
1101                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1102                  )
1103                 )
1104               )
1105             {
1106                 return false;
1107             }
1108             if(IsDigit(*uri))++uri;
1109         }
1110     }
1111     return iIPv4 == 4;
1112 }
1113
1114 bool wxURI::ParseIPv6address(const wxChar*& uri)
1115 {
1116     // IPv6address   =                            6( h16 ":" ) ls32
1117     //               /                       "::" 5( h16 ":" ) ls32
1118     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1119     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1120     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1121     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1122     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1123     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1124     //               / [ *6( h16 ":" ) h16 ] "::"
1125
1126     size_t numPrefix = 0,
1127               maxPostfix;
1128
1129     bool bEndHex = false;
1130
1131     for( ; numPrefix < 6; ++numPrefix)
1132     {
1133         if(!ParseH16(uri))
1134         {
1135             --uri;
1136             bEndHex = true;
1137             break;
1138         }
1139
1140         if(*uri != wxT(':'))
1141         {
1142             break;
1143         }
1144     }
1145
1146     if(!bEndHex && !ParseH16(uri))
1147     {
1148         --uri;
1149
1150         if (numPrefix)
1151             return false;
1152
1153         if (*uri == wxT(':'))
1154         {
1155             if (*++uri != wxT(':'))
1156                 return false;
1157
1158             maxPostfix = 5;
1159         }
1160         else
1161             maxPostfix = 6;
1162     }
1163     else
1164     {
1165         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1166         {
1167             if (numPrefix != 6)
1168                 return false;
1169
1170             while (*--uri != wxT(':')) {}
1171             ++uri;
1172
1173             const wxChar* uristart = uri;
1174             //parse ls32
1175             // ls32          = ( h16 ":" h16 ) / IPv4address
1176             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1177                 return true;
1178
1179             uri = uristart;
1180
1181             if (ParseIPv4address(uri))
1182                 return true;
1183             else
1184                 return false;
1185         }
1186         else
1187         {
1188             uri += 2;
1189
1190             if (numPrefix > 3)
1191                 maxPostfix = 0;
1192             else
1193                 maxPostfix = 4 - numPrefix;
1194         }
1195     }
1196
1197     bool bAllowAltEnding = maxPostfix == 0;
1198
1199     for(; maxPostfix != 0; --maxPostfix)
1200     {
1201         if(!ParseH16(uri) || *uri != wxT(':'))
1202             return false;
1203     }
1204
1205     if(numPrefix <= 4)
1206     {
1207         const wxChar* uristart = uri;
1208         //parse ls32
1209         // ls32          = ( h16 ":" h16 ) / IPv4address
1210         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1211             return true;
1212
1213         uri = uristart;
1214
1215         if (ParseIPv4address(uri))
1216             return true;
1217
1218         uri = uristart;
1219
1220         if (!bAllowAltEnding)
1221             return false;
1222     }
1223
1224     if(numPrefix <= 5 && ParseH16(uri))
1225         return true;
1226
1227     return true;
1228 }
1229
1230 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1231 {
1232     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1233     if (*++uri != wxT('v') || !IsHex(*++uri))
1234         return false;
1235
1236     while (IsHex(*++uri)) {}
1237
1238     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1239         return false;
1240
1241     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1242
1243     return true;
1244 }
1245
1246
1247 // ---------------------------------------------------------------------------
1248 // CharToHex
1249 //
1250 // Converts a character into a numeric hexidecimal value, or 0 if the
1251 // passed in character is not a valid hex character
1252 // ---------------------------------------------------------------------------
1253
1254 //static
1255 wxChar wxURI::CharToHex(const wxChar& c)
1256 {
1257     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1258     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1259     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1260
1261     return 0;
1262 }
1263
1264 // ---------------------------------------------------------------------------
1265 // IsXXX
1266 //
1267 // Returns true if the passed in character meets the criteria of the method
1268 // ---------------------------------------------------------------------------
1269
1270 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1271 bool wxURI::IsUnreserved (const wxChar& c)
1272 {   return IsAlpha(c) || IsDigit(c) ||
1273            c == wxT('-') ||
1274            c == wxT('.') ||
1275            c == wxT('_') ||
1276            c == wxT('~') //tilde
1277            ;
1278 }
1279
1280 bool wxURI::IsReserved (const wxChar& c)
1281 {
1282     return IsGenDelim(c) || IsSubDelim(c);
1283 }
1284
1285 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1286 bool wxURI::IsGenDelim (const wxChar& c)
1287 {
1288     return c == wxT(':') ||
1289            c == wxT('/') ||
1290            c == wxT('?') ||
1291            c == wxT('#') ||
1292            c == wxT('[') ||
1293            c == wxT(']') ||
1294            c == wxT('@');
1295 }
1296
1297 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1298 //!               / "*" / "+" / "," / ";" / "="
1299 bool wxURI::IsSubDelim (const wxChar& c)
1300 {
1301     return c == wxT('!') ||
1302            c == wxT('$') ||
1303            c == wxT('&') ||
1304            c == wxT('\'') ||
1305            c == wxT('(') ||
1306            c == wxT(')') ||
1307            c == wxT('*') ||
1308            c == wxT('+') ||
1309            c == wxT(',') ||
1310            c == wxT(';') ||
1311            c == wxT('=')
1312            ;
1313 }
1314
1315 bool wxURI::IsHex(const wxChar& c)
1316 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1317
1318 bool wxURI::IsAlpha(const wxChar& c)
1319 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1320
1321 bool wxURI::IsDigit(const wxChar& c)
1322 {   return c >= wxT('0') && c <= wxT('9');        }
1323
1324
1325 //end of uri.cpp
1326
1327
1328