src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  20     #pragma implementation "uri.h"
  21 #endif
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27     #pragma hdrstop
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject);
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Assign(uri);
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_user = m_server = m_port = m_path =
  82     m_query = m_fragment = wxT("");
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 const wxChar* wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     return Parse(uri);
 101 }
 102
 103 // ---------------------------------------------------------------------------
 104 // Escape Methods
 105 //
 106 // TranslateEscape unencodes a 3 character URL escape sequence
 107 //
 108 // Escape encodes an invalid URI character into a 3 character sequence
 109 //
 110 // IsEscape determines if the input string contains an escape sequence,
 111 // if it does, then it moves the input string past the escape sequence
 112 //
 113 // Unescape unencodes all 3 character URL escape sequences in a wxString
 114 // ---------------------------------------------------------------------------
 115
 116 wxChar wxURI::TranslateEscape(const wxChar* s)
 117 {
 118     wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
 119
 120     //<<4 == 16
 121     return ( CharToHex(*s) << 4 ) | CharToHex(*++s);
 122 }
 123
 124 wxString wxURI::Unescape(const wxString& uri)
 125 {
 126     wxString new_uri;
 127
 128     for(size_t i = 0; i < uri.length(); ++i)
 129     {
 130         if (uri[i] == wxT('%'))
 131         {
 132             new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
 133             i += 2;
 134         }
 135         else
 136             new_uri += uri[i];
 137     }
 138
 139     return new_uri;
 140 }
 141
 142 void wxURI::Escape(wxString& s, const wxChar& c)
 143 {
 144     const wxChar* hdig = wxT("0123456789abcdef");
 145     s += wxT('%');
 146     s += hdig[(c >> 4) & 15];
 147     s += hdig[c & 15];
 148 }
 149
 150 bool wxURI::IsEscape(const wxChar*& uri)
 151 {
 152     // pct-encoded   = "%" HEXDIG HEXDIG
 153     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 154     {
 155         uri += 3;
 156         return true;
 157     }
 158     else
 159         return false;
 160 }
 161
 162 // ---------------------------------------------------------------------------
 163 // BuildURI
 164 //
 165 // BuildURI() builds the entire URI into a useable
 166 // representation, including proper identification characters such as slashes
 167 //
 168 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 169 // the components that accept escape sequences
 170 // ---------------------------------------------------------------------------
 171
 172 wxString wxURI::BuildURI() const
 173 {
 174     wxString ret;
 175
 176     if (HasScheme())
 177         ret = ret + m_scheme + wxT(":");
 178
 179     if (HasServer())
 180     {
 181         ret += wxT("//");
 182
 183         if (HasUser())
 184             ret = ret + m_user + wxT("@");
 185
 186         ret += m_server;
 187
 188         if (HasPort())
 189             ret = ret + wxT(":") + m_port;
 190     }
 191
 192     ret += m_path;
 193
 194     if (HasQuery())
 195         ret = ret + wxT("?") + m_query;
 196
 197     if (HasFragment())
 198         ret = ret + wxT("#") + m_fragment;
 199
 200     return ret;
 201 }
 202
 203 wxString wxURI::BuildUnescapedURI() const
 204 {
 205     wxString ret;
 206
 207     if (HasScheme())
 208         ret = ret + m_scheme + wxT(":");
 209
 210     if (HasServer())
 211     {
 212         ret += wxT("//");
 213
 214         if (HasUser())
 215             ret = ret + wxURI::Unescape(m_user) + wxT("@");
 216
 217         if (m_hostType == wxURI_REGNAME)
 218             ret += wxURI::Unescape(m_server);
 219         else
 220             ret += m_server;
 221
 222         if (HasPort())
 223             ret = ret + wxT(":") + m_port;
 224     }
 225
 226     ret += wxURI::Unescape(m_path);
 227
 228     if (HasQuery())
 229         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 230
 231     if (HasFragment())
 232         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 233
 234     return ret;
 235 }
 236
 237 // ---------------------------------------------------------------------------
 238 // Assignment
 239 // ---------------------------------------------------------------------------
 240
 241 wxURI& wxURI::Assign(const wxURI& uri)
 242 {
 243     //assign fields
 244     m_fields = uri.m_fields;
 245
 246     //ref over components
 247     m_scheme = uri.m_scheme;
 248     m_user = uri.m_user;
 249     m_server = uri.m_server;
 250     m_hostType = uri.m_hostType;
 251     m_port = uri.m_port;
 252     m_path = uri.m_path;
 253     m_query = uri.m_query;
 254     m_fragment = uri.m_fragment;
 255
 256     return *this;
 257 }
 258
 259 wxURI& wxURI::operator = (const wxURI& uri)
 260 {
 261     return Assign(uri);
 262 }
 263
 264 wxURI& wxURI::operator = (const wxString& string)
 265 {
 266     Create(string);
 267     return *this;
 268 }
 269
 270 // ---------------------------------------------------------------------------
 271 // Comparison
 272 // ---------------------------------------------------------------------------
 273
 274 bool wxURI::operator == (const wxURI& uri) const
 275 {
 276     if (HasScheme())
 277     {
 278         if(m_scheme != uri.m_scheme)
 279             return false;
 280     }
 281     else if (uri.HasScheme())
 282         return false;
 283
 284
 285     if (HasServer())
 286     {
 287         if (HasUser())
 288         {
 289             if (m_user != uri.m_user)
 290                 return false;
 291         }
 292         else if (uri.HasUser())
 293             return false;
 294
 295         if (m_server != uri.m_server ||
 296             m_hostType != uri.m_hostType)
 297             return false;
 298
 299         if (HasPort())
 300         {
 301             if(m_port != uri.m_port)
 302                 return false;
 303         }
 304         else if (uri.HasPort())
 305             return false;
 306     }
 307     else if (uri.HasServer())
 308         return false;
 309
 310
 311     if (HasPath())
 312     {
 313         if(m_path != uri.m_path)
 314             return false;
 315     }
 316     else if (uri.HasPath())
 317         return false;
 318
 319     if (HasQuery())
 320     {
 321         if (m_query != uri.m_query)
 322             return false;
 323     }
 324     else if (uri.HasQuery())
 325         return false;
 326
 327     if (HasFragment())
 328     {
 329         if (m_fragment != uri.m_fragment)
 330             return false;
 331     }
 332     else if (uri.HasFragment())
 333         return false;
 334
 335     return true;
 336 }
 337
 338 // ---------------------------------------------------------------------------
 339 // IsReference
 340 //
 341 // if there is no authority or scheme, it is a reference
 342 // ---------------------------------------------------------------------------
 343
 344 bool wxURI::IsReference() const
 345 {   return !HasScheme() || !HasServer();  }
 346
 347 // ---------------------------------------------------------------------------
 348 // Parse
 349 //
 350 // Master URI parsing method.  Just calls the individual parsing methods
 351 //
 352 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 353 // URI-reference = URI / relative-URITestCase
 354 // ---------------------------------------------------------------------------
 355
 356 const wxChar* wxURI::Parse(const wxChar* uri)
 357 {
 358     uri = ParseScheme(uri);
 359     uri = ParseAuthority(uri);
 360     uri = ParsePath(uri);
 361     uri = ParseQuery(uri);
 362     return ParseFragment(uri);
 363 }
 364
 365 // ---------------------------------------------------------------------------
 366 // ParseXXX
 367 //
 368 // Individual parsers for each URI component
 369 // ---------------------------------------------------------------------------
 370
 371 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 372 {
 373     wxASSERT(uri != NULL);
 374
 375     //copy of the uri - used for figuring out
 376     //length of each component
 377     const wxChar* uricopy = uri;
 378
 379     //Does the uri have a scheme (first character alpha)?
 380     if (IsAlpha(*uri))
 381     {
 382         m_scheme += *uri++;
 383
 384         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 385         while (IsAlpha(*uri) || IsDigit(*uri) ||
 386                *uri == wxT('+')   ||
 387                *uri == wxT('-')   ||
 388                *uri == wxT('.'))
 389         {
 390             m_scheme += *uri++;
 391         }
 392
 393         //valid scheme?
 394         if (*uri == wxT(':'))
 395         {
 396             //mark the scheme as valid
 397             m_fields |= wxURI_SCHEME;
 398
 399             //move reference point up to input buffer
 400             uricopy = ++uri;
 401         }
 402         else
 403             //relative uri with relative path reference
 404             m_scheme = wxT("");
 405     }
 406 //    else
 407         //relative uri with _possible_ relative path reference
 408
 409     return uricopy;
 410 }
 411
 412 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 413 {
 414     // authority     = [ userinfo "@" ] host [ ":" port ]
 415     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 416     {
 417         uri += 2;
 418
 419         uri = ParseUser(uri);
 420         uri = ParseServer(uri);
 421         return ParsePort(uri);
 422     }
 423
 424     return uri;
 425 }
 426
 427 const wxChar* wxURI::ParseUser(const wxChar* uri)
 428 {
 429     wxASSERT(uri != NULL);
 430
 431     //copy of the uri - used for figuring out
 432     //length of each component
 433     const wxChar* uricopy = uri;
 434
 435     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 436     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 437     {
 438         if(IsUnreserved(*uri) || IsEscape(uri) ||
 439            IsSubDelim(*uri) || *uri == wxT(':'))
 440             m_user += *uri++;
 441         else
 442             Escape(m_user, *uri++);
 443     }
 444
 445     if(*uri == wxT('@'))
 446     {
 447         //valid userinfo
 448         m_fields |= wxURI_USER;
 449
 450         uricopy = ++uri;
 451     }
 452     else
 453         m_user = wxT("");
 454
 455     return uricopy;
 456 }
 457
 458 const wxChar* wxURI::ParseServer(const wxChar* uri)
 459 {
 460     wxASSERT(uri != NULL);
 461
 462     //copy of the uri - used for figuring out
 463     //length of each component
 464     const wxChar* uricopy = uri;
 465
 466     // host          = IP-literal / IPv4address / reg-name
 467     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 468     if (*uri == wxT('['))
 469     {
 470         if (ParseIPv6address(++uri) && *uri == wxT(']'))
 471         {
 472             ++uri;
 473             m_hostType = wxURI_IPV6ADDRESS;
 474
 475             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 476             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 477             theBuffer.SetLength(uri-uricopy);
 478         }
 479         else
 480         {
 481             uri = uricopy;
 482
 483             if (ParseIPvFuture(++uri) && *uri == wxT(']'))
 484             {
 485                 ++uri;
 486                 m_hostType = wxURI_IPVFUTURE;
 487
 488                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 489                 wxMemcpy(theBuffer, uricopy, uri-uricopy);
 490                 theBuffer.SetLength(uri-uricopy);
 491             }
 492             else
 493                 uri = uricopy;
 494         }
 495     }
 496     else
 497     {
 498         if (ParseIPv4address(uri))
 499         {
 500             m_hostType = wxURI_IPV4ADDRESS;
 501
 502             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 503             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 504             theBuffer.SetLength(uri-uricopy);
 505         }
 506         else
 507             uri = uricopy;
 508     }
 509
 510     if(m_hostType == wxURI_REGNAME)
 511     {
 512         uri = uricopy;
 513         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 514         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 515         {
 516             if(IsUnreserved(*uri) || IsEscape(uri) ||  IsSubDelim(*uri))
 517                 m_server += *uri++;
 518             else
 519                 Escape(m_server, *uri++);
 520         }
 521     }
 522
 523     //mark the server as valid
 524     m_fields |= wxURI_SERVER;
 525
 526     return uri;
 527 }
 528
 529
 530 const wxChar* wxURI::ParsePort(const wxChar* uri)
 531 {
 532     wxASSERT(uri != NULL);
 533
 534     // port          = *DIGIT
 535     if(*uri == wxT(':'))
 536     {
 537         ++uri;
 538         while(IsDigit(*uri))
 539         {
 540             m_port += *uri++;
 541         }
 542
 543         //mark the port as valid
 544         m_fields |= wxURI_PORT;
 545     }
 546
 547     return uri;
 548 }
 549
 550 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 551 {
 552     wxASSERT(uri != NULL);
 553
 554     //copy of the uri - used for figuring out
 555     //length of each component
 556     const wxChar* uricopy = uri;
 557
 558     /// hier-part     = "//" authority path-abempty
 559     ///               / path-absolute
 560     ///               / path-rootless
 561     ///               / path-empty
 562     ///
 563     /// relative-part = "//" authority path-abempty
 564     ///               / path-absolute
 565     ///               / path-noscheme
 566     ///               / path-empty
 567     ///
 568     /// path-abempty  = *( "/" segment )
 569     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 570     /// path-noscheme = segment-nz-nc *( "/" segment )
 571     /// path-rootless = segment-nz *( "/" segment )
 572     /// path-empty    = 0<pchar>
 573     ///
 574     /// segment       = *pchar
 575     /// segment-nz    = 1*pchar
 576     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 577     ///               ; non-zero-length segment without any colon ":"
 578     ///
 579     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 580     if (*uri == wxT('/'))
 581     {
 582         m_path += *uri++;
 583
 584         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 585         {
 586             if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 587                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 588                 m_path += *uri++;
 589             else
 590                 Escape(m_path, *uri++);
 591         }
 592
 593         if (bNormalize)
 594         {
 595             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 596 #if wxUSE_STL
 597             wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 598 #endif
 599             Normalize(theBuffer, true);
 600             theBuffer.SetLength(wxStrlen(theBuffer));
 601         }
 602         //mark the path as valid
 603         m_fields |= wxURI_PATH;
 604     }
 605     else if(*uri) //Relative path
 606     {
 607         if (bReference)
 608         {
 609             //no colon allowed
 610             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 611             {
 612                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 613                   *uri == wxT('@') || *uri == wxT('/'))
 614                     m_path += *uri++;
 615                 else
 616                     Escape(m_path, *uri++);
 617             }
 618         }
 619         else
 620         {
 621             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 622             {
 623                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 624                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 625                     m_path += *uri++;
 626                 else
 627                     Escape(m_path, *uri++);
 628             }
 629         }
 630
 631         if (uri != uricopy)
 632         {
 633             if (bNormalize)
 634             {
 635                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 636 #if wxUSE_STL
 637                 wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 638 #endif
 639                 Normalize(theBuffer);
 640                 theBuffer.SetLength(wxStrlen(theBuffer));
 641             }
 642
 643             //mark the path as valid
 644             m_fields |= wxURI_PATH;
 645         }
 646     }
 647
 648     return uri;
 649 }
 650
 651
 652 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 653 {
 654     wxASSERT(uri != NULL);
 655
 656     // query         = *( pchar / "/" / "?" )
 657     if (*uri == wxT('?'))
 658     {
 659         ++uri;
 660         while(*uri && *uri != wxT('#'))
 661         {
 662             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 663                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 664                   m_query += *uri++;
 665             else
 666                   Escape(m_query, *uri++);
 667         }
 668
 669         //mark the server as valid
 670         m_fields |= wxURI_QUERY;
 671     }
 672
 673     return uri;
 674 }
 675
 676
 677 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 678 {
 679     wxASSERT(uri != NULL);
 680
 681     // fragment      = *( pchar / "/" / "?" )
 682     if (*uri == wxT('#'))
 683     {
 684         ++uri;
 685         while(*uri)
 686         {
 687             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 688                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 689                   m_fragment += *uri++;
 690             else
 691                   Escape(m_fragment, *uri++);
 692         }
 693
 694         //mark the server as valid
 695         m_fields |= wxURI_FRAGMENT;
 696     }
 697
 698     return uri;
 699 }
 700
 701 // ---------------------------------------------------------------------------
 702 // Resolve
 703 //
 704 // Builds missing components of this uri from a base uri
 705 //
 706 // A version of the algorithm outlined in the RFC is used here
 707 // (it is shown in comments)
 708 //
 709 // Note that an empty URI inherits all components
 710 // ---------------------------------------------------------------------------
 711
 712 void wxURI::Resolve(const wxURI& base, int flags)
 713 {
 714     wxASSERT_MSG(!base.IsReference(),
 715                 wxT("wxURI to inherit from must not be a reference!"));
 716
 717     // If we arn't being strict, enable the older (pre-RFC2396)
 718     // loophole that allows this uri to inherit other
 719     // properties from the base uri - even if the scheme
 720     // is defined
 721     if ( !(flags & wxURI_STRICT) &&
 722             HasScheme() && base.HasScheme() &&
 723                 m_scheme == base.m_scheme )
 724     {
 725         m_fields -= wxURI_SCHEME;
 726     }
 727
 728
 729     // Do nothing if this is an absolute wxURI
 730     //    if defined(R.scheme) then
 731     //       T.scheme    = R.scheme;
 732     //       T.authority = R.authority;
 733     //       T.path      = remove_dot_segments(R.path);
 734     //       T.query     = R.query;
 735     if (HasScheme())
 736     {
 737         return;
 738     }
 739
 740     //No sheme - inherit
 741     m_scheme = base.m_scheme;
 742     m_fields |= wxURI_SCHEME;
 743
 744     // All we need to do for relative URIs with an
 745     // authority component is just inherit the scheme
 746     //       if defined(R.authority) then
 747     //          T.authority = R.authority;
 748     //          T.path      = remove_dot_segments(R.path);
 749     //          T.query     = R.query;
 750     if (HasServer())
 751     {
 752         return;
 753     }
 754
 755     //No authority - inherit
 756     if (base.HasUser())
 757     {
 758         m_user = base.m_user;
 759         m_fields |= wxURI_USER;
 760     }
 761
 762     m_server = base.m_server;
 763     m_hostType = base.m_hostType;
 764     m_fields |= wxURI_SERVER;
 765
 766     if (base.HasPort())
 767     {
 768         m_port = base.m_port;
 769         m_fields |= wxURI_PORT;
 770     }
 771
 772
 773     // Simple path inheritance from base
 774     if (!HasPath())
 775     {
 776         //             T.path = Base.path;
 777         m_path = base.m_path;
 778         m_fields |= wxURI_PATH;
 779
 780
 781         //             if defined(R.query) then
 782         //                T.query = R.query;
 783         //             else
 784         //                T.query = Base.query;
 785         //             endif;
 786         if (!HasQuery())
 787         {
 788             m_query = base.m_query;
 789             m_fields |= wxURI_QUERY;
 790         }
 791     }
 792     else
 793     {
 794         //             if (R.path starts-with "/") then
 795         //                T.path = remove_dot_segments(R.path);
 796         //             else
 797         //                T.path = merge(Base.path, R.path);
 798         //                T.path = remove_dot_segments(T.path);
 799         //             endif;
 800         //             T.query = R.query;
 801         if (m_path[0u] != wxT('/'))
 802         {
 803             //Marge paths
 804             const wxChar* op = m_path.c_str();
 805             const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
 806
 807             //not a ending directory?  move up
 808             if (base.m_path[0] && *(bp-1) != wxT('/'))
 809                 UpTree(base.m_path, bp);
 810
 811             //normalize directories
 812             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 813                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 814             {
 815                 UpTree(base.m_path, bp);
 816
 817                 if (*(op+2) == '\0')
 818                     op += 2;
 819                 else
 820                     op += 3;
 821             }
 822
 823             m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
 824                     m_path.substr((op - m_path.c_str()), m_path.Length());
 825         }
 826     }
 827
 828     //T.fragment = R.fragment;
 829 }
 830
 831 // ---------------------------------------------------------------------------
 832 // UpTree
 833 //
 834 // Moves a URI path up a directory
 835 // ---------------------------------------------------------------------------
 836
 837 //static
 838 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 839 {
 840     if (uri != uristart && *(uri-1) == wxT('/'))
 841     {
 842         uri -= 2;
 843     }
 844
 845     for(;uri != uristart; --uri)
 846     {
 847         if (*uri == wxT('/'))
 848         {
 849             ++uri;
 850             break;
 851         }
 852     }
 853
 854     //!!!TODO:HACK!!!//
 855     if (uri == uristart && *uri == wxT('/'))
 856         ++uri;
 857     //!!!//
 858 }
 859
 860 // ---------------------------------------------------------------------------
 861 // Normalize
 862 //
 863 // Normalizes directories in-place
 864 //
 865 // I.E. ./ and . are ignored
 866 //
 867 // ../ and .. are removed if a directory is before it, along
 868 // with that directory (leading .. and ../ are kept)
 869 // ---------------------------------------------------------------------------
 870
 871 //static
 872 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 873 {
 874     wxChar* cp = s;
 875     wxChar* bp = s;
 876
 877     if(s[0] == wxT('/'))
 878         ++bp;
 879
 880     while(*cp)
 881     {
 882         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 883             && (bp == cp || *(cp-1) == wxT('/')))
 884         {
 885             //. _or_ ./  - ignore
 886             if (*(cp+1) == '\0')
 887                 cp += 1;
 888             else
 889                 cp += 2;
 890         }
 891         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 892                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 893                 && (bp == cp || *(cp-1) == wxT('/')))
 894         {
 895             //.. _or_ ../ - go up the tree
 896             if (s != bp)
 897             {
 898                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 899
 900                 if (*(cp+2) == '\0')
 901                     cp += 2;
 902                 else
 903                     cp += 3;
 904             }
 905             else if (!bIgnoreLeads)
 906
 907             {
 908                 *bp++ = *cp++;
 909                 *bp++ = *cp++;
 910                 if (*cp)
 911                     *bp++ = *cp++;
 912
 913                 s = bp;
 914             }
 915             else
 916             {
 917                 if (*(cp+2) == '\0')
 918                     cp += 2;
 919                 else
 920                     cp += 3;
 921             }
 922         }
 923         else
 924             *s++ = *cp++;
 925     }
 926
 927     *s = '\0';
 928 }
 929
 930 // ---------------------------------------------------------------------------
 931 // ParseH16
 932 //
 933 // Parses 1 to 4 hex values.  Returns true if the first character of the input
 934 // string is a valid hex character.  It is the caller's responsability to move
 935 // the input string back to its original position on failure.
 936 // ---------------------------------------------------------------------------
 937
 938 bool wxURI::ParseH16(const wxChar*& uri)
 939 {
 940     // h16           = 1*4HEXDIG
 941     if(!IsHex(*++uri))
 942         return false;
 943
 944     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 945         ++uri;
 946
 947     return true;
 948 }
 949
 950 // ---------------------------------------------------------------------------
 951 // ParseIPXXX
 952 //
 953 // Parses a certain version of an IP address and moves the input string past
 954 // it.  Returns true if the input  string contains the proper version of an ip
 955 // address.  It is the caller's responsability to move the input string back
 956 // to its original position on failure.
 957 // ---------------------------------------------------------------------------
 958
 959 bool wxURI::ParseIPv4address(const wxChar*& uri)
 960 {
 961     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 962     //
 963     //dec-octet     =      DIGIT                    ; 0-9
 964     //                / %x31-39 DIGIT               ; 10-99
 965     //                / "1" 2DIGIT                  ; 100-199
 966     //                / "2" %x30-34 DIGIT           ; 200-249
 967     //                / "25" %x30-35                ; 250-255
 968     size_t iIPv4 = 0;
 969     if (IsDigit(*uri))
 970     {
 971         ++iIPv4;
 972
 973
 974         //each ip part must be between 0-255 (dupe of version in for loop)
 975         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 976            //100 or less  (note !)
 977            !( (*(uri-2) < wxT('2')) ||
 978            //240 or less
 979              (*(uri-2) == wxT('2') &&
 980                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
 981              )
 982             )
 983           )
 984         {
 985             return false;
 986         }
 987
 988         if(IsDigit(*uri))++uri;
 989
 990         //compilers should unroll this loop
 991         for(; iIPv4 < 4; ++iIPv4)
 992         {
 993             if (*uri != wxT('.') || !IsDigit(*++uri))
 994                 break;
 995
 996             //each ip part must be between 0-255
 997             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 998                //100 or less  (note !)
 999                !( (*(uri-2) < wxT('2')) ||
1000                //240 or less
1001                  (*(uri-2) == wxT('2') &&
1002                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1003                  )
1004                 )
1005               )
1006             {
1007                 return false;
1008             }
1009             if(IsDigit(*uri))++uri;
1010         }
1011     }
1012     return iIPv4 == 4;
1013 }
1014
1015 bool wxURI::ParseIPv6address(const wxChar*& uri)
1016 {
1017     // IPv6address   =                            6( h16 ":" ) ls32
1018     //               /                       "::" 5( h16 ":" ) ls32
1019     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1020     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1021     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1022     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1023     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1024     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1025     //               / [ *6( h16 ":" ) h16 ] "::"
1026
1027     size_t numPrefix = 0,
1028               maxPostfix;
1029
1030     bool bEndHex = false;
1031
1032     for( ; numPrefix < 6; ++numPrefix)
1033     {
1034         if(!ParseH16(uri))
1035         {
1036             --uri;
1037             bEndHex = true;
1038             break;
1039         }
1040
1041         if(*uri != wxT(':'))
1042         {
1043             break;
1044         }
1045     }
1046
1047     if(!bEndHex && !ParseH16(uri))
1048     {
1049         --uri;
1050
1051         if (numPrefix)
1052             return false;
1053
1054         if (*uri == wxT(':'))
1055         {
1056             if (*++uri != wxT(':'))
1057                 return false;
1058
1059             maxPostfix = 5;
1060         }
1061         else
1062             maxPostfix = 6;
1063     }
1064     else
1065     {
1066         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1067         {
1068             if (numPrefix != 6)
1069                 return false;
1070
1071             while (*--uri != wxT(':')) {}
1072             ++uri;
1073
1074             const wxChar* uristart = uri;
1075             //parse ls32
1076             // ls32          = ( h16 ":" h16 ) / IPv4address
1077             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1078                 return true;
1079
1080             uri = uristart;
1081
1082             if (ParseIPv4address(uri))
1083                 return true;
1084             else
1085                 return false;
1086         }
1087         else
1088         {
1089             uri += 2;
1090
1091             if (numPrefix > 3)
1092                 maxPostfix = 0;
1093             else
1094                 maxPostfix = 4 - numPrefix;
1095         }
1096     }
1097
1098     bool bAllowAltEnding = maxPostfix == 0;
1099
1100     for(; maxPostfix != 0; --maxPostfix)
1101     {
1102         if(!ParseH16(uri) || *uri != wxT(':'))
1103             return false;
1104     }
1105
1106     if(numPrefix <= 4)
1107     {
1108         const wxChar* uristart = uri;
1109         //parse ls32
1110         // ls32          = ( h16 ":" h16 ) / IPv4address
1111         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1112             return true;
1113
1114         uri = uristart;
1115
1116         if (ParseIPv4address(uri))
1117             return true;
1118
1119         uri = uristart;
1120
1121         if (!bAllowAltEnding)
1122             return false;
1123     }
1124
1125     if(numPrefix <= 5 && ParseH16(uri))
1126         return true;
1127
1128     return true;
1129 }
1130
1131 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1132 {
1133     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1134     if (*++uri != wxT('v') || !IsHex(*++uri))
1135         return false;
1136
1137     while (IsHex(*++uri)) {}
1138
1139     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1140         return false;
1141
1142     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1143
1144     return true;
1145 }
1146
1147
1148 // ---------------------------------------------------------------------------
1149 // CharToHex
1150 //
1151 // Converts a character into a numeric hexidecimal value, or 0 if the
1152 // passed in character is not a valid hex character
1153 // ---------------------------------------------------------------------------
1154
1155 //static
1156 wxChar wxURI::CharToHex(const wxChar& c)
1157 {
1158     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1159     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1160     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1161
1162     return 0;
1163 }
1164
1165 // ---------------------------------------------------------------------------
1166 // IsXXX
1167 //
1168 // Returns true if the passed in character meets the criteria of the method
1169 // ---------------------------------------------------------------------------
1170
1171 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1172 bool wxURI::IsUnreserved (const wxChar& c)
1173 {   return IsAlpha(c) || IsDigit(c) ||
1174            c == wxT('-') ||
1175            c == wxT('.') ||
1176            c == wxT('_') ||
1177            c == wxT('~') //tilde
1178            ;
1179 }
1180
1181 bool wxURI::IsReserved (const wxChar& c)
1182 {
1183     return IsGenDelim(c) || IsSubDelim(c);
1184 }
1185
1186 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1187 bool wxURI::IsGenDelim (const wxChar& c)
1188 {
1189     return c == wxT(':') ||
1190            c == wxT('/') ||
1191            c == wxT('?') ||
1192            c == wxT('#') ||
1193            c == wxT('[') ||
1194            c == wxT(']') ||
1195            c == wxT('@');
1196 }
1197
1198 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1199 //!               / "*" / "+" / "," / ";" / "="
1200 bool wxURI::IsSubDelim (const wxChar& c)
1201 {
1202     return c == wxT('!') ||
1203            c == wxT('$') ||
1204            c == wxT('&') ||
1205            c == wxT('\'') ||
1206            c == wxT('(') ||
1207            c == wxT(')') ||
1208            c == wxT('*') ||
1209            c == wxT('+') ||
1210            c == wxT(',') ||
1211            c == wxT(';') ||
1212            c == wxT('=')
1213            ;
1214 }
1215
1216 bool wxURI::IsHex(const wxChar& c)
1217 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1218
1219 bool wxURI::IsAlpha(const wxChar& c)
1220 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1221
1222 bool wxURI::IsDigit(const wxChar& c)
1223 {   return c >= wxT('0') && c <= wxT('9');        }
1224
1225
1226 // ---------------------------------------------------------------------------
1227 //
1228 //                        wxURL Compatability
1229 //
1230 // ---------------------------------------------------------------------------
1231
1232 #if wxUSE_URL
1233
1234 #if WXWIN_COMPATIBILITY_2_4
1235
1236 #include "wx/url.h"
1237
1238 //Note that this old code really doesn't convert to a URI that well and looks
1239 //more like a dirty hack than anything else...
1240
1241 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1242 {
1243   wxString out_str;
1244   wxString hexa_code;
1245   size_t i;
1246
1247   for (i = 0; i < uri.Len(); i++)
1248   {
1249     wxChar c = uri.GetChar(i);
1250
1251     if (c == wxT(' '))
1252     {
1253       // GRG, Apr/2000: changed to "%20" instead of '+'
1254
1255       out_str += wxT("%20");
1256     }
1257     else
1258     {
1259       // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1260       //
1261       // - Alphanumeric characters are never escaped
1262       // - Unreserved marks are never escaped
1263       // - Delimiters must be escaped if they appear within a component
1264       //     but not if they are used to separate components. Here we have
1265       //     no clear way to distinguish between these two cases, so they
1266       //     are escaped unless they are passed in the 'delims' parameter
1267       //     (allowed delimiters).
1268
1269       static const wxChar marks[] = wxT("-_.!~*()'");
1270
1271       if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1272       {
1273         hexa_code.Printf(wxT("%%%02X"), c);
1274         out_str += hexa_code;
1275       }
1276       else
1277       {
1278         out_str += c;
1279       }
1280     }
1281   }
1282
1283   return out_str;
1284 }
1285
1286 wxString wxURL::ConvertFromURI(const wxString& uri)
1287 {
1288     return wxURI::Unescape(uri);
1289 }
1290
1291 #endif //WXWIN_COMPATIBILITY_2_4
1292
1293 #endif //wxUSE_URL
1294
1295 //end of uri.cpp
1296
1297
1298