src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  20     #pragma implementation "uri.h"
  21 #endif
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27     #pragma hdrstop
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject);
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Assign(uri);
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_user = m_server = m_port = m_path =
  82     m_query = m_fragment = wxT("");
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 const wxChar* wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     return Parse(uri);
 101 }
 102
 103 // ---------------------------------------------------------------------------
 104 // Escape Methods
 105 //
 106 // TranslateEscape unencodes a 3 character URL escape sequence
 107 //
 108 // Escape encodes an invalid URI character into a 3 character sequence
 109 //
 110 // IsEscape determines if the input string contains an escape sequence,
 111 // if it does, then it moves the input string past the escape sequence
 112 //
 113 // Unescape unencodes all 3 character URL escape sequences in a wxString
 114 // ---------------------------------------------------------------------------
 115
 116 wxChar wxURI::TranslateEscape(const wxChar* s)
 117 {
 118     wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
 119
 120     //<<4 == 16
 121     return (wxChar)( CharToHex(*s) << 4 ) | CharToHex(*++s);
 122 }
 123
 124 wxString wxURI::Unescape(const wxString& uri)
 125 {
 126     wxString new_uri;
 127
 128     for(size_t i = 0; i < uri.length(); ++i)
 129     {
 130         if (uri[i] == wxT('%'))
 131         {
 132             new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
 133             i += 2;
 134         }
 135         else
 136             new_uri += uri[i];
 137     }
 138
 139     return new_uri;
 140 }
 141
 142 void wxURI::Escape(wxString& s, const wxChar& c)
 143 {
 144     const wxChar* hdig = wxT("0123456789abcdef");
 145     s += wxT('%');
 146     s += hdig[(c >> 4) & 15];
 147     s += hdig[c & 15];
 148 }
 149
 150 bool wxURI::IsEscape(const wxChar*& uri)
 151 {
 152     // pct-encoded   = "%" HEXDIG HEXDIG
 153     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 154     {
 155         uri += 3;
 156         return true;
 157     }
 158     else
 159         return false;
 160 }
 161
 162 // ---------------------------------------------------------------------------
 163 // BuildURI
 164 //
 165 // BuildURI() builds the entire URI into a useable
 166 // representation, including proper identification characters such as slashes
 167 //
 168 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 169 // the components that accept escape sequences
 170 // ---------------------------------------------------------------------------
 171
 172 wxString wxURI::BuildURI() const
 173 {
 174     wxString ret;
 175
 176     if (HasScheme())
 177         ret = ret + m_scheme + wxT(":");
 178
 179     if (HasServer())
 180     {
 181         ret += wxT("//");
 182
 183         if (HasUser())
 184             ret = ret + m_user + wxT("@");
 185
 186         ret += m_server;
 187
 188         if (HasPort())
 189             ret = ret + wxT(":") + m_port;
 190     }
 191
 192     ret += m_path;
 193
 194     if (HasQuery())
 195         ret = ret + wxT("?") + m_query;
 196
 197     if (HasFragment())
 198         ret = ret + wxT("#") + m_fragment;
 199
 200     return ret;
 201 }
 202
 203 wxString wxURI::BuildUnescapedURI() const
 204 {
 205     wxString ret;
 206
 207     if (HasScheme())
 208         ret = ret + m_scheme + wxT(":");
 209
 210     if (HasServer())
 211     {
 212         ret += wxT("//");
 213
 214         if (HasUser())
 215             ret = ret + wxURI::Unescape(m_user) + wxT("@");
 216
 217         if (m_hostType == wxURI_REGNAME)
 218             ret += wxURI::Unescape(m_server);
 219         else
 220             ret += m_server;
 221
 222         if (HasPort())
 223             ret = ret + wxT(":") + m_port;
 224     }
 225
 226     ret += wxURI::Unescape(m_path);
 227
 228     if (HasQuery())
 229         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 230
 231     if (HasFragment())
 232         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 233
 234     return ret;
 235 }
 236
 237 // ---------------------------------------------------------------------------
 238 // Assignment
 239 // ---------------------------------------------------------------------------
 240
 241 wxURI& wxURI::Assign(const wxURI& uri)
 242 {
 243     //assign fields
 244     m_fields = uri.m_fields;
 245
 246     //ref over components
 247     m_scheme = uri.m_scheme;
 248     m_user = uri.m_user;
 249     m_server = uri.m_server;
 250     m_hostType = uri.m_hostType;
 251     m_port = uri.m_port;
 252     m_path = uri.m_path;
 253     m_query = uri.m_query;
 254     m_fragment = uri.m_fragment;
 255
 256     return *this;
 257 }
 258
 259 wxURI& wxURI::operator = (const wxURI& uri)
 260 {
 261     return Assign(uri);
 262 }
 263
 264 wxURI& wxURI::operator = (const wxString& string)
 265 {
 266     Create(string);
 267     return *this;
 268 }
 269
 270 // ---------------------------------------------------------------------------
 271 // Comparison
 272 // ---------------------------------------------------------------------------
 273
 274 bool wxURI::operator == (const wxURI& uri) const
 275 {
 276     if (HasScheme())
 277     {
 278         if(m_scheme != uri.m_scheme)
 279             return false;
 280     }
 281     else if (uri.HasScheme())
 282         return false;
 283
 284
 285     if (HasServer())
 286     {
 287         if (HasUser())
 288         {
 289             if (m_user != uri.m_user)
 290                 return false;
 291         }
 292         else if (uri.HasUser())
 293             return false;
 294
 295         if (m_server != uri.m_server ||
 296             m_hostType != uri.m_hostType)
 297             return false;
 298
 299         if (HasPort())
 300         {
 301             if(m_port != uri.m_port)
 302                 return false;
 303         }
 304         else if (uri.HasPort())
 305             return false;
 306     }
 307     else if (uri.HasServer())
 308         return false;
 309
 310
 311     if (HasPath())
 312     {
 313         if(m_path != uri.m_path)
 314             return false;
 315     }
 316     else if (uri.HasPath())
 317         return false;
 318
 319     if (HasQuery())
 320     {
 321         if (m_query != uri.m_query)
 322             return false;
 323     }
 324     else if (uri.HasQuery())
 325         return false;
 326
 327     if (HasFragment())
 328     {
 329         if (m_fragment != uri.m_fragment)
 330             return false;
 331     }
 332     else if (uri.HasFragment())
 333         return false;
 334
 335     return true;
 336 }
 337
 338 // ---------------------------------------------------------------------------
 339 // IsReference
 340 //
 341 // if there is no authority or scheme, it is a reference
 342 // ---------------------------------------------------------------------------
 343
 344 bool wxURI::IsReference() const
 345 {   return !HasScheme() || !HasServer();  }
 346
 347 // ---------------------------------------------------------------------------
 348 // Parse
 349 //
 350 // Master URI parsing method.  Just calls the individual parsing methods
 351 //
 352 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 353 // URI-reference = URI / relative-URITestCase
 354 // ---------------------------------------------------------------------------
 355
 356 const wxChar* wxURI::Parse(const wxChar* uri)
 357 {
 358     uri = ParseScheme(uri);
 359     uri = ParseAuthority(uri);
 360     uri = ParsePath(uri);
 361     uri = ParseQuery(uri);
 362     return ParseFragment(uri);
 363 }
 364
 365 // ---------------------------------------------------------------------------
 366 // ParseXXX
 367 //
 368 // Individual parsers for each URI component
 369 // ---------------------------------------------------------------------------
 370
 371 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 372 {
 373     wxASSERT(uri != NULL);
 374
 375     //copy of the uri - used for figuring out
 376     //length of each component
 377     const wxChar* uricopy = uri;
 378
 379     //Does the uri have a scheme (first character alpha)?
 380     if (IsAlpha(*uri))
 381     {
 382         m_scheme += *uri++;
 383
 384         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 385         while (IsAlpha(*uri) || IsDigit(*uri) ||
 386                *uri == wxT('+')   ||
 387                *uri == wxT('-')   ||
 388                *uri == wxT('.'))
 389         {
 390             m_scheme += *uri++;
 391         }
 392
 393         //valid scheme?
 394         if (*uri == wxT(':'))
 395         {
 396             //mark the scheme as valid
 397             m_fields |= wxURI_SCHEME;
 398
 399             //move reference point up to input buffer
 400             uricopy = ++uri;
 401         }
 402         else
 403             //relative uri with relative path reference
 404             m_scheme = wxT("");
 405     }
 406 //    else
 407         //relative uri with _possible_ relative path reference
 408
 409     return uricopy;
 410 }
 411
 412 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 413 {
 414     // authority     = [ userinfo "@" ] host [ ":" port ]
 415     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 416     {
 417         uri += 2;
 418
 419         uri = ParseUser(uri);
 420         uri = ParseServer(uri);
 421         return ParsePort(uri);
 422     }
 423
 424     return uri;
 425 }
 426
 427 const wxChar* wxURI::ParseUser(const wxChar* uri)
 428 {
 429     wxASSERT(uri != NULL);
 430
 431     //copy of the uri - used for figuring out
 432     //length of each component
 433     const wxChar* uricopy = uri;
 434
 435     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 436     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 437     {
 438         if(IsUnreserved(*uri) || IsEscape(uri) ||
 439            IsSubDelim(*uri) || *uri == wxT(':'))
 440             m_user += *uri++;
 441         else
 442             Escape(m_user, *uri++);
 443     }
 444
 445     if(*uri == wxT('@'))
 446     {
 447         //valid userinfo
 448         m_fields |= wxURI_USER;
 449
 450         uricopy = ++uri;
 451     }
 452     else
 453         m_user = wxT("");
 454
 455     return uricopy;
 456 }
 457
 458 const wxChar* wxURI::ParseServer(const wxChar* uri)
 459 {
 460     wxASSERT(uri != NULL);
 461
 462     //copy of the uri - used for figuring out
 463     //length of each component
 464     const wxChar* uricopy = uri;
 465
 466     // host          = IP-literal / IPv4address / reg-name
 467     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 468     if (*uri == wxT('['))
 469     {
 470         ++uri; //some compilers don't support *&ing a ++*
 471         if (ParseIPv6address(uri) && *uri == wxT(']'))
 472         {
 473             ++uri;
 474             m_hostType = wxURI_IPV6ADDRESS;
 475
 476             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 477             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 478             theBuffer.SetLength(uri-uricopy);
 479         }
 480         else
 481         {
 482             uri = uricopy;
 483
 484             ++uri; //some compilers don't support *&ing a ++*
 485             if (ParseIPvFuture(uri) && *uri == wxT(']'))
 486             {
 487                 ++uri;
 488                 m_hostType = wxURI_IPVFUTURE;
 489
 490                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 491                 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 492                 theBuffer.SetLength(uri-uricopy);
 493             }
 494             else
 495                 uri = uricopy;
 496         }
 497     }
 498     else
 499     {
 500         if (ParseIPv4address(uri))
 501         {
 502             m_hostType = wxURI_IPV4ADDRESS;
 503
 504             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 505             wxTmemcpy(theBuffer, uricopy, uri-uricopy);
 506             theBuffer.SetLength(uri-uricopy);
 507         }
 508         else
 509             uri = uricopy;
 510     }
 511
 512     if(m_hostType == wxURI_REGNAME)
 513     {
 514         uri = uricopy;
 515         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 516         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 517         {
 518             if(IsUnreserved(*uri) || IsEscape(uri) ||  IsSubDelim(*uri))
 519                 m_server += *uri++;
 520             else
 521                 Escape(m_server, *uri++);
 522         }
 523     }
 524
 525     //mark the server as valid
 526     m_fields |= wxURI_SERVER;
 527
 528     return uri;
 529 }
 530
 531
 532 const wxChar* wxURI::ParsePort(const wxChar* uri)
 533 {
 534     wxASSERT(uri != NULL);
 535
 536     // port          = *DIGIT
 537     if(*uri == wxT(':'))
 538     {
 539         ++uri;
 540         while(IsDigit(*uri))
 541         {
 542             m_port += *uri++;
 543         }
 544
 545         //mark the port as valid
 546         m_fields |= wxURI_PORT;
 547     }
 548
 549     return uri;
 550 }
 551
 552 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 553 {
 554     wxASSERT(uri != NULL);
 555
 556     //copy of the uri - used for figuring out
 557     //length of each component
 558     const wxChar* uricopy = uri;
 559
 560     /// hier-part     = "//" authority path-abempty
 561     ///               / path-absolute
 562     ///               / path-rootless
 563     ///               / path-empty
 564     ///
 565     /// relative-part = "//" authority path-abempty
 566     ///               / path-absolute
 567     ///               / path-noscheme
 568     ///               / path-empty
 569     ///
 570     /// path-abempty  = *( "/" segment )
 571     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 572     /// path-noscheme = segment-nz-nc *( "/" segment )
 573     /// path-rootless = segment-nz *( "/" segment )
 574     /// path-empty    = 0<pchar>
 575     ///
 576     /// segment       = *pchar
 577     /// segment-nz    = 1*pchar
 578     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 579     ///               ; non-zero-length segment without any colon ":"
 580     ///
 581     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 582     if (*uri == wxT('/'))
 583     {
 584         m_path += *uri++;
 585
 586         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 587         {
 588             if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 589                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 590                 m_path += *uri++;
 591             else
 592                 Escape(m_path, *uri++);
 593         }
 594
 595         if (bNormalize)
 596         {
 597             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 598 #if wxUSE_STL
 599             wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 600 #endif
 601             Normalize(theBuffer, true);
 602             theBuffer.SetLength(wxStrlen(theBuffer));
 603         }
 604         //mark the path as valid
 605         m_fields |= wxURI_PATH;
 606     }
 607     else if(*uri) //Relative path
 608     {
 609         if (bReference)
 610         {
 611             //no colon allowed
 612             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 613             {
 614                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 615                   *uri == wxT('@') || *uri == wxT('/'))
 616                     m_path += *uri++;
 617                 else
 618                     Escape(m_path, *uri++);
 619             }
 620         }
 621         else
 622         {
 623             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 624             {
 625                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 626                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 627                     m_path += *uri++;
 628                 else
 629                     Escape(m_path, *uri++);
 630             }
 631         }
 632
 633         if (uri != uricopy)
 634         {
 635             if (bNormalize)
 636             {
 637                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 638 #if wxUSE_STL
 639                 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 640 #endif
 641                 Normalize(theBuffer);
 642                 theBuffer.SetLength(wxStrlen(theBuffer));
 643             }
 644
 645             //mark the path as valid
 646             m_fields |= wxURI_PATH;
 647         }
 648     }
 649
 650     return uri;
 651 }
 652
 653
 654 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 655 {
 656     wxASSERT(uri != NULL);
 657
 658     // query         = *( pchar / "/" / "?" )
 659     if (*uri == wxT('?'))
 660     {
 661         ++uri;
 662         while(*uri && *uri != wxT('#'))
 663         {
 664             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 665                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 666                   m_query += *uri++;
 667             else
 668                   Escape(m_query, *uri++);
 669         }
 670
 671         //mark the server as valid
 672         m_fields |= wxURI_QUERY;
 673     }
 674
 675     return uri;
 676 }
 677
 678
 679 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 680 {
 681     wxASSERT(uri != NULL);
 682
 683     // fragment      = *( pchar / "/" / "?" )
 684     if (*uri == wxT('#'))
 685     {
 686         ++uri;
 687         while(*uri)
 688         {
 689             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 690                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 691                   m_fragment += *uri++;
 692             else
 693                   Escape(m_fragment, *uri++);
 694         }
 695
 696         //mark the server as valid
 697         m_fields |= wxURI_FRAGMENT;
 698     }
 699
 700     return uri;
 701 }
 702
 703 // ---------------------------------------------------------------------------
 704 // Resolve
 705 //
 706 // Builds missing components of this uri from a base uri
 707 //
 708 // A version of the algorithm outlined in the RFC is used here
 709 // (it is shown in comments)
 710 //
 711 // Note that an empty URI inherits all components
 712 // ---------------------------------------------------------------------------
 713
 714 void wxURI::Resolve(const wxURI& base, int flags)
 715 {
 716     wxASSERT_MSG(!base.IsReference(),
 717                 wxT("wxURI to inherit from must not be a reference!"));
 718
 719     // If we arn't being strict, enable the older (pre-RFC2396)
 720     // loophole that allows this uri to inherit other
 721     // properties from the base uri - even if the scheme
 722     // is defined
 723     if ( !(flags & wxURI_STRICT) &&
 724             HasScheme() && base.HasScheme() &&
 725                 m_scheme == base.m_scheme )
 726     {
 727         m_fields -= wxURI_SCHEME;
 728     }
 729
 730
 731     // Do nothing if this is an absolute wxURI
 732     //    if defined(R.scheme) then
 733     //       T.scheme    = R.scheme;
 734     //       T.authority = R.authority;
 735     //       T.path      = remove_dot_segments(R.path);
 736     //       T.query     = R.query;
 737     if (HasScheme())
 738     {
 739         return;
 740     }
 741
 742     //No sheme - inherit
 743     m_scheme = base.m_scheme;
 744     m_fields |= wxURI_SCHEME;
 745
 746     // All we need to do for relative URIs with an
 747     // authority component is just inherit the scheme
 748     //       if defined(R.authority) then
 749     //          T.authority = R.authority;
 750     //          T.path      = remove_dot_segments(R.path);
 751     //          T.query     = R.query;
 752     if (HasServer())
 753     {
 754         return;
 755     }
 756
 757     //No authority - inherit
 758     if (base.HasUser())
 759     {
 760         m_user = base.m_user;
 761         m_fields |= wxURI_USER;
 762     }
 763
 764     m_server = base.m_server;
 765     m_hostType = base.m_hostType;
 766     m_fields |= wxURI_SERVER;
 767
 768     if (base.HasPort())
 769     {
 770         m_port = base.m_port;
 771         m_fields |= wxURI_PORT;
 772     }
 773
 774
 775     // Simple path inheritance from base
 776     if (!HasPath())
 777     {
 778         //             T.path = Base.path;
 779         m_path = base.m_path;
 780         m_fields |= wxURI_PATH;
 781
 782
 783         //             if defined(R.query) then
 784         //                T.query = R.query;
 785         //             else
 786         //                T.query = Base.query;
 787         //             endif;
 788         if (!HasQuery())
 789         {
 790             m_query = base.m_query;
 791             m_fields |= wxURI_QUERY;
 792         }
 793     }
 794     else
 795     {
 796         //             if (R.path starts-with "/") then
 797         //                T.path = remove_dot_segments(R.path);
 798         //             else
 799         //                T.path = merge(Base.path, R.path);
 800         //                T.path = remove_dot_segments(T.path);
 801         //             endif;
 802         //             T.query = R.query;
 803         if (m_path[0u] != wxT('/'))
 804         {
 805             //Marge paths
 806             const wxChar* op = m_path.c_str();
 807             const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
 808
 809             //not a ending directory?  move up
 810             if (base.m_path[0] && *(bp-1) != wxT('/'))
 811                 UpTree(base.m_path, bp);
 812
 813             //normalize directories
 814             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 815                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 816             {
 817                 UpTree(base.m_path, bp);
 818
 819                 if (*(op+2) == '\0')
 820                     op += 2;
 821                 else
 822                     op += 3;
 823             }
 824
 825             m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
 826                     m_path.substr((op - m_path.c_str()), m_path.Length());
 827         }
 828     }
 829
 830     //T.fragment = R.fragment;
 831 }
 832
 833 // ---------------------------------------------------------------------------
 834 // UpTree
 835 //
 836 // Moves a URI path up a directory
 837 // ---------------------------------------------------------------------------
 838
 839 //static
 840 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 841 {
 842     if (uri != uristart && *(uri-1) == wxT('/'))
 843     {
 844         uri -= 2;
 845     }
 846
 847     for(;uri != uristart; --uri)
 848     {
 849         if (*uri == wxT('/'))
 850         {
 851             ++uri;
 852             break;
 853         }
 854     }
 855
 856     //!!!TODO:HACK!!!//
 857     if (uri == uristart && *uri == wxT('/'))
 858         ++uri;
 859     //!!!//
 860 }
 861
 862 // ---------------------------------------------------------------------------
 863 // Normalize
 864 //
 865 // Normalizes directories in-place
 866 //
 867 // I.E. ./ and . are ignored
 868 //
 869 // ../ and .. are removed if a directory is before it, along
 870 // with that directory (leading .. and ../ are kept)
 871 // ---------------------------------------------------------------------------
 872
 873 //static
 874 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 875 {
 876     wxChar* cp = s;
 877     wxChar* bp = s;
 878
 879     if(s[0] == wxT('/'))
 880         ++bp;
 881
 882     while(*cp)
 883     {
 884         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 885             && (bp == cp || *(cp-1) == wxT('/')))
 886         {
 887             //. _or_ ./  - ignore
 888             if (*(cp+1) == '\0')
 889                 cp += 1;
 890             else
 891                 cp += 2;
 892         }
 893         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 894                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 895                 && (bp == cp || *(cp-1) == wxT('/')))
 896         {
 897             //.. _or_ ../ - go up the tree
 898             if (s != bp)
 899             {
 900                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 901
 902                 if (*(cp+2) == '\0')
 903                     cp += 2;
 904                 else
 905                     cp += 3;
 906             }
 907             else if (!bIgnoreLeads)
 908
 909             {
 910                 *bp++ = *cp++;
 911                 *bp++ = *cp++;
 912                 if (*cp)
 913                     *bp++ = *cp++;
 914
 915                 s = bp;
 916             }
 917             else
 918             {
 919                 if (*(cp+2) == '\0')
 920                     cp += 2;
 921                 else
 922                     cp += 3;
 923             }
 924         }
 925         else
 926             *s++ = *cp++;
 927     }
 928
 929     *s = '\0';
 930 }
 931
 932 // ---------------------------------------------------------------------------
 933 // ParseH16
 934 //
 935 // Parses 1 to 4 hex values.  Returns true if the first character of the input
 936 // string is a valid hex character.  It is the caller's responsability to move
 937 // the input string back to its original position on failure.
 938 // ---------------------------------------------------------------------------
 939
 940 bool wxURI::ParseH16(const wxChar*& uri)
 941 {
 942     // h16           = 1*4HEXDIG
 943     if(!IsHex(*++uri))
 944         return false;
 945
 946     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 947         ++uri;
 948
 949     return true;
 950 }
 951
 952 // ---------------------------------------------------------------------------
 953 // ParseIPXXX
 954 //
 955 // Parses a certain version of an IP address and moves the input string past
 956 // it.  Returns true if the input  string contains the proper version of an ip
 957 // address.  It is the caller's responsability to move the input string back
 958 // to its original position on failure.
 959 // ---------------------------------------------------------------------------
 960
 961 bool wxURI::ParseIPv4address(const wxChar*& uri)
 962 {
 963     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 964     //
 965     //dec-octet     =      DIGIT                    ; 0-9
 966     //                / %x31-39 DIGIT               ; 10-99
 967     //                / "1" 2DIGIT                  ; 100-199
 968     //                / "2" %x30-34 DIGIT           ; 200-249
 969     //                / "25" %x30-35                ; 250-255
 970     size_t iIPv4 = 0;
 971     if (IsDigit(*uri))
 972     {
 973         ++iIPv4;
 974
 975
 976         //each ip part must be between 0-255 (dupe of version in for loop)
 977         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 978            //100 or less  (note !)
 979            !( (*(uri-2) < wxT('2')) ||
 980            //240 or less
 981              (*(uri-2) == wxT('2') &&
 982                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
 983              )
 984             )
 985           )
 986         {
 987             return false;
 988         }
 989
 990         if(IsDigit(*uri))++uri;
 991
 992         //compilers should unroll this loop
 993         for(; iIPv4 < 4; ++iIPv4)
 994         {
 995             if (*uri != wxT('.') || !IsDigit(*++uri))
 996                 break;
 997
 998             //each ip part must be between 0-255
 999             if( IsDigit(*++uri) && IsDigit(*++uri) &&
1000                //100 or less  (note !)
1001                !( (*(uri-2) < wxT('2')) ||
1002                //240 or less
1003                  (*(uri-2) == wxT('2') &&
1004                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1005                  )
1006                 )
1007               )
1008             {
1009                 return false;
1010             }
1011             if(IsDigit(*uri))++uri;
1012         }
1013     }
1014     return iIPv4 == 4;
1015 }
1016
1017 bool wxURI::ParseIPv6address(const wxChar*& uri)
1018 {
1019     // IPv6address   =                            6( h16 ":" ) ls32
1020     //               /                       "::" 5( h16 ":" ) ls32
1021     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1022     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1023     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1024     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1025     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1026     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1027     //               / [ *6( h16 ":" ) h16 ] "::"
1028
1029     size_t numPrefix = 0,
1030               maxPostfix;
1031
1032     bool bEndHex = false;
1033
1034     for( ; numPrefix < 6; ++numPrefix)
1035     {
1036         if(!ParseH16(uri))
1037         {
1038             --uri;
1039             bEndHex = true;
1040             break;
1041         }
1042
1043         if(*uri != wxT(':'))
1044         {
1045             break;
1046         }
1047     }
1048
1049     if(!bEndHex && !ParseH16(uri))
1050     {
1051         --uri;
1052
1053         if (numPrefix)
1054             return false;
1055
1056         if (*uri == wxT(':'))
1057         {
1058             if (*++uri != wxT(':'))
1059                 return false;
1060
1061             maxPostfix = 5;
1062         }
1063         else
1064             maxPostfix = 6;
1065     }
1066     else
1067     {
1068         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1069         {
1070             if (numPrefix != 6)
1071                 return false;
1072
1073             while (*--uri != wxT(':')) {}
1074             ++uri;
1075
1076             const wxChar* uristart = uri;
1077             //parse ls32
1078             // ls32          = ( h16 ":" h16 ) / IPv4address
1079             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1080                 return true;
1081
1082             uri = uristart;
1083
1084             if (ParseIPv4address(uri))
1085                 return true;
1086             else
1087                 return false;
1088         }
1089         else
1090         {
1091             uri += 2;
1092
1093             if (numPrefix > 3)
1094                 maxPostfix = 0;
1095             else
1096                 maxPostfix = 4 - numPrefix;
1097         }
1098     }
1099
1100     bool bAllowAltEnding = maxPostfix == 0;
1101
1102     for(; maxPostfix != 0; --maxPostfix)
1103     {
1104         if(!ParseH16(uri) || *uri != wxT(':'))
1105             return false;
1106     }
1107
1108     if(numPrefix <= 4)
1109     {
1110         const wxChar* uristart = uri;
1111         //parse ls32
1112         // ls32          = ( h16 ":" h16 ) / IPv4address
1113         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1114             return true;
1115
1116         uri = uristart;
1117
1118         if (ParseIPv4address(uri))
1119             return true;
1120
1121         uri = uristart;
1122
1123         if (!bAllowAltEnding)
1124             return false;
1125     }
1126
1127     if(numPrefix <= 5 && ParseH16(uri))
1128         return true;
1129
1130     return true;
1131 }
1132
1133 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1134 {
1135     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1136     if (*++uri != wxT('v') || !IsHex(*++uri))
1137         return false;
1138
1139     while (IsHex(*++uri)) {}
1140
1141     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1142         return false;
1143
1144     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1145
1146     return true;
1147 }
1148
1149
1150 // ---------------------------------------------------------------------------
1151 // CharToHex
1152 //
1153 // Converts a character into a numeric hexidecimal value, or 0 if the
1154 // passed in character is not a valid hex character
1155 // ---------------------------------------------------------------------------
1156
1157 //static
1158 wxChar wxURI::CharToHex(const wxChar& c)
1159 {
1160     if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1161     if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1162     if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
1163
1164     return 0;
1165 }
1166
1167 // ---------------------------------------------------------------------------
1168 // IsXXX
1169 //
1170 // Returns true if the passed in character meets the criteria of the method
1171 // ---------------------------------------------------------------------------
1172
1173 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1174 bool wxURI::IsUnreserved (const wxChar& c)
1175 {   return IsAlpha(c) || IsDigit(c) ||
1176            c == wxT('-') ||
1177            c == wxT('.') ||
1178            c == wxT('_') ||
1179            c == wxT('~') //tilde
1180            ;
1181 }
1182
1183 bool wxURI::IsReserved (const wxChar& c)
1184 {
1185     return IsGenDelim(c) || IsSubDelim(c);
1186 }
1187
1188 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1189 bool wxURI::IsGenDelim (const wxChar& c)
1190 {
1191     return c == wxT(':') ||
1192            c == wxT('/') ||
1193            c == wxT('?') ||
1194            c == wxT('#') ||
1195            c == wxT('[') ||
1196            c == wxT(']') ||
1197            c == wxT('@');
1198 }
1199
1200 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1201 //!               / "*" / "+" / "," / ";" / "="
1202 bool wxURI::IsSubDelim (const wxChar& c)
1203 {
1204     return c == wxT('!') ||
1205            c == wxT('$') ||
1206            c == wxT('&') ||
1207            c == wxT('\'') ||
1208            c == wxT('(') ||
1209            c == wxT(')') ||
1210            c == wxT('*') ||
1211            c == wxT('+') ||
1212            c == wxT(',') ||
1213            c == wxT(';') ||
1214            c == wxT('=')
1215            ;
1216 }
1217
1218 bool wxURI::IsHex(const wxChar& c)
1219 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1220
1221 bool wxURI::IsAlpha(const wxChar& c)
1222 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1223
1224 bool wxURI::IsDigit(const wxChar& c)
1225 {   return c >= wxT('0') && c <= wxT('9');        }
1226
1227
1228 // ---------------------------------------------------------------------------
1229 //
1230 //                        wxURL Compatability
1231 //
1232 // ---------------------------------------------------------------------------
1233
1234 #if wxUSE_URL
1235
1236 #if WXWIN_COMPATIBILITY_2_4
1237
1238 #include "wx/url.h"
1239
1240 //Note that this old code really doesn't convert to a URI that well and looks
1241 //more like a dirty hack than anything else...
1242
1243 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1244 {
1245   wxString out_str;
1246   wxString hexa_code;
1247   size_t i;
1248
1249   for (i = 0; i < uri.Len(); i++)
1250   {
1251     wxChar c = uri.GetChar(i);
1252
1253     if (c == wxT(' '))
1254     {
1255       // GRG, Apr/2000: changed to "%20" instead of '+'
1256
1257       out_str += wxT("%20");
1258     }
1259     else
1260     {
1261       // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1262       //
1263       // - Alphanumeric characters are never escaped
1264       // - Unreserved marks are never escaped
1265       // - Delimiters must be escaped if they appear within a component
1266       //     but not if they are used to separate components. Here we have
1267       //     no clear way to distinguish between these two cases, so they
1268       //     are escaped unless they are passed in the 'delims' parameter
1269       //     (allowed delimiters).
1270
1271       static const wxChar marks[] = wxT("-_.!~*()'");
1272
1273       if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1274       {
1275         hexa_code.Printf(wxT("%%%02X"), c);
1276         out_str += hexa_code;
1277       }
1278       else
1279       {
1280         out_str += c;
1281       }
1282     }
1283   }
1284
1285   return out_str;
1286 }
1287
1288 wxString wxURL::ConvertFromURI(const wxString& uri)
1289 {
1290     return wxURI::Unescape(uri);
1291 }
1292
1293 #endif //WXWIN_COMPATIBILITY_2_4
1294
1295 #endif //wxUSE_URL
1296
1297 //end of uri.cpp
1298
1299
1300