src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  20     #pragma implementation "uri.h"
  21 #endif
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27     #pragma hdrstop
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject);
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Assign(uri);
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_user = m_server = m_port = m_path =
  82     m_query = m_fragment = wxT("");
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 const wxChar* wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     return Parse(uri);
 101 }
 102
 103 // ---------------------------------------------------------------------------
 104 // Escape Methods
 105 //
 106 // TranslateEscape unencodes a 3 character URL escape sequence
 107 //
 108 // Escape encodes an invalid URI character into a 3 character sequence
 109 //
 110 // IsEscape determines if the input string contains an escape sequence,
 111 // if it does, then it moves the input string past the escape sequence
 112 //
 113 // Unescape unencodes all 3 character URL escape sequences in a wxString
 114 // ---------------------------------------------------------------------------
 115
 116 wxChar wxURI::TranslateEscape(const wxChar* s)
 117 {
 118     wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
 119
 120     return (wxChar)( CharToHex(*s) * 0x10 + CharToHex(*++s) );
 121 }
 122
 123 wxString wxURI::Unescape(const wxString& uri)
 124 {
 125     wxString new_uri;
 126
 127     for(size_t i = 0; i < uri.length(); ++i)
 128     {
 129         if (uri[i] == wxT('%'))
 130         {
 131             new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
 132             i += 2;
 133         }
 134     }
 135
 136     return new_uri;
 137 }
 138
 139 void wxURI::Escape(wxString& s, const wxChar& c)
 140 {
 141     const wxChar* hdig = wxT("0123456789abcdef");
 142     s += wxT('%');
 143     s += hdig[(c >> 4) & 15];
 144     s += hdig[c & 15];
 145 }
 146
 147 bool wxURI::IsEscape(const wxChar*& uri)
 148 {
 149     // pct-encoded   = "%" HEXDIG HEXDIG
 150     if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 151     {
 152         uri += 3;
 153         return true;
 154     }
 155     else
 156         return false;
 157 }
 158
 159 // ---------------------------------------------------------------------------
 160 // BuildURI
 161 //
 162 // BuildURI() builds the entire URI into a useable
 163 // representation, including proper identification characters such as slashes
 164 //
 165 // BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
 166 // the components that accept escape sequences
 167 // ---------------------------------------------------------------------------
 168
 169 wxString wxURI::BuildURI() const
 170 {
 171     wxString ret;
 172
 173     if (HasScheme())
 174         ret = ret + m_scheme + wxT(":");
 175
 176     if (HasServer())
 177     {
 178         ret += wxT("//");
 179
 180         if (HasUser())
 181             ret = ret + m_user + wxT("@");
 182
 183         ret += m_server;
 184
 185         if (HasPort())
 186             ret = ret + wxT(":") + m_port;
 187     }
 188
 189     ret += m_path;
 190
 191     if (HasQuery())
 192         ret = ret + wxT("?") + m_query;
 193
 194     if (HasFragment())
 195         ret = ret + wxT("#") + m_fragment;
 196
 197     return ret;
 198 }
 199
 200 wxString wxURI::BuildUnescapedURI() const
 201 {
 202     wxString ret;
 203
 204     if (HasScheme())
 205         ret = ret + m_scheme + wxT(":");
 206
 207     if (HasServer())
 208     {
 209         ret += wxT("//");
 210
 211         if (HasUser())
 212             ret = ret + wxURI::Unescape(m_user) + wxT("@");
 213
 214         if (m_hostType == wxURI_REGNAME)
 215             ret += wxURI::Unescape(m_server);
 216         else
 217             ret += m_server;
 218
 219         if (HasPort())
 220             ret = ret + wxT(":") + m_port;
 221     }
 222
 223     ret += wxURI::Unescape(m_path);
 224
 225     if (HasQuery())
 226         ret = ret + wxT("?") + wxURI::Unescape(m_query);
 227
 228     if (HasFragment())
 229         ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
 230
 231     return ret;
 232 }
 233
 234 // ---------------------------------------------------------------------------
 235 // Assignment
 236 // ---------------------------------------------------------------------------
 237
 238 wxURI& wxURI::Assign(const wxURI& uri)
 239 {
 240     //assign fields
 241     m_fields = uri.m_fields;
 242
 243     //ref over components
 244     m_scheme = uri.m_scheme;
 245     m_user = uri.m_user;
 246     m_server = uri.m_server;
 247     m_hostType = uri.m_hostType;
 248     m_port = uri.m_port;
 249     m_path = uri.m_path;
 250     m_query = uri.m_query;
 251     m_fragment = uri.m_fragment;
 252
 253     return *this;
 254 }
 255
 256 wxURI& wxURI::operator = (const wxURI& uri)
 257 {
 258     return Assign(uri);
 259 }
 260
 261 wxURI& wxURI::operator = (const wxString& string)
 262 {
 263     Create(string);
 264     return *this;
 265 }
 266
 267 // ---------------------------------------------------------------------------
 268 // Comparison
 269 // ---------------------------------------------------------------------------
 270
 271 bool wxURI::operator == (const wxURI& uri) const
 272 {
 273     if (HasScheme())
 274     {
 275         if(m_scheme != uri.m_scheme)
 276             return false;
 277     }
 278     else if (uri.HasScheme())
 279         return false;
 280
 281
 282     if (HasServer())
 283     {
 284         if (HasUser())
 285         {
 286             if (m_user != uri.m_user)
 287                 return false;
 288         }
 289         else if (uri.HasUser())
 290             return false;
 291
 292         if (m_server != uri.m_server ||
 293             m_hostType != uri.m_hostType)
 294             return false;
 295
 296         if (HasPort())
 297         {
 298             if(m_port != uri.m_port)
 299                 return false;
 300         }
 301         else if (uri.HasPort())
 302             return false;
 303     }
 304     else if (uri.HasServer())
 305         return false;
 306
 307
 308     if (HasPath())
 309     {
 310         if(m_path != uri.m_path)
 311             return false;
 312     }
 313     else if (uri.HasPath())
 314         return false;
 315
 316     if (HasQuery())
 317     {
 318         if (m_query != uri.m_query)
 319             return false;
 320     }
 321     else if (uri.HasQuery())
 322         return false;
 323
 324     if (HasFragment())
 325     {
 326         if (m_fragment != uri.m_fragment)
 327             return false;
 328     }
 329     else if (uri.HasFragment())
 330         return false;
 331
 332     return true;
 333 }
 334
 335 // ---------------------------------------------------------------------------
 336 // IsReference
 337 //
 338 // if there is no authority or scheme, it is a reference
 339 // ---------------------------------------------------------------------------
 340
 341 bool wxURI::IsReference() const
 342 {   return !HasScheme() || !HasServer();  }
 343
 344 // ---------------------------------------------------------------------------
 345 // Parse
 346 //
 347 // Master URI parsing method.  Just calls the individual parsing methods
 348 //
 349 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 350 // URI-reference = URI / relative-URITestCase
 351 // ---------------------------------------------------------------------------
 352
 353 const wxChar* wxURI::Parse(const wxChar* uri)
 354 {
 355     uri = ParseScheme(uri);
 356     uri = ParseAuthority(uri);
 357     uri = ParsePath(uri);
 358     uri = ParseQuery(uri);
 359     return ParseFragment(uri);
 360 }
 361
 362 // ---------------------------------------------------------------------------
 363 // ParseXXX
 364 //
 365 // Individual parsers for each URI component
 366 // ---------------------------------------------------------------------------
 367
 368 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 369 {
 370     wxASSERT(uri != NULL);
 371
 372     //copy of the uri - used for figuring out
 373     //length of each component
 374     const wxChar* uricopy = uri;
 375
 376     //Does the uri have a scheme (first character alpha)?
 377     if (IsAlpha(*uri))
 378     {
 379         m_scheme += *uri++;
 380
 381         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 382         while (IsAlpha(*uri) || IsDigit(*uri) ||
 383                *uri == wxT('+')   ||
 384                *uri == wxT('-')   ||
 385                *uri == wxT('.'))
 386         {
 387             m_scheme += *uri++;
 388         }
 389
 390         //valid scheme?
 391         if (*uri == wxT(':'))
 392         {
 393             //mark the scheme as valid
 394             m_fields |= wxURI_SCHEME;
 395
 396             //move reference point up to input buffer
 397             uricopy = ++uri;
 398         }
 399         else
 400             //relative uri with relative path reference
 401             m_scheme = wxT("");
 402     }
 403 //    else
 404         //relative uri with _possible_ relative path reference
 405
 406     return uricopy;
 407 }
 408
 409 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 410 {
 411     // authority     = [ userinfo "@" ] host [ ":" port ]
 412     if (*uri == wxT('/') && *(uri+1) == wxT('/'))
 413     {
 414         uri += 2;
 415
 416         uri = ParseUser(uri);
 417         uri = ParseServer(uri);
 418         return ParsePort(uri);
 419     }
 420
 421     return uri;
 422 }
 423
 424 const wxChar* wxURI::ParseUser(const wxChar* uri)
 425 {
 426     wxASSERT(uri != NULL);
 427
 428     //copy of the uri - used for figuring out
 429     //length of each component
 430     const wxChar* uricopy = uri;
 431
 432     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 433     while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
 434     {
 435         if(IsUnreserved(*uri) || IsEscape(uri) ||
 436            IsSubDelim(*uri) || *uri == wxT(':'))
 437             m_user += *uri++;
 438         else
 439             Escape(m_user, *uri++);
 440     }
 441
 442     if(*uri == wxT('@'))
 443     {
 444         //valid userinfo
 445         m_fields |= wxURI_USER;
 446
 447         uricopy = ++uri;
 448     }
 449     else
 450         m_user = wxT("");
 451
 452     return uricopy;
 453 }
 454
 455 const wxChar* wxURI::ParseServer(const wxChar* uri)
 456 {
 457     wxASSERT(uri != NULL);
 458
 459     //copy of the uri - used for figuring out
 460     //length of each component
 461     const wxChar* uricopy = uri;
 462
 463     // host          = IP-literal / IPv4address / reg-name
 464     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 465     if (*uri == wxT('['))
 466     {
 467         if (ParseIPv6address(++uri) && *uri == wxT(']'))
 468         {
 469             ++uri;
 470             m_hostType = wxURI_IPV6ADDRESS;
 471
 472             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 473             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 474             theBuffer.SetLength(uri-uricopy);
 475         }
 476         else
 477         {
 478             uri = uricopy;
 479
 480             if (ParseIPvFuture(++uri) && *uri == wxT(']'))
 481             {
 482                 ++uri;
 483                 m_hostType = wxURI_IPVFUTURE;
 484
 485                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 486                 wxMemcpy(theBuffer, uricopy, uri-uricopy);
 487                 theBuffer.SetLength(uri-uricopy);
 488             }
 489             else
 490                 uri = uricopy;
 491         }
 492     }
 493     else
 494     {
 495         if (ParseIPv4address(uri))
 496         {
 497             m_hostType = wxURI_IPV4ADDRESS;
 498
 499             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 500             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 501             theBuffer.SetLength(uri-uricopy);
 502         }
 503         else
 504             uri = uricopy;
 505     }
 506
 507     if(m_hostType == wxURI_REGNAME)
 508     {
 509         uri = uricopy;
 510         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 511         while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
 512         {
 513             if(IsUnreserved(*uri) || IsEscape(uri) ||  IsSubDelim(*uri))
 514                 m_server += *uri++;
 515             else
 516                 Escape(m_server, *uri++);
 517         }
 518     }
 519
 520     //mark the server as valid
 521     m_fields |= wxURI_SERVER;
 522
 523     return uri;
 524 }
 525
 526
 527 const wxChar* wxURI::ParsePort(const wxChar* uri)
 528 {
 529     wxASSERT(uri != NULL);
 530
 531     // port          = *DIGIT
 532     if(*uri == wxT(':'))
 533     {
 534         ++uri;
 535         while(IsDigit(*uri))
 536         {
 537             m_port += *uri++;
 538         }
 539
 540         //mark the port as valid
 541         m_fields |= wxURI_PORT;
 542     }
 543
 544     return uri;
 545 }
 546
 547 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 548 {
 549     wxASSERT(uri != NULL);
 550
 551     //copy of the uri - used for figuring out
 552     //length of each component
 553     const wxChar* uricopy = uri;
 554
 555     /// hier-part     = "//" authority path-abempty
 556     ///               / path-absolute
 557     ///               / path-rootless
 558     ///               / path-empty
 559     ///
 560     /// relative-part = "//" authority path-abempty
 561     ///               / path-absolute
 562     ///               / path-noscheme
 563     ///               / path-empty
 564     ///
 565     /// path-abempty  = *( "/" segment )
 566     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 567     /// path-noscheme = segment-nz-nc *( "/" segment )
 568     /// path-rootless = segment-nz *( "/" segment )
 569     /// path-empty    = 0<pchar>
 570     ///
 571     /// segment       = *pchar
 572     /// segment-nz    = 1*pchar
 573     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 574     ///               ; non-zero-length segment without any colon ":"
 575     ///
 576     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 577     if (*uri == wxT('/'))
 578     {
 579         m_path += *uri++;
 580
 581         while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 582         {
 583             if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 584                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 585                 m_path += *uri++;
 586             else
 587                 Escape(m_path, *uri++);
 588         }
 589
 590         if (bNormalize)
 591         {
 592             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 593 #if wxUSE_STL
 594             wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 595 #endif
 596             Normalize(theBuffer, true);
 597             theBuffer.SetLength(wxStrlen(theBuffer));
 598         }
 599         //mark the path as valid
 600         m_fields |= wxURI_PATH;
 601     }
 602     else if(*uri) //Relative path
 603     {
 604         if (bReference)
 605         {
 606             //no colon allowed
 607             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 608             {
 609                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 610                   *uri == wxT('@') || *uri == wxT('/'))
 611                     m_path += *uri++;
 612                 else
 613                     Escape(m_path, *uri++);
 614             }
 615         }
 616         else
 617         {
 618             while(*uri && *uri != wxT('#') && *uri != wxT('?'))
 619             {
 620                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 621                    *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
 622                     m_path += *uri++;
 623                 else
 624                     Escape(m_path, *uri++);
 625             }
 626         }
 627
 628         if (uri != uricopy)
 629         {
 630             if (bNormalize)
 631             {
 632                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 633 #if wxUSE_STL
 634                 wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
 635 #endif
 636                 Normalize(theBuffer);
 637                 theBuffer.SetLength(wxStrlen(theBuffer));
 638             }
 639
 640             //mark the path as valid
 641             m_fields |= wxURI_PATH;
 642         }
 643     }
 644
 645     return uri;
 646 }
 647
 648
 649 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 650 {
 651     wxASSERT(uri != NULL);
 652
 653     // query         = *( pchar / "/" / "?" )
 654     if (*uri == wxT('?'))
 655     {
 656         ++uri;
 657         while(*uri && *uri != wxT('#'))
 658         {
 659             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 660                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 661                   m_query += *uri++;
 662             else
 663                   Escape(m_query, *uri++);
 664         }
 665
 666         //mark the server as valid
 667         m_fields |= wxURI_QUERY;
 668     }
 669
 670     return uri;
 671 }
 672
 673
 674 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 675 {
 676     wxASSERT(uri != NULL);
 677
 678     // fragment      = *( pchar / "/" / "?" )
 679     if (*uri == wxT('#'))
 680     {
 681         ++uri;
 682         while(*uri)
 683         {
 684             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 685                 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
 686                   m_fragment += *uri++;
 687             else
 688                   Escape(m_fragment, *uri++);
 689         }
 690
 691         //mark the server as valid
 692         m_fields |= wxURI_FRAGMENT;
 693     }
 694
 695     return uri;
 696 }
 697
 698 // ---------------------------------------------------------------------------
 699 // Resolve
 700 //
 701 // Builds missing components of this uri from a base uri
 702 //
 703 // A version of the algorithm outlined in the RFC is used here
 704 // (it is shown in comments)
 705 //
 706 // Note that an empty URI inherits all components
 707 // ---------------------------------------------------------------------------
 708
 709 void wxURI::Resolve(const wxURI& base, int flags)
 710 {
 711     wxASSERT_MSG(!base.IsReference(),
 712                 wxT("wxURI to inherit from must not be a reference!"));
 713
 714     // If we arn't being strict, enable the older (pre-RFC2396)
 715     // loophole that allows this uri to inherit other
 716     // properties from the base uri - even if the scheme
 717     // is defined
 718     if ( !(flags & wxURI_STRICT) &&
 719             HasScheme() && base.HasScheme() &&
 720                 m_scheme == base.m_scheme )
 721     {
 722         m_fields -= wxURI_SCHEME;
 723     }
 724
 725
 726     // Do nothing if this is an absolute wxURI
 727     //    if defined(R.scheme) then
 728     //       T.scheme    = R.scheme;
 729     //       T.authority = R.authority;
 730     //       T.path      = remove_dot_segments(R.path);
 731     //       T.query     = R.query;
 732     if (HasScheme())
 733     {
 734         return;
 735     }
 736
 737     //No sheme - inherit
 738     m_scheme = base.m_scheme;
 739     m_fields |= wxURI_SCHEME;
 740
 741     // All we need to do for relative URIs with an
 742     // authority component is just inherit the scheme
 743     //       if defined(R.authority) then
 744     //          T.authority = R.authority;
 745     //          T.path      = remove_dot_segments(R.path);
 746     //          T.query     = R.query;
 747     if (HasServer())
 748     {
 749         return;
 750     }
 751
 752     //No authority - inherit
 753     if (base.HasUser())
 754     {
 755         m_user = base.m_user;
 756         m_fields |= wxURI_USER;
 757     }
 758
 759     m_server = base.m_server;
 760     m_hostType = base.m_hostType;
 761     m_fields |= wxURI_SERVER;
 762
 763     if (base.HasPort())
 764     {
 765         m_port = base.m_port;
 766         m_fields |= wxURI_PORT;
 767     }
 768
 769
 770     // Simple path inheritance from base
 771     if (!HasPath())
 772     {
 773         //             T.path = Base.path;
 774         m_path = base.m_path;
 775         m_fields |= wxURI_PATH;
 776
 777
 778         //             if defined(R.query) then
 779         //                T.query = R.query;
 780         //             else
 781         //                T.query = Base.query;
 782         //             endif;
 783         if (!HasQuery())
 784         {
 785             m_query = base.m_query;
 786             m_fields |= wxURI_QUERY;
 787         }
 788     }
 789     else
 790     {
 791         //             if (R.path starts-with "/") then
 792         //                T.path = remove_dot_segments(R.path);
 793         //             else
 794         //                T.path = merge(Base.path, R.path);
 795         //                T.path = remove_dot_segments(T.path);
 796         //             endif;
 797         //             T.query = R.query;
 798         if (m_path[0u] != wxT('/'))
 799         {
 800             //Marge paths
 801             const wxChar* op = m_path.c_str();
 802             const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
 803
 804             //not a ending directory?  move up
 805             if (base.m_path[0] && *(bp-1) != wxT('/'))
 806                 UpTree(base.m_path, bp);
 807
 808             //normalize directories
 809             while(*op == wxT('.') && *(op+1) == wxT('.') &&
 810                        (*(op+2) == '\0' || *(op+2) == wxT('/')) )
 811             {
 812                 UpTree(base.m_path, bp);
 813
 814                 if (*(op+2) == '\0')
 815                     op += 2;
 816                 else
 817                     op += 3;
 818             }
 819
 820             m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
 821                     m_path.substr((op - m_path.c_str()), m_path.Length());
 822         }
 823     }
 824
 825     //T.fragment = R.fragment;
 826 }
 827
 828 // ---------------------------------------------------------------------------
 829 // UpTree
 830 //
 831 // Moves a URI path up a directory
 832 // ---------------------------------------------------------------------------
 833
 834 //static
 835 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 836 {
 837     if (uri != uristart && *(uri-1) == wxT('/'))
 838     {
 839         uri -= 2;
 840     }
 841
 842     for(;uri != uristart; --uri)
 843     {
 844         if (*uri == wxT('/'))
 845         {
 846             ++uri;
 847             break;
 848         }
 849     }
 850
 851     //!!!TODO:HACK!!!//
 852     if (uri == uristart && *uri == wxT('/'))
 853         ++uri;
 854     //!!!//
 855 }
 856
 857 // ---------------------------------------------------------------------------
 858 // Normalize
 859 //
 860 // Normalizes directories in-place
 861 //
 862 // I.E. ./ and . are ignored
 863 //
 864 // ../ and .. are removed if a directory is before it, along
 865 // with that directory (leading .. and ../ are kept)
 866 // ---------------------------------------------------------------------------
 867
 868 //static
 869 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 870 {
 871     wxChar* cp = s;
 872     wxChar* bp = s;
 873
 874     if(s[0] == wxT('/'))
 875         ++bp;
 876
 877     while(*cp)
 878     {
 879         if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
 880             && (bp == cp || *(cp-1) == wxT('/')))
 881         {
 882             //. _or_ ./  - ignore
 883             if (*(cp+1) == '\0')
 884                 cp += 1;
 885             else
 886                 cp += 2;
 887         }
 888         else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
 889                 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
 890                 && (bp == cp || *(cp-1) == wxT('/')))
 891         {
 892             //.. _or_ ../ - go up the tree
 893             if (s != bp)
 894             {
 895                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 896
 897                 if (*(cp+2) == '\0')
 898                     cp += 2;
 899                 else
 900                     cp += 3;
 901             }
 902             else if (!bIgnoreLeads)
 903
 904             {
 905                 *bp++ = *cp++;
 906                 *bp++ = *cp++;
 907                 if (*cp)
 908                     *bp++ = *cp++;
 909
 910                 s = bp;
 911             }
 912             else
 913             {
 914                 if (*(cp+2) == '\0')
 915                     cp += 2;
 916                 else
 917                     cp += 3;
 918             }
 919         }
 920         else
 921             *s++ = *cp++;
 922     }
 923
 924     *s = '\0';
 925 }
 926
 927 // ---------------------------------------------------------------------------
 928 // ParseH16
 929 //
 930 // Parses 1 to 4 hex values.  Returns true if the first character of the input
 931 // string is a valid hex character.  It is the caller's responsability to move
 932 // the input string back to its original position on failure.
 933 // ---------------------------------------------------------------------------
 934
 935 bool wxURI::ParseH16(const wxChar*& uri)
 936 {
 937     // h16           = 1*4HEXDIG
 938     if(!IsHex(*++uri))
 939         return false;
 940
 941     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 942         ++uri;
 943
 944     return true;
 945 }
 946
 947 // ---------------------------------------------------------------------------
 948 // ParseIPXXX
 949 //
 950 // Parses a certain version of an IP address and moves the input string past
 951 // it.  Returns true if the input  string contains the proper version of an ip
 952 // address.  It is the caller's responsability to move the input string back
 953 // to its original position on failure.
 954 // ---------------------------------------------------------------------------
 955
 956 bool wxURI::ParseIPv4address(const wxChar*& uri)
 957 {
 958     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 959     //
 960     //dec-octet     =      DIGIT                    ; 0-9
 961     //                / %x31-39 DIGIT               ; 10-99
 962     //                / "1" 2DIGIT                  ; 100-199
 963     //                / "2" %x30-34 DIGIT           ; 200-249
 964     //                / "25" %x30-35                ; 250-255
 965     size_t iIPv4 = 0;
 966     if (IsDigit(*uri))
 967     {
 968         ++iIPv4;
 969
 970
 971         //each ip part must be between 0-255 (dupe of version in for loop)
 972         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 973            //100 or less  (note !)
 974            !( (*(uri-2) < wxT('2')) ||
 975            //240 or less
 976              (*(uri-2) == wxT('2') &&
 977                (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
 978              )
 979             )
 980           )
 981         {
 982             return false;
 983         }
 984
 985         if(IsDigit(*uri))++uri;
 986
 987         //compilers should unroll this loop
 988         for(; iIPv4 < 4; ++iIPv4)
 989         {
 990             if (*uri != wxT('.') || !IsDigit(*++uri))
 991                 break;
 992
 993             //each ip part must be between 0-255
 994             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 995                //100 or less  (note !)
 996                !( (*(uri-2) < wxT('2')) ||
 997                //240 or less
 998                  (*(uri-2) == wxT('2') &&
 999                    (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
1000                  )
1001                 )
1002               )
1003             {
1004                 return false;
1005             }
1006             if(IsDigit(*uri))++uri;
1007         }
1008     }
1009     return iIPv4 == 4;
1010 }
1011
1012 bool wxURI::ParseIPv6address(const wxChar*& uri)
1013 {
1014     // IPv6address   =                            6( h16 ":" ) ls32
1015     //               /                       "::" 5( h16 ":" ) ls32
1016     //               / [               h16 ] "::" 4( h16 ":" ) ls32
1017     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1018     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1019     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
1020     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
1021     //               / [ *5( h16 ":" ) h16 ] "::"              h16
1022     //               / [ *6( h16 ":" ) h16 ] "::"
1023
1024     size_t numPrefix = 0,
1025               maxPostfix;
1026
1027     bool bEndHex = false;
1028
1029     for( ; numPrefix < 6; ++numPrefix)
1030     {
1031         if(!ParseH16(uri))
1032         {
1033             --uri;
1034             bEndHex = true;
1035             break;
1036         }
1037
1038         if(*uri != wxT(':'))
1039         {
1040             break;
1041         }
1042     }
1043
1044     if(!bEndHex && !ParseH16(uri))
1045     {
1046         --uri;
1047
1048         if (numPrefix)
1049             return false;
1050
1051         if (*uri == wxT(':'))
1052         {
1053             if (*++uri != wxT(':'))
1054                 return false;
1055
1056             maxPostfix = 5;
1057         }
1058         else
1059             maxPostfix = 6;
1060     }
1061     else
1062     {
1063         if (*uri != wxT(':') || *(uri+1) != wxT(':'))
1064         {
1065             if (numPrefix != 6)
1066                 return false;
1067
1068             while (*--uri != wxT(':')) {}
1069             ++uri;
1070
1071             const wxChar* uristart = uri;
1072             //parse ls32
1073             // ls32          = ( h16 ":" h16 ) / IPv4address
1074             if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1075                 return true;
1076
1077             uri = uristart;
1078
1079             if (ParseIPv4address(uri))
1080                 return true;
1081             else
1082                 return false;
1083         }
1084         else
1085         {
1086             uri += 2;
1087
1088             if (numPrefix > 3)
1089                 maxPostfix = 0;
1090             else
1091                 maxPostfix = 4 - numPrefix;
1092         }
1093     }
1094
1095     bool bAllowAltEnding = maxPostfix == 0;
1096
1097     for(; maxPostfix != 0; --maxPostfix)
1098     {
1099         if(!ParseH16(uri) || *uri != wxT(':'))
1100             return false;
1101     }
1102
1103     if(numPrefix <= 4)
1104     {
1105         const wxChar* uristart = uri;
1106         //parse ls32
1107         // ls32          = ( h16 ":" h16 ) / IPv4address
1108         if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
1109             return true;
1110
1111         uri = uristart;
1112
1113         if (ParseIPv4address(uri))
1114             return true;
1115
1116         uri = uristart;
1117
1118         if (!bAllowAltEnding)
1119             return false;
1120     }
1121
1122     if(numPrefix <= 5 && ParseH16(uri))
1123         return true;
1124
1125     return true;
1126 }
1127
1128 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1129 {
1130     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1131     if (*++uri != wxT('v') || !IsHex(*++uri))
1132         return false;
1133
1134     while (IsHex(*++uri)) {}
1135
1136     if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
1137         return false;
1138
1139     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
1140
1141     return true;
1142 }
1143
1144
1145 // ---------------------------------------------------------------------------
1146 // CharToHex
1147 //
1148 // Converts a character into a numeric hexidecimal value, or 0 if the
1149 // passed in character is not a valid hex character
1150 // ---------------------------------------------------------------------------
1151
1152 //static
1153 wxInt32 wxURI::CharToHex(const wxChar& c)
1154 {
1155     if ((c >= wxT('A')) && (c <= wxT('Z')))    return c - wxT('A') + 0x0A;
1156     if ((c >= wxT('a')) && (c <= wxT('z')))    return c - wxT('a') + 0x0a;
1157     if ((c >= wxT('0')) && (c <= wxT('9')))    return c - wxT('0') + 0x00;
1158
1159     return 0;
1160 }
1161
1162 // ---------------------------------------------------------------------------
1163 // IsXXX
1164 //
1165 // Returns true if the passed in character meets the criteria of the method
1166 // ---------------------------------------------------------------------------
1167
1168 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1169 bool wxURI::IsUnreserved (const wxChar& c)
1170 {   return IsAlpha(c) || IsDigit(c) ||
1171            c == wxT('-') ||
1172            c == wxT('.') ||
1173            c == wxT('_') ||
1174            c == wxT('~') //tilde
1175            ;
1176 }
1177
1178 bool wxURI::IsReserved (const wxChar& c)
1179 {
1180     return IsGenDelim(c) || IsSubDelim(c);
1181 }
1182
1183 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1184 bool wxURI::IsGenDelim (const wxChar& c)
1185 {
1186     return c == wxT(':') ||
1187            c == wxT('/') ||
1188            c == wxT('?') ||
1189            c == wxT('#') ||
1190            c == wxT('[') ||
1191            c == wxT(']') ||
1192            c == wxT('@');
1193 }
1194
1195 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1196 //!               / "*" / "+" / "," / ";" / "="
1197 bool wxURI::IsSubDelim (const wxChar& c)
1198 {
1199     return c == wxT('!') ||
1200            c == wxT('$') ||
1201            c == wxT('&') ||
1202            c == wxT('\'') ||
1203            c == wxT('(') ||
1204            c == wxT(')') ||
1205            c == wxT('*') ||
1206            c == wxT('+') ||
1207            c == wxT(',') ||
1208            c == wxT(';') ||
1209            c == wxT('=')
1210            ;
1211 }
1212
1213 bool wxURI::IsHex(const wxChar& c)
1214 {   return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
1215
1216 bool wxURI::IsAlpha(const wxChar& c)
1217 {   return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z'));  }
1218
1219 bool wxURI::IsDigit(const wxChar& c)
1220 {   return c >= wxT('0') && c <= wxT('9');        }
1221
1222
1223 // ---------------------------------------------------------------------------
1224 //
1225 //                        wxURL Compatability
1226 //
1227 // ---------------------------------------------------------------------------
1228
1229 #if wxUSE_URL
1230
1231 #if WXWIN_COMPATIBILITY_2_4
1232
1233 #include "wx/url.h"
1234
1235 //Note that this old code really doesn't convert to a URI that well and looks
1236 //more like a dirty hack than anything else...
1237
1238 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1239 {
1240   wxString out_str;
1241   wxString hexa_code;
1242   size_t i;
1243
1244   for (i = 0; i < uri.Len(); i++)
1245   {
1246     wxChar c = uri.GetChar(i);
1247
1248     if (c == wxT(' '))
1249     {
1250       // GRG, Apr/2000: changed to "%20" instead of '+'
1251
1252       out_str += wxT("%20");
1253     }
1254     else
1255     {
1256       // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1257       //
1258       // - Alphanumeric characters are never escaped
1259       // - Unreserved marks are never escaped
1260       // - Delimiters must be escaped if they appear within a component
1261       //     but not if they are used to separate components. Here we have
1262       //     no clear way to distinguish between these two cases, so they
1263       //     are escaped unless they are passed in the 'delims' parameter
1264       //     (allowed delimiters).
1265
1266       static const wxChar marks[] = wxT("-_.!~*()'");
1267
1268       if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1269       {
1270         hexa_code.Printf(wxT("%%%02X"), c);
1271         out_str += hexa_code;
1272       }
1273       else
1274       {
1275         out_str += c;
1276       }
1277     }
1278   }
1279
1280   return out_str;
1281 }
1282
1283 wxString wxURL::ConvertFromURI(const wxString& uri)
1284 {
1285     return wxURI::Unescape(uri);
1286 }
1287
1288 #endif //WXWIN_COMPATIBILITY_2_4
1289
1290 #endif //wxUSE_URL
1291
1292 //end of uri.cpp
1293
1294
1295