src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 //
  12 //TODO:  RN:  I had some massive doxygen docs, I need to move these
  13 //in a presentable form in these sources
  14 //
  15
  16 // ===========================================================================
  17 // declarations
  18 // ===========================================================================
  19
  20 // ---------------------------------------------------------------------------
  21 // headers
  22 // ---------------------------------------------------------------------------
  23
  24 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  25     #pragma implementation "uri.h"
  26 #endif
  27
  28 // For compilers that support precompilation, includes "wx.h".
  29 #include "wx/wxprec.h"
  30
  31 #ifdef __BORLANDC__
  32     #pragma hdrstop
  33 #endif
  34
  35 #include "wx/uri.h"
  36
  37 // ---------------------------------------------------------------------------
  38 // definitions
  39 // ---------------------------------------------------------------------------
  40
  41 IMPLEMENT_CLASS(wxURI, wxObject);
  42
  43 // ===========================================================================
  44 // implementation
  45 // ===========================================================================
  46
  47 // ---------------------------------------------------------------------------
  48 // utilities
  49 // ---------------------------------------------------------------------------
  50
  51 // ---------------------------------------------------------------------------
  52 //
  53 //                        wxURI
  54 //
  55 // ---------------------------------------------------------------------------
  56
  57 // ---------------------------------------------------------------------------
  58 //  Constructors
  59 // ---------------------------------------------------------------------------
  60
  61 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  62 {
  63 }
  64
  65 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     Create(uri);
  68 }
  69
  70 wxURI::wxURI(const wxURI& uri)  : m_hostType(wxURI_REGNAME), m_fields(0)
  71 {
  72     Assign(uri);
  73 }
  74
  75 // ---------------------------------------------------------------------------
  76 // Destructor and cleanup
  77 // ---------------------------------------------------------------------------
  78
  79 wxURI::~wxURI()
  80 {
  81     Clear();
  82 }
  83
  84 void wxURI::Clear()
  85 {
  86     m_scheme = m_user = m_server = m_port = m_path =
  87     m_query = m_fragment = wxT("");
  88
  89     m_hostType = wxURI_REGNAME;
  90
  91     m_fields = 0;
  92 }
  93
  94 // ---------------------------------------------------------------------------
  95 // Create
  96 //
  97 // This creates the URI - all we do here is call the main parsing method
  98 // ---------------------------------------------------------------------------
  99
 100 void wxURI::Create(const wxString& uri)
 101 {
 102     if (m_fields)
 103         Clear();
 104
 105     Parse(uri);
 106 }
 107
 108 // ---------------------------------------------------------------------------
 109 // Escape/Unescape/IsEscape
 110 //
 111 // Unescape unencodes a 3 character URL escape sequence
 112 // Escape encodes an invalid URI character into a 3 character sequence
 113 // IsEscape determines if the input string contains an escape sequence,
 114 // if it does, then it moves the input string past the escape sequence
 115 // ---------------------------------------------------------------------------
 116
 117 wxChar wxURI::Unescape(const wxChar* s)
 118 {
 119     wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
 120
 121     return CharToHex(*s) * 0x10 + CharToHex(*++s);
 122 }
 123
 124 void wxURI::Escape(wxString& s, const wxChar& c)
 125 {
 126     const wxChar* hdig = wxT("0123456789abcdef");
 127     s += '%';
 128     s += hdig[(c >> 4) & 15];
 129         s += hdig[c & 15];
 130 }
 131
 132 bool wxURI::IsEscape(const wxChar*& uri)
 133 {
 134     if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 135     {
 136         uri += 3;
 137         return true;
 138     }
 139     else
 140         return false;
 141 }
 142
 143 // ---------------------------------------------------------------------------
 144 // Get
 145 //
 146 // Get() actually builds the entire URI into a useable
 147 // representation, including proper identification characters such as slashes
 148 // ---------------------------------------------------------------------------
 149
 150 wxString wxURI::Get() const
 151 {
 152     wxString ret;
 153
 154     if (HasScheme())
 155         ret = ret + m_scheme + wxT(":");
 156
 157     if (HasServer())
 158     {
 159         ret += wxT("//");
 160
 161         if (HasUser())
 162             ret = ret + m_user + wxT("@");
 163
 164         ret += m_server;
 165
 166         if (HasPort())
 167             ret = ret + wxT(":") + m_port;
 168     }
 169
 170     ret += m_path;
 171
 172     if (HasQuery())
 173         ret = ret + wxT("?") + m_query;
 174
 175     if (HasFragment())
 176         ret = ret + wxT("#") + m_fragment;
 177
 178     return ret;
 179 }
 180
 181 // ---------------------------------------------------------------------------
 182 // operator = and ==
 183 // ---------------------------------------------------------------------------
 184
 185 wxURI& wxURI::operator = (const wxURI& uri)
 186 {
 187     return Assign(uri);
 188 }
 189
 190 wxURI& wxURI::Assign(const wxURI& uri)
 191 {
 192     //assign fields
 193     m_fields = uri.m_fields;
 194
 195     //ref over components
 196     m_scheme = uri.m_scheme;
 197     m_user = uri.m_user;
 198     m_server = uri.m_server;
 199     m_hostType = uri.m_hostType;
 200     m_port = uri.m_port;
 201     m_path = uri.m_path;
 202     m_query = uri.m_query;
 203     m_fragment = uri.m_fragment;
 204
 205     return *this;
 206 }
 207
 208 wxURI& wxURI::operator = (const wxString& string)
 209 {
 210     Create(string);
 211     return *this;
 212 }
 213
 214 bool wxURI::operator == (const wxURI& uri) const
 215 {
 216     if (HasScheme())
 217     {
 218         if(m_scheme != uri.m_scheme)
 219             return false;
 220     }
 221     else if (uri.HasScheme())
 222         return false;
 223
 224
 225     if (HasServer())
 226     {
 227         if (HasUser())
 228         {
 229             if (m_user != uri.m_user)
 230                 return false;
 231         }
 232         else if (uri.HasUser())
 233             return false;
 234
 235         if (m_server != uri.m_server ||
 236             m_hostType != uri.m_hostType)
 237             return false;
 238
 239         if (HasPort())
 240         {
 241             if(m_port != uri.m_port)
 242                 return false;
 243         }
 244         else if (uri.HasPort())
 245             return false;
 246     }
 247     else if (uri.HasServer())
 248         return false;
 249
 250
 251     if (HasPath())
 252     {
 253         if(m_path != uri.m_path)
 254             return false;
 255     }
 256     else if (uri.HasPath())
 257         return false;
 258
 259     if (HasQuery())
 260     {
 261         if (m_query != uri.m_query)
 262             return false;
 263     }
 264     else if (uri.HasQuery())
 265         return false;
 266
 267     if (HasFragment())
 268     {
 269         if (m_fragment != uri.m_fragment)
 270             return false;
 271     }
 272     else if (uri.HasFragment())
 273         return false;
 274
 275     return true;
 276 }
 277
 278 // ---------------------------------------------------------------------------
 279 // IsReference
 280 //
 281 // if there is no authority or scheme, it is a reference
 282 // ---------------------------------------------------------------------------
 283
 284 bool wxURI::IsReference() const
 285 {   return !HasScheme() || !HasServer();  }
 286
 287 // ---------------------------------------------------------------------------
 288 // Parse
 289 //
 290 // Master URI parsing method.  Just calls the individual parsing methods
 291 //
 292 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 293 // URI-reference = URI / relative-URITestCase
 294 // ---------------------------------------------------------------------------
 295
 296 const wxChar* wxURI::Parse(const wxChar* uri)
 297 {
 298     uri = ParseScheme(uri);
 299     uri = ParseAuthority(uri);
 300     uri = ParsePath(uri);
 301     uri = ParseQuery(uri);
 302     return ParseFragment(uri);
 303 }
 304
 305 // ---------------------------------------------------------------------------
 306 // ParseXXX
 307 //
 308 // Individual parsers for each URI component
 309 // ---------------------------------------------------------------------------
 310
 311 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 312 {
 313     wxASSERT(uri != NULL);
 314
 315     //copy of the uri - used for figuring out
 316     //length of each component
 317     const wxChar* uricopy = uri;
 318
 319     //Does the uri have a scheme (first character alpha)?
 320     if (IsAlpha(*uri))
 321     {
 322         m_scheme += *uri++;
 323
 324         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 325         //RN: Scheme can not be escaped
 326         while (IsAlpha(*uri) || IsDigit(*uri) ||
 327                *uri == '+'   ||
 328                *uri == '-'   ||
 329                *uri == '.')
 330         {
 331             m_scheme += *uri++;
 332         }
 333
 334         //valid scheme?
 335         if (*uri == ':')
 336         {
 337             //mark the scheme as valid
 338             m_fields |= wxURI_SCHEME;
 339
 340             //move reference point up to input buffer
 341             uricopy = ++uri;
 342         }
 343         else
 344             //relative uri with relative path reference
 345             m_scheme = wxT("");
 346     }
 347 //    else
 348         //relative uri with _possible_ relative path reference
 349
 350     return uricopy;
 351 }
 352
 353 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 354 {
 355     // authority     = [ userinfo "@" ] host [ ":" port ]
 356     if (*uri == '/' && *(uri+1) == '/')
 357     {
 358         uri += 2;
 359
 360         uri = ParseUser(uri);
 361         uri = ParseServer(uri);
 362         return ParsePort(uri);
 363     }
 364
 365     return uri;
 366 }
 367
 368 const wxChar* wxURI::ParseUser(const wxChar* uri)
 369 {
 370     wxASSERT(uri != NULL);
 371
 372     //copy of the uri - used for figuring out
 373     //length of each component
 374     const wxChar* uricopy = uri;
 375
 376     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 377     while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?')
 378     {
 379         if(IsUnreserved(*uri) || IsEscape(uri) ||
 380            IsSubDelim(*uri) || *uri == ':')
 381             m_user += *uri++;
 382         else
 383             Escape(m_user, *uri++);
 384     }
 385
 386     if(*uri == '@')
 387     {
 388         //valid userinfo
 389         m_fields |= wxURI_USER;
 390
 391         uricopy = ++uri;
 392     }
 393     else
 394         m_user = wxT("");
 395
 396     return uricopy;
 397 }
 398
 399 const wxChar* wxURI::ParseServer(const wxChar* uri)
 400 {
 401     wxASSERT(uri != NULL);
 402
 403     //copy of the uri - used for figuring out
 404     //length of each component
 405     const wxChar* uricopy = uri;
 406
 407     // host          = IP-literal / IPv4address / reg-name
 408     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 409     if (*uri == '[')
 410     {
 411         if (ParseIPv6address(++uri) && *uri == ']')
 412         {
 413             ++uri;
 414             m_hostType = wxURI_IPV6ADDRESS;
 415
 416             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 417             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 418             theBuffer.SetLength(uri-uricopy);
 419         }
 420         else
 421         {
 422             uri = uricopy;
 423
 424             if (ParseIPvFuture(++uri) && *uri == ']')
 425             {
 426                 ++uri;
 427                 m_hostType = wxURI_IPVFUTURE;
 428
 429                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 430                 wxMemcpy(theBuffer, uricopy, uri-uricopy);
 431                 theBuffer.SetLength(uri-uricopy);
 432             }
 433             else
 434                 uri = uricopy;
 435         }
 436     }
 437     else
 438     {
 439         if (ParseIPv4address(uri))
 440         {
 441             m_hostType = wxURI_IPV4ADDRESS;
 442
 443             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 444             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 445             theBuffer.SetLength(uri-uricopy);
 446         }
 447         else
 448             uri = uricopy;
 449     }
 450
 451     if(m_hostType == wxURI_REGNAME)
 452     {
 453         uri = uricopy;
 454         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 455         while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?')
 456         {
 457             if(IsUnreserved(*uri) || IsEscape(uri) ||  IsSubDelim(*uri))
 458                 m_server += *uri++;
 459             else
 460                 Escape(m_server, *uri++);
 461         }
 462     }
 463
 464     //mark the server as valid
 465     m_fields |= wxURI_SERVER;
 466
 467     return uri;
 468 }
 469
 470
 471 const wxChar* wxURI::ParsePort(const wxChar* uri)
 472 {
 473     wxASSERT(uri != NULL);
 474
 475     // port          = *DIGIT
 476     if(*uri == ':')
 477     {
 478         ++uri;
 479         while(IsDigit(*uri))
 480         {
 481             m_port += *uri++;
 482         }
 483
 484         //mark the port as valid
 485         m_fields |= wxURI_PORT;
 486     }
 487
 488     return uri;
 489 }
 490
 491 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 492 {
 493     wxASSERT(uri != NULL);
 494
 495     //copy of the uri - used for figuring out
 496     //length of each component
 497     const wxChar* uricopy = uri;
 498
 499     /// hier-part     = "//" authority path-abempty
 500     ///               / path-absolute
 501     ///               / path-rootless
 502     ///               / path-empty
 503     ///
 504     /// relative-part = "//" authority path-abempty
 505     ///               / path-absolute
 506     ///               / path-noscheme
 507     ///               / path-empty
 508     ///
 509     /// path-abempty  = *( "/" segment )
 510     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 511     /// path-noscheme = segment-nz-nc *( "/" segment )
 512     /// path-rootless = segment-nz *( "/" segment )
 513     /// path-empty    = 0<pchar>
 514     ///
 515     /// segment       = *pchar
 516     /// segment-nz    = 1*pchar
 517     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 518     ///               ; non-zero-length segment without any colon ":"
 519     ///
 520     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 521     if (*uri == '/')
 522     {
 523         m_path += *uri++;
 524
 525         while(*uri && *uri != '#' && *uri != '?')
 526         {
 527             if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 528                 *uri == ':' || *uri == '@' || *uri == '/')
 529                 m_path += *uri++;
 530             else
 531                 Escape(m_path, *uri++);
 532         }
 533
 534         if (bNormalize)
 535         {
 536             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 537             Normalize(theBuffer, true);
 538             theBuffer.SetLength(wxStrlen(theBuffer));
 539         }
 540         //mark the path as valid
 541         m_fields |= wxURI_PATH;
 542     }
 543     else if(*uri) //Relative path
 544     {
 545         if (bReference)
 546         {
 547             //no colon allowed
 548             while(*uri && *uri != '#' && *uri != '?')
 549             {
 550                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 551                   *uri == '@' || *uri == '/')
 552                     m_path += *uri++;
 553                 else
 554                     Escape(m_path, *uri++);
 555             }
 556         }
 557         else
 558         {
 559             while(*uri && *uri != '#' && *uri != '?')
 560             {
 561                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 562                    *uri == ':' || *uri == '@' || *uri == '/')
 563                     m_path += *uri++;
 564                 else
 565                     Escape(m_path, *uri++);
 566             }
 567         }
 568
 569         if (uri != uricopy)
 570         {
 571             if (bNormalize)
 572             {
 573                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 574                 Normalize(theBuffer);
 575                 theBuffer.SetLength(wxStrlen(theBuffer));
 576             }
 577
 578             //mark the path as valid
 579             m_fields |= wxURI_PATH;
 580         }
 581     }
 582
 583     return uri;
 584 }
 585
 586
 587 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 588 {
 589     wxASSERT(uri != NULL);
 590
 591     // query         = *( pchar / "/" / "?" )
 592     if (*uri == '?')
 593     {
 594         ++uri;
 595         while(*uri && *uri != '#')
 596         {
 597             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 598                 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 599                   m_query += *uri++;
 600             else
 601                   Escape(m_query, *uri++);
 602         }
 603
 604         //mark the server as valid
 605         m_fields |= wxURI_QUERY;
 606     }
 607
 608     return uri;
 609 }
 610
 611
 612 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 613 {
 614     wxASSERT(uri != NULL);
 615
 616     // fragment      = *( pchar / "/" / "?" )
 617     if (*uri == '#')
 618     {
 619         ++uri;
 620         while(*uri)
 621         {
 622             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 623                 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 624                   m_fragment += *uri++;
 625             else
 626                   Escape(m_fragment, *uri++);
 627         }
 628
 629         //mark the server as valid
 630         m_fields |= wxURI_FRAGMENT;
 631     }
 632
 633     return uri;
 634 }
 635
 636 // ---------------------------------------------------------------------------
 637 //  Resolve URI
 638 //
 639 //  Builds missing components of this uri from a base uri
 640 //
 641 //  A version of the algorithm outlined in the RFC is used here
 642 //  (it is shown in comments)
 643 // ---------------------------------------------------------------------------
 644
 645 void wxURI::Resolve(const wxURI& base, int flags)
 646 {
 647     wxASSERT_MSG(!base.IsReference(),
 648                 wxT("wxURI to inherit from must not be a reference!"));
 649
 650     // If we arn't being strict, enable the older
 651     // loophole that allows this uri to inherit other
 652     // properties from the base uri - even if the scheme
 653     // is defined
 654     if ( !(flags & wxURI_STRICT) &&
 655             HasScheme() && base.HasScheme() &&
 656                 m_scheme == base.m_scheme )
 657     {
 658         m_fields -= wxURI_SCHEME;
 659     }
 660
 661
 662     // Do nothing if this is an absolute wxURI
 663     //    if defined(R.scheme) then
 664     //       T.scheme    = R.scheme;
 665     //       T.authority = R.authority;
 666     //       T.path      = remove_dot_segments(R.path);
 667     //       T.query     = R.query;
 668     if (HasScheme())
 669     {
 670         return;
 671     }
 672
 673     //No sheme - inherit
 674     m_scheme = base.m_scheme;
 675     m_fields |= wxURI_SCHEME;
 676
 677     // All we need to do for relative URIs with an
 678     // authority component is just inherit the scheme
 679     //       if defined(R.authority) then
 680     //          T.authority = R.authority;
 681     //          T.path      = remove_dot_segments(R.path);
 682     //          T.query     = R.query;
 683     if (HasServer())
 684     {
 685         return;
 686     }
 687
 688     //No authority - inherit
 689     if (base.HasUser())
 690     {
 691         m_user = base.m_user;
 692         m_fields |= wxURI_USER;
 693     }
 694
 695     m_server = base.m_server;
 696     m_hostType = base.m_hostType;
 697     m_fields |= wxURI_SERVER;
 698
 699     if (base.HasPort())
 700     {
 701         m_port = base.m_port;
 702         m_fields |= wxURI_PORT;
 703     }
 704
 705
 706     // Simple path inheritance from base
 707     if (!HasPath())
 708     {
 709         //             T.path = Base.path;
 710         m_path = base.m_path;
 711         m_fields |= wxURI_PATH;
 712
 713
 714         //             if defined(R.query) then
 715         //                T.query = R.query;
 716         //             else
 717         //                T.query = Base.query;
 718         //             endif;
 719         if (!HasQuery())
 720         {
 721             m_query = base.m_query;
 722             m_fields |= wxURI_QUERY;
 723         }
 724     }
 725     else
 726     {
 727         //             if (R.path starts-with "/") then
 728         //                T.path = remove_dot_segments(R.path);
 729         //             else
 730         //                T.path = merge(Base.path, R.path);
 731         //                T.path = remove_dot_segments(T.path);
 732         //             endif;
 733         //             T.query = R.query;
 734         if (m_path[(const size_t&)0] != '/')
 735         {
 736             //Marge paths
 737             const wxChar* op = m_path.c_str();
 738             const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
 739
 740             //not a ending directory?  move up
 741             if (base.m_path[0] && *(bp-1) != '/')
 742                 UpTree(base.m_path, bp);
 743
 744             //normalize directories
 745             while(*op == '.' && *(op+1) == '.' &&
 746                        (*(op+2) == '\0' || *(op+2) == '/') )
 747             {
 748                 UpTree(base.m_path, bp);
 749
 750                 if (*(op+2) == '\0')
 751                     op += 2;
 752                 else
 753                     op += 3;
 754             }
 755
 756             m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
 757                     m_path.Mid((op - m_path.c_str()), m_path.Length());
 758         }
 759     }
 760 }
 761
 762 // ---------------------------------------------------------------------------
 763 // Directory Normalization (static)
 764 //
 765 // UpTree goes up a directory in a string and moves the pointer as such,
 766 // while Normalize gets rid of duplicate/erronues directories in a URI
 767 // according to RFC 2396 and modified quite a bit to meet the unit tests
 768 // in it.
 769 // ---------------------------------------------------------------------------
 770
 771 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 772 {
 773     if (uri != uristart && *(uri-1) == '/')
 774     {
 775         uri -= 2;
 776     }
 777
 778     for(;uri != uristart; --uri)
 779     {
 780         if (*uri == '/')
 781         {
 782             ++uri;
 783             break;
 784         }
 785     }
 786
 787     //!!!TODO:HACK!!!//
 788     if (uri == uristart && *uri == '/')
 789         ++uri;
 790     //!!!//
 791 }
 792
 793 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 794 {
 795     wxChar* cp = s;
 796     wxChar* bp = s;
 797
 798     if(s[0] == '/')
 799         ++bp;
 800
 801     while(*cp)
 802     {
 803         if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0')
 804             && (bp == cp || *(cp-1) == '/'))
 805         {
 806             //. _or_ ./  - ignore
 807             if (*(cp+1) == '\0')
 808                 cp += 1;
 809             else
 810                 cp += 2;
 811         }
 812         else if (*cp == '.' && *(cp+1) == '.' &&
 813                 (*(cp+2) == '/' || *(cp+2) == '\0')
 814                 && (bp == cp || *(cp-1) == '/'))
 815         {
 816             //.. _or_ ../ - go up the tree
 817             if (s != bp)
 818             {
 819                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 820
 821                 if (*(cp+2) == '\0')
 822                     cp += 2;
 823                 else
 824                     cp += 3;
 825             }
 826             else if (!bIgnoreLeads)
 827
 828             {
 829                 *bp++ = *cp++;
 830                 *bp++ = *cp++;
 831                 if (*cp)
 832                     *bp++ = *cp++;
 833
 834                 s = bp;
 835             }
 836             else
 837             {
 838                 if (*(cp+2) == '\0')
 839                     cp += 2;
 840                 else
 841                     cp += 3;
 842             }
 843         }
 844         else
 845             *s++ = *cp++;
 846     }
 847
 848     *s = '\0';
 849 }
 850
 851 // ---------------------------------------------------------------------------
 852 // Misc. Parsing Methods
 853 // ---------------------------------------------------------------------------
 854
 855 bool wxURI::ParseIPv4address(const wxChar*& uri)
 856 {
 857     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 858     //
 859     //dec-octet     =      DIGIT                    ; 0-9
 860     //                / %x31-39 DIGIT               ; 10-99
 861     //                / "1" 2DIGIT                  ; 100-199
 862     //                / "2" %x30-34 DIGIT           ; 200-249
 863     //                / "25" %x30-35                ; 250-255
 864     size_t iIPv4 = 0;
 865     if (IsDigit(*uri))
 866     {
 867         ++iIPv4;
 868
 869
 870         //each ip part must be between 0-255 (dupe of version in for loop)
 871         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 872            //100 or less  (note !)
 873            !( (*(uri-2) < '2') ||
 874            //240 or less
 875              (*(uri-2) == '2' &&
 876                (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 877              )
 878             )
 879           )
 880         {
 881             return false;
 882         }
 883
 884         if(IsDigit(*uri))++uri;
 885
 886         //compilers should unroll this loop
 887         for(; iIPv4 < 4; ++iIPv4)
 888         {
 889             if (*uri != '.' || !IsDigit(*++uri))
 890                 break;
 891
 892             //each ip part must be between 0-255
 893             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 894                //100 or less  (note !)
 895                !( (*(uri-2) < '2') ||
 896                //240 or less
 897                  (*(uri-2) == '2' &&
 898                    (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 899                  )
 900                 )
 901               )
 902             {
 903                 return false;
 904             }
 905             if(IsDigit(*uri))++uri;
 906         }
 907     }
 908     return iIPv4 == 4;
 909 }
 910
 911 bool wxURI::ParseH16(const wxChar*& uri)
 912 {
 913     // h16           = 1*4HEXDIG
 914     if(!IsHex(*++uri))
 915         return false;
 916
 917     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 918         ++uri;
 919
 920     return true;
 921 }
 922
 923 bool wxURI::ParseIPv6address(const wxChar*& uri)
 924 {
 925     // IPv6address   =                            6( h16 ":" ) ls32
 926     //               /                       "::" 5( h16 ":" ) ls32
 927     //               / [               h16 ] "::" 4( h16 ":" ) ls32
 928     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
 929     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
 930     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
 931     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
 932     //               / [ *5( h16 ":" ) h16 ] "::"              h16
 933     //               / [ *6( h16 ":" ) h16 ] "::"
 934
 935     size_t numPrefix = 0,
 936               maxPostfix;
 937
 938     bool bEndHex = false;
 939
 940     for( ; numPrefix < 6; ++numPrefix)
 941     {
 942         if(!ParseH16(uri))
 943         {
 944             --uri;
 945             bEndHex = true;
 946             break;
 947         }
 948
 949         if(*uri != ':')
 950         {
 951             break;
 952         }
 953     }
 954
 955     if(!bEndHex && !ParseH16(uri))
 956     {
 957         --uri;
 958
 959         if (numPrefix)
 960             return false;
 961
 962         if (*uri == ':')
 963         {
 964             if (*++uri != ':')
 965                 return false;
 966
 967             maxPostfix = 5;
 968         }
 969         else
 970             maxPostfix = 6;
 971     }
 972     else
 973     {
 974         if (*uri != ':' || *(uri+1) != ':')
 975         {
 976             if (numPrefix != 6)
 977                 return false;
 978
 979             while (*--uri != ':') {}
 980             ++uri;
 981
 982             const wxChar* uristart = uri;
 983             //parse ls32
 984             // ls32          = ( h16 ":" h16 ) / IPv4address
 985             if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 986                 return true;
 987
 988             uri = uristart;
 989
 990             if (ParseIPv4address(uri))
 991                 return true;
 992             else
 993                 return false;
 994         }
 995         else
 996         {
 997             uri += 2;
 998
 999             if (numPrefix > 3)
1000                 maxPostfix = 0;
1001             else
1002                 maxPostfix = 4 - numPrefix;
1003         }
1004     }
1005
1006     bool bAllowAltEnding = maxPostfix == 0;
1007
1008     for(; maxPostfix != 0; --maxPostfix)
1009     {
1010         if(!ParseH16(uri) || *uri != ':')
1011             return false;
1012     }
1013
1014     if(numPrefix <= 4)
1015     {
1016         const wxChar* uristart = uri;
1017         //parse ls32
1018         // ls32          = ( h16 ":" h16 ) / IPv4address
1019         if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1020             return true;
1021
1022         uri = uristart;
1023
1024         if (ParseIPv4address(uri))
1025             return true;
1026
1027         uri = uristart;
1028
1029         if (!bAllowAltEnding)
1030             return false;
1031     }
1032
1033     if(numPrefix <= 5 && ParseH16(uri))
1034         return true;
1035
1036     return true;
1037 }
1038
1039 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1040 {
1041     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1042     if (*++uri != 'v' || !IsHex(*++uri))
1043         return false;
1044
1045     while (IsHex(*++uri)) {}
1046
1047     if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1048         return false;
1049
1050     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1051
1052     return true;
1053 }
1054
1055
1056 // ---------------------------------------------------------------------------
1057 // Misc methods - IsXXX and CharToHex
1058 // ---------------------------------------------------------------------------
1059
1060 int wxURI::CharToHex(const wxChar& c)
1061 {
1062         if ((c >= 'A') && (c <= 'Z'))   return c - 'A' + 0x0A;
1063         if ((c >= 'a') && (c <= 'z'))   return c - 'a' + 0x0a;
1064         if ((c >= '0') && (c <= '9'))   return c - '0' + 0x00;
1065
1066         return 0;
1067 }
1068
1069 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1070 bool wxURI::IsUnreserved (const wxChar& c)
1071 {   return IsAlpha(c) || IsDigit(c) ||
1072            c == '-' ||
1073            c == '.' ||
1074            c == '_' ||
1075            c == '~' //tilde
1076            ;
1077 }
1078
1079 bool wxURI::IsReserved (const wxChar& c)
1080 {
1081     return IsGenDelim(c) || IsSubDelim(c);
1082 }
1083
1084 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1085 bool wxURI::IsGenDelim (const wxChar& c)
1086 {
1087     return c == ':' ||
1088            c == '/' ||
1089            c == '?' ||
1090            c == '#' ||
1091            c == '[' ||
1092            c == ']' ||
1093            c == '@';
1094 }
1095
1096 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1097 //!               / "*" / "+" / "," / ";" / "="
1098 bool wxURI::IsSubDelim (const wxChar& c)
1099 {
1100     return c == '!' ||
1101            c == '$' ||
1102            c == '&' ||
1103            c == '\'' ||
1104            c == '(' ||
1105            c == ')' ||
1106            c == '*' ||
1107            c == '+' ||
1108            c == ',' ||
1109            c == ';' ||
1110            c == '='
1111            ;
1112 }
1113
1114 bool wxURI::IsHex(const wxChar& c)
1115 {   return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
1116
1117 bool wxURI::IsAlpha(const wxChar& c)
1118 {   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');  }
1119
1120 bool wxURI::IsDigit(const wxChar& c)
1121 {   return c >= '0' && c <= '9';        }
1122
1123
1124 // ---------------------------------------------------------------------------
1125 //
1126 //                        wxURL Compatability
1127 //
1128 // TODO:  Use wxURI instead here...
1129 // ---------------------------------------------------------------------------
1130
1131 #if wxUSE_URL
1132
1133 #include "wx/url.h"
1134
1135 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1136 {
1137   wxString out_str;
1138   wxString hexa_code;
1139   size_t i;
1140
1141   for (i = 0; i < uri.Len(); i++)
1142   {
1143     wxChar c = uri.GetChar(i);
1144
1145     if (c == wxT(' '))
1146     {
1147       // GRG, Apr/2000: changed to "%20" instead of '+'
1148
1149       out_str += wxT("%20");
1150     }
1151     else
1152     {
1153       // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1154       //
1155       // - Alphanumeric characters are never escaped
1156       // - Unreserved marks are never escaped
1157       // - Delimiters must be escaped if they appear within a component
1158       //     but not if they are used to separate components. Here we have
1159       //     no clear way to distinguish between these two cases, so they
1160       //     are escaped unless they are passed in the 'delims' parameter
1161       //     (allowed delimiters).
1162
1163       static const wxChar marks[] = wxT("-_.!~*()'");
1164
1165       if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1166       {
1167         hexa_code.Printf(wxT("%%%02X"), c);
1168         out_str += hexa_code;
1169       }
1170       else
1171       {
1172         out_str += c;
1173       }
1174     }
1175   }
1176
1177   return out_str;
1178 }
1179
1180 wxString wxURL::ConvertFromURI(const wxString& uri)
1181 {
1182   wxString new_uri;
1183
1184   size_t i = 0;
1185   while (i < uri.Len())
1186   {
1187     int code;
1188     if (uri[i] == wxT('%'))
1189     {
1190       i++;
1191       if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1192         code = (uri[i] - wxT('A') + 10) * 16;
1193       else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1194         code = (uri[i] - wxT('a') + 10) * 16;
1195       else
1196         code = (uri[i] - wxT('0')) * 16;
1197
1198       i++;
1199       if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1200         code += (uri[i] - wxT('A')) + 10;
1201       else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1202         code += (uri[i] - wxT('a')) + 10;
1203       else
1204         code += (uri[i] - wxT('0'));
1205
1206       i++;
1207       new_uri += (wxChar)code;
1208       continue;
1209     }
1210     new_uri += uri[i];
1211     i++;
1212   }
1213   return new_uri;
1214 }
1215
1216 #endif //wxUSE_URL
1217
1218 //end of uri.cpp
1219
1220
1221