src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/uri.cpp
   3 // Purpose:     Implementation of a URI parser
   4 // Author:      Ryan Norton,
   5 //              Vadim Zeitlin (UTF-8 URI support, many other changes)
   6 // Created:     10/26/04
   7 // Copyright:   (c) 2004 Ryan Norton,
   8 //                  2008 Vadim Zeitlin
   9 // Licence:     wxWindows licence
  10 /////////////////////////////////////////////////////////////////////////////
  11
  12 // ===========================================================================
  13 // declarations
  14 // ===========================================================================
  15
  16 // ---------------------------------------------------------------------------
  17 // headers
  18 // ---------------------------------------------------------------------------
  19
  20 // For compilers that support precompilation, includes "wx.h".
  21 #include "wx/wxprec.h"
  22
  23 #ifdef __BORLANDC__
  24     #pragma hdrstop
  25 #endif
  26
  27 #ifndef WX_PRECOMP
  28     #include "wx/crt.h"
  29 #endif
  30
  31 #include "wx/uri.h"
  32
  33 // ---------------------------------------------------------------------------
  34 // definitions
  35 // ---------------------------------------------------------------------------
  36
  37 IMPLEMENT_CLASS(wxURI, wxObject)
  38
  39 // ===========================================================================
  40 // wxURI implementation
  41 // ===========================================================================
  42
  43 // ---------------------------------------------------------------------------
  44 // Constructors and cleanup
  45 // ---------------------------------------------------------------------------
  46
  47 wxURI::wxURI()
  48      : m_hostType(wxURI_REGNAME),
  49        m_fields(0)
  50 {
  51 }
  52
  53 wxURI::wxURI(const wxString& uri)
  54      : m_hostType(wxURI_REGNAME),
  55        m_fields(0)
  56 {
  57     Create(uri);
  58 }
  59
  60 bool wxURI::Create(const wxString& uri)
  61 {
  62     if (m_fields)
  63         Clear();
  64
  65     return Parse(uri.utf8_str());
  66 }
  67
  68 void wxURI::Clear()
  69 {
  70     m_scheme =
  71     m_userinfo =
  72     m_server =
  73     m_port =
  74     m_path =
  75     m_query =
  76     m_fragment = wxEmptyString;
  77
  78     m_hostType = wxURI_REGNAME;
  79
  80     m_fields = 0;
  81 }
  82
  83 // ---------------------------------------------------------------------------
  84 // Escaped characters handling
  85 // ---------------------------------------------------------------------------
  86
  87 // Converts a character into a numeric hexadecimal value, or -1 if the passed
  88 // in character is not a valid hex character
  89
  90 /* static */
  91 int wxURI::CharToHex(char c)
  92 {
  93     if ((c >= 'A') && (c <= 'Z'))
  94         return c - 'A' + 10;
  95     if ((c >= 'a') && (c <= 'z'))
  96         return c - 'a' + 10;
  97     if ((c >= '0') && (c <= '9'))
  98         return c - '0';
  99
 100     return -1;
 101 }
 102
 103 int wxURI::DecodeEscape(wxString::const_iterator& i)
 104 {
 105     int hi = CharToHex(*++i);
 106     if ( hi == -1 )
 107         return -1;
 108
 109     int lo = CharToHex(*++i);
 110     if ( lo == -1 )
 111         return -1;
 112
 113     return (hi << 4) | lo;
 114 }
 115
 116 /* static */
 117 wxString wxURI::Unescape(const wxString& uri)
 118 {
 119     // the unescaped version can't be longer than the original one
 120     wxCharBuffer buf(uri.length());
 121     char *p = buf.data();
 122
 123     for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
 124     {
 125         char c = *i;
 126         if ( c == '%' )
 127         {
 128             int n = wxURI::DecodeEscape(i);
 129             if ( n == -1 )
 130                 return wxString();
 131
 132             wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
 133
 134             c = static_cast<char>(n);
 135         }
 136
 137         *p = c;
 138     }
 139
 140     *p = '\0';
 141
 142     // by default assume that the URI is in UTF-8, this is the most common
 143     // practice
 144     wxString s = wxString::FromUTF8(buf);
 145     if ( s.empty() )
 146     {
 147         // if it isn't, use latin-1 as a fallback -- at least this always
 148         // succeeds
 149         s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
 150     }
 151
 152     return s;
 153 }
 154
 155 void wxURI::AppendNextEscaped(wxString& s, const char *& p)
 156 {
 157     // check for an already encoded character:
 158     //
 159     // pct-encoded   = "%" HEXDIG HEXDIG
 160     if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
 161     {
 162         s += *p++;
 163         s += *p++;
 164         s += *p++;
 165     }
 166     else // really needs escaping
 167     {
 168         static const char* hexDigits = "0123456789abcdef";
 169
 170         const char c = *p++;
 171
 172         s += '%';
 173         s += hexDigits[(c >> 4) & 15];
 174         s += hexDigits[c & 15];
 175     }
 176 }
 177
 178 // ---------------------------------------------------------------------------
 179 // GetUser
 180 // GetPassword
 181 //
 182 // Gets the username and password via the old URL method.
 183 // ---------------------------------------------------------------------------
 184 wxString wxURI::GetUser() const
 185 {
 186     // if there is no colon at all, find() returns npos and this method returns
 187     // the entire string which is correct as it means that password was omitted
 188     return m_userinfo(0, m_userinfo.find(':'));
 189 }
 190
 191 wxString wxURI::GetPassword() const
 192 {
 193       size_t posColon = m_userinfo.find(':');
 194
 195       if ( posColon == wxString::npos )
 196           return "";
 197
 198       return m_userinfo(posColon + 1, wxString::npos);
 199 }
 200
 201 // combine all URI fields in a single string, applying funcDecode to each
 202 // component which it may make sense to decode (i.e. "unescape")
 203 wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
 204 {
 205     wxString ret;
 206
 207     if (HasScheme())
 208         ret += m_scheme + ":";
 209
 210     if (HasServer())
 211     {
 212         ret += "//";
 213
 214         if (HasUserInfo())
 215             ret += funcDecode(m_userinfo) + "@";
 216
 217         if (m_hostType == wxURI_REGNAME)
 218             ret += funcDecode(m_server);
 219         else
 220             ret += m_server;
 221
 222         if (HasPort())
 223             ret += ":" + m_port;
 224     }
 225
 226     ret += funcDecode(m_path);
 227
 228     if (HasQuery())
 229         ret += "?" + funcDecode(m_query);
 230
 231     if (HasFragment())
 232         ret += "#" + funcDecode(m_fragment);
 233
 234     return ret;
 235 }
 236
 237 // ---------------------------------------------------------------------------
 238 // Comparison
 239 // ---------------------------------------------------------------------------
 240
 241 bool wxURI::operator==(const wxURI& uri) const
 242 {
 243     if (HasScheme())
 244     {
 245         if(m_scheme != uri.m_scheme)
 246             return false;
 247     }
 248     else if (uri.HasScheme())
 249         return false;
 250
 251
 252     if (HasServer())
 253     {
 254         if (HasUserInfo())
 255         {
 256             if (m_userinfo != uri.m_userinfo)
 257                 return false;
 258         }
 259         else if (uri.HasUserInfo())
 260             return false;
 261
 262         if (m_server != uri.m_server ||
 263             m_hostType != uri.m_hostType)
 264             return false;
 265
 266         if (HasPort())
 267         {
 268             if(m_port != uri.m_port)
 269                 return false;
 270         }
 271         else if (uri.HasPort())
 272             return false;
 273     }
 274     else if (uri.HasServer())
 275         return false;
 276
 277
 278     if (HasPath())
 279     {
 280         if(m_path != uri.m_path)
 281             return false;
 282     }
 283     else if (uri.HasPath())
 284         return false;
 285
 286     if (HasQuery())
 287     {
 288         if (m_query != uri.m_query)
 289             return false;
 290     }
 291     else if (uri.HasQuery())
 292         return false;
 293
 294     if (HasFragment())
 295     {
 296         if (m_fragment != uri.m_fragment)
 297             return false;
 298     }
 299     else if (uri.HasFragment())
 300         return false;
 301
 302     return true;
 303 }
 304
 305 // ---------------------------------------------------------------------------
 306 // IsReference
 307 //
 308 // if there is no authority or scheme, it is a reference
 309 // ---------------------------------------------------------------------------
 310
 311 bool wxURI::IsReference() const
 312 {
 313     return !HasScheme() || !HasServer();
 314 }
 315
 316 // ---------------------------------------------------------------------------
 317 // Parse
 318 //
 319 // Master URI parsing method.  Just calls the individual parsing methods
 320 //
 321 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 322 // URI-reference = URI / relative
 323 // ---------------------------------------------------------------------------
 324
 325 bool wxURI::Parse(const char *uri)
 326 {
 327     uri = ParseScheme(uri);
 328     if ( uri )
 329         uri = ParseAuthority(uri);
 330     if ( uri )
 331         uri = ParsePath(uri);
 332     if ( uri )
 333         uri = ParseQuery(uri);
 334     if ( uri )
 335         uri = ParseFragment(uri);
 336
 337     // we only succeed if we parsed the entire string
 338     return uri && *uri == '\0';
 339 }
 340
 341 const char* wxURI::ParseScheme(const char *uri)
 342 {
 343     const char * const start = uri;
 344
 345     // assume that we have a scheme if we have the valid start of it
 346     if ( IsAlpha(*uri) )
 347     {
 348         m_scheme += *uri++;
 349
 350         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 351         while (IsAlpha(*uri) || IsDigit(*uri) ||
 352                *uri == '+'   ||
 353                *uri == '-'   ||
 354                *uri == '.')
 355         {
 356             m_scheme += *uri++;
 357         }
 358
 359         //valid scheme?
 360         if (*uri == ':')
 361         {
 362             //mark the scheme as valid
 363             m_fields |= wxURI_SCHEME;
 364
 365             //move reference point up to input buffer
 366             ++uri;
 367         }
 368         else // no valid scheme finally
 369         {
 370             uri = start; // rewind
 371             m_scheme.clear();
 372         }
 373     }
 374     //else: can't have schema, possible a relative URI
 375
 376     return uri;
 377 }
 378
 379 const char* wxURI::ParseAuthority(const char* uri)
 380 {
 381     // authority     = [ userinfo "@" ] host [ ":" port ]
 382     if ( uri[0] == '/' && uri[1] == '/' )
 383     {
 384         //skip past the two slashes
 385         uri += 2;
 386
 387         // ############# DEVIATION FROM RFC #########################
 388         // Don't parse the server component for file URIs
 389         if(m_scheme != "file")
 390         {
 391             //normal way
 392             uri = ParseUserInfo(uri);
 393             uri = ParseServer(uri);
 394             return ParsePort(uri);
 395         }
 396     }
 397
 398     return uri;
 399 }
 400
 401 const char* wxURI::ParseUserInfo(const char* uri)
 402 {
 403     const char * const start = uri;
 404
 405     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 406     while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
 407     {
 408         if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
 409             m_userinfo += *uri++;
 410         else
 411             AppendNextEscaped(m_userinfo, uri);
 412     }
 413
 414     if ( *uri++ == '@' )
 415     {
 416         // valid userinfo
 417         m_fields |= wxURI_USERINFO;
 418     }
 419     else
 420     {
 421         uri = start; // rewind
 422         m_userinfo.clear();
 423     }
 424
 425     return uri;
 426 }
 427
 428 const char* wxURI::ParseServer(const char* uri)
 429 {
 430     const char * const start = uri;
 431
 432     // host          = IP-literal / IPv4address / reg-name
 433     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 434     if (*uri == '[')
 435     {
 436         ++uri;
 437         if (ParseIPv6address(uri) && *uri == ']')
 438         {
 439             m_hostType = wxURI_IPV6ADDRESS;
 440
 441             m_server.assign(start + 1, uri - start - 1);
 442             ++uri;
 443         }
 444         else
 445         {
 446             uri = start + 1; // skip the leading '[' again
 447
 448             if (ParseIPvFuture(uri) && *uri == ']')
 449             {
 450                 m_hostType = wxURI_IPVFUTURE;
 451
 452                 m_server.assign(start + 1, uri - start - 1);
 453                 ++uri;
 454             }
 455             else // unrecognized IP literal
 456             {
 457                 uri = start;
 458             }
 459         }
 460     }
 461     else // IPv4 or a reg-name
 462     {
 463         if (ParseIPv4address(uri))
 464         {
 465             m_hostType = wxURI_IPV4ADDRESS;
 466
 467             m_server.assign(start, uri - start);
 468         }
 469         else
 470         {
 471             uri = start;
 472         }
 473     }
 474
 475     if ( m_hostType == wxURI_REGNAME )
 476     {
 477         uri = start;
 478         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 479         while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
 480         {
 481             if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
 482                 m_server += *uri++;
 483             else
 484                 AppendNextEscaped(m_server, uri);
 485         }
 486     }
 487
 488     m_fields |= wxURI_SERVER;
 489
 490     return uri;
 491 }
 492
 493
 494 const char* wxURI::ParsePort(const char* uri)
 495 {
 496     // port          = *DIGIT
 497     if( *uri == ':' )
 498     {
 499         ++uri;
 500         while ( IsDigit(*uri) )
 501         {
 502             m_port += *uri++;
 503         }
 504
 505         m_fields |= wxURI_PORT;
 506     }
 507
 508     return uri;
 509 }
 510
 511 const char* wxURI::ParsePath(const char* uri)
 512 {
 513     /// hier-part     = "//" authority path-abempty
 514     ///               / path-absolute
 515     ///               / path-rootless
 516     ///               / path-empty
 517     ///
 518     /// relative-part = "//" authority path-abempty
 519     ///               / path-absolute
 520     ///               / path-noscheme
 521     ///               / path-empty
 522     ///
 523     /// path-abempty  = *( "/" segment )
 524     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 525     /// path-noscheme = segment-nz-nc *( "/" segment )
 526     /// path-rootless = segment-nz *( "/" segment )
 527     /// path-empty    = 0<pchar>
 528     ///
 529     /// segment       = *pchar
 530     /// segment-nz    = 1*pchar
 531     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 532     ///               ; non-zero-length segment without any colon ":"
 533     ///
 534     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 535
 536     if ( IsEndPath(*uri) )
 537         return uri;
 538
 539     const bool isAbs = *uri == '/';
 540     if ( isAbs )
 541         m_path += *uri++;
 542
 543     wxArrayString segments;
 544     wxString segment;
 545     for ( ;; )
 546     {
 547         const bool endPath = IsEndPath(*uri);
 548         if ( endPath || *uri == '/' )
 549         {
 550             // end of a segment, look at what we got
 551             if ( segment == ".." )
 552             {
 553                 if ( !segments.empty() && *segments.rbegin() != ".." )
 554                     segments.pop_back();
 555                 else if ( !isAbs )
 556                     segments.push_back("..");
 557             }
 558             else if ( segment == "." )
 559             {
 560                 // normally we ignore "." but the last one should be taken into
 561                 // account as "path/." is the same as "path/" and not just "path"
 562                 if ( endPath )
 563                     segments.push_back("");
 564             }
 565             else // normal segment
 566             {
 567                 segments.push_back(segment);
 568             }
 569
 570             if ( endPath )
 571                 break;
 572
 573             segment.clear();
 574             ++uri;
 575             continue;
 576         }
 577
 578         if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
 579             segment += *uri++;
 580         else
 581             AppendNextEscaped(segment, uri);
 582     }
 583
 584     m_path += wxJoin(segments, '/', '\0');
 585     m_fields |= wxURI_PATH;
 586
 587     return uri;
 588 }
 589
 590
 591 const char* wxURI::ParseQuery(const char* uri)
 592 {
 593     // query         = *( pchar / "/" / "?" )
 594     if ( *uri == '?' )
 595     {
 596         ++uri;
 597         while ( *uri && *uri != '#' )
 598         {
 599             if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 600                     *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
 601                 m_query += *uri++;
 602             else
 603                 AppendNextEscaped(m_query, uri);
 604         }
 605
 606         m_fields |= wxURI_QUERY;
 607     }
 608
 609     return uri;
 610 }
 611
 612
 613 const char* wxURI::ParseFragment(const char* uri)
 614 {
 615     // fragment      = *( pchar / "/" / "?" )
 616     if ( *uri == '#' )
 617     {
 618         ++uri;
 619         while ( *uri )
 620         {
 621             if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 622                     *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 623                 m_fragment += *uri++;
 624             else
 625                 AppendNextEscaped(m_fragment, uri);
 626         }
 627
 628         m_fields |= wxURI_FRAGMENT;
 629     }
 630
 631     return uri;
 632 }
 633
 634 // ---------------------------------------------------------------------------
 635 // Resolve
 636 //
 637 // Builds missing components of this uri from a base uri
 638 //
 639 // A version of the algorithm outlined in the RFC is used here
 640 // (it is shown in comments)
 641 //
 642 // Note that an empty URI inherits all components
 643 // ---------------------------------------------------------------------------
 644
 645 /* static */
 646 wxArrayString wxURI::SplitInSegments(const wxString& path)
 647 {
 648     return wxSplit(path, '/', '\0' /* no escape character */);
 649 }
 650
 651 void wxURI::Resolve(const wxURI& base, int flags)
 652 {
 653     wxASSERT_MSG(!base.IsReference(),
 654                 "wxURI to inherit from must not be a reference!");
 655
 656     // If we aren't being strict, enable the older (pre-RFC2396) loophole that
 657     // allows this uri to inherit other properties from the base uri - even if
 658     // the scheme is defined
 659     if ( !(flags & wxURI_STRICT) &&
 660             HasScheme() && base.HasScheme() &&
 661                 m_scheme == base.m_scheme )
 662     {
 663         m_fields -= wxURI_SCHEME;
 664     }
 665
 666
 667     // Do nothing if this is an absolute wxURI
 668     //    if defined(R.scheme) then
 669     //       T.scheme    = R.scheme;
 670     //       T.authority = R.authority;
 671     //       T.path      = remove_dot_segments(R.path);
 672     //       T.query     = R.query;
 673     if (HasScheme())
 674         return;
 675
 676     //No scheme - inherit
 677     m_scheme = base.m_scheme;
 678     m_fields |= wxURI_SCHEME;
 679
 680     // All we need to do for relative URIs with an
 681     // authority component is just inherit the scheme
 682     //       if defined(R.authority) then
 683     //          T.authority = R.authority;
 684     //          T.path      = remove_dot_segments(R.path);
 685     //          T.query     = R.query;
 686     if (HasServer())
 687         return;
 688
 689     //No authority - inherit
 690     if (base.HasUserInfo())
 691     {
 692         m_userinfo = base.m_userinfo;
 693         m_fields |= wxURI_USERINFO;
 694     }
 695
 696     m_server = base.m_server;
 697     m_hostType = base.m_hostType;
 698     m_fields |= wxURI_SERVER;
 699
 700     if (base.HasPort())
 701     {
 702         m_port = base.m_port;
 703         m_fields |= wxURI_PORT;
 704     }
 705
 706
 707     // Simple path inheritance from base
 708     if (!HasPath())
 709     {
 710         //             T.path = Base.path;
 711         m_path = base.m_path;
 712         m_fields |= wxURI_PATH;
 713
 714
 715         //             if defined(R.query) then
 716         //                T.query = R.query;
 717         //             else
 718         //                T.query = Base.query;
 719         //             endif;
 720         if (!HasQuery())
 721         {
 722             m_query = base.m_query;
 723             m_fields |= wxURI_QUERY;
 724         }
 725     }
 726     else if ( m_path.empty() || m_path[0u] != '/' )
 727     {
 728         //             if (R.path starts-with "/") then
 729         //                T.path = remove_dot_segments(R.path);
 730         //             else
 731         //                T.path = merge(Base.path, R.path);
 732         //                T.path = remove_dot_segments(T.path);
 733         //             endif;
 734         //             T.query = R.query;
 735         //
 736         // So we don't do anything for absolute paths and implement merge for
 737         // the relative ones
 738
 739         wxArrayString our(SplitInSegments(m_path)),
 740                       result(SplitInSegments(base.m_path));
 741
 742         if ( !result.empty() )
 743             result.pop_back();
 744
 745         if ( our.empty() )
 746         {
 747             // if we have an empty path it means we were constructed from a "."
 748             // string or something similar (e.g. "././././"), it should count
 749             // as (empty) segment
 750             our.push_back("");
 751         }
 752
 753         const wxArrayString::const_iterator end = our.end();
 754         for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
 755         {
 756             if ( i->empty() || *i == "." )
 757             {
 758                 // as in ParsePath(), while normally we ignore the empty
 759                 // segments, we need to take account of them at the end
 760                 if ( i == end - 1 )
 761                     result.push_back("");
 762                 continue;
 763             }
 764
 765             if ( *i == ".." )
 766             {
 767                 if ( !result.empty() )
 768                 {
 769                     result.pop_back();
 770
 771                     if ( i == end - 1 )
 772                         result.push_back("");
 773                 }
 774                 //else: just ignore, extra ".." don't accumulate
 775             }
 776             else
 777             {
 778                 if ( result.empty() )
 779                 {
 780                     // ensure that the resulting path will always be absolute
 781                     result.push_back("");
 782                 }
 783
 784                 result.push_back(*i);
 785             }
 786         }
 787
 788         m_path = wxJoin(result, '/', '\0');
 789     }
 790
 791     //T.fragment = R.fragment;
 792 }
 793
 794 // ---------------------------------------------------------------------------
 795 // ParseH16
 796 //
 797 // Parses 1 to 4 hex values.  Returns true if the first character of the input
 798 // string is a valid hex character.  It is the caller's responsibility to move
 799 // the input string back to its original position on failure.
 800 // ---------------------------------------------------------------------------
 801
 802 bool wxURI::ParseH16(const char*& uri)
 803 {
 804     // h16           = 1*4HEXDIG
 805     if(!IsHex(*++uri))
 806         return false;
 807
 808     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 809         ++uri;
 810
 811     return true;
 812 }
 813
 814 // ---------------------------------------------------------------------------
 815 // ParseIPXXX
 816 //
 817 // Parses a certain version of an IP address and moves the input string past
 818 // it.  Returns true if the input  string contains the proper version of an ip
 819 // address.  It is the caller's responsibility to move the input string back
 820 // to its original position on failure.
 821 // ---------------------------------------------------------------------------
 822
 823 bool wxURI::ParseIPv4address(const char*& uri)
 824 {
 825     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 826     //
 827     //dec-octet     =      DIGIT                    ; 0-9
 828     //                / %x31-39 DIGIT               ; 10-99
 829     //                / "1" 2DIGIT                  ; 100-199
 830     //                / "2" %x30-34 DIGIT           ; 200-249
 831     //                / "25" %x30-35                ; 250-255
 832     size_t iIPv4 = 0;
 833     if (IsDigit(*uri))
 834     {
 835         ++iIPv4;
 836
 837
 838         //each ip part must be between 0-255 (dupe of version in for loop)
 839         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 840            //100 or less  (note !)
 841            !( (*(uri-2) < '2') ||
 842            //240 or less
 843              (*(uri-2) == '2' &&
 844                (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 845              )
 846             )
 847           )
 848         {
 849             return false;
 850         }
 851
 852         if(IsDigit(*uri))++uri;
 853
 854         //compilers should unroll this loop
 855         for(; iIPv4 < 4; ++iIPv4)
 856         {
 857             if (*uri != '.' || !IsDigit(*++uri))
 858                 break;
 859
 860             //each ip part must be between 0-255
 861             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 862                //100 or less  (note !)
 863                !( (*(uri-2) < '2') ||
 864                //240 or less
 865                  (*(uri-2) == '2' &&
 866                    (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 867                  )
 868                 )
 869               )
 870             {
 871                 return false;
 872             }
 873             if(IsDigit(*uri))++uri;
 874         }
 875     }
 876     return iIPv4 == 4;
 877 }
 878
 879 bool wxURI::ParseIPv6address(const char*& uri)
 880 {
 881     // IPv6address   =                            6( h16 ":" ) ls32
 882     //               /                       "::" 5( h16 ":" ) ls32
 883     //               / [               h16 ] "::" 4( h16 ":" ) ls32
 884     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
 885     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
 886     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
 887     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
 888     //               / [ *5( h16 ":" ) h16 ] "::"              h16
 889     //               / [ *6( h16 ":" ) h16 ] "::"
 890
 891     size_t numPrefix = 0,
 892               maxPostfix;
 893
 894     bool bEndHex = false;
 895
 896     for( ; numPrefix < 6; ++numPrefix)
 897     {
 898         if(!ParseH16(uri))
 899         {
 900             --uri;
 901             bEndHex = true;
 902             break;
 903         }
 904
 905         if(*uri != ':')
 906         {
 907             break;
 908         }
 909     }
 910
 911     if(!bEndHex && !ParseH16(uri))
 912     {
 913         --uri;
 914
 915         if (numPrefix)
 916             return false;
 917
 918         if (*uri == ':')
 919         {
 920             if (*++uri != ':')
 921                 return false;
 922
 923             maxPostfix = 5;
 924         }
 925         else
 926             maxPostfix = 6;
 927     }
 928     else
 929     {
 930         if (*uri != ':' || *(uri+1) != ':')
 931         {
 932             if (numPrefix != 6)
 933                 return false;
 934
 935             while (*--uri != ':') {}
 936             ++uri;
 937
 938             const char * const start = uri;
 939             //parse ls32
 940             // ls32          = ( h16 ":" h16 ) / IPv4address
 941             if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 942                 return true;
 943
 944             uri = start;
 945
 946             if (ParseIPv4address(uri))
 947                 return true;
 948             else
 949                 return false;
 950         }
 951         else
 952         {
 953             uri += 2;
 954
 955             if (numPrefix > 3)
 956                 maxPostfix = 0;
 957             else
 958                 maxPostfix = 4 - numPrefix;
 959         }
 960     }
 961
 962     bool bAllowAltEnding = maxPostfix == 0;
 963
 964     for(; maxPostfix != 0; --maxPostfix)
 965     {
 966         if(!ParseH16(uri) || *uri != ':')
 967             return false;
 968     }
 969
 970     if(numPrefix <= 4)
 971     {
 972         const char * const start = uri;
 973         //parse ls32
 974         // ls32          = ( h16 ":" h16 ) / IPv4address
 975         if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 976             return true;
 977
 978         uri = start;
 979
 980         if (ParseIPv4address(uri))
 981             return true;
 982
 983         uri = start;
 984
 985         if (!bAllowAltEnding)
 986             return false;
 987     }
 988
 989     if(numPrefix <= 5 && ParseH16(uri))
 990         return true;
 991
 992     return true;
 993 }
 994
 995 bool wxURI::ParseIPvFuture(const char*& uri)
 996 {
 997     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
 998     if (*++uri != 'v' || !IsHex(*++uri))
 999         return false;
1000
1001     while (IsHex(*++uri))
1002         ;
1003
1004     if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1005         return false;
1006
1007     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1008
1009     return true;
1010 }
1011
1012
1013 // ---------------------------------------------------------------------------
1014 // IsXXX
1015 //
1016 // Returns true if the passed in character meets the criteria of the method
1017 // ---------------------------------------------------------------------------
1018
1019 // unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1020 bool wxURI::IsUnreserved(char c)
1021 {
1022     return IsAlpha(c) ||
1023            IsDigit(c) ||
1024            c == '-' ||
1025            c == '.' ||
1026            c == '_' ||
1027            c == '~'
1028            ;
1029 }
1030
1031 bool wxURI::IsReserved(char c)
1032 {
1033     return IsGenDelim(c) || IsSubDelim(c);
1034 }
1035
1036 // gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1037 bool wxURI::IsGenDelim(char c)
1038 {
1039     return c == ':' ||
1040            c == '/' ||
1041            c == '?' ||
1042            c == '#' ||
1043            c == '[' ||
1044            c == ']' ||
1045            c == '@';
1046 }
1047
1048 // sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1049 //               / "*" / "+" / "," / ";" / "="
1050 bool wxURI::IsSubDelim(char c)
1051 {
1052     return c == '!' ||
1053            c == '$' ||
1054            c == '&' ||
1055            c == '\'' ||
1056            c == '(' ||
1057            c == ')' ||
1058            c == '*' ||
1059            c == '+' ||
1060            c == ',' ||
1061            c == ';' ||
1062            c == '='
1063            ;
1064 }
1065
1066 bool wxURI::IsHex(char c)
1067 {
1068     return IsDigit(c) ||
1069            (c >= 'a' && c <= 'f') ||
1070            (c >= 'A' && c <= 'F');
1071 }
1072
1073 bool wxURI::IsAlpha(char c)
1074 {
1075     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1076 }
1077
1078 bool wxURI::IsDigit(char c)
1079 {
1080     return c >= '0' && c <= '9';
1081 }
1082
1083 bool wxURI::IsEndPath(char c)
1084 {
1085     return c == '\0' || c == '#' || c == '?';
1086 }
1087