src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        src/common/uri.cpp
   3 // Purpose:     Implementation of a URI parser
   4 // Author:      Ryan Norton,
   5 //              Vadim Zeitlin (UTF-8 URI support, many other changes)
   6 // Created:     10/26/04
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 2004 Ryan Norton,
   9 //                  2008 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // declarations
  15 // ===========================================================================
  16
  17 // ---------------------------------------------------------------------------
  18 // headers
  19 // ---------------------------------------------------------------------------
  20
  21 // For compilers that support precompilation, includes "wx.h".
  22 #include "wx/wxprec.h"
  23
  24 #ifdef __BORLANDC__
  25     #pragma hdrstop
  26 #endif
  27
  28 #ifndef WX_PRECOMP
  29     #include "wx/crt.h"
  30 #endif
  31
  32 #include "wx/uri.h"
  33
  34 // ---------------------------------------------------------------------------
  35 // definitions
  36 // ---------------------------------------------------------------------------
  37
  38 IMPLEMENT_CLASS(wxURI, wxObject)
  39
  40 // ===========================================================================
  41 // wxURI implementation
  42 // ===========================================================================
  43
  44 // ---------------------------------------------------------------------------
  45 // Constructors and cleanup
  46 // ---------------------------------------------------------------------------
  47
  48 wxURI::wxURI()
  49      : m_hostType(wxURI_REGNAME),
  50        m_fields(0)
  51 {
  52 }
  53
  54 wxURI::wxURI(const wxString& uri)
  55      : m_hostType(wxURI_REGNAME),
  56        m_fields(0)
  57 {
  58     Create(uri);
  59 }
  60
  61 bool wxURI::Create(const wxString& uri)
  62 {
  63     if (m_fields)
  64         Clear();
  65
  66     return Parse(uri.utf8_str());
  67 }
  68
  69 void wxURI::Clear()
  70 {
  71     m_scheme =
  72     m_userinfo =
  73     m_server =
  74     m_port =
  75     m_path =
  76     m_query =
  77     m_fragment = wxEmptyString;
  78
  79     m_hostType = wxURI_REGNAME;
  80
  81     m_fields = 0;
  82 }
  83
  84 // ---------------------------------------------------------------------------
  85 // Escaped characters handling
  86 // ---------------------------------------------------------------------------
  87
  88 // Converts a character into a numeric hexadecimal value, or -1 if the passed
  89 // in character is not a valid hex character
  90
  91 /* static */
  92 int wxURI::CharToHex(char c)
  93 {
  94     if ((c >= 'A') && (c <= 'Z'))
  95         return c - 'A' + 10;
  96     if ((c >= 'a') && (c <= 'z'))
  97         return c - 'a' + 10;
  98     if ((c >= '0') && (c <= '9'))
  99         return c - '0';
 100
 101     return -1;
 102 }
 103
 104 int wxURI::DecodeEscape(wxString::const_iterator& i)
 105 {
 106     int hi = CharToHex(*++i);
 107     if ( hi == -1 )
 108         return -1;
 109
 110     int lo = CharToHex(*++i);
 111     if ( lo == -1 )
 112         return -1;
 113
 114     return (hi << 4) | lo;
 115 }
 116
 117 /* static */
 118 wxString wxURI::Unescape(const wxString& uri)
 119 {
 120     // the unescaped version can't be longer than the original one
 121     wxCharBuffer buf(uri.length());
 122     char *p = buf.data();
 123
 124     for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
 125     {
 126         char c = *i;
 127         if ( c == '%' )
 128         {
 129             int n = wxURI::DecodeEscape(i);
 130             if ( n == -1 )
 131                 return wxString();
 132
 133             wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
 134
 135             c = static_cast<char>(n);
 136         }
 137
 138         *p = c;
 139     }
 140
 141     *p = '\0';
 142
 143     // by default assume that the URI is in UTF-8, this is the most common
 144     // practice
 145     wxString s = wxString::FromUTF8(buf);
 146     if ( s.empty() )
 147     {
 148         // if it isn't, use latin-1 as a fallback -- at least this always
 149         // succeeds
 150         s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
 151     }
 152
 153     return s;
 154 }
 155
 156 void wxURI::AppendNextEscaped(wxString& s, const char *& p)
 157 {
 158     // check for an already encoded character:
 159     //
 160     // pct-encoded   = "%" HEXDIG HEXDIG
 161     if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
 162     {
 163         s += *p++;
 164         s += *p++;
 165         s += *p++;
 166     }
 167     else // really needs escaping
 168     {
 169         static const char* hexDigits = "0123456789abcdef";
 170
 171         const char c = *p++;
 172
 173         s += '%';
 174         s += hexDigits[(c >> 4) & 15];
 175         s += hexDigits[c & 15];
 176     }
 177 }
 178
 179 // ---------------------------------------------------------------------------
 180 // GetUser
 181 // GetPassword
 182 //
 183 // Gets the username and password via the old URL method.
 184 // ---------------------------------------------------------------------------
 185 wxString wxURI::GetUser() const
 186 {
 187     // if there is no colon at all, find() returns npos and this method returns
 188     // the entire string which is correct as it means that password was omitted
 189     return m_userinfo(0, m_userinfo.find(':'));
 190 }
 191
 192 wxString wxURI::GetPassword() const
 193 {
 194       size_t posColon = m_userinfo.find(':');
 195
 196       if ( posColon == wxString::npos )
 197           return "";
 198
 199       return m_userinfo(posColon + 1, wxString::npos);
 200 }
 201
 202 // combine all URI fields in a single string, applying funcDecode to each
 203 // component which it may make sense to decode (i.e. "unescape")
 204 wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
 205 {
 206     wxString ret;
 207
 208     if (HasScheme())
 209         ret += m_scheme + ":";
 210
 211     if (HasServer())
 212     {
 213         ret += "//";
 214
 215         if (HasUserInfo())
 216             ret += funcDecode(m_userinfo) + "@";
 217
 218         if (m_hostType == wxURI_REGNAME)
 219             ret += funcDecode(m_server);
 220         else
 221             ret += m_server;
 222
 223         if (HasPort())
 224             ret += ":" + m_port;
 225     }
 226
 227     ret += funcDecode(m_path);
 228
 229     if (HasQuery())
 230         ret += "?" + funcDecode(m_query);
 231
 232     if (HasFragment())
 233         ret += "#" + funcDecode(m_fragment);
 234
 235     return ret;
 236 }
 237
 238 // ---------------------------------------------------------------------------
 239 // Comparison
 240 // ---------------------------------------------------------------------------
 241
 242 bool wxURI::operator==(const wxURI& uri) const
 243 {
 244     if (HasScheme())
 245     {
 246         if(m_scheme != uri.m_scheme)
 247             return false;
 248     }
 249     else if (uri.HasScheme())
 250         return false;
 251
 252
 253     if (HasServer())
 254     {
 255         if (HasUserInfo())
 256         {
 257             if (m_userinfo != uri.m_userinfo)
 258                 return false;
 259         }
 260         else if (uri.HasUserInfo())
 261             return false;
 262
 263         if (m_server != uri.m_server ||
 264             m_hostType != uri.m_hostType)
 265             return false;
 266
 267         if (HasPort())
 268         {
 269             if(m_port != uri.m_port)
 270                 return false;
 271         }
 272         else if (uri.HasPort())
 273             return false;
 274     }
 275     else if (uri.HasServer())
 276         return false;
 277
 278
 279     if (HasPath())
 280     {
 281         if(m_path != uri.m_path)
 282             return false;
 283     }
 284     else if (uri.HasPath())
 285         return false;
 286
 287     if (HasQuery())
 288     {
 289         if (m_query != uri.m_query)
 290             return false;
 291     }
 292     else if (uri.HasQuery())
 293         return false;
 294
 295     if (HasFragment())
 296     {
 297         if (m_fragment != uri.m_fragment)
 298             return false;
 299     }
 300     else if (uri.HasFragment())
 301         return false;
 302
 303     return true;
 304 }
 305
 306 // ---------------------------------------------------------------------------
 307 // IsReference
 308 //
 309 // if there is no authority or scheme, it is a reference
 310 // ---------------------------------------------------------------------------
 311
 312 bool wxURI::IsReference() const
 313 {
 314     return !HasScheme() || !HasServer();
 315 }
 316
 317 // ---------------------------------------------------------------------------
 318 // Parse
 319 //
 320 // Master URI parsing method.  Just calls the individual parsing methods
 321 //
 322 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 323 // URI-reference = URI / relative
 324 // ---------------------------------------------------------------------------
 325
 326 bool wxURI::Parse(const char *uri)
 327 {
 328     uri = ParseScheme(uri);
 329     if ( uri )
 330         uri = ParseAuthority(uri);
 331     if ( uri )
 332         uri = ParsePath(uri);
 333     if ( uri )
 334         uri = ParseQuery(uri);
 335     if ( uri )
 336         uri = ParseFragment(uri);
 337
 338     // we only succeed if we parsed the entire string
 339     return uri && *uri == '\0';
 340 }
 341
 342 const char* wxURI::ParseScheme(const char *uri)
 343 {
 344     const char * const start = uri;
 345
 346     // assume that we have a scheme if we have the valid start of it
 347     if ( IsAlpha(*uri) )
 348     {
 349         m_scheme += *uri++;
 350
 351         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 352         while (IsAlpha(*uri) || IsDigit(*uri) ||
 353                *uri == '+'   ||
 354                *uri == '-'   ||
 355                *uri == '.')
 356         {
 357             m_scheme += *uri++;
 358         }
 359
 360         //valid scheme?
 361         if (*uri == ':')
 362         {
 363             //mark the scheme as valid
 364             m_fields |= wxURI_SCHEME;
 365
 366             //move reference point up to input buffer
 367             ++uri;
 368         }
 369         else // no valid scheme finally
 370         {
 371             uri = start; // rewind
 372             m_scheme.clear();
 373         }
 374     }
 375     //else: can't have schema, possible a relative URI
 376
 377     return uri;
 378 }
 379
 380 const char* wxURI::ParseAuthority(const char* uri)
 381 {
 382     // authority     = [ userinfo "@" ] host [ ":" port ]
 383     if ( uri[0] == '/' && uri[1] == '/' )
 384     {
 385         //skip past the two slashes
 386         uri += 2;
 387
 388         // ############# DEVIATION FROM RFC #########################
 389         // Don't parse the server component for file URIs
 390         if(m_scheme != "file")
 391         {
 392             //normal way
 393             uri = ParseUserInfo(uri);
 394             uri = ParseServer(uri);
 395             return ParsePort(uri);
 396         }
 397     }
 398
 399     return uri;
 400 }
 401
 402 const char* wxURI::ParseUserInfo(const char* uri)
 403 {
 404     const char * const start = uri;
 405
 406     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 407     while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
 408     {
 409         if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
 410             m_userinfo += *uri++;
 411         else
 412             AppendNextEscaped(m_userinfo, uri);
 413     }
 414
 415     if ( *uri++ == '@' )
 416     {
 417         // valid userinfo
 418         m_fields |= wxURI_USERINFO;
 419     }
 420     else
 421     {
 422         uri = start; // rewind
 423         m_userinfo.clear();
 424     }
 425
 426     return uri;
 427 }
 428
 429 const char* wxURI::ParseServer(const char* uri)
 430 {
 431     const char * const start = uri;
 432
 433     // host          = IP-literal / IPv4address / reg-name
 434     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 435     if (*uri == '[')
 436     {
 437         ++uri;
 438         if (ParseIPv6address(uri) && *uri == ']')
 439         {
 440             m_hostType = wxURI_IPV6ADDRESS;
 441
 442             m_server.assign(start + 1, uri - start - 1);
 443             ++uri;
 444         }
 445         else
 446         {
 447             uri = start + 1; // skip the leading '[' again
 448
 449             if (ParseIPvFuture(uri) && *uri == ']')
 450             {
 451                 m_hostType = wxURI_IPVFUTURE;
 452
 453                 m_server.assign(start + 1, uri - start - 1);
 454                 ++uri;
 455             }
 456             else // unrecognized IP literal
 457             {
 458                 uri = start;
 459             }
 460         }
 461     }
 462     else // IPv4 or a reg-name
 463     {
 464         if (ParseIPv4address(uri))
 465         {
 466             m_hostType = wxURI_IPV4ADDRESS;
 467
 468             m_server.assign(start, uri - start);
 469         }
 470         else
 471         {
 472             uri = start;
 473         }
 474     }
 475
 476     if ( m_hostType == wxURI_REGNAME )
 477     {
 478         uri = start;
 479         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 480         while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
 481         {
 482             if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
 483                 m_server += *uri++;
 484             else
 485                 AppendNextEscaped(m_server, uri);
 486         }
 487     }
 488
 489     m_fields |= wxURI_SERVER;
 490
 491     return uri;
 492 }
 493
 494
 495 const char* wxURI::ParsePort(const char* uri)
 496 {
 497     // port          = *DIGIT
 498     if( *uri == ':' )
 499     {
 500         ++uri;
 501         while ( IsDigit(*uri) )
 502         {
 503             m_port += *uri++;
 504         }
 505
 506         m_fields |= wxURI_PORT;
 507     }
 508
 509     return uri;
 510 }
 511
 512 const char* wxURI::ParsePath(const char* uri)
 513 {
 514     /// hier-part     = "//" authority path-abempty
 515     ///               / path-absolute
 516     ///               / path-rootless
 517     ///               / path-empty
 518     ///
 519     /// relative-part = "//" authority path-abempty
 520     ///               / path-absolute
 521     ///               / path-noscheme
 522     ///               / path-empty
 523     ///
 524     /// path-abempty  = *( "/" segment )
 525     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 526     /// path-noscheme = segment-nz-nc *( "/" segment )
 527     /// path-rootless = segment-nz *( "/" segment )
 528     /// path-empty    = 0<pchar>
 529     ///
 530     /// segment       = *pchar
 531     /// segment-nz    = 1*pchar
 532     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 533     ///               ; non-zero-length segment without any colon ":"
 534     ///
 535     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 536
 537     if ( IsEndPath(*uri) )
 538         return uri;
 539
 540     const bool isAbs = *uri == '/';
 541     if ( isAbs )
 542         m_path += *uri++;
 543
 544     wxArrayString segments;
 545     wxString segment;
 546     for ( ;; )
 547     {
 548         const bool endPath = IsEndPath(*uri);
 549         if ( endPath || *uri == '/' )
 550         {
 551             // end of a segment, look at what we got
 552             if ( segment == ".." )
 553             {
 554                 if ( !segments.empty() && *segments.rbegin() != ".." )
 555                     segments.pop_back();
 556                 else if ( !isAbs )
 557                     segments.push_back("..");
 558             }
 559             else if ( segment == "." )
 560             {
 561                 // normally we ignore "." but the last one should be taken into
 562                 // account as "path/." is the same as "path/" and not just "path"
 563                 if ( endPath )
 564                     segments.push_back("");
 565             }
 566             else // normal segment
 567             {
 568                 segments.push_back(segment);
 569             }
 570
 571             if ( endPath )
 572                 break;
 573
 574             segment.clear();
 575             ++uri;
 576             continue;
 577         }
 578
 579         if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
 580             segment += *uri++;
 581         else
 582             AppendNextEscaped(segment, uri);
 583     }
 584
 585     m_path += wxJoin(segments, '/', '\0');
 586     m_fields |= wxURI_PATH;
 587
 588     return uri;
 589 }
 590
 591
 592 const char* wxURI::ParseQuery(const char* uri)
 593 {
 594     // query         = *( pchar / "/" / "?" )
 595     if ( *uri == '?' )
 596     {
 597         ++uri;
 598         while ( *uri && *uri != '#' )
 599         {
 600             if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 601                     *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
 602                 m_query += *uri++;
 603             else
 604                 AppendNextEscaped(m_query, uri);
 605         }
 606
 607         m_fields |= wxURI_QUERY;
 608     }
 609
 610     return uri;
 611 }
 612
 613
 614 const char* wxURI::ParseFragment(const char* uri)
 615 {
 616     // fragment      = *( pchar / "/" / "?" )
 617     if ( *uri == '#' )
 618     {
 619         ++uri;
 620         while ( *uri )
 621         {
 622             if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 623                     *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 624                 m_fragment += *uri++;
 625             else
 626                 AppendNextEscaped(m_fragment, uri);
 627         }
 628
 629         m_fields |= wxURI_FRAGMENT;
 630     }
 631
 632     return uri;
 633 }
 634
 635 // ---------------------------------------------------------------------------
 636 // Resolve
 637 //
 638 // Builds missing components of this uri from a base uri
 639 //
 640 // A version of the algorithm outlined in the RFC is used here
 641 // (it is shown in comments)
 642 //
 643 // Note that an empty URI inherits all components
 644 // ---------------------------------------------------------------------------
 645
 646 /* static */
 647 wxArrayString wxURI::SplitInSegments(const wxString& path)
 648 {
 649     return wxSplit(path, '/', '\0' /* no escape character */);
 650 }
 651
 652 void wxURI::Resolve(const wxURI& base, int flags)
 653 {
 654     wxASSERT_MSG(!base.IsReference(),
 655                 "wxURI to inherit from must not be a reference!");
 656
 657     // If we aren't being strict, enable the older (pre-RFC2396) loophole that
 658     // allows this uri to inherit other properties from the base uri - even if
 659     // the scheme is defined
 660     if ( !(flags & wxURI_STRICT) &&
 661             HasScheme() && base.HasScheme() &&
 662                 m_scheme == base.m_scheme )
 663     {
 664         m_fields -= wxURI_SCHEME;
 665     }
 666
 667
 668     // Do nothing if this is an absolute wxURI
 669     //    if defined(R.scheme) then
 670     //       T.scheme    = R.scheme;
 671     //       T.authority = R.authority;
 672     //       T.path      = remove_dot_segments(R.path);
 673     //       T.query     = R.query;
 674     if (HasScheme())
 675         return;
 676
 677     //No scheme - inherit
 678     m_scheme = base.m_scheme;
 679     m_fields |= wxURI_SCHEME;
 680
 681     // All we need to do for relative URIs with an
 682     // authority component is just inherit the scheme
 683     //       if defined(R.authority) then
 684     //          T.authority = R.authority;
 685     //          T.path      = remove_dot_segments(R.path);
 686     //          T.query     = R.query;
 687     if (HasServer())
 688         return;
 689
 690     //No authority - inherit
 691     if (base.HasUserInfo())
 692     {
 693         m_userinfo = base.m_userinfo;
 694         m_fields |= wxURI_USERINFO;
 695     }
 696
 697     m_server = base.m_server;
 698     m_hostType = base.m_hostType;
 699     m_fields |= wxURI_SERVER;
 700
 701     if (base.HasPort())
 702     {
 703         m_port = base.m_port;
 704         m_fields |= wxURI_PORT;
 705     }
 706
 707
 708     // Simple path inheritance from base
 709     if (!HasPath())
 710     {
 711         //             T.path = Base.path;
 712         m_path = base.m_path;
 713         m_fields |= wxURI_PATH;
 714
 715
 716         //             if defined(R.query) then
 717         //                T.query = R.query;
 718         //             else
 719         //                T.query = Base.query;
 720         //             endif;
 721         if (!HasQuery())
 722         {
 723             m_query = base.m_query;
 724             m_fields |= wxURI_QUERY;
 725         }
 726     }
 727     else if ( m_path.empty() || m_path[0u] != '/' )
 728     {
 729         //             if (R.path starts-with "/") then
 730         //                T.path = remove_dot_segments(R.path);
 731         //             else
 732         //                T.path = merge(Base.path, R.path);
 733         //                T.path = remove_dot_segments(T.path);
 734         //             endif;
 735         //             T.query = R.query;
 736         //
 737         // So we don't do anything for absolute paths and implement merge for
 738         // the relative ones
 739
 740         wxArrayString our(SplitInSegments(m_path)),
 741                       result(SplitInSegments(base.m_path));
 742
 743         if ( !result.empty() )
 744             result.pop_back();
 745
 746         if ( our.empty() )
 747         {
 748             // if we have an empty path it means we were constructed from a "."
 749             // string or something similar (e.g. "././././"), it should count
 750             // as (empty) segment
 751             our.push_back("");
 752         }
 753
 754         const wxArrayString::const_iterator end = our.end();
 755         for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
 756         {
 757             if ( i->empty() || *i == "." )
 758             {
 759                 // as in ParsePath(), while normally we ignore the empty
 760                 // segments, we need to take account of them at the end
 761                 if ( i == end - 1 )
 762                     result.push_back("");
 763                 continue;
 764             }
 765
 766             if ( *i == ".." )
 767             {
 768                 if ( !result.empty() )
 769                 {
 770                     result.pop_back();
 771
 772                     if ( i == end - 1 )
 773                         result.push_back("");
 774                 }
 775                 //else: just ignore, extra ".." don't accumulate
 776             }
 777             else
 778             {
 779                 if ( result.empty() )
 780                 {
 781                     // ensure that the resulting path will always be absolute
 782                     result.push_back("");
 783                 }
 784
 785                 result.push_back(*i);
 786             }
 787         }
 788
 789         m_path = wxJoin(result, '/', '\0');
 790     }
 791
 792     //T.fragment = R.fragment;
 793 }
 794
 795 // ---------------------------------------------------------------------------
 796 // ParseH16
 797 //
 798 // Parses 1 to 4 hex values.  Returns true if the first character of the input
 799 // string is a valid hex character.  It is the caller's responsibility to move
 800 // the input string back to its original position on failure.
 801 // ---------------------------------------------------------------------------
 802
 803 bool wxURI::ParseH16(const char*& uri)
 804 {
 805     // h16           = 1*4HEXDIG
 806     if(!IsHex(*++uri))
 807         return false;
 808
 809     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 810         ++uri;
 811
 812     return true;
 813 }
 814
 815 // ---------------------------------------------------------------------------
 816 // ParseIPXXX
 817 //
 818 // Parses a certain version of an IP address and moves the input string past
 819 // it.  Returns true if the input  string contains the proper version of an ip
 820 // address.  It is the caller's responsability to move the input string back
 821 // to its original position on failure.
 822 // ---------------------------------------------------------------------------
 823
 824 bool wxURI::ParseIPv4address(const char*& uri)
 825 {
 826     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 827     //
 828     //dec-octet     =      DIGIT                    ; 0-9
 829     //                / %x31-39 DIGIT               ; 10-99
 830     //                / "1" 2DIGIT                  ; 100-199
 831     //                / "2" %x30-34 DIGIT           ; 200-249
 832     //                / "25" %x30-35                ; 250-255
 833     size_t iIPv4 = 0;
 834     if (IsDigit(*uri))
 835     {
 836         ++iIPv4;
 837
 838
 839         //each ip part must be between 0-255 (dupe of version in for loop)
 840         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 841            //100 or less  (note !)
 842            !( (*(uri-2) < '2') ||
 843            //240 or less
 844              (*(uri-2) == '2' &&
 845                (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 846              )
 847             )
 848           )
 849         {
 850             return false;
 851         }
 852
 853         if(IsDigit(*uri))++uri;
 854
 855         //compilers should unroll this loop
 856         for(; iIPv4 < 4; ++iIPv4)
 857         {
 858             if (*uri != '.' || !IsDigit(*++uri))
 859                 break;
 860
 861             //each ip part must be between 0-255
 862             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 863                //100 or less  (note !)
 864                !( (*(uri-2) < '2') ||
 865                //240 or less
 866                  (*(uri-2) == '2' &&
 867                    (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 868                  )
 869                 )
 870               )
 871             {
 872                 return false;
 873             }
 874             if(IsDigit(*uri))++uri;
 875         }
 876     }
 877     return iIPv4 == 4;
 878 }
 879
 880 bool wxURI::ParseIPv6address(const char*& uri)
 881 {
 882     // IPv6address   =                            6( h16 ":" ) ls32
 883     //               /                       "::" 5( h16 ":" ) ls32
 884     //               / [               h16 ] "::" 4( h16 ":" ) ls32
 885     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
 886     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
 887     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
 888     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
 889     //               / [ *5( h16 ":" ) h16 ] "::"              h16
 890     //               / [ *6( h16 ":" ) h16 ] "::"
 891
 892     size_t numPrefix = 0,
 893               maxPostfix;
 894
 895     bool bEndHex = false;
 896
 897     for( ; numPrefix < 6; ++numPrefix)
 898     {
 899         if(!ParseH16(uri))
 900         {
 901             --uri;
 902             bEndHex = true;
 903             break;
 904         }
 905
 906         if(*uri != ':')
 907         {
 908             break;
 909         }
 910     }
 911
 912     if(!bEndHex && !ParseH16(uri))
 913     {
 914         --uri;
 915
 916         if (numPrefix)
 917             return false;
 918
 919         if (*uri == ':')
 920         {
 921             if (*++uri != ':')
 922                 return false;
 923
 924             maxPostfix = 5;
 925         }
 926         else
 927             maxPostfix = 6;
 928     }
 929     else
 930     {
 931         if (*uri != ':' || *(uri+1) != ':')
 932         {
 933             if (numPrefix != 6)
 934                 return false;
 935
 936             while (*--uri != ':') {}
 937             ++uri;
 938
 939             const char * const start = uri;
 940             //parse ls32
 941             // ls32          = ( h16 ":" h16 ) / IPv4address
 942             if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 943                 return true;
 944
 945             uri = start;
 946
 947             if (ParseIPv4address(uri))
 948                 return true;
 949             else
 950                 return false;
 951         }
 952         else
 953         {
 954             uri += 2;
 955
 956             if (numPrefix > 3)
 957                 maxPostfix = 0;
 958             else
 959                 maxPostfix = 4 - numPrefix;
 960         }
 961     }
 962
 963     bool bAllowAltEnding = maxPostfix == 0;
 964
 965     for(; maxPostfix != 0; --maxPostfix)
 966     {
 967         if(!ParseH16(uri) || *uri != ':')
 968             return false;
 969     }
 970
 971     if(numPrefix <= 4)
 972     {
 973         const char * const start = uri;
 974         //parse ls32
 975         // ls32          = ( h16 ":" h16 ) / IPv4address
 976         if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 977             return true;
 978
 979         uri = start;
 980
 981         if (ParseIPv4address(uri))
 982             return true;
 983
 984         uri = start;
 985
 986         if (!bAllowAltEnding)
 987             return false;
 988     }
 989
 990     if(numPrefix <= 5 && ParseH16(uri))
 991         return true;
 992
 993     return true;
 994 }
 995
 996 bool wxURI::ParseIPvFuture(const char*& uri)
 997 {
 998     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
 999     if (*++uri != 'v' || !IsHex(*++uri))
1000         return false;
1001
1002     while (IsHex(*++uri))
1003         ;
1004
1005     if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1006         return false;
1007
1008     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1009
1010     return true;
1011 }
1012
1013
1014 // ---------------------------------------------------------------------------
1015 // IsXXX
1016 //
1017 // Returns true if the passed in character meets the criteria of the method
1018 // ---------------------------------------------------------------------------
1019
1020 // unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1021 bool wxURI::IsUnreserved(char c)
1022 {
1023     return IsAlpha(c) ||
1024            IsDigit(c) ||
1025            c == '-' ||
1026            c == '.' ||
1027            c == '_' ||
1028            c == '~'
1029            ;
1030 }
1031
1032 bool wxURI::IsReserved(char c)
1033 {
1034     return IsGenDelim(c) || IsSubDelim(c);
1035 }
1036
1037 // gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1038 bool wxURI::IsGenDelim(char c)
1039 {
1040     return c == ':' ||
1041            c == '/' ||
1042            c == '?' ||
1043            c == '#' ||
1044            c == '[' ||
1045            c == ']' ||
1046            c == '@';
1047 }
1048
1049 // sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1050 //               / "*" / "+" / "," / ";" / "="
1051 bool wxURI::IsSubDelim(char c)
1052 {
1053     return c == '!' ||
1054            c == '$' ||
1055            c == '&' ||
1056            c == '\'' ||
1057            c == '(' ||
1058            c == ')' ||
1059            c == '*' ||
1060            c == '+' ||
1061            c == ',' ||
1062            c == ';' ||
1063            c == '='
1064            ;
1065 }
1066
1067 bool wxURI::IsHex(char c)
1068 {
1069     return IsDigit(c) ||
1070            (c >= 'a' && c <= 'f') ||
1071            (c >= 'A' && c <= 'F');
1072 }
1073
1074 bool wxURI::IsAlpha(char c)
1075 {
1076     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1077 }
1078
1079 bool wxURI::IsDigit(char c)
1080 {
1081     return c >= '0' && c <= '9';
1082 }
1083
1084 bool wxURI::IsEndPath(char c)
1085 {
1086     return c == '\0' || c == '#' || c == '?';
1087 }
1088