src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a URI parser
   4 // Author:      Ryan Norton,
   5 //              Vadim Zeitlin (UTF-8 URI support, many other changes)
   6 // Created:     10/26/04
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 2004 Ryan Norton,
   9 //                  2008 Vadim Zeitlin
  10 // Licence:     wxWindows licence
  11 /////////////////////////////////////////////////////////////////////////////
  12
  13 // ===========================================================================
  14 // declarations
  15 // ===========================================================================
  16
  17 // ---------------------------------------------------------------------------
  18 // headers
  19 // ---------------------------------------------------------------------------
  20
  21 // For compilers that support precompilation, includes "wx.h".
  22 #include "wx/wxprec.h"
  23
  24 #ifdef __BORLANDC__
  25     #pragma hdrstop
  26 #endif
  27
  28 #ifndef WX_PRECOMP
  29     #include "wx/crt.h"
  30 #endif
  31
  32 #include "wx/uri.h"
  33
  34 // ---------------------------------------------------------------------------
  35 // definitions
  36 // ---------------------------------------------------------------------------
  37
  38 IMPLEMENT_CLASS(wxURI, wxObject)
  39
  40 // ===========================================================================
  41 // wxURI implementation
  42 // ===========================================================================
  43
  44 // ---------------------------------------------------------------------------
  45 // Constructors and cleanup
  46 // ---------------------------------------------------------------------------
  47
  48 wxURI::wxURI()
  49      : m_hostType(wxURI_REGNAME),
  50        m_fields(0)
  51 {
  52 }
  53
  54 wxURI::wxURI(const wxString& uri)
  55      : m_hostType(wxURI_REGNAME),
  56        m_fields(0)
  57 {
  58     Create(uri);
  59 }
  60
  61 bool wxURI::Create(const wxString& uri)
  62 {
  63     if (m_fields)
  64         Clear();
  65
  66     return Parse(uri.utf8_str());
  67 }
  68
  69 void wxURI::Clear()
  70 {
  71     m_scheme =
  72     m_userinfo =
  73     m_server =
  74     m_port =
  75     m_path =
  76     m_query =
  77     m_fragment = wxEmptyString;
  78
  79     m_hostType = wxURI_REGNAME;
  80
  81     m_fields = 0;
  82 }
  83
  84 // ---------------------------------------------------------------------------
  85 // Escaped characters handling
  86 // ---------------------------------------------------------------------------
  87
  88 // Converts a character into a numeric hexadecimal value, or -1 if the passed
  89 // in character is not a valid hex character
  90
  91 /* static */
  92 int wxURI::CharToHex(char c)
  93 {
  94     if ((c >= 'A') && (c <= 'Z'))
  95         return c - 'A' + 10;
  96     if ((c >= 'a') && (c <= 'z'))
  97         return c - 'a' + 10;
  98     if ((c >= '0') && (c <= '9'))
  99         return c - '0';
 100
 101     return -1;
 102 }
 103
 104 int wxURI::DecodeEscape(wxString::const_iterator& i)
 105 {
 106     int hi = CharToHex(*++i);
 107     if ( hi == -1 )
 108         return -1;
 109
 110     int lo = CharToHex(*++i);
 111     if ( lo == -1 )
 112         return -1;
 113
 114     return (hi << 4) | lo;
 115 }
 116
 117 /* static */
 118 wxString wxURI::Unescape(const wxString& uri)
 119 {
 120     // the unescaped version can't be longer than the original one
 121     wxCharBuffer buf(uri.length());
 122     char *p = buf.data();
 123
 124     for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
 125     {
 126         char c = *i;
 127         if ( c == '%' )
 128         {
 129             int n = wxURI::DecodeEscape(i);
 130             if ( n == -1 )
 131                 return wxString();
 132
 133             wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
 134
 135             c = wx_static_cast(char, n);
 136         }
 137
 138         *p = c;
 139     }
 140
 141     *p = '\0';
 142
 143     // by default assume that the URI is in UTF-8, this is the most common
 144     // practice
 145     wxString s = wxString::FromUTF8(buf);
 146     if ( s.empty() )
 147     {
 148         // if it isn't, use latin-1 as a fallback -- at least this always
 149         // succeeds
 150         s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
 151     }
 152
 153     return s;
 154 }
 155
 156 void wxURI::AppendNextEscaped(wxString& s, const char *& p)
 157 {
 158     // check for an already encoded character:
 159     //
 160     // pct-encoded   = "%" HEXDIG HEXDIG
 161     if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
 162     {
 163         s += *p++;
 164         s += *p++;
 165         s += *p++;
 166     }
 167     else // really needs escaping
 168     {
 169         static const char* hexDigits = "0123456789abcdef";
 170
 171         const char c = *p++;
 172
 173         s += '%';
 174         s += hexDigits[(c >> 4) & 15];
 175         s += hexDigits[c & 15];
 176     }
 177 }
 178
 179 // ---------------------------------------------------------------------------
 180 // GetUser
 181 // GetPassword
 182 //
 183 // Gets the username and password via the old URL method.
 184 // ---------------------------------------------------------------------------
 185 wxString wxURI::GetUser() const
 186 {
 187       size_t dwPasswordPos = m_userinfo.find(':');
 188
 189       if (dwPasswordPos == wxString::npos)
 190           dwPasswordPos = 0;
 191
 192       return m_userinfo(0, dwPasswordPos);
 193 }
 194
 195 wxString wxURI::GetPassword() const
 196 {
 197       size_t dwPasswordPos = m_userinfo.find(':');
 198
 199       if (dwPasswordPos == wxString::npos)
 200           return "";
 201       else
 202           return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
 203 }
 204
 205 // combine all URI fields in a single string, applying funcDecode to each
 206 // component which it may make sense to decode (i.e. "unescape")
 207 wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
 208 {
 209     wxString ret;
 210
 211     if (HasScheme())
 212         ret += m_scheme + ":";
 213
 214     if (HasServer())
 215     {
 216         ret += "//";
 217
 218         if (HasUserInfo())
 219             ret += funcDecode(m_userinfo) + "@";
 220
 221         if (m_hostType == wxURI_REGNAME)
 222             ret += funcDecode(m_server);
 223         else
 224             ret += m_server;
 225
 226         if (HasPort())
 227             ret += ":" + m_port;
 228     }
 229
 230     ret += funcDecode(m_path);
 231
 232     if (HasQuery())
 233         ret += "?" + funcDecode(m_query);
 234
 235     if (HasFragment())
 236         ret += "#" + funcDecode(m_fragment);
 237
 238     return ret;
 239 }
 240
 241 // ---------------------------------------------------------------------------
 242 // Comparison
 243 // ---------------------------------------------------------------------------
 244
 245 bool wxURI::operator==(const wxURI& uri) const
 246 {
 247     if (HasScheme())
 248     {
 249         if(m_scheme != uri.m_scheme)
 250             return false;
 251     }
 252     else if (uri.HasScheme())
 253         return false;
 254
 255
 256     if (HasServer())
 257     {
 258         if (HasUserInfo())
 259         {
 260             if (m_userinfo != uri.m_userinfo)
 261                 return false;
 262         }
 263         else if (uri.HasUserInfo())
 264             return false;
 265
 266         if (m_server != uri.m_server ||
 267             m_hostType != uri.m_hostType)
 268             return false;
 269
 270         if (HasPort())
 271         {
 272             if(m_port != uri.m_port)
 273                 return false;
 274         }
 275         else if (uri.HasPort())
 276             return false;
 277     }
 278     else if (uri.HasServer())
 279         return false;
 280
 281
 282     if (HasPath())
 283     {
 284         if(m_path != uri.m_path)
 285             return false;
 286     }
 287     else if (uri.HasPath())
 288         return false;
 289
 290     if (HasQuery())
 291     {
 292         if (m_query != uri.m_query)
 293             return false;
 294     }
 295     else if (uri.HasQuery())
 296         return false;
 297
 298     if (HasFragment())
 299     {
 300         if (m_fragment != uri.m_fragment)
 301             return false;
 302     }
 303     else if (uri.HasFragment())
 304         return false;
 305
 306     return true;
 307 }
 308
 309 // ---------------------------------------------------------------------------
 310 // IsReference
 311 //
 312 // if there is no authority or scheme, it is a reference
 313 // ---------------------------------------------------------------------------
 314
 315 bool wxURI::IsReference() const
 316 {
 317     return !HasScheme() || !HasServer();
 318 }
 319
 320 // ---------------------------------------------------------------------------
 321 // Parse
 322 //
 323 // Master URI parsing method.  Just calls the individual parsing methods
 324 //
 325 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 326 // URI-reference = URI / relative
 327 // ---------------------------------------------------------------------------
 328
 329 bool wxURI::Parse(const char *uri)
 330 {
 331     uri = ParseScheme(uri);
 332     if ( uri )
 333         uri = ParseAuthority(uri);
 334     if ( uri )
 335         uri = ParsePath(uri);
 336     if ( uri )
 337         uri = ParseQuery(uri);
 338     if ( uri )
 339         uri = ParseFragment(uri);
 340
 341     // we only succeed if we parsed the entire string
 342     return uri && *uri == '\0';
 343 }
 344
 345 const char* wxURI::ParseScheme(const char *uri)
 346 {
 347     const char * const start = uri;
 348
 349     // assume that we have a scheme if we have the valid start of it
 350     if ( IsAlpha(*uri) )
 351     {
 352         m_scheme += *uri++;
 353
 354         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 355         while (IsAlpha(*uri) || IsDigit(*uri) ||
 356                *uri == '+'   ||
 357                *uri == '-'   ||
 358                *uri == '.')
 359         {
 360             m_scheme += *uri++;
 361         }
 362
 363         //valid scheme?
 364         if (*uri == ':')
 365         {
 366             //mark the scheme as valid
 367             m_fields |= wxURI_SCHEME;
 368
 369             //move reference point up to input buffer
 370             ++uri;
 371         }
 372         else // no valid scheme finally
 373         {
 374             uri = start; // rewind
 375             m_scheme.clear();
 376         }
 377     }
 378     //else: can't have schema, possible a relative URI
 379
 380     return uri;
 381 }
 382
 383 const char* wxURI::ParseAuthority(const char* uri)
 384 {
 385     // authority     = [ userinfo "@" ] host [ ":" port ]
 386     if ( uri[0] == '/' && uri[1] == '/' )
 387     {
 388         //skip past the two slashes
 389         uri += 2;
 390
 391         // ############# DEVIATION FROM RFC #########################
 392         // Don't parse the server component for file URIs
 393         if(m_scheme != "file")
 394         {
 395             //normal way
 396             uri = ParseUserInfo(uri);
 397             uri = ParseServer(uri);
 398             return ParsePort(uri);
 399         }
 400     }
 401
 402     return uri;
 403 }
 404
 405 const char* wxURI::ParseUserInfo(const char* uri)
 406 {
 407     const char * const start = uri;
 408
 409     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 410     while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
 411     {
 412         if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
 413             m_userinfo += *uri++;
 414         else
 415             AppendNextEscaped(m_userinfo, uri);
 416     }
 417
 418     if ( *uri++ == '@' )
 419     {
 420         // valid userinfo
 421         m_fields |= wxURI_USERINFO;
 422     }
 423     else
 424     {
 425         uri = start; // rewind
 426         m_userinfo.clear();
 427     }
 428
 429     return uri;
 430 }
 431
 432 const char* wxURI::ParseServer(const char* uri)
 433 {
 434     const char * const start = uri;
 435
 436     // host          = IP-literal / IPv4address / reg-name
 437     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 438     if (*uri == '[')
 439     {
 440         ++uri;
 441         if (ParseIPv6address(uri) && *uri == ']')
 442         {
 443             m_hostType = wxURI_IPV6ADDRESS;
 444
 445             m_server.assign(start + 1, uri - start - 1);
 446             ++uri;
 447         }
 448         else
 449         {
 450             uri = start + 1; // skip the leading '[' again
 451
 452             if (ParseIPvFuture(uri) && *uri == ']')
 453             {
 454                 m_hostType = wxURI_IPVFUTURE;
 455
 456                 m_server.assign(start + 1, uri - start - 1);
 457                 ++uri;
 458             }
 459             else // unrecognized IP literal
 460             {
 461                 uri = start;
 462             }
 463         }
 464     }
 465     else // IPv4 or a reg-name
 466     {
 467         if (ParseIPv4address(uri))
 468         {
 469             m_hostType = wxURI_IPV4ADDRESS;
 470
 471             m_server.assign(start, uri - start);
 472         }
 473         else
 474         {
 475             uri = start;
 476         }
 477     }
 478
 479     if ( m_hostType == wxURI_REGNAME )
 480     {
 481         uri = start;
 482         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 483         while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
 484         {
 485             if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
 486                 m_server += *uri++;
 487             else
 488                 AppendNextEscaped(m_server, uri);
 489         }
 490     }
 491
 492     m_fields |= wxURI_SERVER;
 493
 494     return uri;
 495 }
 496
 497
 498 const char* wxURI::ParsePort(const char* uri)
 499 {
 500     // port          = *DIGIT
 501     if( *uri == ':' )
 502     {
 503         ++uri;
 504         while ( IsDigit(*uri) )
 505         {
 506             m_port += *uri++;
 507         }
 508
 509         m_fields |= wxURI_PORT;
 510     }
 511
 512     return uri;
 513 }
 514
 515 const char* wxURI::ParsePath(const char* uri)
 516 {
 517     /// hier-part     = "//" authority path-abempty
 518     ///               / path-absolute
 519     ///               / path-rootless
 520     ///               / path-empty
 521     ///
 522     /// relative-part = "//" authority path-abempty
 523     ///               / path-absolute
 524     ///               / path-noscheme
 525     ///               / path-empty
 526     ///
 527     /// path-abempty  = *( "/" segment )
 528     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 529     /// path-noscheme = segment-nz-nc *( "/" segment )
 530     /// path-rootless = segment-nz *( "/" segment )
 531     /// path-empty    = 0<pchar>
 532     ///
 533     /// segment       = *pchar
 534     /// segment-nz    = 1*pchar
 535     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 536     ///               ; non-zero-length segment without any colon ":"
 537     ///
 538     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 539
 540     if ( IsEndPath(*uri) )
 541         return uri;
 542
 543     const bool isAbs = *uri == '/';
 544     if ( isAbs )
 545         m_path += *uri++;
 546
 547     wxArrayString segments;
 548     wxString segment;
 549     for ( ;; )
 550     {
 551         const bool endPath = IsEndPath(*uri);
 552         if ( endPath || *uri == '/' )
 553         {
 554             // end of a segment, look at what we got
 555             if ( segment == ".." )
 556             {
 557                 if ( !segments.empty() && *segments.rbegin() != ".." )
 558                     segments.pop_back();
 559                 else if ( !isAbs )
 560                     segments.push_back("..");
 561             }
 562             else if ( segment == "." )
 563             {
 564                 // normally we ignore "." but the last one should be taken into
 565                 // account as "path/." is the same as "path/" and not just "path"
 566                 if ( endPath )
 567                     segments.push_back("");
 568             }
 569             else // normal segment
 570             {
 571                 segments.push_back(segment);
 572             }
 573
 574             if ( endPath )
 575                 break;
 576
 577             segment.clear();
 578             ++uri;
 579             continue;
 580         }
 581
 582         if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
 583             segment += *uri++;
 584         else
 585             AppendNextEscaped(segment, uri);
 586     }
 587
 588     m_path += wxJoin(segments, '/', '\0');
 589     m_fields |= wxURI_PATH;
 590
 591     return uri;
 592 }
 593
 594
 595 const char* wxURI::ParseQuery(const char* uri)
 596 {
 597     // query         = *( pchar / "/" / "?" )
 598     if ( *uri == '?' )
 599     {
 600         ++uri;
 601         while ( *uri && *uri != '#' )
 602         {
 603             if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 604                     *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
 605                 m_query += *uri++;
 606             else
 607                 AppendNextEscaped(m_query, uri);
 608         }
 609
 610         m_fields |= wxURI_QUERY;
 611     }
 612
 613     return uri;
 614 }
 615
 616
 617 const char* wxURI::ParseFragment(const char* uri)
 618 {
 619     // fragment      = *( pchar / "/" / "?" )
 620     if ( *uri == '#' )
 621     {
 622         ++uri;
 623         while ( *uri )
 624         {
 625             if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
 626                     *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 627                 m_fragment += *uri++;
 628             else
 629                 AppendNextEscaped(m_fragment, uri);
 630         }
 631
 632         m_fields |= wxURI_FRAGMENT;
 633     }
 634
 635     return uri;
 636 }
 637
 638 // ---------------------------------------------------------------------------
 639 // Resolve
 640 //
 641 // Builds missing components of this uri from a base uri
 642 //
 643 // A version of the algorithm outlined in the RFC is used here
 644 // (it is shown in comments)
 645 //
 646 // Note that an empty URI inherits all components
 647 // ---------------------------------------------------------------------------
 648
 649 /* static */
 650 wxArrayString wxURI::SplitInSegments(const wxString& path)
 651 {
 652     return wxSplit(path, '/', '\0' /* no escape character */);
 653 }
 654
 655 void wxURI::Resolve(const wxURI& base, int flags)
 656 {
 657     wxASSERT_MSG(!base.IsReference(),
 658                 "wxURI to inherit from must not be a reference!");
 659
 660     // If we aren't being strict, enable the older (pre-RFC2396) loophole that
 661     // allows this uri to inherit other properties from the base uri - even if
 662     // the scheme is defined
 663     if ( !(flags & wxURI_STRICT) &&
 664             HasScheme() && base.HasScheme() &&
 665                 m_scheme == base.m_scheme )
 666     {
 667         m_fields -= wxURI_SCHEME;
 668     }
 669
 670
 671     // Do nothing if this is an absolute wxURI
 672     //    if defined(R.scheme) then
 673     //       T.scheme    = R.scheme;
 674     //       T.authority = R.authority;
 675     //       T.path      = remove_dot_segments(R.path);
 676     //       T.query     = R.query;
 677     if (HasScheme())
 678         return;
 679
 680     //No scheme - inherit
 681     m_scheme = base.m_scheme;
 682     m_fields |= wxURI_SCHEME;
 683
 684     // All we need to do for relative URIs with an
 685     // authority component is just inherit the scheme
 686     //       if defined(R.authority) then
 687     //          T.authority = R.authority;
 688     //          T.path      = remove_dot_segments(R.path);
 689     //          T.query     = R.query;
 690     if (HasServer())
 691         return;
 692
 693     //No authority - inherit
 694     if (base.HasUserInfo())
 695     {
 696         m_userinfo = base.m_userinfo;
 697         m_fields |= wxURI_USERINFO;
 698     }
 699
 700     m_server = base.m_server;
 701     m_hostType = base.m_hostType;
 702     m_fields |= wxURI_SERVER;
 703
 704     if (base.HasPort())
 705     {
 706         m_port = base.m_port;
 707         m_fields |= wxURI_PORT;
 708     }
 709
 710
 711     // Simple path inheritance from base
 712     if (!HasPath())
 713     {
 714         //             T.path = Base.path;
 715         m_path = base.m_path;
 716         m_fields |= wxURI_PATH;
 717
 718
 719         //             if defined(R.query) then
 720         //                T.query = R.query;
 721         //             else
 722         //                T.query = Base.query;
 723         //             endif;
 724         if (!HasQuery())
 725         {
 726             m_query = base.m_query;
 727             m_fields |= wxURI_QUERY;
 728         }
 729     }
 730     else if ( m_path.empty() || m_path[0u] != '/' )
 731     {
 732         //             if (R.path starts-with "/") then
 733         //                T.path = remove_dot_segments(R.path);
 734         //             else
 735         //                T.path = merge(Base.path, R.path);
 736         //                T.path = remove_dot_segments(T.path);
 737         //             endif;
 738         //             T.query = R.query;
 739         //
 740         // So we don't do anything for absolute paths and implement merge for
 741         // the relative ones
 742
 743         wxArrayString our(SplitInSegments(m_path)),
 744                       result(SplitInSegments(base.m_path));
 745
 746         if ( !result.empty() )
 747             result.pop_back();
 748
 749         if ( our.empty() )
 750         {
 751             // if we have an empty path it means we were constructed from a "."
 752             // string or something similar (e.g. "././././"), it should count
 753             // as (empty) segment
 754             our.push_back("");
 755         }
 756
 757         const wxArrayString::const_iterator end = our.end();
 758         for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
 759         {
 760             if ( i->empty() || *i == "." )
 761             {
 762                 // as in ParsePath(), while normally we ignore the empty
 763                 // segments, we need to take account of them at the end
 764                 if ( i == end - 1 )
 765                     result.push_back("");
 766                 continue;
 767             }
 768
 769             if ( *i == ".." )
 770             {
 771                 if ( !result.empty() )
 772                 {
 773                     result.pop_back();
 774
 775                     if ( i == end - 1 )
 776                         result.push_back("");
 777                 }
 778                 //else: just ignore, extra ".." don't accumulate
 779             }
 780             else
 781             {
 782                 if ( result.empty() )
 783                 {
 784                     // ensure that the resulting path will always be absolute
 785                     result.push_back("");
 786                 }
 787
 788                 result.push_back(*i);
 789             }
 790         }
 791
 792         m_path = wxJoin(result, '/', '\0');
 793     }
 794
 795     //T.fragment = R.fragment;
 796 }
 797
 798 // ---------------------------------------------------------------------------
 799 // ParseH16
 800 //
 801 // Parses 1 to 4 hex values.  Returns true if the first character of the input
 802 // string is a valid hex character.  It is the caller's responsibility to move
 803 // the input string back to its original position on failure.
 804 // ---------------------------------------------------------------------------
 805
 806 bool wxURI::ParseH16(const char*& uri)
 807 {
 808     // h16           = 1*4HEXDIG
 809     if(!IsHex(*++uri))
 810         return false;
 811
 812     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 813         ++uri;
 814
 815     return true;
 816 }
 817
 818 // ---------------------------------------------------------------------------
 819 // ParseIPXXX
 820 //
 821 // Parses a certain version of an IP address and moves the input string past
 822 // it.  Returns true if the input  string contains the proper version of an ip
 823 // address.  It is the caller's responsability to move the input string back
 824 // to its original position on failure.
 825 // ---------------------------------------------------------------------------
 826
 827 bool wxURI::ParseIPv4address(const char*& uri)
 828 {
 829     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 830     //
 831     //dec-octet     =      DIGIT                    ; 0-9
 832     //                / %x31-39 DIGIT               ; 10-99
 833     //                / "1" 2DIGIT                  ; 100-199
 834     //                / "2" %x30-34 DIGIT           ; 200-249
 835     //                / "25" %x30-35                ; 250-255
 836     size_t iIPv4 = 0;
 837     if (IsDigit(*uri))
 838     {
 839         ++iIPv4;
 840
 841
 842         //each ip part must be between 0-255 (dupe of version in for loop)
 843         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 844            //100 or less  (note !)
 845            !( (*(uri-2) < '2') ||
 846            //240 or less
 847              (*(uri-2) == '2' &&
 848                (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 849              )
 850             )
 851           )
 852         {
 853             return false;
 854         }
 855
 856         if(IsDigit(*uri))++uri;
 857
 858         //compilers should unroll this loop
 859         for(; iIPv4 < 4; ++iIPv4)
 860         {
 861             if (*uri != '.' || !IsDigit(*++uri))
 862                 break;
 863
 864             //each ip part must be between 0-255
 865             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 866                //100 or less  (note !)
 867                !( (*(uri-2) < '2') ||
 868                //240 or less
 869                  (*(uri-2) == '2' &&
 870                    (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 871                  )
 872                 )
 873               )
 874             {
 875                 return false;
 876             }
 877             if(IsDigit(*uri))++uri;
 878         }
 879     }
 880     return iIPv4 == 4;
 881 }
 882
 883 bool wxURI::ParseIPv6address(const char*& uri)
 884 {
 885     // IPv6address   =                            6( h16 ":" ) ls32
 886     //               /                       "::" 5( h16 ":" ) ls32
 887     //               / [               h16 ] "::" 4( h16 ":" ) ls32
 888     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
 889     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
 890     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
 891     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
 892     //               / [ *5( h16 ":" ) h16 ] "::"              h16
 893     //               / [ *6( h16 ":" ) h16 ] "::"
 894
 895     size_t numPrefix = 0,
 896               maxPostfix;
 897
 898     bool bEndHex = false;
 899
 900     for( ; numPrefix < 6; ++numPrefix)
 901     {
 902         if(!ParseH16(uri))
 903         {
 904             --uri;
 905             bEndHex = true;
 906             break;
 907         }
 908
 909         if(*uri != ':')
 910         {
 911             break;
 912         }
 913     }
 914
 915     if(!bEndHex && !ParseH16(uri))
 916     {
 917         --uri;
 918
 919         if (numPrefix)
 920             return false;
 921
 922         if (*uri == ':')
 923         {
 924             if (*++uri != ':')
 925                 return false;
 926
 927             maxPostfix = 5;
 928         }
 929         else
 930             maxPostfix = 6;
 931     }
 932     else
 933     {
 934         if (*uri != ':' || *(uri+1) != ':')
 935         {
 936             if (numPrefix != 6)
 937                 return false;
 938
 939             while (*--uri != ':') {}
 940             ++uri;
 941
 942             const char * const start = uri;
 943             //parse ls32
 944             // ls32          = ( h16 ":" h16 ) / IPv4address
 945             if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 946                 return true;
 947
 948             uri = start;
 949
 950             if (ParseIPv4address(uri))
 951                 return true;
 952             else
 953                 return false;
 954         }
 955         else
 956         {
 957             uri += 2;
 958
 959             if (numPrefix > 3)
 960                 maxPostfix = 0;
 961             else
 962                 maxPostfix = 4 - numPrefix;
 963         }
 964     }
 965
 966     bool bAllowAltEnding = maxPostfix == 0;
 967
 968     for(; maxPostfix != 0; --maxPostfix)
 969     {
 970         if(!ParseH16(uri) || *uri != ':')
 971             return false;
 972     }
 973
 974     if(numPrefix <= 4)
 975     {
 976         const char * const start = uri;
 977         //parse ls32
 978         // ls32          = ( h16 ":" h16 ) / IPv4address
 979         if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
 980             return true;
 981
 982         uri = start;
 983
 984         if (ParseIPv4address(uri))
 985             return true;
 986
 987         uri = start;
 988
 989         if (!bAllowAltEnding)
 990             return false;
 991     }
 992
 993     if(numPrefix <= 5 && ParseH16(uri))
 994         return true;
 995
 996     return true;
 997 }
 998
 999 bool wxURI::ParseIPvFuture(const char*& uri)
1000 {
1001     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1002     if (*++uri != 'v' || !IsHex(*++uri))
1003         return false;
1004
1005     while (IsHex(*++uri))
1006         ;
1007
1008     if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1009         return false;
1010
1011     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1012
1013     return true;
1014 }
1015
1016
1017 // ---------------------------------------------------------------------------
1018 // IsXXX
1019 //
1020 // Returns true if the passed in character meets the criteria of the method
1021 // ---------------------------------------------------------------------------
1022
1023 // unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1024 bool wxURI::IsUnreserved(char c)
1025 {
1026     return IsAlpha(c) ||
1027            IsDigit(c) ||
1028            c == '-' ||
1029            c == '.' ||
1030            c == '_' ||
1031            c == '~'
1032            ;
1033 }
1034
1035 bool wxURI::IsReserved(char c)
1036 {
1037     return IsGenDelim(c) || IsSubDelim(c);
1038 }
1039
1040 // gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1041 bool wxURI::IsGenDelim(char c)
1042 {
1043     return c == ':' ||
1044            c == '/' ||
1045            c == '?' ||
1046            c == '#' ||
1047            c == '[' ||
1048            c == ']' ||
1049            c == '@';
1050 }
1051
1052 // sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1053 //               / "*" / "+" / "," / ";" / "="
1054 bool wxURI::IsSubDelim(char c)
1055 {
1056     return c == '!' ||
1057            c == '$' ||
1058            c == '&' ||
1059            c == '\'' ||
1060            c == '(' ||
1061            c == ')' ||
1062            c == '*' ||
1063            c == '+' ||
1064            c == ',' ||
1065            c == ';' ||
1066            c == '='
1067            ;
1068 }
1069
1070 bool wxURI::IsHex(char c)
1071 {
1072     return IsDigit(c) ||
1073            (c >= 'a' && c <= 'f') ||
1074            (c >= 'A' && c <= 'F');
1075 }
1076
1077 bool wxURI::IsAlpha(char c)
1078 {
1079     return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1080 }
1081
1082 bool wxURI::IsDigit(char c)
1083 {
1084     return c >= '0' && c <= '9';
1085 }
1086
1087 bool wxURI::IsEndPath(char c)
1088 {
1089     return c == '\0' || c == '#' || c == '?';
1090 }
1091