src/common/uri.cpp

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        uri.cpp
   3 // Purpose:     Implementation of a uri parser
   4 // Author:      Ryan Norton
   5 // Created:     10/26/04
   6 // RCS-ID:      $Id$
   7 // Copyright:   (c) 2004 Ryan Norton
   8 // Licence:     wxWindows
   9 /////////////////////////////////////////////////////////////////////////////
  10
  11 // ===========================================================================
  12 // declarations
  13 // ===========================================================================
  14
  15 // ---------------------------------------------------------------------------
  16 // headers
  17 // ---------------------------------------------------------------------------
  18
  19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
  20     #pragma implementation "uri.h"
  21 #endif
  22
  23 // For compilers that support precompilation, includes "wx.h".
  24 #include "wx/wxprec.h"
  25
  26 #ifdef __BORLANDC__
  27     #pragma hdrstop
  28 #endif
  29
  30 #include "wx/uri.h"
  31
  32 // ---------------------------------------------------------------------------
  33 // definitions
  34 // ---------------------------------------------------------------------------
  35
  36 IMPLEMENT_CLASS(wxURI, wxObject);
  37
  38 // ===========================================================================
  39 // implementation
  40 // ===========================================================================
  41
  42 // ---------------------------------------------------------------------------
  43 // utilities
  44 // ---------------------------------------------------------------------------
  45
  46 // ---------------------------------------------------------------------------
  47 //
  48 //                        wxURI
  49 //
  50 // ---------------------------------------------------------------------------
  51
  52 // ---------------------------------------------------------------------------
  53 //  Constructors
  54 // ---------------------------------------------------------------------------
  55
  56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
  57 {
  58 }
  59
  60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
  61 {
  62     Create(uri);
  63 }
  64
  65 wxURI::wxURI(const wxURI& uri)  : m_hostType(wxURI_REGNAME), m_fields(0)
  66 {
  67     *this = uri;
  68 }
  69
  70 // ---------------------------------------------------------------------------
  71 // Destructor and cleanup
  72 // ---------------------------------------------------------------------------
  73
  74 wxURI::~wxURI()
  75 {
  76     Clear();
  77 }
  78
  79 void wxURI::Clear()
  80 {
  81     m_scheme = m_user = m_server = m_port = m_path =
  82     m_query = m_fragment = wxT("");
  83
  84     m_hostType = wxURI_REGNAME;
  85
  86     m_fields = 0;
  87 }
  88
  89 // ---------------------------------------------------------------------------
  90 // Create
  91 //
  92 // This creates the URI - all we do here is call the main parsing method
  93 // ---------------------------------------------------------------------------
  94
  95 void wxURI::Create(const wxString& uri)
  96 {
  97     if (m_fields)
  98         Clear();
  99
 100     Parse(uri);
 101 }
 102
 103 // ---------------------------------------------------------------------------
 104 // Escape/Unescape/IsEscape
 105 //
 106 // Unescape unencodes a 3 character URL escape sequence
 107 // Escape encodes an invalid URI character into a 3 character sequence
 108 // IsEscape determines if the input string contains an escape sequence,
 109 // if it does, then it moves the input string past the escape sequence
 110 // ---------------------------------------------------------------------------
 111
 112 wxChar wxURI::Unescape(const wxChar* s)
 113 {
 114     wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
 115
 116     return CharToHex(*s) * 0x10 + CharToHex(*++s);
 117 }
 118
 119 void wxURI::Escape(wxString& s, const wxChar& c)
 120 {
 121     const wxChar* hdig = wxT("0123456789abcdef");
 122     s += '%';
 123     s += hdig[(c >> 4) & 15];
 124         s += hdig[c & 15];
 125 }
 126
 127 bool wxURI::IsEscape(const wxChar*& uri)
 128 {
 129     if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2)))
 130     {
 131         uri += 3;
 132         return true;
 133     }
 134     else
 135         return false;
 136 }
 137
 138 // ---------------------------------------------------------------------------
 139 // HasXXX
 140 // ---------------------------------------------------------------------------
 141
 142 bool wxURI::HasScheme() const
 143 {   return (m_fields & wxURI_SCHEME) == wxURI_SCHEME;   }
 144
 145 bool wxURI::HasUser() const
 146 {   return (m_fields & wxURI_USER) == wxURI_USER;   }
 147
 148 bool wxURI::HasServer() const
 149 {   return (m_fields & wxURI_SERVER) == wxURI_SERVER;   }
 150
 151 bool wxURI::HasPort() const
 152 {   return (m_fields & wxURI_PORT) == wxURI_PORT;   }
 153
 154 bool wxURI::HasPath() const
 155 {   return (m_fields & wxURI_PATH) == wxURI_PATH;   }
 156
 157 bool wxURI::HasQuery() const
 158 {   return (m_fields & wxURI_QUERY) == wxURI_QUERY;   }
 159
 160 bool wxURI::HasFragment() const
 161 {   return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT;   }
 162
 163 // ---------------------------------------------------------------------------
 164 // GetXXX
 165 //
 166 // The normal Get() actually builds the entire URI into a useable
 167 // representation, including proper identification characters such as slashes
 168 // ---------------------------------------------------------------------------
 169
 170 const wxString& wxURI::GetScheme() const
 171 {   return m_scheme;               }
 172
 173 const wxString& wxURI::GetPath() const
 174 {   return m_path;               }
 175
 176 const wxString& wxURI::GetQuery() const
 177 {   return m_query;               }
 178
 179 const wxString& wxURI::GetFragment() const
 180 {   return m_fragment;               }
 181
 182 const wxString& wxURI::GetPort() const
 183 {   return m_port;               }
 184
 185 const wxString& wxURI::GetUser() const
 186 {   return m_user;               }
 187
 188 const wxString& wxURI::GetServer() const
 189 {   return m_server;               }
 190
 191 const wxURIHostType& wxURI::GetHostType() const
 192 {   return m_hostType;             }
 193
 194 wxString wxURI::Get() const
 195 {
 196     wxString ret;
 197
 198     if (HasScheme())
 199         ret = ret + m_scheme + wxT(":");
 200
 201     if (HasServer())
 202     {
 203         ret += wxT("//");
 204
 205         if (HasUser())
 206             ret = ret + m_user + wxT("@");
 207
 208         ret += m_server;
 209
 210         if (HasPort())
 211             ret = ret + wxT(":") + m_port;
 212     }
 213
 214     ret += m_path;
 215
 216     if (HasQuery())
 217         ret = ret + wxT("?") + m_query;
 218
 219     if (HasFragment())
 220         ret = ret + wxT("#") + m_fragment;
 221
 222     return ret;
 223 }
 224
 225 // ---------------------------------------------------------------------------
 226 // operator = and ==
 227 // ---------------------------------------------------------------------------
 228
 229 wxURI& wxURI::operator = (const wxURI& uri)
 230 {
 231     if (HasScheme())
 232         m_scheme = uri.m_scheme;
 233
 234
 235     if (HasServer())
 236     {
 237         if (HasUser())
 238             m_user = uri.m_user;
 239
 240         m_server = uri.m_server;
 241         m_hostType = uri.m_hostType;
 242
 243         if (HasPort())
 244             m_port = uri.m_port;
 245     }
 246
 247
 248     if (HasPath())
 249         m_path = uri.m_path;
 250
 251     if (HasQuery())
 252         m_query = uri.m_query;
 253
 254     if (HasFragment())
 255         m_fragment = uri.m_fragment;
 256
 257     return *this;
 258 }
 259
 260 wxURI& wxURI::operator = (const wxChar* string)
 261 {
 262     Create(string);
 263     return *this;
 264 }
 265
 266 bool wxURI::operator == (const wxURI& uri) const
 267 {
 268     if (HasScheme())
 269     {
 270         if(m_scheme != uri.m_scheme)
 271             return false;
 272     }
 273     else if (uri.HasScheme())
 274         return false;
 275
 276
 277     if (HasServer())
 278     {
 279         if (HasUser())
 280         {
 281             if (m_user != uri.m_user)
 282                 return false;
 283         }
 284         else if (uri.HasUser())
 285             return false;
 286
 287         if (m_server != uri.m_server ||
 288             m_hostType != uri.m_hostType)
 289             return false;
 290
 291         if (HasPort())
 292         {
 293             if(m_port != uri.m_port)
 294                 return false;
 295         }
 296         else if (uri.HasPort())
 297             return false;
 298     }
 299     else if (uri.HasServer())
 300         return false;
 301
 302
 303     if (HasPath())
 304     {
 305         if(m_path != uri.m_path)
 306             return false;
 307     }
 308     else if (uri.HasPath())
 309         return false;
 310
 311     if (HasQuery())
 312     {
 313         if (m_query != uri.m_query)
 314             return false;
 315     }
 316     else if (uri.HasQuery())
 317         return false;
 318
 319     if (HasFragment())
 320     {
 321         if (m_fragment != uri.m_fragment)
 322             return false;
 323     }
 324     else if (uri.HasFragment())
 325         return false;
 326
 327     return true;
 328 }
 329
 330 // ---------------------------------------------------------------------------
 331 // IsReference
 332 //
 333 // if there is no authority or scheme, it is a reference
 334 // ---------------------------------------------------------------------------
 335
 336 bool wxURI::IsReference() const
 337 {   return !HasScheme() || !HasServer();  }
 338
 339 // ---------------------------------------------------------------------------
 340 // Parse
 341 //
 342 // Master URI parsing method.  Just calls the individual parsing methods
 343 //
 344 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 345 // URI-reference = URI / relative-URITestCase
 346 // ---------------------------------------------------------------------------
 347
 348 const wxChar* wxURI::Parse(const wxChar* uri)
 349 {
 350     uri = ParseScheme(uri);
 351     uri = ParseAuthority(uri);
 352     uri = ParsePath(uri);
 353     uri = ParseQuery(uri);
 354     return ParseFragment(uri);
 355 }
 356
 357 // ---------------------------------------------------------------------------
 358 // ParseXXX
 359 //
 360 // Individual parsers for each URI component
 361 // ---------------------------------------------------------------------------
 362
 363 const wxChar* wxURI::ParseScheme(const wxChar* uri)
 364 {
 365     wxASSERT(uri != NULL);
 366
 367     //copy of the uri - used for figuring out
 368     //length of each component
 369     const wxChar* uricopy = uri;
 370
 371     //Does the uri have a scheme (first character alpha)?
 372     if (IsAlpha(*uri))
 373     {
 374         m_scheme += *uri++;
 375
 376         //scheme        = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 377         //RN: Scheme can not be escaped
 378         while (IsAlpha(*uri) || IsDigit(*uri) ||
 379                *uri == '+'   ||
 380                *uri == '-'   ||
 381                *uri == '.')
 382         {
 383             m_scheme += *uri++;
 384         }
 385
 386         //valid scheme?
 387         if (*uri == ':')
 388         {
 389             //mark the scheme as valid
 390             m_fields |= wxURI_SCHEME;
 391
 392             //move reference point up to input buffer
 393             uricopy = ++uri;
 394         }
 395         else
 396             //relative uri with relative path reference
 397             m_scheme = wxT("");
 398     }
 399 //    else
 400         //relative uri with _possible_ relative path reference
 401
 402     return uricopy;
 403 }
 404
 405 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
 406 {
 407     // authority     = [ userinfo "@" ] host [ ":" port ]
 408     if (*uri == '/' && *(uri+1) == '/')
 409     {
 410         uri += 2;
 411
 412         uri = ParseUser(uri);
 413         uri = ParseServer(uri);
 414         return ParsePort(uri);
 415     }
 416
 417     return uri;
 418 }
 419
 420 const wxChar* wxURI::ParseUser(const wxChar* uri)
 421 {
 422     wxASSERT(uri != NULL);
 423
 424     //copy of the uri - used for figuring out
 425     //length of each component
 426     const wxChar* uricopy = uri;
 427
 428     // userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
 429     while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?')
 430     {
 431         if(IsUnreserved(*uri) || IsEscape(uri) ||
 432            IsSubDelim(*uri) || *uri == ':')
 433             m_user += *uri++;
 434         else
 435             Escape(m_user, *uri++);
 436     }
 437
 438     if(*uri == '@')
 439     {
 440         //valid userinfo
 441         m_fields |= wxURI_USER;
 442
 443         uricopy = ++uri;
 444     }
 445     else
 446         m_user = wxT("");
 447
 448     return uricopy;
 449 }
 450
 451 const wxChar* wxURI::ParseServer(const wxChar* uri)
 452 {
 453     wxASSERT(uri != NULL);
 454
 455     //copy of the uri - used for figuring out
 456     //length of each component
 457     const wxChar* uricopy = uri;
 458
 459     // host          = IP-literal / IPv4address / reg-name
 460     // IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
 461     if (*uri == '[')
 462     {
 463         if (ParseIPv6address(++uri) && *uri == ']')
 464         {
 465             ++uri;
 466             m_hostType = wxURI_IPV6ADDRESS;
 467
 468             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 469             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 470             theBuffer.SetLength(uri-uricopy);
 471         }
 472         else
 473         {
 474             uri = uricopy;
 475
 476             if (ParseIPvFuture(++uri) && *uri == ']')
 477             {
 478                 ++uri;
 479                 m_hostType = wxURI_IPVFUTURE;
 480
 481                 wxStringBufferLength theBuffer(m_server, uri - uricopy);
 482                 wxMemcpy(theBuffer, uricopy, uri-uricopy);
 483                 theBuffer.SetLength(uri-uricopy);
 484             }
 485             else
 486                 uri = uricopy;
 487         }
 488     }
 489     else
 490     {
 491         if (ParseIPv4address(uri))
 492         {
 493             m_hostType = wxURI_IPV4ADDRESS;
 494
 495             wxStringBufferLength theBuffer(m_server, uri - uricopy);
 496             wxMemcpy(theBuffer, uricopy, uri-uricopy);
 497             theBuffer.SetLength(uri-uricopy);
 498         }
 499         else
 500             uri = uricopy;
 501     }
 502
 503     if(m_hostType == wxURI_REGNAME)
 504     {
 505         uri = uricopy;
 506         // reg-name      = *( unreserved / pct-encoded / sub-delims )
 507         while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?')
 508         {
 509             if(IsUnreserved(*uri) || IsEscape(uri) ||  IsSubDelim(*uri))
 510                 m_server += *uri++;
 511             else
 512                 Escape(m_server, *uri++);
 513         }
 514     }
 515
 516     //mark the server as valid
 517     m_fields |= wxURI_SERVER;
 518
 519     return uri;
 520 }
 521
 522
 523 const wxChar* wxURI::ParsePort(const wxChar* uri)
 524 {
 525     wxASSERT(uri != NULL);
 526
 527     // port          = *DIGIT
 528     if(*uri == ':')
 529     {
 530         ++uri;
 531         while(IsDigit(*uri))
 532         {
 533             m_port += *uri++;
 534         }
 535
 536         //mark the port as valid
 537         m_fields |= wxURI_PORT;
 538     }
 539
 540     return uri;
 541 }
 542
 543 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
 544 {
 545     wxASSERT(uri != NULL);
 546
 547     //copy of the uri - used for figuring out
 548     //length of each component
 549     const wxChar* uricopy = uri;
 550
 551     /// hier-part     = "//" authority path-abempty
 552     ///               / path-absolute
 553     ///               / path-rootless
 554     ///               / path-empty
 555     ///
 556     /// relative-part = "//" authority path-abempty
 557     ///               / path-absolute
 558     ///               / path-noscheme
 559     ///               / path-empty
 560     ///
 561     /// path-abempty  = *( "/" segment )
 562     /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
 563     /// path-noscheme = segment-nz-nc *( "/" segment )
 564     /// path-rootless = segment-nz *( "/" segment )
 565     /// path-empty    = 0<pchar>
 566     ///
 567     /// segment       = *pchar
 568     /// segment-nz    = 1*pchar
 569     /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
 570     ///               ; non-zero-length segment without any colon ":"
 571     ///
 572     /// pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
 573     if (*uri == '/')
 574     {
 575         m_path += *uri++;
 576
 577         while(*uri && *uri != '#' && *uri != '?')
 578         {
 579             if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 580                 *uri == ':' || *uri == '@' || *uri == '/')
 581                 m_path += *uri++;
 582             else
 583                 Escape(m_path, *uri++);
 584         }
 585
 586         if (bNormalize)
 587         {
 588             wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 589             Normalize(theBuffer, true);
 590             theBuffer.SetLength(wxStrlen(theBuffer));
 591         }
 592         //mark the path as valid
 593         m_fields |= wxURI_PATH;
 594     }
 595     else if(*uri) //Relative path
 596     {
 597         if (bReference)
 598         {
 599             //no colon allowed
 600             while(*uri && *uri != '#' && *uri != '?')
 601             {
 602                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 603                   *uri == '@' || *uri == '/')
 604                     m_path += *uri++;
 605                 else
 606                     Escape(m_path, *uri++);
 607             }
 608         }
 609         else
 610         {
 611             while(*uri && *uri != '#' && *uri != '?')
 612             {
 613                 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 614                    *uri == ':' || *uri == '@' || *uri == '/')
 615                     m_path += *uri++;
 616                 else
 617                     Escape(m_path, *uri++);
 618             }
 619         }
 620
 621         if (uri != uricopy)
 622         {
 623             if (bNormalize)
 624             {
 625                 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
 626                 Normalize(theBuffer);
 627                 theBuffer.SetLength(wxStrlen(theBuffer));
 628             }
 629
 630             //mark the path as valid
 631             m_fields |= wxURI_PATH;
 632         }
 633     }
 634
 635     return uri;
 636 }
 637
 638
 639 const wxChar* wxURI::ParseQuery(const wxChar* uri)
 640 {
 641     wxASSERT(uri != NULL);
 642
 643     // query         = *( pchar / "/" / "?" )
 644     if (*uri == '?')
 645     {
 646         ++uri;
 647         while(*uri && *uri != '#')
 648         {
 649             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 650                 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 651                   m_query += *uri++;
 652             else
 653                   Escape(m_query, *uri++);
 654         }
 655
 656         //mark the server as valid
 657         m_fields |= wxURI_QUERY;
 658     }
 659
 660     return uri;
 661 }
 662
 663
 664 const wxChar* wxURI::ParseFragment(const wxChar* uri)
 665 {
 666     wxASSERT(uri != NULL);
 667
 668     // fragment      = *( pchar / "/" / "?" )
 669     if (*uri == '#')
 670     {
 671         ++uri;
 672         while(*uri)
 673         {
 674             if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
 675                 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
 676                   m_fragment += *uri++;
 677             else
 678                   Escape(m_fragment, *uri++);
 679         }
 680
 681         //mark the server as valid
 682         m_fields |= wxURI_FRAGMENT;
 683     }
 684
 685     return uri;
 686 }
 687
 688 // ---------------------------------------------------------------------------
 689 //  Resolve URI
 690 //
 691 //  Builds missing components of this uri from a base uri
 692 //
 693 //  A version of the algorithm outlined in the RFC is used here
 694 //  (it is shown in comments)
 695 // ---------------------------------------------------------------------------
 696
 697 void wxURI::Resolve(const wxURI& base, int flags)
 698 {
 699     wxASSERT_MSG(!base.IsReference(),
 700                 wxT("wxURI to inherit from must not be a reference!"));
 701
 702     // If we arn't being strict, enable the older
 703     // loophole that allows this uri to inherit other
 704     // properties from the base uri - even if the scheme
 705     // is defined
 706     if ( !(flags & wxURI_STRICT) &&
 707             HasScheme() && base.HasScheme() &&
 708                 m_scheme == base.m_scheme )
 709     {
 710         m_fields -= wxURI_SCHEME;
 711     }
 712
 713
 714     // Do nothing if this is an absolute wxURI
 715     //    if defined(R.scheme) then
 716     //       T.scheme    = R.scheme;
 717     //       T.authority = R.authority;
 718     //       T.path      = remove_dot_segments(R.path);
 719     //       T.query     = R.query;
 720     if (HasScheme())
 721     {
 722         return;
 723     }
 724
 725     //No sheme - inherit
 726     m_scheme = base.m_scheme;
 727     m_fields |= wxURI_SCHEME;
 728
 729     // All we need to do for relative URIs with an
 730     // authority component is just inherit the scheme
 731     //       if defined(R.authority) then
 732     //          T.authority = R.authority;
 733     //          T.path      = remove_dot_segments(R.path);
 734     //          T.query     = R.query;
 735     if (HasServer())
 736     {
 737         return;
 738     }
 739
 740     //No authority - inherit
 741     if (base.HasUser())
 742     {
 743         m_user = base.m_user;
 744         m_fields |= wxURI_USER;
 745     }
 746
 747     m_server = base.m_server;
 748     m_hostType = base.m_hostType;
 749     m_fields |= wxURI_SERVER;
 750
 751     if (base.HasPort())
 752     {
 753         m_port = base.m_port;
 754         m_fields |= wxURI_PORT;
 755     }
 756
 757
 758     // Simple path inheritance from base
 759     if (!HasPath())
 760     {
 761         //             T.path = Base.path;
 762         m_path = base.m_path;
 763         m_fields |= wxURI_PATH;
 764
 765
 766         //             if defined(R.query) then
 767         //                T.query = R.query;
 768         //             else
 769         //                T.query = Base.query;
 770         //             endif;
 771         if (!HasQuery())
 772         {
 773             m_query = base.m_query;
 774             m_fields |= wxURI_QUERY;
 775         }
 776     }
 777     else
 778     {
 779         //             if (R.path starts-with "/") then
 780         //                T.path = remove_dot_segments(R.path);
 781         //             else
 782         //                T.path = merge(Base.path, R.path);
 783         //                T.path = remove_dot_segments(T.path);
 784         //             endif;
 785         //             T.query = R.query;
 786         if (m_path[(const size_t&)0] != '/')
 787         {
 788             //Marge paths
 789             const wxChar* op = m_path.c_str();
 790             const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
 791
 792             //not a ending directory?  move up
 793             if (base.m_path[0] && *(bp-1) != '/')
 794                 UpTree(base.m_path, bp);
 795
 796             //normalize directories
 797             while(*op == '.' && *(op+1) == '.' &&
 798                        (*(op+2) == '\0' || *(op+2) == '/') )
 799             {
 800                 UpTree(base.m_path, bp);
 801
 802                 if (*(op+2) == '\0')
 803                     op += 2;
 804                 else
 805                     op += 3;
 806             }
 807
 808             m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
 809                     m_path.Mid((op - m_path.c_str()), m_path.Length());
 810         }
 811     }
 812 }
 813
 814 // ---------------------------------------------------------------------------
 815 // Directory Normalization (static)
 816 //
 817 // UpTree goes up a directory in a string and moves the pointer as such,
 818 // while Normalize gets rid of duplicate/erronues directories in a URI
 819 // according to RFC 2396 and modified quite a bit to meet the unit tests
 820 // in it.
 821 // ---------------------------------------------------------------------------
 822
 823 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
 824 {
 825     if (uri != uristart && *(uri-1) == '/')
 826     {
 827         uri -= 2;
 828     }
 829
 830     for(;uri != uristart; --uri)
 831     {
 832         if (*uri == '/')
 833         {
 834             ++uri;
 835             break;
 836         }
 837     }
 838
 839     //!!!TODO:HACK!!!//
 840     if (uri == uristart && *uri == '/')
 841         ++uri;
 842     //!!!//
 843 }
 844
 845 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
 846 {
 847     wxChar* cp = s;
 848     wxChar* bp = s;
 849
 850     if(s[0] == '/')
 851         ++bp;
 852
 853     while(*cp)
 854     {
 855         if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0')
 856             && (bp == cp || *(cp-1) == '/'))
 857         {
 858             //. _or_ ./  - ignore
 859             if (*(cp+1) == '\0')
 860                 cp += 1;
 861             else
 862                 cp += 2;
 863         }
 864         else if (*cp == '.' && *(cp+1) == '.' &&
 865                 (*(cp+2) == '/' || *(cp+2) == '\0')
 866                 && (bp == cp || *(cp-1) == '/'))
 867         {
 868             //.. _or_ ../ - go up the tree
 869             if (s != bp)
 870             {
 871                 UpTree((const wxChar*)bp, (const wxChar*&)s);
 872
 873                 if (*(cp+2) == '\0')
 874                     cp += 2;
 875                 else
 876                     cp += 3;
 877             }
 878             else if (!bIgnoreLeads)
 879
 880             {
 881                 *bp++ = *cp++;
 882                 *bp++ = *cp++;
 883                 if (*cp)
 884                     *bp++ = *cp++;
 885
 886                 s = bp;
 887             }
 888             else
 889             {
 890                 if (*(cp+2) == '\0')
 891                     cp += 2;
 892                 else
 893                     cp += 3;
 894             }
 895         }
 896         else
 897             *s++ = *cp++;
 898     }
 899
 900     *s = '\0';
 901 }
 902
 903 // ---------------------------------------------------------------------------
 904 // Misc. Parsing Methods
 905 // ---------------------------------------------------------------------------
 906
 907 bool wxURI::ParseIPv4address(const wxChar*& uri)
 908 {
 909     //IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
 910     //
 911     //dec-octet     =      DIGIT                    ; 0-9
 912     //                / %x31-39 DIGIT               ; 10-99
 913     //                / "1" 2DIGIT                  ; 100-199
 914     //                / "2" %x30-34 DIGIT           ; 200-249
 915     //                / "25" %x30-35                ; 250-255
 916     size_t iIPv4 = 0;
 917     if (IsDigit(*uri))
 918     {
 919         ++iIPv4;
 920
 921
 922         //each ip part must be between 0-255 (dupe of version in for loop)
 923         if( IsDigit(*++uri) && IsDigit(*++uri) &&
 924            //100 or less  (note !)
 925            !( (*(uri-2) < '2') ||
 926            //240 or less
 927              (*(uri-2) == '2' &&
 928                (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 929              )
 930             )
 931           )
 932         {
 933             return false;
 934         }
 935
 936         if(IsDigit(*uri))++uri;
 937
 938         //compilers should unroll this loop
 939         for(; iIPv4 < 4; ++iIPv4)
 940         {
 941             if (*uri != '.' || !IsDigit(*++uri))
 942                 break;
 943
 944             //each ip part must be between 0-255
 945             if( IsDigit(*++uri) && IsDigit(*++uri) &&
 946                //100 or less  (note !)
 947                !( (*(uri-2) < '2') ||
 948                //240 or less
 949                  (*(uri-2) == '2' &&
 950                    (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
 951                  )
 952                 )
 953               )
 954             {
 955                 return false;
 956             }
 957             if(IsDigit(*uri))++uri;
 958         }
 959     }
 960     return iIPv4 == 4;
 961 }
 962
 963 bool wxURI::ParseH16(const wxChar*& uri)
 964 {
 965     // h16           = 1*4HEXDIG
 966     if(!IsHex(*++uri))
 967         return false;
 968
 969     if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
 970         ++uri;
 971
 972     return true;
 973 }
 974
 975 bool wxURI::ParseIPv6address(const wxChar*& uri)
 976 {
 977     // IPv6address   =                            6( h16 ":" ) ls32
 978     //               /                       "::" 5( h16 ":" ) ls32
 979     //               / [               h16 ] "::" 4( h16 ":" ) ls32
 980     //               / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
 981     //               / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
 982     //               / [ *3( h16 ":" ) h16 ] "::"    h16 ":"   ls32
 983     //               / [ *4( h16 ":" ) h16 ] "::"              ls32
 984     //               / [ *5( h16 ":" ) h16 ] "::"              h16
 985     //               / [ *6( h16 ":" ) h16 ] "::"
 986
 987     size_t numPrefix = 0,
 988               maxPostfix;
 989
 990     bool bEndHex = false;
 991
 992     for( ; numPrefix < 6; ++numPrefix)
 993     {
 994         if(!ParseH16(uri))
 995         {
 996             --uri;
 997             bEndHex = true;
 998             break;
 999         }
1000
1001         if(*uri != ':')
1002         {
1003             break;
1004         }
1005     }
1006
1007     if(!bEndHex && !ParseH16(uri))
1008     {
1009         --uri;
1010
1011         if (numPrefix)
1012             return false;
1013
1014         if (*uri == ':')
1015         {
1016             if (*++uri != ':')
1017                 return false;
1018
1019             maxPostfix = 5;
1020         }
1021         else
1022             maxPostfix = 6;
1023     }
1024     else
1025     {
1026         if (*uri != ':' || *(uri+1) != ':')
1027         {
1028             if (numPrefix != 6)
1029                 return false;
1030
1031             while (*--uri != ':') {}
1032             ++uri;
1033
1034             const wxChar* uristart = uri;
1035             //parse ls32
1036             // ls32          = ( h16 ":" h16 ) / IPv4address
1037             if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1038                 return true;
1039
1040             uri = uristart;
1041
1042             if (ParseIPv4address(uri))
1043                 return true;
1044             else
1045                 return false;
1046         }
1047         else
1048         {
1049             uri += 2;
1050
1051             if (numPrefix > 3)
1052                 maxPostfix = 0;
1053             else
1054                 maxPostfix = 4 - numPrefix;
1055         }
1056     }
1057
1058     bool bAllowAltEnding = maxPostfix == 0;
1059
1060     for(; maxPostfix != 0; --maxPostfix)
1061     {
1062         if(!ParseH16(uri) || *uri != ':')
1063             return false;
1064     }
1065
1066     if(numPrefix <= 4)
1067     {
1068         const wxChar* uristart = uri;
1069         //parse ls32
1070         // ls32          = ( h16 ":" h16 ) / IPv4address
1071         if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1072             return true;
1073
1074         uri = uristart;
1075
1076         if (ParseIPv4address(uri))
1077             return true;
1078
1079         uri = uristart;
1080
1081         if (!bAllowAltEnding)
1082             return false;
1083     }
1084
1085     if(numPrefix <= 5 && ParseH16(uri))
1086         return true;
1087
1088     return true;
1089 }
1090
1091 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1092 {
1093     // IPvFuture     = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1094     if (*++uri != 'v' || !IsHex(*++uri))
1095         return false;
1096
1097     while (IsHex(*++uri)) {}
1098
1099     if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1100         return false;
1101
1102     while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1103
1104     return true;
1105 }
1106
1107
1108 // ---------------------------------------------------------------------------
1109 // Misc methods - IsXXX and CharToHex
1110 // ---------------------------------------------------------------------------
1111
1112 int wxURI::CharToHex(const wxChar& c)
1113 {
1114         if ((c >= 'A') && (c <= 'Z'))   return c - 'A' + 0x0A;
1115         if ((c >= 'a') && (c <= 'z'))   return c - 'a' + 0x0a;
1116         if ((c >= '0') && (c <= '9'))   return c - '0' + 0x00;
1117
1118         return 0;
1119 }
1120
1121 //! unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
1122 bool wxURI::IsUnreserved (const wxChar& c)
1123 {   return IsAlpha(c) || IsDigit(c) ||
1124            c == '-' ||
1125            c == '.' ||
1126            c == '_' ||
1127            c == '~' //tilde
1128            ;
1129 }
1130
1131 bool wxURI::IsReserved (const wxChar& c)
1132 {
1133     return IsGenDelim(c) || IsSubDelim(c);
1134 }
1135
1136 //! gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1137 bool wxURI::IsGenDelim (const wxChar& c)
1138 {
1139     return c == ':' ||
1140            c == '/' ||
1141            c == '?' ||
1142            c == '#' ||
1143            c == '[' ||
1144            c == ']' ||
1145            c == '@';
1146 }
1147
1148 //! sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
1149 //!               / "*" / "+" / "," / ";" / "="
1150 bool wxURI::IsSubDelim (const wxChar& c)
1151 {
1152     return c == '!' ||
1153            c == '$' ||
1154            c == '&' ||
1155            c == '\'' ||
1156            c == '(' ||
1157            c == ')' ||
1158            c == '*' ||
1159            c == '+' ||
1160            c == ',' ||
1161            c == ';' ||
1162            c == '='
1163            ;
1164 }
1165
1166 bool wxURI::IsHex(const wxChar& c)
1167 {   return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
1168
1169 bool wxURI::IsAlpha(const wxChar& c)
1170 {   return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');  }
1171
1172 bool wxURI::IsDigit(const wxChar& c)
1173 {   return c >= '0' && c <= '9';        }
1174
1175
1176 // ---------------------------------------------------------------------------
1177 //
1178 //                        wxURL Compatability
1179 //
1180 // TODO:  Use wxURI instead here...
1181 // ---------------------------------------------------------------------------
1182
1183 #if wxUSE_URL
1184
1185 #include "wx/url.h"
1186
1187 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1188 {
1189   wxString out_str;
1190   wxString hexa_code;
1191   size_t i;
1192
1193   for (i = 0; i < uri.Len(); i++)
1194   {
1195     wxChar c = uri.GetChar(i);
1196
1197     if (c == wxT(' '))
1198     {
1199       // GRG, Apr/2000: changed to "%20" instead of '+'
1200
1201       out_str += wxT("%20");
1202     }
1203     else
1204     {
1205       // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1206       //
1207       // - Alphanumeric characters are never escaped
1208       // - Unreserved marks are never escaped
1209       // - Delimiters must be escaped if they appear within a component
1210       //     but not if they are used to separate components. Here we have
1211       //     no clear way to distinguish between these two cases, so they
1212       //     are escaped unless they are passed in the 'delims' parameter
1213       //     (allowed delimiters).
1214
1215       static const wxChar marks[] = wxT("-_.!~*()'");
1216
1217       if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1218       {
1219         hexa_code.Printf(wxT("%%%02X"), c);
1220         out_str += hexa_code;
1221       }
1222       else
1223       {
1224         out_str += c;
1225       }
1226     }
1227   }
1228
1229   return out_str;
1230 }
1231
1232 wxString wxURL::ConvertFromURI(const wxString& uri)
1233 {
1234   wxString new_uri;
1235
1236   size_t i = 0;
1237   while (i < uri.Len())
1238   {
1239     int code;
1240     if (uri[i] == wxT('%'))
1241     {
1242       i++;
1243       if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1244         code = (uri[i] - wxT('A') + 10) * 16;
1245       else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1246         code = (uri[i] - wxT('a') + 10) * 16;
1247       else
1248         code = (uri[i] - wxT('0')) * 16;
1249
1250       i++;
1251       if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1252         code += (uri[i] - wxT('A')) + 10;
1253       else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1254         code += (uri[i] - wxT('a')) + 10;
1255       else
1256         code += (uri[i] - wxT('0'));
1257
1258       i++;
1259       new_uri += (wxChar)code;
1260       continue;
1261     }
1262     new_uri += uri[i];
1263     i++;
1264   }
1265   return new_uri;
1266 }
1267
1268 #endif //wxUSE_URL
1269
1270 //end of uri.cpp
1271
1272
1273