]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
Add new modules to __all__
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
19#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21#endif
22
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
30#include "wx/uri.h"
31
32// ---------------------------------------------------------------------------
33// definitions
34// ---------------------------------------------------------------------------
35
36IMPLEMENT_CLASS(wxURI, wxObject);
37
38// ===========================================================================
39// implementation
40// ===========================================================================
41
42// ---------------------------------------------------------------------------
43// utilities
44// ---------------------------------------------------------------------------
45
46// ---------------------------------------------------------------------------
47//
48// wxURI
49//
50// ---------------------------------------------------------------------------
51
52// ---------------------------------------------------------------------------
53// Constructors
54// ---------------------------------------------------------------------------
55
56wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58}
846978d7 59
dd65d8c8
RN
60wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61{
62 Create(uri);
63}
64
60431236 65wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
dd65d8c8 66{
b60b2ec8 67 Assign(uri);
dd65d8c8
RN
68}
69
70// ---------------------------------------------------------------------------
71// Destructor and cleanup
72// ---------------------------------------------------------------------------
73
74wxURI::~wxURI()
75{
76 Clear();
77}
78
79void wxURI::Clear()
80{
4860d40d 81 m_scheme = m_userinfo = m_server = m_port = m_path =
525d8583 82 m_query = m_fragment = wxEmptyString;
dd65d8c8
RN
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87}
88
89// ---------------------------------------------------------------------------
90// Create
91//
846978d7 92// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
93// ---------------------------------------------------------------------------
94
86470d43 95const wxChar* wxURI::Create(const wxString& uri)
846978d7 96{
dd65d8c8
RN
97 if (m_fields)
98 Clear();
99
846978d7
WS
100 return Parse(uri);
101}
dd65d8c8
RN
102
103// ---------------------------------------------------------------------------
ce321570 104// Escape Methods
dd65d8c8 105//
846978d7 106// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 107//
dd65d8c8 108// Escape encodes an invalid URI character into a 3 character sequence
ce321570 109//
dd65d8c8
RN
110// IsEscape determines if the input string contains an escape sequence,
111// if it does, then it moves the input string past the escape sequence
ce321570
RN
112//
113// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
114// ---------------------------------------------------------------------------
115
86470d43 116wxChar wxURI::TranslateEscape(const wxChar* s)
dd65d8c8 117{
6f0344c7 118 wxASSERT_MSG( IsHex(s[0]) && IsHex(s[1]), wxT("Invalid escape sequence!"));
8404931e 119
6f0344c7 120 return (wxChar)( CharToHex(s[0]) << 4 ) | CharToHex(s[1]);
dd65d8c8
RN
121}
122
86470d43
RN
123wxString wxURI::Unescape(const wxString& uri)
124{
125 wxString new_uri;
126
127 for(size_t i = 0; i < uri.length(); ++i)
128 {
129 if (uri[i] == wxT('%'))
130 {
131 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
132 i += 2;
133 }
d8d7193d
RN
134 else
135 new_uri += uri[i];
86470d43
RN
136 }
137
138 return new_uri;
139}
140
dd65d8c8
RN
141void wxURI::Escape(wxString& s, const wxChar& c)
142{
143 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 144 s += wxT('%');
dd65d8c8 145 s += hdig[(c >> 4) & 15];
846978d7 146 s += hdig[c & 15];
dd65d8c8
RN
147}
148
149bool wxURI::IsEscape(const wxChar*& uri)
150{
ce321570
RN
151 // pct-encoded = "%" HEXDIG HEXDIG
152 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8
RN
153 {
154 uri += 3;
155 return true;
156 }
157 else
158 return false;
159}
160
4860d40d
RN
161// ---------------------------------------------------------------------------
162// GetUser
163// GetPassword
164//
165// Gets the username and password via the old URL method.
166// ---------------------------------------------------------------------------
167wxString wxURI::GetUser() const
168{
169 size_t dwPasswordPos = m_userinfo.find(':');
170
171 if (dwPasswordPos == wxString::npos)
172 dwPasswordPos = 0;
173
174 return m_userinfo(0, dwPasswordPos);
175}
176
177wxString wxURI::GetPassword() const
178{
179 size_t dwPasswordPos = m_userinfo.find(':');
180
181 if (dwPasswordPos == wxString::npos)
182 return wxT("");
183 else
184 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
185}
186
dd65d8c8 187// ---------------------------------------------------------------------------
86470d43 188// BuildURI
dd65d8c8 189//
846978d7 190// BuildURI() builds the entire URI into a useable
dd65d8c8 191// representation, including proper identification characters such as slashes
ce321570
RN
192//
193// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
194// the components that accept escape sequences
dd65d8c8
RN
195// ---------------------------------------------------------------------------
196
86470d43 197wxString wxURI::BuildURI() const
846978d7 198{
dd65d8c8
RN
199 wxString ret;
200
201 if (HasScheme())
202 ret = ret + m_scheme + wxT(":");
203
204 if (HasServer())
205 {
206 ret += wxT("//");
207
4860d40d
RN
208 if (HasUserInfo())
209 ret = ret + m_userinfo + wxT("@");
dd65d8c8
RN
210
211 ret += m_server;
212
213 if (HasPort())
214 ret = ret + wxT(":") + m_port;
215 }
216
217 ret += m_path;
218
219 if (HasQuery())
220 ret = ret + wxT("?") + m_query;
221
222 if (HasFragment())
223 ret = ret + wxT("#") + m_fragment;
224
225 return ret;
226}
227
86470d43
RN
228wxString wxURI::BuildUnescapedURI() const
229{
230 wxString ret;
231
232 if (HasScheme())
233 ret = ret + m_scheme + wxT(":");
234
235 if (HasServer())
236 {
237 ret += wxT("//");
238
4860d40d
RN
239 if (HasUserInfo())
240 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
86470d43
RN
241
242 if (m_hostType == wxURI_REGNAME)
243 ret += wxURI::Unescape(m_server);
244 else
245 ret += m_server;
246
247 if (HasPort())
248 ret = ret + wxT(":") + m_port;
249 }
250
251 ret += wxURI::Unescape(m_path);
252
253 if (HasQuery())
254 ret = ret + wxT("?") + wxURI::Unescape(m_query);
255
256 if (HasFragment())
257 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
258
259 return ret;
260}
261
dd65d8c8 262// ---------------------------------------------------------------------------
ce321570 263// Assignment
dd65d8c8
RN
264// ---------------------------------------------------------------------------
265
b60b2ec8
RN
266wxURI& wxURI::Assign(const wxURI& uri)
267{
268 //assign fields
269 m_fields = uri.m_fields;
270
271 //ref over components
272 m_scheme = uri.m_scheme;
4860d40d 273 m_userinfo = uri.m_userinfo;
b60b2ec8
RN
274 m_server = uri.m_server;
275 m_hostType = uri.m_hostType;
276 m_port = uri.m_port;
277 m_path = uri.m_path;
278 m_query = uri.m_query;
279 m_fragment = uri.m_fragment;
dd65d8c8
RN
280
281 return *this;
282}
283
ce321570
RN
284wxURI& wxURI::operator = (const wxURI& uri)
285{
286 return Assign(uri);
287}
288
b60b2ec8 289wxURI& wxURI::operator = (const wxString& string)
846978d7 290{
dd65d8c8
RN
291 Create(string);
292 return *this;
293}
294
ce321570
RN
295// ---------------------------------------------------------------------------
296// Comparison
297// ---------------------------------------------------------------------------
298
dd65d8c8 299bool wxURI::operator == (const wxURI& uri) const
846978d7 300{
dd65d8c8
RN
301 if (HasScheme())
302 {
303 if(m_scheme != uri.m_scheme)
304 return false;
305 }
306 else if (uri.HasScheme())
307 return false;
308
309
310 if (HasServer())
311 {
4860d40d 312 if (HasUserInfo())
dd65d8c8 313 {
4860d40d 314 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
315 return false;
316 }
4860d40d 317 else if (uri.HasUserInfo())
dd65d8c8
RN
318 return false;
319
320 if (m_server != uri.m_server ||
321 m_hostType != uri.m_hostType)
322 return false;
323
324 if (HasPort())
325 {
326 if(m_port != uri.m_port)
327 return false;
328 }
329 else if (uri.HasPort())
330 return false;
331 }
332 else if (uri.HasServer())
333 return false;
334
335
336 if (HasPath())
337 {
338 if(m_path != uri.m_path)
339 return false;
340 }
341 else if (uri.HasPath())
342 return false;
343
344 if (HasQuery())
345 {
346 if (m_query != uri.m_query)
347 return false;
348 }
349 else if (uri.HasQuery())
350 return false;
351
352 if (HasFragment())
353 {
354 if (m_fragment != uri.m_fragment)
355 return false;
356 }
357 else if (uri.HasFragment())
358 return false;
359
360 return true;
361}
362
363// ---------------------------------------------------------------------------
364// IsReference
365//
366// if there is no authority or scheme, it is a reference
367// ---------------------------------------------------------------------------
368
369bool wxURI::IsReference() const
370{ return !HasScheme() || !HasServer(); }
371
372// ---------------------------------------------------------------------------
373// Parse
374//
375// Master URI parsing method. Just calls the individual parsing methods
376//
377// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 378// URI-reference = URI / relative
dd65d8c8
RN
379// ---------------------------------------------------------------------------
380
381const wxChar* wxURI::Parse(const wxChar* uri)
382{
383 uri = ParseScheme(uri);
384 uri = ParseAuthority(uri);
385 uri = ParsePath(uri);
386 uri = ParseQuery(uri);
387 return ParseFragment(uri);
388}
389
390// ---------------------------------------------------------------------------
391// ParseXXX
392//
393// Individual parsers for each URI component
394// ---------------------------------------------------------------------------
395
396const wxChar* wxURI::ParseScheme(const wxChar* uri)
397{
398 wxASSERT(uri != NULL);
399
400 //copy of the uri - used for figuring out
401 //length of each component
402 const wxChar* uricopy = uri;
403
404 //Does the uri have a scheme (first character alpha)?
405 if (IsAlpha(*uri))
406 {
407 m_scheme += *uri++;
408
409 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 410 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
411 *uri == wxT('+') ||
412 *uri == wxT('-') ||
846978d7
WS
413 *uri == wxT('.'))
414 {
415 m_scheme += *uri++;
dd65d8c8
RN
416 }
417
418 //valid scheme?
ce321570 419 if (*uri == wxT(':'))
846978d7 420 {
dd65d8c8
RN
421 //mark the scheme as valid
422 m_fields |= wxURI_SCHEME;
423
424 //move reference point up to input buffer
425 uricopy = ++uri;
426 }
846978d7 427 else
dd65d8c8 428 //relative uri with relative path reference
525d8583 429 m_scheme = wxEmptyString;
dd65d8c8 430 }
846978d7 431// else
dd65d8c8
RN
432 //relative uri with _possible_ relative path reference
433
434 return uricopy;
435}
436
437const wxChar* wxURI::ParseAuthority(const wxChar* uri)
438{
439 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 440 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8
RN
441 {
442 uri += 2;
443
4860d40d 444 uri = ParseUserInfo(uri);
dd65d8c8
RN
445 uri = ParseServer(uri);
446 return ParsePort(uri);
447 }
448
449 return uri;
450}
451
4860d40d 452const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
dd65d8c8
RN
453{
454 wxASSERT(uri != NULL);
455
456 //copy of the uri - used for figuring out
457 //length of each component
458 const wxChar* uricopy = uri;
459
460 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 461 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 462 {
846978d7 463 if(IsUnreserved(*uri) || IsEscape(uri) ||
ce321570 464 IsSubDelim(*uri) || *uri == wxT(':'))
4860d40d 465 m_userinfo += *uri++;
dd65d8c8 466 else
4860d40d 467 Escape(m_userinfo, *uri++);
dd65d8c8
RN
468 }
469
ce321570 470 if(*uri == wxT('@'))
dd65d8c8
RN
471 {
472 //valid userinfo
4860d40d 473 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
474
475 uricopy = ++uri;
476 }
477 else
4860d40d 478 m_userinfo = wxEmptyString;
dd65d8c8
RN
479
480 return uricopy;
481}
482
483const wxChar* wxURI::ParseServer(const wxChar* uri)
484{
485 wxASSERT(uri != NULL);
486
487 //copy of the uri - used for figuring out
488 //length of each component
489 const wxChar* uricopy = uri;
490
491 // host = IP-literal / IPv4address / reg-name
492 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 493 if (*uri == wxT('['))
dd65d8c8 494 {
e854db32
RN
495 ++uri; //some compilers don't support *&ing a ++*
496 if (ParseIPv6address(uri) && *uri == wxT(']'))
dd65d8c8
RN
497 {
498 ++uri;
499 m_hostType = wxURI_IPV6ADDRESS;
846978d7 500
dd65d8c8 501 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 502 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
503 theBuffer.SetLength(uri-uricopy);
504 }
505 else
506 {
507 uri = uricopy;
508
e854db32
RN
509 ++uri; //some compilers don't support *&ing a ++*
510 if (ParseIPvFuture(uri) && *uri == wxT(']'))
dd65d8c8
RN
511 {
512 ++uri;
846978d7
WS
513 m_hostType = wxURI_IPVFUTURE;
514
dd65d8c8 515 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 516 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
517 theBuffer.SetLength(uri-uricopy);
518 }
846978d7 519 else
dd65d8c8
RN
520 uri = uricopy;
521 }
522 }
846978d7 523 else
dd65d8c8
RN
524 {
525 if (ParseIPv4address(uri))
526 {
527 m_hostType = wxURI_IPV4ADDRESS;
528
529 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 530 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
531 theBuffer.SetLength(uri-uricopy);
532 }
846978d7 533 else
dd65d8c8
RN
534 uri = uricopy;
535 }
536
537 if(m_hostType == wxURI_REGNAME)
538 {
539 uri = uricopy;
540 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 541 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
542 {
543 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
544 m_server += *uri++;
545 else
546 Escape(m_server, *uri++);
846978d7 547 }
dd65d8c8
RN
548 }
549
550 //mark the server as valid
551 m_fields |= wxURI_SERVER;
552
553 return uri;
554}
555
846978d7 556
dd65d8c8
RN
557const wxChar* wxURI::ParsePort(const wxChar* uri)
558{
559 wxASSERT(uri != NULL);
560
561 // port = *DIGIT
ce321570 562 if(*uri == wxT(':'))
dd65d8c8
RN
563 {
564 ++uri;
846978d7 565 while(IsDigit(*uri))
dd65d8c8
RN
566 {
567 m_port += *uri++;
846978d7 568 }
dd65d8c8
RN
569
570 //mark the port as valid
571 m_fields |= wxURI_PORT;
572 }
573
574 return uri;
575}
576
8404931e 577const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
578{
579 wxASSERT(uri != NULL);
580
581 //copy of the uri - used for figuring out
582 //length of each component
583 const wxChar* uricopy = uri;
584
585 /// hier-part = "//" authority path-abempty
586 /// / path-absolute
587 /// / path-rootless
588 /// / path-empty
589 ///
590 /// relative-part = "//" authority path-abempty
591 /// / path-absolute
592 /// / path-noscheme
593 /// / path-empty
594 ///
595 /// path-abempty = *( "/" segment )
596 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
597 /// path-noscheme = segment-nz-nc *( "/" segment )
598 /// path-rootless = segment-nz *( "/" segment )
599 /// path-empty = 0<pchar>
600 ///
601 /// segment = *pchar
602 /// segment-nz = 1*pchar
603 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
604 /// ; non-zero-length segment without any colon ":"
605 ///
606 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 607 if (*uri == wxT('/'))
dd65d8c8
RN
608 {
609 m_path += *uri++;
610
846978d7
WS
611 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
612 {
dd65d8c8 613 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 614 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
615 m_path += *uri++;
616 else
617 Escape(m_path, *uri++);
dd65d8c8
RN
618 }
619
620 if (bNormalize)
621 {
622 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 623#if wxUSE_STL
2c09fb3b 624 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 625#endif
dd65d8c8
RN
626 Normalize(theBuffer, true);
627 theBuffer.SetLength(wxStrlen(theBuffer));
628 }
629 //mark the path as valid
630 m_fields |= wxURI_PATH;
631 }
632 else if(*uri) //Relative path
633 {
634 if (bReference)
635 {
636 //no colon allowed
846978d7 637 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
638 {
639 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 640 *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
641 m_path += *uri++;
642 else
643 Escape(m_path, *uri++);
dd65d8c8 644 }
846978d7 645 }
dd65d8c8
RN
646 else
647 {
846978d7 648 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
649 {
650 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 651 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
652 m_path += *uri++;
653 else
654 Escape(m_path, *uri++);
dd65d8c8
RN
655 }
656 }
657
658 if (uri != uricopy)
846978d7 659 {
dd65d8c8
RN
660 if (bNormalize)
661 {
662 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 663#if wxUSE_STL
2c09fb3b 664 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 665#endif
dd65d8c8
RN
666 Normalize(theBuffer);
667 theBuffer.SetLength(wxStrlen(theBuffer));
668 }
669
670 //mark the path as valid
671 m_fields |= wxURI_PATH;
672 }
673 }
674
675 return uri;
676}
677
678
679const wxChar* wxURI::ParseQuery(const wxChar* uri)
680{
681 wxASSERT(uri != NULL);
682
683 // query = *( pchar / "/" / "?" )
ce321570 684 if (*uri == wxT('?'))
dd65d8c8
RN
685 {
686 ++uri;
ce321570 687 while(*uri && *uri != wxT('#'))
dd65d8c8
RN
688 {
689 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 690 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 691 m_query += *uri++;
dd65d8c8 692 else
846978d7 693 Escape(m_query, *uri++);
dd65d8c8
RN
694 }
695
696 //mark the server as valid
697 m_fields |= wxURI_QUERY;
698 }
699
700 return uri;
701}
702
703
704const wxChar* wxURI::ParseFragment(const wxChar* uri)
705{
706 wxASSERT(uri != NULL);
707
708 // fragment = *( pchar / "/" / "?" )
ce321570 709 if (*uri == wxT('#'))
dd65d8c8
RN
710 {
711 ++uri;
712 while(*uri)
713 {
714 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 715 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 716 m_fragment += *uri++;
dd65d8c8 717 else
846978d7 718 Escape(m_fragment, *uri++);
dd65d8c8
RN
719 }
720
721 //mark the server as valid
722 m_fields |= wxURI_FRAGMENT;
723 }
724
725 return uri;
726}
727
728// ---------------------------------------------------------------------------
ce321570 729// Resolve
dd65d8c8 730//
ce321570 731// Builds missing components of this uri from a base uri
dd65d8c8 732//
ce321570
RN
733// A version of the algorithm outlined in the RFC is used here
734// (it is shown in comments)
735//
846978d7 736// Note that an empty URI inherits all components
dd65d8c8
RN
737// ---------------------------------------------------------------------------
738
8404931e 739void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 740{
846978d7 741 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
742 wxT("wxURI to inherit from must not be a reference!"));
743
ce321570 744 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
745 // loophole that allows this uri to inherit other
746 // properties from the base uri - even if the scheme
747 // is defined
8404931e
VZ
748 if ( !(flags & wxURI_STRICT) &&
749 HasScheme() && base.HasScheme() &&
750 m_scheme == base.m_scheme )
846978d7 751 {
dd65d8c8
RN
752 m_fields -= wxURI_SCHEME;
753 }
754
755
756 // Do nothing if this is an absolute wxURI
757 // if defined(R.scheme) then
758 // T.scheme = R.scheme;
759 // T.authority = R.authority;
760 // T.path = remove_dot_segments(R.path);
761 // T.query = R.query;
762 if (HasScheme())
763 {
764 return;
765 }
766
ea4daac4 767 //No scheme - inherit
dd65d8c8
RN
768 m_scheme = base.m_scheme;
769 m_fields |= wxURI_SCHEME;
770
771 // All we need to do for relative URIs with an
772 // authority component is just inherit the scheme
773 // if defined(R.authority) then
774 // T.authority = R.authority;
775 // T.path = remove_dot_segments(R.path);
776 // T.query = R.query;
777 if (HasServer())
778 {
779 return;
780 }
781
782 //No authority - inherit
4860d40d 783 if (base.HasUserInfo())
dd65d8c8 784 {
4860d40d
RN
785 m_userinfo = base.m_userinfo;
786 m_fields |= wxURI_USERINFO;
dd65d8c8 787 }
846978d7 788
dd65d8c8
RN
789 m_server = base.m_server;
790 m_hostType = base.m_hostType;
791 m_fields |= wxURI_SERVER;
846978d7 792
dd65d8c8
RN
793 if (base.HasPort())
794 {
795 m_port = base.m_port;
796 m_fields |= wxURI_PORT;
797 }
846978d7 798
dd65d8c8
RN
799
800 // Simple path inheritance from base
801 if (!HasPath())
802 {
803 // T.path = Base.path;
804 m_path = base.m_path;
805 m_fields |= wxURI_PATH;
846978d7 806
dd65d8c8
RN
807
808 // if defined(R.query) then
809 // T.query = R.query;
810 // else
811 // T.query = Base.query;
812 // endif;
813 if (!HasQuery())
814 {
815 m_query = base.m_query;
816 m_fields |= wxURI_QUERY;
817 }
818 }
819 else
820 {
821 // if (R.path starts-with "/") then
822 // T.path = remove_dot_segments(R.path);
823 // else
824 // T.path = merge(Base.path, R.path);
825 // T.path = remove_dot_segments(T.path);
826 // endif;
827 // T.query = R.query;
ce321570 828 if (m_path[0u] != wxT('/'))
dd65d8c8 829 {
ea4daac4 830 //Merge paths
dd65d8c8
RN
831 const wxChar* op = m_path.c_str();
832 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
833
834 //not a ending directory? move up
ce321570 835 if (base.m_path[0] && *(bp-1) != wxT('/'))
dd65d8c8
RN
836 UpTree(base.m_path, bp);
837
838 //normalize directories
846978d7 839 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 840 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8
RN
841 {
842 UpTree(base.m_path, bp);
843
844 if (*(op+2) == '\0')
845 op += 2;
846 else
847 op += 3;
848 }
849
846978d7 850 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
ba5a47ae 851 m_path.substr((op - m_path.c_str()), m_path.Length());
dd65d8c8
RN
852 }
853 }
ce321570 854
846978d7 855 //T.fragment = R.fragment;
dd65d8c8
RN
856}
857
858// ---------------------------------------------------------------------------
846978d7 859// UpTree
dd65d8c8 860//
ce321570 861// Moves a URI path up a directory
dd65d8c8
RN
862// ---------------------------------------------------------------------------
863
ce321570 864//static
dd65d8c8
RN
865void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
866{
ce321570 867 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
868 {
869 uri -= 2;
870 }
846978d7 871
dd65d8c8
RN
872 for(;uri != uristart; --uri)
873 {
ce321570 874 if (*uri == wxT('/'))
dd65d8c8
RN
875 {
876 ++uri;
877 break;
878 }
879 }
880
881 //!!!TODO:HACK!!!//
ce321570 882 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
883 ++uri;
884 //!!!//
885}
886
ce321570
RN
887// ---------------------------------------------------------------------------
888// Normalize
889//
890// Normalizes directories in-place
891//
892// I.E. ./ and . are ignored
893//
894// ../ and .. are removed if a directory is before it, along
895// with that directory (leading .. and ../ are kept)
896// ---------------------------------------------------------------------------
897
898//static
8404931e 899void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
900{
901 wxChar* cp = s;
902 wxChar* bp = s;
903
ce321570 904 if(s[0] == wxT('/'))
dd65d8c8
RN
905 ++bp;
906
907 while(*cp)
908 {
ce321570
RN
909 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
910 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
911 {
912 //. _or_ ./ - ignore
913 if (*(cp+1) == '\0')
914 cp += 1;
915 else
916 cp += 2;
917 }
846978d7 918 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
919 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
920 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
921 {
922 //.. _or_ ../ - go up the tree
923 if (s != bp)
924 {
925 UpTree((const wxChar*)bp, (const wxChar*&)s);
926
927 if (*(cp+2) == '\0')
928 cp += 2;
929 else
930 cp += 3;
931 }
932 else if (!bIgnoreLeads)
933
934 {
935 *bp++ = *cp++;
936 *bp++ = *cp++;
937 if (*cp)
938 *bp++ = *cp++;
939
940 s = bp;
941 }
942 else
943 {
944 if (*(cp+2) == '\0')
945 cp += 2;
946 else
947 cp += 3;
948 }
949 }
950 else
846978d7 951 *s++ = *cp++;
dd65d8c8
RN
952 }
953
954 *s = '\0';
955}
956
957// ---------------------------------------------------------------------------
ce321570
RN
958// ParseH16
959//
960// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 961// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
962// the input string back to its original position on failure.
963// ---------------------------------------------------------------------------
964
965bool wxURI::ParseH16(const wxChar*& uri)
966{
967 // h16 = 1*4HEXDIG
968 if(!IsHex(*++uri))
969 return false;
970
971 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
972 ++uri;
973
974 return true;
975}
976
977// ---------------------------------------------------------------------------
978// ParseIPXXX
979//
846978d7
WS
980// Parses a certain version of an IP address and moves the input string past
981// it. Returns true if the input string contains the proper version of an ip
982// address. It is the caller's responsability to move the input string back
ce321570 983// to its original position on failure.
dd65d8c8
RN
984// ---------------------------------------------------------------------------
985
986bool wxURI::ParseIPv4address(const wxChar*& uri)
987{
988 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
989 //
990 //dec-octet = DIGIT ; 0-9
991 // / %x31-39 DIGIT ; 10-99
992 // / "1" 2DIGIT ; 100-199
993 // / "2" %x30-34 DIGIT ; 200-249
994 // / "25" %x30-35 ; 250-255
995 size_t iIPv4 = 0;
996 if (IsDigit(*uri))
997 {
998 ++iIPv4;
999
846978d7 1000
dd65d8c8
RN
1001 //each ip part must be between 0-255 (dupe of version in for loop)
1002 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1003 //100 or less (note !)
846978d7
WS
1004 !( (*(uri-2) < wxT('2')) ||
1005 //240 or less
1006 (*(uri-2) == wxT('2') &&
ce321570 1007 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1008 )
1009 )
1010 )
1011 {
1012 return false;
1013 }
1014
1015 if(IsDigit(*uri))++uri;
1016
1017 //compilers should unroll this loop
1018 for(; iIPv4 < 4; ++iIPv4)
1019 {
ce321570 1020 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
1021 break;
1022
1023 //each ip part must be between 0-255
1024 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1025 //100 or less (note !)
846978d7
WS
1026 !( (*(uri-2) < wxT('2')) ||
1027 //240 or less
1028 (*(uri-2) == wxT('2') &&
ce321570 1029 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1030 )
1031 )
1032 )
1033 {
1034 return false;
1035 }
1036 if(IsDigit(*uri))++uri;
1037 }
1038 }
1039 return iIPv4 == 4;
1040}
1041
dd65d8c8
RN
1042bool wxURI::ParseIPv6address(const wxChar*& uri)
1043{
1044 // IPv6address = 6( h16 ":" ) ls32
1045 // / "::" 5( h16 ":" ) ls32
1046 // / [ h16 ] "::" 4( h16 ":" ) ls32
1047 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1048 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1049 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1050 // / [ *4( h16 ":" ) h16 ] "::" ls32
1051 // / [ *5( h16 ":" ) h16 ] "::" h16
1052 // / [ *6( h16 ":" ) h16 ] "::"
1053
1054 size_t numPrefix = 0,
1055 maxPostfix;
1056
1057 bool bEndHex = false;
1058
1059 for( ; numPrefix < 6; ++numPrefix)
1060 {
1061 if(!ParseH16(uri))
1062 {
1063 --uri;
1064 bEndHex = true;
1065 break;
1066 }
846978d7 1067
ce321570 1068 if(*uri != wxT(':'))
dd65d8c8
RN
1069 {
1070 break;
1071 }
1072 }
1073
1074 if(!bEndHex && !ParseH16(uri))
1075 {
1076 --uri;
1077
1078 if (numPrefix)
1079 return false;
1080
ce321570 1081 if (*uri == wxT(':'))
dd65d8c8 1082 {
ce321570 1083 if (*++uri != wxT(':'))
dd65d8c8
RN
1084 return false;
1085
1086 maxPostfix = 5;
1087 }
1088 else
1089 maxPostfix = 6;
1090 }
1091 else
1092 {
ce321570 1093 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1094 {
1095 if (numPrefix != 6)
1096 return false;
1097
ce321570 1098 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1099 ++uri;
1100
1101 const wxChar* uristart = uri;
1102 //parse ls32
1103 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1104 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1105 return true;
1106
1107 uri = uristart;
1108
1109 if (ParseIPv4address(uri))
1110 return true;
1111 else
1112 return false;
1113 }
1114 else
1115 {
1116 uri += 2;
846978d7 1117
dd65d8c8
RN
1118 if (numPrefix > 3)
1119 maxPostfix = 0;
1120 else
1121 maxPostfix = 4 - numPrefix;
1122 }
1123 }
1124
1125 bool bAllowAltEnding = maxPostfix == 0;
1126
1127 for(; maxPostfix != 0; --maxPostfix)
1128 {
ce321570 1129 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1130 return false;
1131 }
1132
1133 if(numPrefix <= 4)
1134 {
1135 const wxChar* uristart = uri;
1136 //parse ls32
1137 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1138 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1139 return true;
1140
1141 uri = uristart;
1142
1143 if (ParseIPv4address(uri))
1144 return true;
1145
1146 uri = uristart;
846978d7 1147
dd65d8c8
RN
1148 if (!bAllowAltEnding)
1149 return false;
1150 }
1151
1152 if(numPrefix <= 5 && ParseH16(uri))
1153 return true;
1154
1155 return true;
1156}
1157
1158bool wxURI::ParseIPvFuture(const wxChar*& uri)
1159{
1160 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1161 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1162 return false;
1163
1164 while (IsHex(*++uri)) {}
1165
ce321570 1166 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1167 return false;
1168
ce321570 1169 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1170
1171 return true;
1172}
1173
1174
1175// ---------------------------------------------------------------------------
ce321570
RN
1176// CharToHex
1177//
846978d7 1178// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1179// passed in character is not a valid hex character
dd65d8c8
RN
1180// ---------------------------------------------------------------------------
1181
ce321570 1182//static
409a7ba7 1183wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1184{
1676a194
WS
1185 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1186 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1187 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
dd65d8c8 1188
846978d7 1189 return 0;
dd65d8c8
RN
1190}
1191
ce321570
RN
1192// ---------------------------------------------------------------------------
1193// IsXXX
1194//
1195// Returns true if the passed in character meets the criteria of the method
1196// ---------------------------------------------------------------------------
1197
dd65d8c8
RN
1198//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1199bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1200{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1201 c == wxT('-') ||
1202 c == wxT('.') ||
1203 c == wxT('_') ||
1204 c == wxT('~') //tilde
846978d7 1205 ;
dd65d8c8
RN
1206}
1207
1208bool wxURI::IsReserved (const wxChar& c)
846978d7 1209{
dd65d8c8
RN
1210 return IsGenDelim(c) || IsSubDelim(c);
1211}
1212
1213//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1214bool wxURI::IsGenDelim (const wxChar& c)
1215{
ce321570
RN
1216 return c == wxT(':') ||
1217 c == wxT('/') ||
1218 c == wxT('?') ||
1219 c == wxT('#') ||
1220 c == wxT('[') ||
1221 c == wxT(']') ||
1222 c == wxT('@');
dd65d8c8
RN
1223}
1224
1225//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1226//! / "*" / "+" / "," / ";" / "="
1227bool wxURI::IsSubDelim (const wxChar& c)
1228{
ce321570
RN
1229 return c == wxT('!') ||
1230 c == wxT('$') ||
1231 c == wxT('&') ||
1232 c == wxT('\'') ||
1233 c == wxT('(') ||
1234 c == wxT(')') ||
1235 c == wxT('*') ||
1236 c == wxT('+') ||
1237 c == wxT(',') ||
1238 c == wxT(';') ||
846978d7 1239 c == wxT('=')
dd65d8c8
RN
1240 ;
1241}
1242
1243bool wxURI::IsHex(const wxChar& c)
ce321570 1244{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1245
1246bool wxURI::IsAlpha(const wxChar& c)
ce321570 1247{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1248
1249bool wxURI::IsDigit(const wxChar& c)
ce321570 1250{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1251
1252
1253// ---------------------------------------------------------------------------
1254//
00a1d2e0 1255// wxURL Compatibility
dd65d8c8 1256//
dd65d8c8
RN
1257// ---------------------------------------------------------------------------
1258
1259#if wxUSE_URL
1260
86470d43
RN
1261#if WXWIN_COMPATIBILITY_2_4
1262
dd65d8c8
RN
1263#include "wx/url.h"
1264
a6fb8636
WS
1265wxString wxURL::GetProtocolName() const
1266{
1267 return m_scheme;
1268}
1269
1270wxString wxURL::GetHostName() const
1271{
1272 return m_server;
1273}
1274
1275wxString wxURL::GetPath() const
1276{
1277 return m_path;
1278}
1279
997ba01b
RN
1280//Note that this old code really doesn't convert to a URI that well and looks
1281//more like a dirty hack than anything else...
1282
1283wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
dd65d8c8 1284{
997ba01b
RN
1285 wxString out_str;
1286 wxString hexa_code;
1287 size_t i;
1288
1289 for (i = 0; i < uri.Len(); i++)
1290 {
1291 wxChar c = uri.GetChar(i);
1292
1293 if (c == wxT(' '))
1294 {
1295 // GRG, Apr/2000: changed to "%20" instead of '+'
1296
1297 out_str += wxT("%20");
1298 }
1299 else
1300 {
1301 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1302 //
1303 // - Alphanumeric characters are never escaped
1304 // - Unreserved marks are never escaped
1305 // - Delimiters must be escaped if they appear within a component
1306 // but not if they are used to separate components. Here we have
1307 // no clear way to distinguish between these two cases, so they
1308 // are escaped unless they are passed in the 'delims' parameter
1309 // (allowed delimiters).
1310
1311 static const wxChar marks[] = wxT("-_.!~*()'");
1312
1313 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1314 {
1315 hexa_code.Printf(wxT("%%%02X"), c);
1316 out_str += hexa_code;
1317 }
1318 else
1319 {
1320 out_str += c;
1321 }
1322 }
1323 }
1324
1325 return out_str;
dd65d8c8
RN
1326}
1327
1328wxString wxURL::ConvertFromURI(const wxString& uri)
1329{
86470d43 1330 return wxURI::Unescape(uri);
dd65d8c8
RN
1331}
1332
86470d43
RN
1333#endif //WXWIN_COMPATIBILITY_2_4
1334
dd65d8c8
RN
1335#endif //wxUSE_URL
1336
1337//end of uri.cpp
1338
1339
1340