wxXmlNode::GetAttribute's pointer argument must not be NULL, check for it
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
dd65d8c8
RN
19// For compilers that support precompilation, includes "wx.h".
20#include "wx/wxprec.h"
21
22#ifdef __BORLANDC__
23 #pragma hdrstop
24#endif
25
0bf751e7
VS
26#ifndef WX_PRECOMP
27 #include "wx/crt.h"
28#endif
29
dd65d8c8
RN
30#include "wx/uri.h"
31
32// ---------------------------------------------------------------------------
33// definitions
34// ---------------------------------------------------------------------------
35
4115960d 36IMPLEMENT_CLASS(wxURI, wxObject)
dd65d8c8
RN
37
38// ===========================================================================
39// implementation
40// ===========================================================================
41
42// ---------------------------------------------------------------------------
43// utilities
44// ---------------------------------------------------------------------------
45
46// ---------------------------------------------------------------------------
47//
48// wxURI
49//
50// ---------------------------------------------------------------------------
51
52// ---------------------------------------------------------------------------
53// Constructors
54// ---------------------------------------------------------------------------
55
56wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58}
846978d7 59
dd65d8c8
RN
60wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61{
62 Create(uri);
63}
64
60431236 65wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
dd65d8c8 66{
b60b2ec8 67 Assign(uri);
dd65d8c8
RN
68}
69
70// ---------------------------------------------------------------------------
71// Destructor and cleanup
72// ---------------------------------------------------------------------------
73
74wxURI::~wxURI()
75{
76 Clear();
77}
78
79void wxURI::Clear()
80{
4860d40d 81 m_scheme = m_userinfo = m_server = m_port = m_path =
525d8583 82 m_query = m_fragment = wxEmptyString;
dd65d8c8
RN
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87}
88
89// ---------------------------------------------------------------------------
90// Create
91//
846978d7 92// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
93// ---------------------------------------------------------------------------
94
86470d43 95const wxChar* wxURI::Create(const wxString& uri)
846978d7 96{
dd65d8c8
RN
97 if (m_fields)
98 Clear();
99
86501081
VS
100 // FIXME-UTF8: rewrite ParseXXX() methods using iterators
101 // NB: using wxWxCharBuffer instead of just c_str() avoids keeping
102 // converted string in memory for longer than needed
103 return Parse(wxWxCharBuffer(uri.c_str()));
846978d7 104}
dd65d8c8
RN
105
106// ---------------------------------------------------------------------------
ce321570 107// Escape Methods
dd65d8c8 108//
846978d7 109// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 110//
dd65d8c8 111// Escape encodes an invalid URI character into a 3 character sequence
ce321570 112//
dd65d8c8
RN
113// IsEscape determines if the input string contains an escape sequence,
114// if it does, then it moves the input string past the escape sequence
ce321570
RN
115//
116// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
117// ---------------------------------------------------------------------------
118
c9f78968 119wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
dd65d8c8 120{
c9f78968
VS
121 wxChar c1(*s);
122 wxChar c2(*(s + 1));
8404931e 123
c9f78968
VS
124 wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
125
126 return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
dd65d8c8
RN
127}
128
86470d43
RN
129wxString wxURI::Unescape(const wxString& uri)
130{
131 wxString new_uri;
132
c9f78968 133 for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
86470d43 134 {
c9f78968 135 if ( *i == wxT('%') )
86470d43 136 {
c9f78968 137 new_uri += wxURI::TranslateEscape(i + 1);
86470d43
RN
138 i += 2;
139 }
d8d7193d 140 else
c9f78968 141 new_uri += *i;
86470d43
RN
142 }
143
144 return new_uri;
145}
146
dd65d8c8
RN
147void wxURI::Escape(wxString& s, const wxChar& c)
148{
149 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 150 s += wxT('%');
dd65d8c8 151 s += hdig[(c >> 4) & 15];
846978d7 152 s += hdig[c & 15];
dd65d8c8
RN
153}
154
155bool wxURI::IsEscape(const wxChar*& uri)
156{
ce321570
RN
157 // pct-encoded = "%" HEXDIG HEXDIG
158 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8 159 return true;
dd65d8c8
RN
160 else
161 return false;
162}
163
4860d40d
RN
164// ---------------------------------------------------------------------------
165// GetUser
166// GetPassword
167//
168// Gets the username and password via the old URL method.
169// ---------------------------------------------------------------------------
170wxString wxURI::GetUser() const
171{
172 size_t dwPasswordPos = m_userinfo.find(':');
173
174 if (dwPasswordPos == wxString::npos)
175 dwPasswordPos = 0;
176
177 return m_userinfo(0, dwPasswordPos);
178}
179
180wxString wxURI::GetPassword() const
181{
182 size_t dwPasswordPos = m_userinfo.find(':');
183
184 if (dwPasswordPos == wxString::npos)
185 return wxT("");
186 else
187 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
188}
189
dd65d8c8 190// ---------------------------------------------------------------------------
86470d43 191// BuildURI
dd65d8c8 192//
846978d7 193// BuildURI() builds the entire URI into a useable
dd65d8c8 194// representation, including proper identification characters such as slashes
ce321570
RN
195//
196// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
197// the components that accept escape sequences
dd65d8c8
RN
198// ---------------------------------------------------------------------------
199
86470d43 200wxString wxURI::BuildURI() const
846978d7 201{
dd65d8c8
RN
202 wxString ret;
203
204 if (HasScheme())
205 ret = ret + m_scheme + wxT(":");
206
207 if (HasServer())
208 {
209 ret += wxT("//");
210
4860d40d
RN
211 if (HasUserInfo())
212 ret = ret + m_userinfo + wxT("@");
dd65d8c8
RN
213
214 ret += m_server;
215
216 if (HasPort())
217 ret = ret + wxT(":") + m_port;
218 }
219
220 ret += m_path;
221
222 if (HasQuery())
223 ret = ret + wxT("?") + m_query;
224
225 if (HasFragment())
226 ret = ret + wxT("#") + m_fragment;
227
228 return ret;
229}
230
86470d43
RN
231wxString wxURI::BuildUnescapedURI() const
232{
233 wxString ret;
234
235 if (HasScheme())
236 ret = ret + m_scheme + wxT(":");
237
238 if (HasServer())
239 {
240 ret += wxT("//");
241
4860d40d
RN
242 if (HasUserInfo())
243 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
86470d43
RN
244
245 if (m_hostType == wxURI_REGNAME)
246 ret += wxURI::Unescape(m_server);
247 else
248 ret += m_server;
249
250 if (HasPort())
251 ret = ret + wxT(":") + m_port;
252 }
253
254 ret += wxURI::Unescape(m_path);
255
256 if (HasQuery())
257 ret = ret + wxT("?") + wxURI::Unescape(m_query);
258
259 if (HasFragment())
260 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
261
262 return ret;
263}
264
dd65d8c8 265// ---------------------------------------------------------------------------
ce321570 266// Assignment
dd65d8c8
RN
267// ---------------------------------------------------------------------------
268
b60b2ec8
RN
269wxURI& wxURI::Assign(const wxURI& uri)
270{
271 //assign fields
272 m_fields = uri.m_fields;
273
274 //ref over components
275 m_scheme = uri.m_scheme;
4860d40d 276 m_userinfo = uri.m_userinfo;
b60b2ec8
RN
277 m_server = uri.m_server;
278 m_hostType = uri.m_hostType;
279 m_port = uri.m_port;
280 m_path = uri.m_path;
281 m_query = uri.m_query;
282 m_fragment = uri.m_fragment;
dd65d8c8
RN
283
284 return *this;
285}
286
ce321570
RN
287wxURI& wxURI::operator = (const wxURI& uri)
288{
289 return Assign(uri);
290}
291
b60b2ec8 292wxURI& wxURI::operator = (const wxString& string)
846978d7 293{
dd65d8c8
RN
294 Create(string);
295 return *this;
296}
297
ce321570
RN
298// ---------------------------------------------------------------------------
299// Comparison
300// ---------------------------------------------------------------------------
301
dd65d8c8 302bool wxURI::operator == (const wxURI& uri) const
846978d7 303{
dd65d8c8
RN
304 if (HasScheme())
305 {
306 if(m_scheme != uri.m_scheme)
307 return false;
308 }
309 else if (uri.HasScheme())
310 return false;
311
312
313 if (HasServer())
314 {
4860d40d 315 if (HasUserInfo())
dd65d8c8 316 {
4860d40d 317 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
318 return false;
319 }
4860d40d 320 else if (uri.HasUserInfo())
dd65d8c8
RN
321 return false;
322
323 if (m_server != uri.m_server ||
324 m_hostType != uri.m_hostType)
325 return false;
326
327 if (HasPort())
328 {
329 if(m_port != uri.m_port)
330 return false;
331 }
332 else if (uri.HasPort())
333 return false;
334 }
335 else if (uri.HasServer())
336 return false;
337
338
339 if (HasPath())
340 {
341 if(m_path != uri.m_path)
342 return false;
343 }
344 else if (uri.HasPath())
345 return false;
346
347 if (HasQuery())
348 {
349 if (m_query != uri.m_query)
350 return false;
351 }
352 else if (uri.HasQuery())
353 return false;
354
355 if (HasFragment())
356 {
357 if (m_fragment != uri.m_fragment)
358 return false;
359 }
360 else if (uri.HasFragment())
361 return false;
362
363 return true;
364}
365
366// ---------------------------------------------------------------------------
367// IsReference
368//
369// if there is no authority or scheme, it is a reference
370// ---------------------------------------------------------------------------
371
372bool wxURI::IsReference() const
373{ return !HasScheme() || !HasServer(); }
374
375// ---------------------------------------------------------------------------
376// Parse
377//
378// Master URI parsing method. Just calls the individual parsing methods
379//
380// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 381// URI-reference = URI / relative
dd65d8c8
RN
382// ---------------------------------------------------------------------------
383
86501081 384const wxChar* wxURI::Parse(const wxChar *uri)
dd65d8c8
RN
385{
386 uri = ParseScheme(uri);
387 uri = ParseAuthority(uri);
388 uri = ParsePath(uri);
389 uri = ParseQuery(uri);
390 return ParseFragment(uri);
391}
392
393// ---------------------------------------------------------------------------
394// ParseXXX
395//
396// Individual parsers for each URI component
397// ---------------------------------------------------------------------------
398
86501081 399const wxChar* wxURI::ParseScheme(const wxChar *uri)
dd65d8c8
RN
400{
401 wxASSERT(uri != NULL);
402
403 //copy of the uri - used for figuring out
404 //length of each component
405 const wxChar* uricopy = uri;
406
407 //Does the uri have a scheme (first character alpha)?
408 if (IsAlpha(*uri))
409 {
410 m_scheme += *uri++;
411
412 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 413 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
414 *uri == wxT('+') ||
415 *uri == wxT('-') ||
846978d7
WS
416 *uri == wxT('.'))
417 {
418 m_scheme += *uri++;
dd65d8c8
RN
419 }
420
421 //valid scheme?
ce321570 422 if (*uri == wxT(':'))
846978d7 423 {
dd65d8c8
RN
424 //mark the scheme as valid
425 m_fields |= wxURI_SCHEME;
426
427 //move reference point up to input buffer
428 uricopy = ++uri;
429 }
846978d7 430 else
dd65d8c8 431 //relative uri with relative path reference
525d8583 432 m_scheme = wxEmptyString;
dd65d8c8 433 }
846978d7 434// else
dd65d8c8
RN
435 //relative uri with _possible_ relative path reference
436
437 return uricopy;
438}
439
440const wxChar* wxURI::ParseAuthority(const wxChar* uri)
441{
442 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 443 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8 444 {
97ad053b 445 //skip past the two slashes
dd65d8c8
RN
446 uri += 2;
447
97ad053b
VZ
448 // ############# DEVIATION FROM RFC #########################
449 // Don't parse the server component for file URIs
450 if(m_scheme != wxT("file"))
451 {
452 //normal way
4860d40d 453 uri = ParseUserInfo(uri);
dd65d8c8
RN
454 uri = ParseServer(uri);
455 return ParsePort(uri);
97ad053b 456 }
dd65d8c8
RN
457 }
458
459 return uri;
460}
461
4860d40d 462const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
dd65d8c8
RN
463{
464 wxASSERT(uri != NULL);
465
466 //copy of the uri - used for figuring out
467 //length of each component
468 const wxChar* uricopy = uri;
469
470 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 471 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 472 {
24ca04e7 473 if(IsUnreserved(*uri) ||
ce321570 474 IsSubDelim(*uri) || *uri == wxT(':'))
4860d40d 475 m_userinfo += *uri++;
24ca04e7
VZ
476 else if (IsEscape(uri))
477 {
478 m_userinfo += *uri++;
479 m_userinfo += *uri++;
480 m_userinfo += *uri++;
481 }
dd65d8c8 482 else
4860d40d 483 Escape(m_userinfo, *uri++);
dd65d8c8
RN
484 }
485
ce321570 486 if(*uri == wxT('@'))
dd65d8c8
RN
487 {
488 //valid userinfo
4860d40d 489 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
490
491 uricopy = ++uri;
492 }
493 else
4860d40d 494 m_userinfo = wxEmptyString;
dd65d8c8
RN
495
496 return uricopy;
497}
498
499const wxChar* wxURI::ParseServer(const wxChar* uri)
500{
501 wxASSERT(uri != NULL);
502
503 //copy of the uri - used for figuring out
504 //length of each component
505 const wxChar* uricopy = uri;
506
507 // host = IP-literal / IPv4address / reg-name
508 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 509 if (*uri == wxT('['))
dd65d8c8 510 {
e854db32
RN
511 ++uri; //some compilers don't support *&ing a ++*
512 if (ParseIPv6address(uri) && *uri == wxT(']'))
dd65d8c8
RN
513 {
514 ++uri;
515 m_hostType = wxURI_IPV6ADDRESS;
846978d7 516
dd65d8c8 517 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 518 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
519 theBuffer.SetLength(uri-uricopy);
520 }
521 else
522 {
523 uri = uricopy;
524
e854db32
RN
525 ++uri; //some compilers don't support *&ing a ++*
526 if (ParseIPvFuture(uri) && *uri == wxT(']'))
dd65d8c8
RN
527 {
528 ++uri;
846978d7
WS
529 m_hostType = wxURI_IPVFUTURE;
530
dd65d8c8 531 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 532 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
533 theBuffer.SetLength(uri-uricopy);
534 }
846978d7 535 else
dd65d8c8
RN
536 uri = uricopy;
537 }
538 }
846978d7 539 else
dd65d8c8
RN
540 {
541 if (ParseIPv4address(uri))
542 {
543 m_hostType = wxURI_IPV4ADDRESS;
544
545 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 546 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
547 theBuffer.SetLength(uri-uricopy);
548 }
846978d7 549 else
dd65d8c8
RN
550 uri = uricopy;
551 }
552
553 if(m_hostType == wxURI_REGNAME)
554 {
555 uri = uricopy;
556 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 557 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 558 {
24ca04e7
VZ
559 if(IsUnreserved(*uri) || IsSubDelim(*uri))
560 m_server += *uri++;
561 else if (IsEscape(uri))
562 {
dd65d8c8 563 m_server += *uri++;
24ca04e7
VZ
564 m_server += *uri++;
565 m_server += *uri++;
566 }
dd65d8c8
RN
567 else
568 Escape(m_server, *uri++);
846978d7 569 }
dd65d8c8
RN
570 }
571
572 //mark the server as valid
573 m_fields |= wxURI_SERVER;
574
575 return uri;
576}
577
846978d7 578
dd65d8c8
RN
579const wxChar* wxURI::ParsePort(const wxChar* uri)
580{
581 wxASSERT(uri != NULL);
582
583 // port = *DIGIT
ce321570 584 if(*uri == wxT(':'))
dd65d8c8
RN
585 {
586 ++uri;
846978d7 587 while(IsDigit(*uri))
dd65d8c8
RN
588 {
589 m_port += *uri++;
846978d7 590 }
dd65d8c8
RN
591
592 //mark the port as valid
593 m_fields |= wxURI_PORT;
594 }
595
596 return uri;
597}
598
8404931e 599const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
600{
601 wxASSERT(uri != NULL);
602
603 //copy of the uri - used for figuring out
604 //length of each component
605 const wxChar* uricopy = uri;
606
607 /// hier-part = "//" authority path-abempty
608 /// / path-absolute
609 /// / path-rootless
610 /// / path-empty
611 ///
612 /// relative-part = "//" authority path-abempty
613 /// / path-absolute
614 /// / path-noscheme
615 /// / path-empty
616 ///
617 /// path-abempty = *( "/" segment )
618 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
619 /// path-noscheme = segment-nz-nc *( "/" segment )
620 /// path-rootless = segment-nz *( "/" segment )
621 /// path-empty = 0<pchar>
622 ///
623 /// segment = *pchar
624 /// segment-nz = 1*pchar
625 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
626 /// ; non-zero-length segment without any colon ":"
627 ///
628 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 629 if (*uri == wxT('/'))
dd65d8c8
RN
630 {
631 m_path += *uri++;
632
846978d7
WS
633 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
634 {
24ca04e7 635 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 636 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 637 m_path += *uri++;
24ca04e7
VZ
638 else if (IsEscape(uri))
639 {
640 m_path += *uri++;
641 m_path += *uri++;
642 m_path += *uri++;
643 }
846978d7
WS
644 else
645 Escape(m_path, *uri++);
dd65d8c8
RN
646 }
647
648 if (bNormalize)
649 {
650 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
81727065
VS
651#if wxUSE_STL || wxUSE_UNICODE_UTF8
652 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
2c09fb3b 653 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 654#endif
dd65d8c8
RN
655 Normalize(theBuffer, true);
656 theBuffer.SetLength(wxStrlen(theBuffer));
657 }
658 //mark the path as valid
659 m_fields |= wxURI_PATH;
660 }
661 else if(*uri) //Relative path
662 {
663 if (bReference)
664 {
665 //no colon allowed
846978d7 666 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 667 {
24ca04e7 668 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 669 *uri == wxT('@') || *uri == wxT('/'))
846978d7 670 m_path += *uri++;
24ca04e7
VZ
671 else if (IsEscape(uri))
672 {
673 m_path += *uri++;
674 m_path += *uri++;
675 m_path += *uri++;
676 }
846978d7
WS
677 else
678 Escape(m_path, *uri++);
dd65d8c8 679 }
846978d7 680 }
dd65d8c8
RN
681 else
682 {
846978d7 683 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 684 {
24ca04e7 685 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 686 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 687 m_path += *uri++;
24ca04e7
VZ
688 else if (IsEscape(uri))
689 {
690 m_path += *uri++;
691 m_path += *uri++;
692 m_path += *uri++;
693 }
846978d7
WS
694 else
695 Escape(m_path, *uri++);
dd65d8c8
RN
696 }
697 }
698
699 if (uri != uricopy)
846978d7 700 {
dd65d8c8
RN
701 if (bNormalize)
702 {
703 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
81727065
VS
704#if wxUSE_STL || wxUSE_UNICODE_UTF8
705 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
2c09fb3b 706 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 707#endif
dd65d8c8
RN
708 Normalize(theBuffer);
709 theBuffer.SetLength(wxStrlen(theBuffer));
710 }
711
712 //mark the path as valid
713 m_fields |= wxURI_PATH;
714 }
715 }
716
717 return uri;
718}
719
720
721const wxChar* wxURI::ParseQuery(const wxChar* uri)
722{
723 wxASSERT(uri != NULL);
724
725 // query = *( pchar / "/" / "?" )
ce321570 726 if (*uri == wxT('?'))
dd65d8c8
RN
727 {
728 ++uri;
ce321570 729 while(*uri && *uri != wxT('#'))
dd65d8c8 730 {
24ca04e7 731 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 732 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 733 m_query += *uri++;
24ca04e7
VZ
734 else if (IsEscape(uri))
735 {
736 m_query += *uri++;
737 m_query += *uri++;
738 m_query += *uri++;
739 }
dd65d8c8 740 else
846978d7 741 Escape(m_query, *uri++);
dd65d8c8
RN
742 }
743
744 //mark the server as valid
745 m_fields |= wxURI_QUERY;
746 }
747
748 return uri;
749}
750
751
752const wxChar* wxURI::ParseFragment(const wxChar* uri)
753{
754 wxASSERT(uri != NULL);
755
756 // fragment = *( pchar / "/" / "?" )
ce321570 757 if (*uri == wxT('#'))
dd65d8c8
RN
758 {
759 ++uri;
760 while(*uri)
761 {
24ca04e7 762 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 763 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 764 m_fragment += *uri++;
24ca04e7
VZ
765 else if (IsEscape(uri))
766 {
767 m_fragment += *uri++;
768 m_fragment += *uri++;
769 m_fragment += *uri++;
770 }
dd65d8c8 771 else
846978d7 772 Escape(m_fragment, *uri++);
dd65d8c8
RN
773 }
774
775 //mark the server as valid
776 m_fields |= wxURI_FRAGMENT;
777 }
778
779 return uri;
780}
781
782// ---------------------------------------------------------------------------
ce321570 783// Resolve
dd65d8c8 784//
ce321570 785// Builds missing components of this uri from a base uri
dd65d8c8 786//
ce321570
RN
787// A version of the algorithm outlined in the RFC is used here
788// (it is shown in comments)
789//
846978d7 790// Note that an empty URI inherits all components
dd65d8c8
RN
791// ---------------------------------------------------------------------------
792
8404931e 793void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 794{
846978d7 795 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
796 wxT("wxURI to inherit from must not be a reference!"));
797
ce321570 798 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
799 // loophole that allows this uri to inherit other
800 // properties from the base uri - even if the scheme
801 // is defined
8404931e
VZ
802 if ( !(flags & wxURI_STRICT) &&
803 HasScheme() && base.HasScheme() &&
804 m_scheme == base.m_scheme )
846978d7 805 {
dd65d8c8
RN
806 m_fields -= wxURI_SCHEME;
807 }
808
809
810 // Do nothing if this is an absolute wxURI
811 // if defined(R.scheme) then
812 // T.scheme = R.scheme;
813 // T.authority = R.authority;
814 // T.path = remove_dot_segments(R.path);
815 // T.query = R.query;
816 if (HasScheme())
817 {
818 return;
819 }
820
ea4daac4 821 //No scheme - inherit
dd65d8c8
RN
822 m_scheme = base.m_scheme;
823 m_fields |= wxURI_SCHEME;
824
825 // All we need to do for relative URIs with an
826 // authority component is just inherit the scheme
827 // if defined(R.authority) then
828 // T.authority = R.authority;
829 // T.path = remove_dot_segments(R.path);
830 // T.query = R.query;
831 if (HasServer())
832 {
833 return;
834 }
835
836 //No authority - inherit
4860d40d 837 if (base.HasUserInfo())
dd65d8c8 838 {
4860d40d
RN
839 m_userinfo = base.m_userinfo;
840 m_fields |= wxURI_USERINFO;
dd65d8c8 841 }
846978d7 842
dd65d8c8
RN
843 m_server = base.m_server;
844 m_hostType = base.m_hostType;
845 m_fields |= wxURI_SERVER;
846978d7 846
dd65d8c8
RN
847 if (base.HasPort())
848 {
849 m_port = base.m_port;
850 m_fields |= wxURI_PORT;
851 }
846978d7 852
dd65d8c8
RN
853
854 // Simple path inheritance from base
855 if (!HasPath())
856 {
857 // T.path = Base.path;
858 m_path = base.m_path;
859 m_fields |= wxURI_PATH;
846978d7 860
dd65d8c8
RN
861
862 // if defined(R.query) then
863 // T.query = R.query;
864 // else
865 // T.query = Base.query;
866 // endif;
867 if (!HasQuery())
868 {
869 m_query = base.m_query;
870 m_fields |= wxURI_QUERY;
871 }
872 }
873 else
874 {
875 // if (R.path starts-with "/") then
876 // T.path = remove_dot_segments(R.path);
877 // else
878 // T.path = merge(Base.path, R.path);
879 // T.path = remove_dot_segments(T.path);
880 // endif;
881 // T.query = R.query;
ce321570 882 if (m_path[0u] != wxT('/'))
dd65d8c8 883 {
ea4daac4 884 //Merge paths
c9f78968
VS
885 wxString::const_iterator op = m_path.begin();
886 wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
dd65d8c8
RN
887
888 //not a ending directory? move up
ce321570 889 if (base.m_path[0] && *(bp-1) != wxT('/'))
c9f78968 890 UpTree(base.m_path.begin(), bp);
dd65d8c8
RN
891
892 //normalize directories
846978d7 893 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 894 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8 895 {
c9f78968 896 UpTree(base.m_path.begin(), bp);
dd65d8c8
RN
897
898 if (*(op+2) == '\0')
899 op += 2;
900 else
901 op += 3;
902 }
903
c9f78968
VS
904 m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
905 m_path.substr((op - m_path.begin()), m_path.length());
dd65d8c8
RN
906 }
907 }
ce321570 908
846978d7 909 //T.fragment = R.fragment;
dd65d8c8
RN
910}
911
912// ---------------------------------------------------------------------------
846978d7 913// UpTree
dd65d8c8 914//
ce321570 915// Moves a URI path up a directory
dd65d8c8
RN
916// ---------------------------------------------------------------------------
917
ce321570 918//static
c9f78968
VS
919void wxURI::UpTree(wxString::const_iterator uristart,
920 wxString::const_iterator& uri)
921{
922 if (uri != uristart && *(uri-1) == wxT('/'))
923 {
924 uri -= 2;
925 }
926
927 for(;uri != uristart; --uri)
928 {
929 if (*uri == wxT('/'))
930 {
931 ++uri;
932 break;
933 }
934 }
935
936 //!!!TODO:HACK!!!//
937 if (uri == uristart && *uri == wxT('/'))
938 ++uri;
939 //!!!//
940}
941
942// FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
943/*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
dd65d8c8 944{
ce321570 945 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
946 {
947 uri -= 2;
948 }
846978d7 949
dd65d8c8
RN
950 for(;uri != uristart; --uri)
951 {
ce321570 952 if (*uri == wxT('/'))
dd65d8c8
RN
953 {
954 ++uri;
955 break;
956 }
957 }
958
959 //!!!TODO:HACK!!!//
ce321570 960 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
961 ++uri;
962 //!!!//
963}
c9f78968 964// end of FIXME-UTF8
dd65d8c8 965
ce321570
RN
966// ---------------------------------------------------------------------------
967// Normalize
968//
969// Normalizes directories in-place
970//
971// I.E. ./ and . are ignored
972//
973// ../ and .. are removed if a directory is before it, along
974// with that directory (leading .. and ../ are kept)
975// ---------------------------------------------------------------------------
976
977//static
8404931e 978void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
979{
980 wxChar* cp = s;
981 wxChar* bp = s;
982
ce321570 983 if(s[0] == wxT('/'))
dd65d8c8
RN
984 ++bp;
985
986 while(*cp)
987 {
ce321570
RN
988 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
989 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
990 {
991 //. _or_ ./ - ignore
992 if (*(cp+1) == '\0')
993 cp += 1;
994 else
995 cp += 2;
996 }
846978d7 997 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
998 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
999 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
1000 {
1001 //.. _or_ ../ - go up the tree
1002 if (s != bp)
1003 {
1004 UpTree((const wxChar*)bp, (const wxChar*&)s);
1005
1006 if (*(cp+2) == '\0')
1007 cp += 2;
1008 else
1009 cp += 3;
1010 }
1011 else if (!bIgnoreLeads)
1012
1013 {
1014 *bp++ = *cp++;
1015 *bp++ = *cp++;
1016 if (*cp)
1017 *bp++ = *cp++;
1018
1019 s = bp;
1020 }
1021 else
1022 {
1023 if (*(cp+2) == '\0')
1024 cp += 2;
1025 else
1026 cp += 3;
1027 }
1028 }
1029 else
846978d7 1030 *s++ = *cp++;
dd65d8c8
RN
1031 }
1032
1033 *s = '\0';
1034}
1035
1036// ---------------------------------------------------------------------------
ce321570
RN
1037// ParseH16
1038//
1039// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 1040// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
1041// the input string back to its original position on failure.
1042// ---------------------------------------------------------------------------
1043
1044bool wxURI::ParseH16(const wxChar*& uri)
1045{
1046 // h16 = 1*4HEXDIG
1047 if(!IsHex(*++uri))
1048 return false;
1049
1050 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1051 ++uri;
1052
1053 return true;
1054}
1055
1056// ---------------------------------------------------------------------------
1057// ParseIPXXX
1058//
846978d7
WS
1059// Parses a certain version of an IP address and moves the input string past
1060// it. Returns true if the input string contains the proper version of an ip
1061// address. It is the caller's responsability to move the input string back
ce321570 1062// to its original position on failure.
dd65d8c8
RN
1063// ---------------------------------------------------------------------------
1064
1065bool wxURI::ParseIPv4address(const wxChar*& uri)
1066{
1067 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1068 //
1069 //dec-octet = DIGIT ; 0-9
1070 // / %x31-39 DIGIT ; 10-99
1071 // / "1" 2DIGIT ; 100-199
1072 // / "2" %x30-34 DIGIT ; 200-249
1073 // / "25" %x30-35 ; 250-255
1074 size_t iIPv4 = 0;
1075 if (IsDigit(*uri))
1076 {
1077 ++iIPv4;
1078
846978d7 1079
dd65d8c8
RN
1080 //each ip part must be between 0-255 (dupe of version in for loop)
1081 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1082 //100 or less (note !)
846978d7
WS
1083 !( (*(uri-2) < wxT('2')) ||
1084 //240 or less
1085 (*(uri-2) == wxT('2') &&
ce321570 1086 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1087 )
1088 )
1089 )
1090 {
1091 return false;
1092 }
1093
1094 if(IsDigit(*uri))++uri;
1095
1096 //compilers should unroll this loop
1097 for(; iIPv4 < 4; ++iIPv4)
1098 {
ce321570 1099 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
1100 break;
1101
1102 //each ip part must be between 0-255
1103 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1104 //100 or less (note !)
846978d7
WS
1105 !( (*(uri-2) < wxT('2')) ||
1106 //240 or less
1107 (*(uri-2) == wxT('2') &&
ce321570 1108 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1109 )
1110 )
1111 )
1112 {
1113 return false;
1114 }
1115 if(IsDigit(*uri))++uri;
1116 }
1117 }
1118 return iIPv4 == 4;
1119}
1120
dd65d8c8
RN
1121bool wxURI::ParseIPv6address(const wxChar*& uri)
1122{
1123 // IPv6address = 6( h16 ":" ) ls32
1124 // / "::" 5( h16 ":" ) ls32
1125 // / [ h16 ] "::" 4( h16 ":" ) ls32
1126 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1127 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1128 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1129 // / [ *4( h16 ":" ) h16 ] "::" ls32
1130 // / [ *5( h16 ":" ) h16 ] "::" h16
1131 // / [ *6( h16 ":" ) h16 ] "::"
1132
1133 size_t numPrefix = 0,
1134 maxPostfix;
1135
1136 bool bEndHex = false;
1137
1138 for( ; numPrefix < 6; ++numPrefix)
1139 {
1140 if(!ParseH16(uri))
1141 {
1142 --uri;
1143 bEndHex = true;
1144 break;
1145 }
846978d7 1146
ce321570 1147 if(*uri != wxT(':'))
dd65d8c8
RN
1148 {
1149 break;
1150 }
1151 }
1152
1153 if(!bEndHex && !ParseH16(uri))
1154 {
1155 --uri;
1156
1157 if (numPrefix)
1158 return false;
1159
ce321570 1160 if (*uri == wxT(':'))
dd65d8c8 1161 {
ce321570 1162 if (*++uri != wxT(':'))
dd65d8c8
RN
1163 return false;
1164
1165 maxPostfix = 5;
1166 }
1167 else
1168 maxPostfix = 6;
1169 }
1170 else
1171 {
ce321570 1172 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1173 {
1174 if (numPrefix != 6)
1175 return false;
1176
ce321570 1177 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1178 ++uri;
1179
1180 const wxChar* uristart = uri;
1181 //parse ls32
1182 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1183 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1184 return true;
1185
1186 uri = uristart;
1187
1188 if (ParseIPv4address(uri))
1189 return true;
1190 else
1191 return false;
1192 }
1193 else
1194 {
1195 uri += 2;
846978d7 1196
dd65d8c8
RN
1197 if (numPrefix > 3)
1198 maxPostfix = 0;
1199 else
1200 maxPostfix = 4 - numPrefix;
1201 }
1202 }
1203
1204 bool bAllowAltEnding = maxPostfix == 0;
1205
1206 for(; maxPostfix != 0; --maxPostfix)
1207 {
ce321570 1208 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1209 return false;
1210 }
1211
1212 if(numPrefix <= 4)
1213 {
1214 const wxChar* uristart = uri;
1215 //parse ls32
1216 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1217 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1218 return true;
1219
1220 uri = uristart;
1221
1222 if (ParseIPv4address(uri))
1223 return true;
1224
1225 uri = uristart;
846978d7 1226
dd65d8c8
RN
1227 if (!bAllowAltEnding)
1228 return false;
1229 }
1230
1231 if(numPrefix <= 5 && ParseH16(uri))
1232 return true;
1233
1234 return true;
1235}
1236
1237bool wxURI::ParseIPvFuture(const wxChar*& uri)
1238{
1239 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1240 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1241 return false;
1242
1243 while (IsHex(*++uri)) {}
1244
ce321570 1245 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1246 return false;
1247
ce321570 1248 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1249
1250 return true;
1251}
1252
1253
1254// ---------------------------------------------------------------------------
ce321570
RN
1255// CharToHex
1256//
846978d7 1257// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1258// passed in character is not a valid hex character
dd65d8c8
RN
1259// ---------------------------------------------------------------------------
1260
ce321570 1261//static
409a7ba7 1262wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1263{
1676a194
WS
1264 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1265 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1266 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
dd65d8c8 1267
846978d7 1268 return 0;
dd65d8c8
RN
1269}
1270
ce321570
RN
1271// ---------------------------------------------------------------------------
1272// IsXXX
1273//
1274// Returns true if the passed in character meets the criteria of the method
1275// ---------------------------------------------------------------------------
1276
dd65d8c8
RN
1277//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1278bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1279{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1280 c == wxT('-') ||
1281 c == wxT('.') ||
1282 c == wxT('_') ||
1283 c == wxT('~') //tilde
846978d7 1284 ;
dd65d8c8
RN
1285}
1286
1287bool wxURI::IsReserved (const wxChar& c)
846978d7 1288{
dd65d8c8
RN
1289 return IsGenDelim(c) || IsSubDelim(c);
1290}
1291
1292//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1293bool wxURI::IsGenDelim (const wxChar& c)
1294{
ce321570
RN
1295 return c == wxT(':') ||
1296 c == wxT('/') ||
1297 c == wxT('?') ||
1298 c == wxT('#') ||
1299 c == wxT('[') ||
1300 c == wxT(']') ||
1301 c == wxT('@');
dd65d8c8
RN
1302}
1303
1304//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1305//! / "*" / "+" / "," / ";" / "="
1306bool wxURI::IsSubDelim (const wxChar& c)
1307{
ce321570
RN
1308 return c == wxT('!') ||
1309 c == wxT('$') ||
1310 c == wxT('&') ||
1311 c == wxT('\'') ||
1312 c == wxT('(') ||
1313 c == wxT(')') ||
1314 c == wxT('*') ||
1315 c == wxT('+') ||
1316 c == wxT(',') ||
1317 c == wxT(';') ||
846978d7 1318 c == wxT('=')
dd65d8c8
RN
1319 ;
1320}
1321
1322bool wxURI::IsHex(const wxChar& c)
ce321570 1323{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1324
1325bool wxURI::IsAlpha(const wxChar& c)
ce321570 1326{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1327
1328bool wxURI::IsDigit(const wxChar& c)
ce321570 1329{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1330
1331
dd65d8c8
RN
1332//end of uri.cpp
1333
1334
1335