]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
Applied patch by Johan van Zyl.
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
19#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21#endif
22
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
30#include "wx/uri.h"
31
32// ---------------------------------------------------------------------------
33// definitions
34// ---------------------------------------------------------------------------
35
36IMPLEMENT_CLASS(wxURI, wxObject);
37
38// ===========================================================================
39// implementation
40// ===========================================================================
41
42// ---------------------------------------------------------------------------
43// utilities
44// ---------------------------------------------------------------------------
45
46// ---------------------------------------------------------------------------
47//
48// wxURI
49//
50// ---------------------------------------------------------------------------
51
52// ---------------------------------------------------------------------------
53// Constructors
54// ---------------------------------------------------------------------------
55
56wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58}
846978d7 59
dd65d8c8
RN
60wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61{
62 Create(uri);
63}
64
60431236 65wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
dd65d8c8 66{
b60b2ec8 67 Assign(uri);
dd65d8c8
RN
68}
69
70// ---------------------------------------------------------------------------
71// Destructor and cleanup
72// ---------------------------------------------------------------------------
73
74wxURI::~wxURI()
75{
76 Clear();
77}
78
79void wxURI::Clear()
80{
4860d40d 81 m_scheme = m_userinfo = m_server = m_port = m_path =
525d8583 82 m_query = m_fragment = wxEmptyString;
dd65d8c8
RN
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87}
88
89// ---------------------------------------------------------------------------
90// Create
91//
846978d7 92// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
93// ---------------------------------------------------------------------------
94
86470d43 95const wxChar* wxURI::Create(const wxString& uri)
846978d7 96{
dd65d8c8
RN
97 if (m_fields)
98 Clear();
99
846978d7
WS
100 return Parse(uri);
101}
dd65d8c8
RN
102
103// ---------------------------------------------------------------------------
ce321570 104// Escape Methods
dd65d8c8 105//
846978d7 106// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 107//
dd65d8c8 108// Escape encodes an invalid URI character into a 3 character sequence
ce321570 109//
dd65d8c8
RN
110// IsEscape determines if the input string contains an escape sequence,
111// if it does, then it moves the input string past the escape sequence
ce321570
RN
112//
113// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
114// ---------------------------------------------------------------------------
115
86470d43 116wxChar wxURI::TranslateEscape(const wxChar* s)
dd65d8c8 117{
6f0344c7 118 wxASSERT_MSG( IsHex(s[0]) && IsHex(s[1]), wxT("Invalid escape sequence!"));
8404931e 119
6f0344c7 120 return (wxChar)( CharToHex(s[0]) << 4 ) | CharToHex(s[1]);
dd65d8c8
RN
121}
122
86470d43
RN
123wxString wxURI::Unescape(const wxString& uri)
124{
125 wxString new_uri;
126
127 for(size_t i = 0; i < uri.length(); ++i)
128 {
129 if (uri[i] == wxT('%'))
130 {
131 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
132 i += 2;
133 }
d8d7193d
RN
134 else
135 new_uri += uri[i];
86470d43
RN
136 }
137
138 return new_uri;
139}
140
dd65d8c8
RN
141void wxURI::Escape(wxString& s, const wxChar& c)
142{
143 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 144 s += wxT('%');
dd65d8c8 145 s += hdig[(c >> 4) & 15];
846978d7 146 s += hdig[c & 15];
dd65d8c8
RN
147}
148
149bool wxURI::IsEscape(const wxChar*& uri)
150{
ce321570
RN
151 // pct-encoded = "%" HEXDIG HEXDIG
152 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8 153 return true;
dd65d8c8
RN
154 else
155 return false;
156}
157
4860d40d
RN
158// ---------------------------------------------------------------------------
159// GetUser
160// GetPassword
161//
162// Gets the username and password via the old URL method.
163// ---------------------------------------------------------------------------
164wxString wxURI::GetUser() const
165{
166 size_t dwPasswordPos = m_userinfo.find(':');
167
168 if (dwPasswordPos == wxString::npos)
169 dwPasswordPos = 0;
170
171 return m_userinfo(0, dwPasswordPos);
172}
173
174wxString wxURI::GetPassword() const
175{
176 size_t dwPasswordPos = m_userinfo.find(':');
177
178 if (dwPasswordPos == wxString::npos)
179 return wxT("");
180 else
181 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
182}
183
dd65d8c8 184// ---------------------------------------------------------------------------
86470d43 185// BuildURI
dd65d8c8 186//
846978d7 187// BuildURI() builds the entire URI into a useable
dd65d8c8 188// representation, including proper identification characters such as slashes
ce321570
RN
189//
190// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
191// the components that accept escape sequences
dd65d8c8
RN
192// ---------------------------------------------------------------------------
193
86470d43 194wxString wxURI::BuildURI() const
846978d7 195{
dd65d8c8
RN
196 wxString ret;
197
198 if (HasScheme())
199 ret = ret + m_scheme + wxT(":");
200
201 if (HasServer())
202 {
203 ret += wxT("//");
204
4860d40d
RN
205 if (HasUserInfo())
206 ret = ret + m_userinfo + wxT("@");
dd65d8c8
RN
207
208 ret += m_server;
209
210 if (HasPort())
211 ret = ret + wxT(":") + m_port;
212 }
213
214 ret += m_path;
215
216 if (HasQuery())
217 ret = ret + wxT("?") + m_query;
218
219 if (HasFragment())
220 ret = ret + wxT("#") + m_fragment;
221
222 return ret;
223}
224
86470d43
RN
225wxString wxURI::BuildUnescapedURI() const
226{
227 wxString ret;
228
229 if (HasScheme())
230 ret = ret + m_scheme + wxT(":");
231
232 if (HasServer())
233 {
234 ret += wxT("//");
235
4860d40d
RN
236 if (HasUserInfo())
237 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
86470d43
RN
238
239 if (m_hostType == wxURI_REGNAME)
240 ret += wxURI::Unescape(m_server);
241 else
242 ret += m_server;
243
244 if (HasPort())
245 ret = ret + wxT(":") + m_port;
246 }
247
248 ret += wxURI::Unescape(m_path);
249
250 if (HasQuery())
251 ret = ret + wxT("?") + wxURI::Unescape(m_query);
252
253 if (HasFragment())
254 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
255
256 return ret;
257}
258
dd65d8c8 259// ---------------------------------------------------------------------------
ce321570 260// Assignment
dd65d8c8
RN
261// ---------------------------------------------------------------------------
262
b60b2ec8
RN
263wxURI& wxURI::Assign(const wxURI& uri)
264{
265 //assign fields
266 m_fields = uri.m_fields;
267
268 //ref over components
269 m_scheme = uri.m_scheme;
4860d40d 270 m_userinfo = uri.m_userinfo;
b60b2ec8
RN
271 m_server = uri.m_server;
272 m_hostType = uri.m_hostType;
273 m_port = uri.m_port;
274 m_path = uri.m_path;
275 m_query = uri.m_query;
276 m_fragment = uri.m_fragment;
dd65d8c8
RN
277
278 return *this;
279}
280
ce321570
RN
281wxURI& wxURI::operator = (const wxURI& uri)
282{
283 return Assign(uri);
284}
285
b60b2ec8 286wxURI& wxURI::operator = (const wxString& string)
846978d7 287{
dd65d8c8
RN
288 Create(string);
289 return *this;
290}
291
ce321570
RN
292// ---------------------------------------------------------------------------
293// Comparison
294// ---------------------------------------------------------------------------
295
dd65d8c8 296bool wxURI::operator == (const wxURI& uri) const
846978d7 297{
dd65d8c8
RN
298 if (HasScheme())
299 {
300 if(m_scheme != uri.m_scheme)
301 return false;
302 }
303 else if (uri.HasScheme())
304 return false;
305
306
307 if (HasServer())
308 {
4860d40d 309 if (HasUserInfo())
dd65d8c8 310 {
4860d40d 311 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
312 return false;
313 }
4860d40d 314 else if (uri.HasUserInfo())
dd65d8c8
RN
315 return false;
316
317 if (m_server != uri.m_server ||
318 m_hostType != uri.m_hostType)
319 return false;
320
321 if (HasPort())
322 {
323 if(m_port != uri.m_port)
324 return false;
325 }
326 else if (uri.HasPort())
327 return false;
328 }
329 else if (uri.HasServer())
330 return false;
331
332
333 if (HasPath())
334 {
335 if(m_path != uri.m_path)
336 return false;
337 }
338 else if (uri.HasPath())
339 return false;
340
341 if (HasQuery())
342 {
343 if (m_query != uri.m_query)
344 return false;
345 }
346 else if (uri.HasQuery())
347 return false;
348
349 if (HasFragment())
350 {
351 if (m_fragment != uri.m_fragment)
352 return false;
353 }
354 else if (uri.HasFragment())
355 return false;
356
357 return true;
358}
359
360// ---------------------------------------------------------------------------
361// IsReference
362//
363// if there is no authority or scheme, it is a reference
364// ---------------------------------------------------------------------------
365
366bool wxURI::IsReference() const
367{ return !HasScheme() || !HasServer(); }
368
369// ---------------------------------------------------------------------------
370// Parse
371//
372// Master URI parsing method. Just calls the individual parsing methods
373//
374// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 375// URI-reference = URI / relative
dd65d8c8
RN
376// ---------------------------------------------------------------------------
377
378const wxChar* wxURI::Parse(const wxChar* uri)
379{
380 uri = ParseScheme(uri);
381 uri = ParseAuthority(uri);
382 uri = ParsePath(uri);
383 uri = ParseQuery(uri);
384 return ParseFragment(uri);
385}
386
387// ---------------------------------------------------------------------------
388// ParseXXX
389//
390// Individual parsers for each URI component
391// ---------------------------------------------------------------------------
392
393const wxChar* wxURI::ParseScheme(const wxChar* uri)
394{
395 wxASSERT(uri != NULL);
396
397 //copy of the uri - used for figuring out
398 //length of each component
399 const wxChar* uricopy = uri;
400
401 //Does the uri have a scheme (first character alpha)?
402 if (IsAlpha(*uri))
403 {
404 m_scheme += *uri++;
405
406 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 407 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
408 *uri == wxT('+') ||
409 *uri == wxT('-') ||
846978d7
WS
410 *uri == wxT('.'))
411 {
412 m_scheme += *uri++;
dd65d8c8
RN
413 }
414
415 //valid scheme?
ce321570 416 if (*uri == wxT(':'))
846978d7 417 {
dd65d8c8
RN
418 //mark the scheme as valid
419 m_fields |= wxURI_SCHEME;
420
421 //move reference point up to input buffer
422 uricopy = ++uri;
423 }
846978d7 424 else
dd65d8c8 425 //relative uri with relative path reference
525d8583 426 m_scheme = wxEmptyString;
dd65d8c8 427 }
846978d7 428// else
dd65d8c8
RN
429 //relative uri with _possible_ relative path reference
430
431 return uricopy;
432}
433
434const wxChar* wxURI::ParseAuthority(const wxChar* uri)
435{
436 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 437 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8
RN
438 {
439 uri += 2;
440
4860d40d 441 uri = ParseUserInfo(uri);
dd65d8c8
RN
442 uri = ParseServer(uri);
443 return ParsePort(uri);
444 }
445
446 return uri;
447}
448
4860d40d 449const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
dd65d8c8
RN
450{
451 wxASSERT(uri != NULL);
452
453 //copy of the uri - used for figuring out
454 //length of each component
455 const wxChar* uricopy = uri;
456
457 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 458 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 459 {
24ca04e7 460 if(IsUnreserved(*uri) ||
ce321570 461 IsSubDelim(*uri) || *uri == wxT(':'))
4860d40d 462 m_userinfo += *uri++;
24ca04e7
VZ
463 else if (IsEscape(uri))
464 {
465 m_userinfo += *uri++;
466 m_userinfo += *uri++;
467 m_userinfo += *uri++;
468 }
dd65d8c8 469 else
4860d40d 470 Escape(m_userinfo, *uri++);
dd65d8c8
RN
471 }
472
ce321570 473 if(*uri == wxT('@'))
dd65d8c8
RN
474 {
475 //valid userinfo
4860d40d 476 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
477
478 uricopy = ++uri;
479 }
480 else
4860d40d 481 m_userinfo = wxEmptyString;
dd65d8c8
RN
482
483 return uricopy;
484}
485
486const wxChar* wxURI::ParseServer(const wxChar* uri)
487{
488 wxASSERT(uri != NULL);
489
490 //copy of the uri - used for figuring out
491 //length of each component
492 const wxChar* uricopy = uri;
493
494 // host = IP-literal / IPv4address / reg-name
495 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 496 if (*uri == wxT('['))
dd65d8c8 497 {
e854db32
RN
498 ++uri; //some compilers don't support *&ing a ++*
499 if (ParseIPv6address(uri) && *uri == wxT(']'))
dd65d8c8
RN
500 {
501 ++uri;
502 m_hostType = wxURI_IPV6ADDRESS;
846978d7 503
dd65d8c8 504 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 505 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
506 theBuffer.SetLength(uri-uricopy);
507 }
508 else
509 {
510 uri = uricopy;
511
e854db32
RN
512 ++uri; //some compilers don't support *&ing a ++*
513 if (ParseIPvFuture(uri) && *uri == wxT(']'))
dd65d8c8
RN
514 {
515 ++uri;
846978d7
WS
516 m_hostType = wxURI_IPVFUTURE;
517
dd65d8c8 518 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 519 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
520 theBuffer.SetLength(uri-uricopy);
521 }
846978d7 522 else
dd65d8c8
RN
523 uri = uricopy;
524 }
525 }
846978d7 526 else
dd65d8c8
RN
527 {
528 if (ParseIPv4address(uri))
529 {
530 m_hostType = wxURI_IPV4ADDRESS;
531
532 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 533 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
534 theBuffer.SetLength(uri-uricopy);
535 }
846978d7 536 else
dd65d8c8
RN
537 uri = uricopy;
538 }
539
540 if(m_hostType == wxURI_REGNAME)
541 {
542 uri = uricopy;
543 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 544 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 545 {
24ca04e7
VZ
546 if(IsUnreserved(*uri) || IsSubDelim(*uri))
547 m_server += *uri++;
548 else if (IsEscape(uri))
549 {
dd65d8c8 550 m_server += *uri++;
24ca04e7
VZ
551 m_server += *uri++;
552 m_server += *uri++;
553 }
dd65d8c8
RN
554 else
555 Escape(m_server, *uri++);
846978d7 556 }
dd65d8c8
RN
557 }
558
559 //mark the server as valid
560 m_fields |= wxURI_SERVER;
561
562 return uri;
563}
564
846978d7 565
dd65d8c8
RN
566const wxChar* wxURI::ParsePort(const wxChar* uri)
567{
568 wxASSERT(uri != NULL);
569
570 // port = *DIGIT
ce321570 571 if(*uri == wxT(':'))
dd65d8c8
RN
572 {
573 ++uri;
846978d7 574 while(IsDigit(*uri))
dd65d8c8
RN
575 {
576 m_port += *uri++;
846978d7 577 }
dd65d8c8
RN
578
579 //mark the port as valid
580 m_fields |= wxURI_PORT;
581 }
582
583 return uri;
584}
585
8404931e 586const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
587{
588 wxASSERT(uri != NULL);
589
590 //copy of the uri - used for figuring out
591 //length of each component
592 const wxChar* uricopy = uri;
593
594 /// hier-part = "//" authority path-abempty
595 /// / path-absolute
596 /// / path-rootless
597 /// / path-empty
598 ///
599 /// relative-part = "//" authority path-abempty
600 /// / path-absolute
601 /// / path-noscheme
602 /// / path-empty
603 ///
604 /// path-abempty = *( "/" segment )
605 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
606 /// path-noscheme = segment-nz-nc *( "/" segment )
607 /// path-rootless = segment-nz *( "/" segment )
608 /// path-empty = 0<pchar>
609 ///
610 /// segment = *pchar
611 /// segment-nz = 1*pchar
612 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
613 /// ; non-zero-length segment without any colon ":"
614 ///
615 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 616 if (*uri == wxT('/'))
dd65d8c8
RN
617 {
618 m_path += *uri++;
619
846978d7
WS
620 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
621 {
24ca04e7 622 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 623 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 624 m_path += *uri++;
24ca04e7
VZ
625 else if (IsEscape(uri))
626 {
627 m_path += *uri++;
628 m_path += *uri++;
629 m_path += *uri++;
630 }
846978d7
WS
631 else
632 Escape(m_path, *uri++);
dd65d8c8
RN
633 }
634
635 if (bNormalize)
636 {
637 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 638#if wxUSE_STL
2c09fb3b 639 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 640#endif
dd65d8c8
RN
641 Normalize(theBuffer, true);
642 theBuffer.SetLength(wxStrlen(theBuffer));
643 }
644 //mark the path as valid
645 m_fields |= wxURI_PATH;
646 }
647 else if(*uri) //Relative path
648 {
649 if (bReference)
650 {
651 //no colon allowed
846978d7 652 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 653 {
24ca04e7 654 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 655 *uri == wxT('@') || *uri == wxT('/'))
846978d7 656 m_path += *uri++;
24ca04e7
VZ
657 else if (IsEscape(uri))
658 {
659 m_path += *uri++;
660 m_path += *uri++;
661 m_path += *uri++;
662 }
846978d7
WS
663 else
664 Escape(m_path, *uri++);
dd65d8c8 665 }
846978d7 666 }
dd65d8c8
RN
667 else
668 {
846978d7 669 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 670 {
24ca04e7 671 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 672 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 673 m_path += *uri++;
24ca04e7
VZ
674 else if (IsEscape(uri))
675 {
676 m_path += *uri++;
677 m_path += *uri++;
678 m_path += *uri++;
679 }
846978d7
WS
680 else
681 Escape(m_path, *uri++);
dd65d8c8
RN
682 }
683 }
684
685 if (uri != uricopy)
846978d7 686 {
dd65d8c8
RN
687 if (bNormalize)
688 {
689 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 690#if wxUSE_STL
2c09fb3b 691 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 692#endif
dd65d8c8
RN
693 Normalize(theBuffer);
694 theBuffer.SetLength(wxStrlen(theBuffer));
695 }
696
697 //mark the path as valid
698 m_fields |= wxURI_PATH;
699 }
700 }
701
702 return uri;
703}
704
705
706const wxChar* wxURI::ParseQuery(const wxChar* uri)
707{
708 wxASSERT(uri != NULL);
709
710 // query = *( pchar / "/" / "?" )
ce321570 711 if (*uri == wxT('?'))
dd65d8c8
RN
712 {
713 ++uri;
ce321570 714 while(*uri && *uri != wxT('#'))
dd65d8c8 715 {
24ca04e7 716 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 717 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 718 m_query += *uri++;
24ca04e7
VZ
719 else if (IsEscape(uri))
720 {
721 m_query += *uri++;
722 m_query += *uri++;
723 m_query += *uri++;
724 }
dd65d8c8 725 else
846978d7 726 Escape(m_query, *uri++);
dd65d8c8
RN
727 }
728
729 //mark the server as valid
730 m_fields |= wxURI_QUERY;
731 }
732
733 return uri;
734}
735
736
737const wxChar* wxURI::ParseFragment(const wxChar* uri)
738{
739 wxASSERT(uri != NULL);
740
741 // fragment = *( pchar / "/" / "?" )
ce321570 742 if (*uri == wxT('#'))
dd65d8c8
RN
743 {
744 ++uri;
745 while(*uri)
746 {
24ca04e7 747 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 748 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 749 m_fragment += *uri++;
24ca04e7
VZ
750 else if (IsEscape(uri))
751 {
752 m_fragment += *uri++;
753 m_fragment += *uri++;
754 m_fragment += *uri++;
755 }
dd65d8c8 756 else
846978d7 757 Escape(m_fragment, *uri++);
dd65d8c8
RN
758 }
759
760 //mark the server as valid
761 m_fields |= wxURI_FRAGMENT;
762 }
763
764 return uri;
765}
766
767// ---------------------------------------------------------------------------
ce321570 768// Resolve
dd65d8c8 769//
ce321570 770// Builds missing components of this uri from a base uri
dd65d8c8 771//
ce321570
RN
772// A version of the algorithm outlined in the RFC is used here
773// (it is shown in comments)
774//
846978d7 775// Note that an empty URI inherits all components
dd65d8c8
RN
776// ---------------------------------------------------------------------------
777
8404931e 778void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 779{
846978d7 780 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
781 wxT("wxURI to inherit from must not be a reference!"));
782
ce321570 783 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
784 // loophole that allows this uri to inherit other
785 // properties from the base uri - even if the scheme
786 // is defined
8404931e
VZ
787 if ( !(flags & wxURI_STRICT) &&
788 HasScheme() && base.HasScheme() &&
789 m_scheme == base.m_scheme )
846978d7 790 {
dd65d8c8
RN
791 m_fields -= wxURI_SCHEME;
792 }
793
794
795 // Do nothing if this is an absolute wxURI
796 // if defined(R.scheme) then
797 // T.scheme = R.scheme;
798 // T.authority = R.authority;
799 // T.path = remove_dot_segments(R.path);
800 // T.query = R.query;
801 if (HasScheme())
802 {
803 return;
804 }
805
ea4daac4 806 //No scheme - inherit
dd65d8c8
RN
807 m_scheme = base.m_scheme;
808 m_fields |= wxURI_SCHEME;
809
810 // All we need to do for relative URIs with an
811 // authority component is just inherit the scheme
812 // if defined(R.authority) then
813 // T.authority = R.authority;
814 // T.path = remove_dot_segments(R.path);
815 // T.query = R.query;
816 if (HasServer())
817 {
818 return;
819 }
820
821 //No authority - inherit
4860d40d 822 if (base.HasUserInfo())
dd65d8c8 823 {
4860d40d
RN
824 m_userinfo = base.m_userinfo;
825 m_fields |= wxURI_USERINFO;
dd65d8c8 826 }
846978d7 827
dd65d8c8
RN
828 m_server = base.m_server;
829 m_hostType = base.m_hostType;
830 m_fields |= wxURI_SERVER;
846978d7 831
dd65d8c8
RN
832 if (base.HasPort())
833 {
834 m_port = base.m_port;
835 m_fields |= wxURI_PORT;
836 }
846978d7 837
dd65d8c8
RN
838
839 // Simple path inheritance from base
840 if (!HasPath())
841 {
842 // T.path = Base.path;
843 m_path = base.m_path;
844 m_fields |= wxURI_PATH;
846978d7 845
dd65d8c8
RN
846
847 // if defined(R.query) then
848 // T.query = R.query;
849 // else
850 // T.query = Base.query;
851 // endif;
852 if (!HasQuery())
853 {
854 m_query = base.m_query;
855 m_fields |= wxURI_QUERY;
856 }
857 }
858 else
859 {
860 // if (R.path starts-with "/") then
861 // T.path = remove_dot_segments(R.path);
862 // else
863 // T.path = merge(Base.path, R.path);
864 // T.path = remove_dot_segments(T.path);
865 // endif;
866 // T.query = R.query;
ce321570 867 if (m_path[0u] != wxT('/'))
dd65d8c8 868 {
ea4daac4 869 //Merge paths
dd65d8c8
RN
870 const wxChar* op = m_path.c_str();
871 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
872
873 //not a ending directory? move up
ce321570 874 if (base.m_path[0] && *(bp-1) != wxT('/'))
dd65d8c8
RN
875 UpTree(base.m_path, bp);
876
877 //normalize directories
846978d7 878 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 879 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8
RN
880 {
881 UpTree(base.m_path, bp);
882
883 if (*(op+2) == '\0')
884 op += 2;
885 else
886 op += 3;
887 }
888
846978d7 889 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
ba5a47ae 890 m_path.substr((op - m_path.c_str()), m_path.Length());
dd65d8c8
RN
891 }
892 }
ce321570 893
846978d7 894 //T.fragment = R.fragment;
dd65d8c8
RN
895}
896
897// ---------------------------------------------------------------------------
846978d7 898// UpTree
dd65d8c8 899//
ce321570 900// Moves a URI path up a directory
dd65d8c8
RN
901// ---------------------------------------------------------------------------
902
ce321570 903//static
dd65d8c8
RN
904void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
905{
ce321570 906 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
907 {
908 uri -= 2;
909 }
846978d7 910
dd65d8c8
RN
911 for(;uri != uristart; --uri)
912 {
ce321570 913 if (*uri == wxT('/'))
dd65d8c8
RN
914 {
915 ++uri;
916 break;
917 }
918 }
919
920 //!!!TODO:HACK!!!//
ce321570 921 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
922 ++uri;
923 //!!!//
924}
925
ce321570
RN
926// ---------------------------------------------------------------------------
927// Normalize
928//
929// Normalizes directories in-place
930//
931// I.E. ./ and . are ignored
932//
933// ../ and .. are removed if a directory is before it, along
934// with that directory (leading .. and ../ are kept)
935// ---------------------------------------------------------------------------
936
937//static
8404931e 938void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
939{
940 wxChar* cp = s;
941 wxChar* bp = s;
942
ce321570 943 if(s[0] == wxT('/'))
dd65d8c8
RN
944 ++bp;
945
946 while(*cp)
947 {
ce321570
RN
948 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
949 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
950 {
951 //. _or_ ./ - ignore
952 if (*(cp+1) == '\0')
953 cp += 1;
954 else
955 cp += 2;
956 }
846978d7 957 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
958 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
959 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
960 {
961 //.. _or_ ../ - go up the tree
962 if (s != bp)
963 {
964 UpTree((const wxChar*)bp, (const wxChar*&)s);
965
966 if (*(cp+2) == '\0')
967 cp += 2;
968 else
969 cp += 3;
970 }
971 else if (!bIgnoreLeads)
972
973 {
974 *bp++ = *cp++;
975 *bp++ = *cp++;
976 if (*cp)
977 *bp++ = *cp++;
978
979 s = bp;
980 }
981 else
982 {
983 if (*(cp+2) == '\0')
984 cp += 2;
985 else
986 cp += 3;
987 }
988 }
989 else
846978d7 990 *s++ = *cp++;
dd65d8c8
RN
991 }
992
993 *s = '\0';
994}
995
996// ---------------------------------------------------------------------------
ce321570
RN
997// ParseH16
998//
999// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 1000// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
1001// the input string back to its original position on failure.
1002// ---------------------------------------------------------------------------
1003
1004bool wxURI::ParseH16(const wxChar*& uri)
1005{
1006 // h16 = 1*4HEXDIG
1007 if(!IsHex(*++uri))
1008 return false;
1009
1010 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1011 ++uri;
1012
1013 return true;
1014}
1015
1016// ---------------------------------------------------------------------------
1017// ParseIPXXX
1018//
846978d7
WS
1019// Parses a certain version of an IP address and moves the input string past
1020// it. Returns true if the input string contains the proper version of an ip
1021// address. It is the caller's responsability to move the input string back
ce321570 1022// to its original position on failure.
dd65d8c8
RN
1023// ---------------------------------------------------------------------------
1024
1025bool wxURI::ParseIPv4address(const wxChar*& uri)
1026{
1027 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1028 //
1029 //dec-octet = DIGIT ; 0-9
1030 // / %x31-39 DIGIT ; 10-99
1031 // / "1" 2DIGIT ; 100-199
1032 // / "2" %x30-34 DIGIT ; 200-249
1033 // / "25" %x30-35 ; 250-255
1034 size_t iIPv4 = 0;
1035 if (IsDigit(*uri))
1036 {
1037 ++iIPv4;
1038
846978d7 1039
dd65d8c8
RN
1040 //each ip part must be between 0-255 (dupe of version in for loop)
1041 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1042 //100 or less (note !)
846978d7
WS
1043 !( (*(uri-2) < wxT('2')) ||
1044 //240 or less
1045 (*(uri-2) == wxT('2') &&
ce321570 1046 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1047 )
1048 )
1049 )
1050 {
1051 return false;
1052 }
1053
1054 if(IsDigit(*uri))++uri;
1055
1056 //compilers should unroll this loop
1057 for(; iIPv4 < 4; ++iIPv4)
1058 {
ce321570 1059 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
1060 break;
1061
1062 //each ip part must be between 0-255
1063 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1064 //100 or less (note !)
846978d7
WS
1065 !( (*(uri-2) < wxT('2')) ||
1066 //240 or less
1067 (*(uri-2) == wxT('2') &&
ce321570 1068 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1069 )
1070 )
1071 )
1072 {
1073 return false;
1074 }
1075 if(IsDigit(*uri))++uri;
1076 }
1077 }
1078 return iIPv4 == 4;
1079}
1080
dd65d8c8
RN
1081bool wxURI::ParseIPv6address(const wxChar*& uri)
1082{
1083 // IPv6address = 6( h16 ":" ) ls32
1084 // / "::" 5( h16 ":" ) ls32
1085 // / [ h16 ] "::" 4( h16 ":" ) ls32
1086 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1087 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1088 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1089 // / [ *4( h16 ":" ) h16 ] "::" ls32
1090 // / [ *5( h16 ":" ) h16 ] "::" h16
1091 // / [ *6( h16 ":" ) h16 ] "::"
1092
1093 size_t numPrefix = 0,
1094 maxPostfix;
1095
1096 bool bEndHex = false;
1097
1098 for( ; numPrefix < 6; ++numPrefix)
1099 {
1100 if(!ParseH16(uri))
1101 {
1102 --uri;
1103 bEndHex = true;
1104 break;
1105 }
846978d7 1106
ce321570 1107 if(*uri != wxT(':'))
dd65d8c8
RN
1108 {
1109 break;
1110 }
1111 }
1112
1113 if(!bEndHex && !ParseH16(uri))
1114 {
1115 --uri;
1116
1117 if (numPrefix)
1118 return false;
1119
ce321570 1120 if (*uri == wxT(':'))
dd65d8c8 1121 {
ce321570 1122 if (*++uri != wxT(':'))
dd65d8c8
RN
1123 return false;
1124
1125 maxPostfix = 5;
1126 }
1127 else
1128 maxPostfix = 6;
1129 }
1130 else
1131 {
ce321570 1132 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1133 {
1134 if (numPrefix != 6)
1135 return false;
1136
ce321570 1137 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1138 ++uri;
1139
1140 const wxChar* uristart = uri;
1141 //parse ls32
1142 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1143 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1144 return true;
1145
1146 uri = uristart;
1147
1148 if (ParseIPv4address(uri))
1149 return true;
1150 else
1151 return false;
1152 }
1153 else
1154 {
1155 uri += 2;
846978d7 1156
dd65d8c8
RN
1157 if (numPrefix > 3)
1158 maxPostfix = 0;
1159 else
1160 maxPostfix = 4 - numPrefix;
1161 }
1162 }
1163
1164 bool bAllowAltEnding = maxPostfix == 0;
1165
1166 for(; maxPostfix != 0; --maxPostfix)
1167 {
ce321570 1168 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1169 return false;
1170 }
1171
1172 if(numPrefix <= 4)
1173 {
1174 const wxChar* uristart = uri;
1175 //parse ls32
1176 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1177 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1178 return true;
1179
1180 uri = uristart;
1181
1182 if (ParseIPv4address(uri))
1183 return true;
1184
1185 uri = uristart;
846978d7 1186
dd65d8c8
RN
1187 if (!bAllowAltEnding)
1188 return false;
1189 }
1190
1191 if(numPrefix <= 5 && ParseH16(uri))
1192 return true;
1193
1194 return true;
1195}
1196
1197bool wxURI::ParseIPvFuture(const wxChar*& uri)
1198{
1199 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1200 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1201 return false;
1202
1203 while (IsHex(*++uri)) {}
1204
ce321570 1205 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1206 return false;
1207
ce321570 1208 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1209
1210 return true;
1211}
1212
1213
1214// ---------------------------------------------------------------------------
ce321570
RN
1215// CharToHex
1216//
846978d7 1217// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1218// passed in character is not a valid hex character
dd65d8c8
RN
1219// ---------------------------------------------------------------------------
1220
ce321570 1221//static
409a7ba7 1222wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1223{
1676a194
WS
1224 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1225 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1226 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
dd65d8c8 1227
846978d7 1228 return 0;
dd65d8c8
RN
1229}
1230
ce321570
RN
1231// ---------------------------------------------------------------------------
1232// IsXXX
1233//
1234// Returns true if the passed in character meets the criteria of the method
1235// ---------------------------------------------------------------------------
1236
dd65d8c8
RN
1237//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1238bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1239{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1240 c == wxT('-') ||
1241 c == wxT('.') ||
1242 c == wxT('_') ||
1243 c == wxT('~') //tilde
846978d7 1244 ;
dd65d8c8
RN
1245}
1246
1247bool wxURI::IsReserved (const wxChar& c)
846978d7 1248{
dd65d8c8
RN
1249 return IsGenDelim(c) || IsSubDelim(c);
1250}
1251
1252//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1253bool wxURI::IsGenDelim (const wxChar& c)
1254{
ce321570
RN
1255 return c == wxT(':') ||
1256 c == wxT('/') ||
1257 c == wxT('?') ||
1258 c == wxT('#') ||
1259 c == wxT('[') ||
1260 c == wxT(']') ||
1261 c == wxT('@');
dd65d8c8
RN
1262}
1263
1264//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1265//! / "*" / "+" / "," / ";" / "="
1266bool wxURI::IsSubDelim (const wxChar& c)
1267{
ce321570
RN
1268 return c == wxT('!') ||
1269 c == wxT('$') ||
1270 c == wxT('&') ||
1271 c == wxT('\'') ||
1272 c == wxT('(') ||
1273 c == wxT(')') ||
1274 c == wxT('*') ||
1275 c == wxT('+') ||
1276 c == wxT(',') ||
1277 c == wxT(';') ||
846978d7 1278 c == wxT('=')
dd65d8c8
RN
1279 ;
1280}
1281
1282bool wxURI::IsHex(const wxChar& c)
ce321570 1283{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1284
1285bool wxURI::IsAlpha(const wxChar& c)
ce321570 1286{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1287
1288bool wxURI::IsDigit(const wxChar& c)
ce321570 1289{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1290
1291
1292// ---------------------------------------------------------------------------
1293//
00a1d2e0 1294// wxURL Compatibility
dd65d8c8 1295//
dd65d8c8
RN
1296// ---------------------------------------------------------------------------
1297
1298#if wxUSE_URL
1299
86470d43
RN
1300#if WXWIN_COMPATIBILITY_2_4
1301
dd65d8c8
RN
1302#include "wx/url.h"
1303
a6fb8636
WS
1304wxString wxURL::GetProtocolName() const
1305{
1306 return m_scheme;
1307}
1308
1309wxString wxURL::GetHostName() const
1310{
1311 return m_server;
1312}
1313
1314wxString wxURL::GetPath() const
1315{
1316 return m_path;
1317}
1318
997ba01b
RN
1319//Note that this old code really doesn't convert to a URI that well and looks
1320//more like a dirty hack than anything else...
1321
1322wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
dd65d8c8 1323{
997ba01b
RN
1324 wxString out_str;
1325 wxString hexa_code;
1326 size_t i;
1327
1328 for (i = 0; i < uri.Len(); i++)
1329 {
1330 wxChar c = uri.GetChar(i);
1331
1332 if (c == wxT(' '))
1333 {
1334 // GRG, Apr/2000: changed to "%20" instead of '+'
1335
1336 out_str += wxT("%20");
1337 }
1338 else
1339 {
1340 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1341 //
1342 // - Alphanumeric characters are never escaped
1343 // - Unreserved marks are never escaped
1344 // - Delimiters must be escaped if they appear within a component
1345 // but not if they are used to separate components. Here we have
1346 // no clear way to distinguish between these two cases, so they
1347 // are escaped unless they are passed in the 'delims' parameter
1348 // (allowed delimiters).
1349
1350 static const wxChar marks[] = wxT("-_.!~*()'");
1351
1352 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1353 {
1354 hexa_code.Printf(wxT("%%%02X"), c);
1355 out_str += hexa_code;
1356 }
1357 else
1358 {
1359 out_str += c;
1360 }
1361 }
1362 }
1363
1364 return out_str;
dd65d8c8
RN
1365}
1366
1367wxString wxURL::ConvertFromURI(const wxString& uri)
1368{
86470d43 1369 return wxURI::Unescape(uri);
dd65d8c8
RN
1370}
1371
86470d43
RN
1372#endif //WXWIN_COMPATIBILITY_2_4
1373
dd65d8c8
RN
1374#endif //wxUSE_URL
1375
1376//end of uri.cpp
1377
1378
1379