]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
compilation test for Unicode build
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
dd65d8c8
RN
19// For compilers that support precompilation, includes "wx.h".
20#include "wx/wxprec.h"
21
22#ifdef __BORLANDC__
23 #pragma hdrstop
24#endif
25
26#include "wx/uri.h"
27
28// ---------------------------------------------------------------------------
29// definitions
30// ---------------------------------------------------------------------------
31
4115960d 32IMPLEMENT_CLASS(wxURI, wxObject)
dd65d8c8
RN
33
34// ===========================================================================
35// implementation
36// ===========================================================================
37
38// ---------------------------------------------------------------------------
39// utilities
40// ---------------------------------------------------------------------------
41
42// ---------------------------------------------------------------------------
43//
44// wxURI
45//
46// ---------------------------------------------------------------------------
47
48// ---------------------------------------------------------------------------
49// Constructors
50// ---------------------------------------------------------------------------
51
52wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
53{
54}
846978d7 55
dd65d8c8
RN
56wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58 Create(uri);
59}
60
60431236 61wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
dd65d8c8 62{
b60b2ec8 63 Assign(uri);
dd65d8c8
RN
64}
65
66// ---------------------------------------------------------------------------
67// Destructor and cleanup
68// ---------------------------------------------------------------------------
69
70wxURI::~wxURI()
71{
72 Clear();
73}
74
75void wxURI::Clear()
76{
4860d40d 77 m_scheme = m_userinfo = m_server = m_port = m_path =
525d8583 78 m_query = m_fragment = wxEmptyString;
dd65d8c8
RN
79
80 m_hostType = wxURI_REGNAME;
81
82 m_fields = 0;
83}
84
85// ---------------------------------------------------------------------------
86// Create
87//
846978d7 88// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
89// ---------------------------------------------------------------------------
90
86470d43 91const wxChar* wxURI::Create(const wxString& uri)
846978d7 92{
dd65d8c8
RN
93 if (m_fields)
94 Clear();
95
846978d7
WS
96 return Parse(uri);
97}
dd65d8c8
RN
98
99// ---------------------------------------------------------------------------
ce321570 100// Escape Methods
dd65d8c8 101//
846978d7 102// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 103//
dd65d8c8 104// Escape encodes an invalid URI character into a 3 character sequence
ce321570 105//
dd65d8c8
RN
106// IsEscape determines if the input string contains an escape sequence,
107// if it does, then it moves the input string past the escape sequence
ce321570
RN
108//
109// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
110// ---------------------------------------------------------------------------
111
86470d43 112wxChar wxURI::TranslateEscape(const wxChar* s)
dd65d8c8 113{
6f0344c7 114 wxASSERT_MSG( IsHex(s[0]) && IsHex(s[1]), wxT("Invalid escape sequence!"));
8404931e 115
17a1ebd1 116 return wx_truncate_cast(wxChar, (CharToHex(s[0]) << 4 ) | CharToHex(s[1]));
dd65d8c8
RN
117}
118
86470d43
RN
119wxString wxURI::Unescape(const wxString& uri)
120{
121 wxString new_uri;
122
123 for(size_t i = 0; i < uri.length(); ++i)
124 {
125 if (uri[i] == wxT('%'))
126 {
127 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
128 i += 2;
129 }
d8d7193d
RN
130 else
131 new_uri += uri[i];
86470d43
RN
132 }
133
134 return new_uri;
135}
136
dd65d8c8
RN
137void wxURI::Escape(wxString& s, const wxChar& c)
138{
139 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 140 s += wxT('%');
dd65d8c8 141 s += hdig[(c >> 4) & 15];
846978d7 142 s += hdig[c & 15];
dd65d8c8
RN
143}
144
145bool wxURI::IsEscape(const wxChar*& uri)
146{
ce321570
RN
147 // pct-encoded = "%" HEXDIG HEXDIG
148 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8 149 return true;
dd65d8c8
RN
150 else
151 return false;
152}
153
4860d40d
RN
154// ---------------------------------------------------------------------------
155// GetUser
156// GetPassword
157//
158// Gets the username and password via the old URL method.
159// ---------------------------------------------------------------------------
160wxString wxURI::GetUser() const
161{
162 size_t dwPasswordPos = m_userinfo.find(':');
163
164 if (dwPasswordPos == wxString::npos)
165 dwPasswordPos = 0;
166
167 return m_userinfo(0, dwPasswordPos);
168}
169
170wxString wxURI::GetPassword() const
171{
172 size_t dwPasswordPos = m_userinfo.find(':');
173
174 if (dwPasswordPos == wxString::npos)
175 return wxT("");
176 else
177 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
178}
179
dd65d8c8 180// ---------------------------------------------------------------------------
86470d43 181// BuildURI
dd65d8c8 182//
846978d7 183// BuildURI() builds the entire URI into a useable
dd65d8c8 184// representation, including proper identification characters such as slashes
ce321570
RN
185//
186// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
187// the components that accept escape sequences
dd65d8c8
RN
188// ---------------------------------------------------------------------------
189
86470d43 190wxString wxURI::BuildURI() const
846978d7 191{
dd65d8c8
RN
192 wxString ret;
193
194 if (HasScheme())
195 ret = ret + m_scheme + wxT(":");
196
197 if (HasServer())
198 {
199 ret += wxT("//");
200
4860d40d
RN
201 if (HasUserInfo())
202 ret = ret + m_userinfo + wxT("@");
dd65d8c8
RN
203
204 ret += m_server;
205
206 if (HasPort())
207 ret = ret + wxT(":") + m_port;
208 }
209
210 ret += m_path;
211
212 if (HasQuery())
213 ret = ret + wxT("?") + m_query;
214
215 if (HasFragment())
216 ret = ret + wxT("#") + m_fragment;
217
218 return ret;
219}
220
86470d43
RN
221wxString wxURI::BuildUnescapedURI() const
222{
223 wxString ret;
224
225 if (HasScheme())
226 ret = ret + m_scheme + wxT(":");
227
228 if (HasServer())
229 {
230 ret += wxT("//");
231
4860d40d
RN
232 if (HasUserInfo())
233 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
86470d43
RN
234
235 if (m_hostType == wxURI_REGNAME)
236 ret += wxURI::Unescape(m_server);
237 else
238 ret += m_server;
239
240 if (HasPort())
241 ret = ret + wxT(":") + m_port;
242 }
243
244 ret += wxURI::Unescape(m_path);
245
246 if (HasQuery())
247 ret = ret + wxT("?") + wxURI::Unescape(m_query);
248
249 if (HasFragment())
250 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
251
252 return ret;
253}
254
dd65d8c8 255// ---------------------------------------------------------------------------
ce321570 256// Assignment
dd65d8c8
RN
257// ---------------------------------------------------------------------------
258
b60b2ec8
RN
259wxURI& wxURI::Assign(const wxURI& uri)
260{
261 //assign fields
262 m_fields = uri.m_fields;
263
264 //ref over components
265 m_scheme = uri.m_scheme;
4860d40d 266 m_userinfo = uri.m_userinfo;
b60b2ec8
RN
267 m_server = uri.m_server;
268 m_hostType = uri.m_hostType;
269 m_port = uri.m_port;
270 m_path = uri.m_path;
271 m_query = uri.m_query;
272 m_fragment = uri.m_fragment;
dd65d8c8
RN
273
274 return *this;
275}
276
ce321570
RN
277wxURI& wxURI::operator = (const wxURI& uri)
278{
279 return Assign(uri);
280}
281
b60b2ec8 282wxURI& wxURI::operator = (const wxString& string)
846978d7 283{
dd65d8c8
RN
284 Create(string);
285 return *this;
286}
287
ce321570
RN
288// ---------------------------------------------------------------------------
289// Comparison
290// ---------------------------------------------------------------------------
291
dd65d8c8 292bool wxURI::operator == (const wxURI& uri) const
846978d7 293{
dd65d8c8
RN
294 if (HasScheme())
295 {
296 if(m_scheme != uri.m_scheme)
297 return false;
298 }
299 else if (uri.HasScheme())
300 return false;
301
302
303 if (HasServer())
304 {
4860d40d 305 if (HasUserInfo())
dd65d8c8 306 {
4860d40d 307 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
308 return false;
309 }
4860d40d 310 else if (uri.HasUserInfo())
dd65d8c8
RN
311 return false;
312
313 if (m_server != uri.m_server ||
314 m_hostType != uri.m_hostType)
315 return false;
316
317 if (HasPort())
318 {
319 if(m_port != uri.m_port)
320 return false;
321 }
322 else if (uri.HasPort())
323 return false;
324 }
325 else if (uri.HasServer())
326 return false;
327
328
329 if (HasPath())
330 {
331 if(m_path != uri.m_path)
332 return false;
333 }
334 else if (uri.HasPath())
335 return false;
336
337 if (HasQuery())
338 {
339 if (m_query != uri.m_query)
340 return false;
341 }
342 else if (uri.HasQuery())
343 return false;
344
345 if (HasFragment())
346 {
347 if (m_fragment != uri.m_fragment)
348 return false;
349 }
350 else if (uri.HasFragment())
351 return false;
352
353 return true;
354}
355
356// ---------------------------------------------------------------------------
357// IsReference
358//
359// if there is no authority or scheme, it is a reference
360// ---------------------------------------------------------------------------
361
362bool wxURI::IsReference() const
363{ return !HasScheme() || !HasServer(); }
364
365// ---------------------------------------------------------------------------
366// Parse
367//
368// Master URI parsing method. Just calls the individual parsing methods
369//
370// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 371// URI-reference = URI / relative
dd65d8c8
RN
372// ---------------------------------------------------------------------------
373
374const wxChar* wxURI::Parse(const wxChar* uri)
375{
376 uri = ParseScheme(uri);
377 uri = ParseAuthority(uri);
378 uri = ParsePath(uri);
379 uri = ParseQuery(uri);
380 return ParseFragment(uri);
381}
382
383// ---------------------------------------------------------------------------
384// ParseXXX
385//
386// Individual parsers for each URI component
387// ---------------------------------------------------------------------------
388
389const wxChar* wxURI::ParseScheme(const wxChar* uri)
390{
391 wxASSERT(uri != NULL);
392
393 //copy of the uri - used for figuring out
394 //length of each component
395 const wxChar* uricopy = uri;
396
397 //Does the uri have a scheme (first character alpha)?
398 if (IsAlpha(*uri))
399 {
400 m_scheme += *uri++;
401
402 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 403 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
404 *uri == wxT('+') ||
405 *uri == wxT('-') ||
846978d7
WS
406 *uri == wxT('.'))
407 {
408 m_scheme += *uri++;
dd65d8c8
RN
409 }
410
411 //valid scheme?
ce321570 412 if (*uri == wxT(':'))
846978d7 413 {
dd65d8c8
RN
414 //mark the scheme as valid
415 m_fields |= wxURI_SCHEME;
416
417 //move reference point up to input buffer
418 uricopy = ++uri;
419 }
846978d7 420 else
dd65d8c8 421 //relative uri with relative path reference
525d8583 422 m_scheme = wxEmptyString;
dd65d8c8 423 }
846978d7 424// else
dd65d8c8
RN
425 //relative uri with _possible_ relative path reference
426
427 return uricopy;
428}
429
430const wxChar* wxURI::ParseAuthority(const wxChar* uri)
431{
432 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 433 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8
RN
434 {
435 uri += 2;
436
4860d40d 437 uri = ParseUserInfo(uri);
dd65d8c8
RN
438 uri = ParseServer(uri);
439 return ParsePort(uri);
440 }
441
442 return uri;
443}
444
4860d40d 445const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
dd65d8c8
RN
446{
447 wxASSERT(uri != NULL);
448
449 //copy of the uri - used for figuring out
450 //length of each component
451 const wxChar* uricopy = uri;
452
453 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 454 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 455 {
24ca04e7 456 if(IsUnreserved(*uri) ||
ce321570 457 IsSubDelim(*uri) || *uri == wxT(':'))
4860d40d 458 m_userinfo += *uri++;
24ca04e7
VZ
459 else if (IsEscape(uri))
460 {
461 m_userinfo += *uri++;
462 m_userinfo += *uri++;
463 m_userinfo += *uri++;
464 }
dd65d8c8 465 else
4860d40d 466 Escape(m_userinfo, *uri++);
dd65d8c8
RN
467 }
468
ce321570 469 if(*uri == wxT('@'))
dd65d8c8
RN
470 {
471 //valid userinfo
4860d40d 472 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
473
474 uricopy = ++uri;
475 }
476 else
4860d40d 477 m_userinfo = wxEmptyString;
dd65d8c8
RN
478
479 return uricopy;
480}
481
482const wxChar* wxURI::ParseServer(const wxChar* uri)
483{
484 wxASSERT(uri != NULL);
485
486 //copy of the uri - used for figuring out
487 //length of each component
488 const wxChar* uricopy = uri;
489
490 // host = IP-literal / IPv4address / reg-name
491 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 492 if (*uri == wxT('['))
dd65d8c8 493 {
e854db32
RN
494 ++uri; //some compilers don't support *&ing a ++*
495 if (ParseIPv6address(uri) && *uri == wxT(']'))
dd65d8c8
RN
496 {
497 ++uri;
498 m_hostType = wxURI_IPV6ADDRESS;
846978d7 499
dd65d8c8 500 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 501 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
502 theBuffer.SetLength(uri-uricopy);
503 }
504 else
505 {
506 uri = uricopy;
507
e854db32
RN
508 ++uri; //some compilers don't support *&ing a ++*
509 if (ParseIPvFuture(uri) && *uri == wxT(']'))
dd65d8c8
RN
510 {
511 ++uri;
846978d7
WS
512 m_hostType = wxURI_IPVFUTURE;
513
dd65d8c8 514 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 515 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
516 theBuffer.SetLength(uri-uricopy);
517 }
846978d7 518 else
dd65d8c8
RN
519 uri = uricopy;
520 }
521 }
846978d7 522 else
dd65d8c8
RN
523 {
524 if (ParseIPv4address(uri))
525 {
526 m_hostType = wxURI_IPV4ADDRESS;
527
528 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 529 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
530 theBuffer.SetLength(uri-uricopy);
531 }
846978d7 532 else
dd65d8c8
RN
533 uri = uricopy;
534 }
535
536 if(m_hostType == wxURI_REGNAME)
537 {
538 uri = uricopy;
539 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 540 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 541 {
24ca04e7
VZ
542 if(IsUnreserved(*uri) || IsSubDelim(*uri))
543 m_server += *uri++;
544 else if (IsEscape(uri))
545 {
dd65d8c8 546 m_server += *uri++;
24ca04e7
VZ
547 m_server += *uri++;
548 m_server += *uri++;
549 }
dd65d8c8
RN
550 else
551 Escape(m_server, *uri++);
846978d7 552 }
dd65d8c8
RN
553 }
554
555 //mark the server as valid
556 m_fields |= wxURI_SERVER;
557
558 return uri;
559}
560
846978d7 561
dd65d8c8
RN
562const wxChar* wxURI::ParsePort(const wxChar* uri)
563{
564 wxASSERT(uri != NULL);
565
566 // port = *DIGIT
ce321570 567 if(*uri == wxT(':'))
dd65d8c8
RN
568 {
569 ++uri;
846978d7 570 while(IsDigit(*uri))
dd65d8c8
RN
571 {
572 m_port += *uri++;
846978d7 573 }
dd65d8c8
RN
574
575 //mark the port as valid
576 m_fields |= wxURI_PORT;
577 }
578
579 return uri;
580}
581
8404931e 582const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
583{
584 wxASSERT(uri != NULL);
585
586 //copy of the uri - used for figuring out
587 //length of each component
588 const wxChar* uricopy = uri;
589
590 /// hier-part = "//" authority path-abempty
591 /// / path-absolute
592 /// / path-rootless
593 /// / path-empty
594 ///
595 /// relative-part = "//" authority path-abempty
596 /// / path-absolute
597 /// / path-noscheme
598 /// / path-empty
599 ///
600 /// path-abempty = *( "/" segment )
601 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
602 /// path-noscheme = segment-nz-nc *( "/" segment )
603 /// path-rootless = segment-nz *( "/" segment )
604 /// path-empty = 0<pchar>
605 ///
606 /// segment = *pchar
607 /// segment-nz = 1*pchar
608 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
609 /// ; non-zero-length segment without any colon ":"
610 ///
611 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 612 if (*uri == wxT('/'))
dd65d8c8
RN
613 {
614 m_path += *uri++;
615
846978d7
WS
616 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
617 {
24ca04e7 618 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 619 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 620 m_path += *uri++;
24ca04e7
VZ
621 else if (IsEscape(uri))
622 {
623 m_path += *uri++;
624 m_path += *uri++;
625 m_path += *uri++;
626 }
846978d7
WS
627 else
628 Escape(m_path, *uri++);
dd65d8c8
RN
629 }
630
631 if (bNormalize)
632 {
633 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 634#if wxUSE_STL
2c09fb3b 635 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 636#endif
dd65d8c8
RN
637 Normalize(theBuffer, true);
638 theBuffer.SetLength(wxStrlen(theBuffer));
639 }
640 //mark the path as valid
641 m_fields |= wxURI_PATH;
642 }
643 else if(*uri) //Relative path
644 {
645 if (bReference)
646 {
647 //no colon allowed
846978d7 648 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 649 {
24ca04e7 650 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 651 *uri == wxT('@') || *uri == wxT('/'))
846978d7 652 m_path += *uri++;
24ca04e7
VZ
653 else if (IsEscape(uri))
654 {
655 m_path += *uri++;
656 m_path += *uri++;
657 m_path += *uri++;
658 }
846978d7
WS
659 else
660 Escape(m_path, *uri++);
dd65d8c8 661 }
846978d7 662 }
dd65d8c8
RN
663 else
664 {
846978d7 665 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 666 {
24ca04e7 667 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 668 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 669 m_path += *uri++;
24ca04e7
VZ
670 else if (IsEscape(uri))
671 {
672 m_path += *uri++;
673 m_path += *uri++;
674 m_path += *uri++;
675 }
846978d7
WS
676 else
677 Escape(m_path, *uri++);
dd65d8c8
RN
678 }
679 }
680
681 if (uri != uricopy)
846978d7 682 {
dd65d8c8
RN
683 if (bNormalize)
684 {
685 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 686#if wxUSE_STL
2c09fb3b 687 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 688#endif
dd65d8c8
RN
689 Normalize(theBuffer);
690 theBuffer.SetLength(wxStrlen(theBuffer));
691 }
692
693 //mark the path as valid
694 m_fields |= wxURI_PATH;
695 }
696 }
697
698 return uri;
699}
700
701
702const wxChar* wxURI::ParseQuery(const wxChar* uri)
703{
704 wxASSERT(uri != NULL);
705
706 // query = *( pchar / "/" / "?" )
ce321570 707 if (*uri == wxT('?'))
dd65d8c8
RN
708 {
709 ++uri;
ce321570 710 while(*uri && *uri != wxT('#'))
dd65d8c8 711 {
24ca04e7 712 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 713 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 714 m_query += *uri++;
24ca04e7
VZ
715 else if (IsEscape(uri))
716 {
717 m_query += *uri++;
718 m_query += *uri++;
719 m_query += *uri++;
720 }
dd65d8c8 721 else
846978d7 722 Escape(m_query, *uri++);
dd65d8c8
RN
723 }
724
725 //mark the server as valid
726 m_fields |= wxURI_QUERY;
727 }
728
729 return uri;
730}
731
732
733const wxChar* wxURI::ParseFragment(const wxChar* uri)
734{
735 wxASSERT(uri != NULL);
736
737 // fragment = *( pchar / "/" / "?" )
ce321570 738 if (*uri == wxT('#'))
dd65d8c8
RN
739 {
740 ++uri;
741 while(*uri)
742 {
24ca04e7 743 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 744 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 745 m_fragment += *uri++;
24ca04e7
VZ
746 else if (IsEscape(uri))
747 {
748 m_fragment += *uri++;
749 m_fragment += *uri++;
750 m_fragment += *uri++;
751 }
dd65d8c8 752 else
846978d7 753 Escape(m_fragment, *uri++);
dd65d8c8
RN
754 }
755
756 //mark the server as valid
757 m_fields |= wxURI_FRAGMENT;
758 }
759
760 return uri;
761}
762
763// ---------------------------------------------------------------------------
ce321570 764// Resolve
dd65d8c8 765//
ce321570 766// Builds missing components of this uri from a base uri
dd65d8c8 767//
ce321570
RN
768// A version of the algorithm outlined in the RFC is used here
769// (it is shown in comments)
770//
846978d7 771// Note that an empty URI inherits all components
dd65d8c8
RN
772// ---------------------------------------------------------------------------
773
8404931e 774void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 775{
846978d7 776 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
777 wxT("wxURI to inherit from must not be a reference!"));
778
ce321570 779 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
780 // loophole that allows this uri to inherit other
781 // properties from the base uri - even if the scheme
782 // is defined
8404931e
VZ
783 if ( !(flags & wxURI_STRICT) &&
784 HasScheme() && base.HasScheme() &&
785 m_scheme == base.m_scheme )
846978d7 786 {
dd65d8c8
RN
787 m_fields -= wxURI_SCHEME;
788 }
789
790
791 // Do nothing if this is an absolute wxURI
792 // if defined(R.scheme) then
793 // T.scheme = R.scheme;
794 // T.authority = R.authority;
795 // T.path = remove_dot_segments(R.path);
796 // T.query = R.query;
797 if (HasScheme())
798 {
799 return;
800 }
801
ea4daac4 802 //No scheme - inherit
dd65d8c8
RN
803 m_scheme = base.m_scheme;
804 m_fields |= wxURI_SCHEME;
805
806 // All we need to do for relative URIs with an
807 // authority component is just inherit the scheme
808 // if defined(R.authority) then
809 // T.authority = R.authority;
810 // T.path = remove_dot_segments(R.path);
811 // T.query = R.query;
812 if (HasServer())
813 {
814 return;
815 }
816
817 //No authority - inherit
4860d40d 818 if (base.HasUserInfo())
dd65d8c8 819 {
4860d40d
RN
820 m_userinfo = base.m_userinfo;
821 m_fields |= wxURI_USERINFO;
dd65d8c8 822 }
846978d7 823
dd65d8c8
RN
824 m_server = base.m_server;
825 m_hostType = base.m_hostType;
826 m_fields |= wxURI_SERVER;
846978d7 827
dd65d8c8
RN
828 if (base.HasPort())
829 {
830 m_port = base.m_port;
831 m_fields |= wxURI_PORT;
832 }
846978d7 833
dd65d8c8
RN
834
835 // Simple path inheritance from base
836 if (!HasPath())
837 {
838 // T.path = Base.path;
839 m_path = base.m_path;
840 m_fields |= wxURI_PATH;
846978d7 841
dd65d8c8
RN
842
843 // if defined(R.query) then
844 // T.query = R.query;
845 // else
846 // T.query = Base.query;
847 // endif;
848 if (!HasQuery())
849 {
850 m_query = base.m_query;
851 m_fields |= wxURI_QUERY;
852 }
853 }
854 else
855 {
856 // if (R.path starts-with "/") then
857 // T.path = remove_dot_segments(R.path);
858 // else
859 // T.path = merge(Base.path, R.path);
860 // T.path = remove_dot_segments(T.path);
861 // endif;
862 // T.query = R.query;
ce321570 863 if (m_path[0u] != wxT('/'))
dd65d8c8 864 {
ea4daac4 865 //Merge paths
dd65d8c8
RN
866 const wxChar* op = m_path.c_str();
867 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
868
869 //not a ending directory? move up
ce321570 870 if (base.m_path[0] && *(bp-1) != wxT('/'))
dd65d8c8
RN
871 UpTree(base.m_path, bp);
872
873 //normalize directories
846978d7 874 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 875 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8
RN
876 {
877 UpTree(base.m_path, bp);
878
879 if (*(op+2) == '\0')
880 op += 2;
881 else
882 op += 3;
883 }
884
d8746da2
DS
885 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
886 m_path.substr((op - m_path.c_str()), m_path.Length());
dd65d8c8
RN
887 }
888 }
ce321570 889
846978d7 890 //T.fragment = R.fragment;
dd65d8c8
RN
891}
892
893// ---------------------------------------------------------------------------
846978d7 894// UpTree
dd65d8c8 895//
ce321570 896// Moves a URI path up a directory
dd65d8c8
RN
897// ---------------------------------------------------------------------------
898
ce321570 899//static
dd65d8c8
RN
900void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
901{
ce321570 902 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
903 {
904 uri -= 2;
905 }
846978d7 906
dd65d8c8
RN
907 for(;uri != uristart; --uri)
908 {
ce321570 909 if (*uri == wxT('/'))
dd65d8c8
RN
910 {
911 ++uri;
912 break;
913 }
914 }
915
916 //!!!TODO:HACK!!!//
ce321570 917 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
918 ++uri;
919 //!!!//
920}
921
ce321570
RN
922// ---------------------------------------------------------------------------
923// Normalize
924//
925// Normalizes directories in-place
926//
927// I.E. ./ and . are ignored
928//
929// ../ and .. are removed if a directory is before it, along
930// with that directory (leading .. and ../ are kept)
931// ---------------------------------------------------------------------------
932
933//static
8404931e 934void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
935{
936 wxChar* cp = s;
937 wxChar* bp = s;
938
ce321570 939 if(s[0] == wxT('/'))
dd65d8c8
RN
940 ++bp;
941
942 while(*cp)
943 {
ce321570
RN
944 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
945 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
946 {
947 //. _or_ ./ - ignore
948 if (*(cp+1) == '\0')
949 cp += 1;
950 else
951 cp += 2;
952 }
846978d7 953 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
954 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
955 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
956 {
957 //.. _or_ ../ - go up the tree
958 if (s != bp)
959 {
960 UpTree((const wxChar*)bp, (const wxChar*&)s);
961
962 if (*(cp+2) == '\0')
963 cp += 2;
964 else
965 cp += 3;
966 }
967 else if (!bIgnoreLeads)
968
969 {
970 *bp++ = *cp++;
971 *bp++ = *cp++;
972 if (*cp)
973 *bp++ = *cp++;
974
975 s = bp;
976 }
977 else
978 {
979 if (*(cp+2) == '\0')
980 cp += 2;
981 else
982 cp += 3;
983 }
984 }
985 else
846978d7 986 *s++ = *cp++;
dd65d8c8
RN
987 }
988
989 *s = '\0';
990}
991
992// ---------------------------------------------------------------------------
ce321570
RN
993// ParseH16
994//
995// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 996// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
997// the input string back to its original position on failure.
998// ---------------------------------------------------------------------------
999
1000bool wxURI::ParseH16(const wxChar*& uri)
1001{
1002 // h16 = 1*4HEXDIG
1003 if(!IsHex(*++uri))
1004 return false;
1005
1006 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1007 ++uri;
1008
1009 return true;
1010}
1011
1012// ---------------------------------------------------------------------------
1013// ParseIPXXX
1014//
846978d7
WS
1015// Parses a certain version of an IP address and moves the input string past
1016// it. Returns true if the input string contains the proper version of an ip
1017// address. It is the caller's responsability to move the input string back
ce321570 1018// to its original position on failure.
dd65d8c8
RN
1019// ---------------------------------------------------------------------------
1020
1021bool wxURI::ParseIPv4address(const wxChar*& uri)
1022{
1023 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1024 //
1025 //dec-octet = DIGIT ; 0-9
1026 // / %x31-39 DIGIT ; 10-99
1027 // / "1" 2DIGIT ; 100-199
1028 // / "2" %x30-34 DIGIT ; 200-249
1029 // / "25" %x30-35 ; 250-255
1030 size_t iIPv4 = 0;
1031 if (IsDigit(*uri))
1032 {
1033 ++iIPv4;
1034
846978d7 1035
dd65d8c8
RN
1036 //each ip part must be between 0-255 (dupe of version in for loop)
1037 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1038 //100 or less (note !)
846978d7
WS
1039 !( (*(uri-2) < wxT('2')) ||
1040 //240 or less
1041 (*(uri-2) == wxT('2') &&
ce321570 1042 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1043 )
1044 )
1045 )
1046 {
1047 return false;
1048 }
1049
1050 if(IsDigit(*uri))++uri;
1051
1052 //compilers should unroll this loop
1053 for(; iIPv4 < 4; ++iIPv4)
1054 {
ce321570 1055 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
1056 break;
1057
1058 //each ip part must be between 0-255
1059 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1060 //100 or less (note !)
846978d7
WS
1061 !( (*(uri-2) < wxT('2')) ||
1062 //240 or less
1063 (*(uri-2) == wxT('2') &&
ce321570 1064 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1065 )
1066 )
1067 )
1068 {
1069 return false;
1070 }
1071 if(IsDigit(*uri))++uri;
1072 }
1073 }
1074 return iIPv4 == 4;
1075}
1076
dd65d8c8
RN
1077bool wxURI::ParseIPv6address(const wxChar*& uri)
1078{
1079 // IPv6address = 6( h16 ":" ) ls32
1080 // / "::" 5( h16 ":" ) ls32
1081 // / [ h16 ] "::" 4( h16 ":" ) ls32
1082 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1083 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1084 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1085 // / [ *4( h16 ":" ) h16 ] "::" ls32
1086 // / [ *5( h16 ":" ) h16 ] "::" h16
1087 // / [ *6( h16 ":" ) h16 ] "::"
1088
1089 size_t numPrefix = 0,
1090 maxPostfix;
1091
1092 bool bEndHex = false;
1093
1094 for( ; numPrefix < 6; ++numPrefix)
1095 {
1096 if(!ParseH16(uri))
1097 {
1098 --uri;
1099 bEndHex = true;
1100 break;
1101 }
846978d7 1102
ce321570 1103 if(*uri != wxT(':'))
dd65d8c8
RN
1104 {
1105 break;
1106 }
1107 }
1108
1109 if(!bEndHex && !ParseH16(uri))
1110 {
1111 --uri;
1112
1113 if (numPrefix)
1114 return false;
1115
ce321570 1116 if (*uri == wxT(':'))
dd65d8c8 1117 {
ce321570 1118 if (*++uri != wxT(':'))
dd65d8c8
RN
1119 return false;
1120
1121 maxPostfix = 5;
1122 }
1123 else
1124 maxPostfix = 6;
1125 }
1126 else
1127 {
ce321570 1128 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1129 {
1130 if (numPrefix != 6)
1131 return false;
1132
ce321570 1133 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1134 ++uri;
1135
1136 const wxChar* uristart = uri;
1137 //parse ls32
1138 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1139 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1140 return true;
1141
1142 uri = uristart;
1143
1144 if (ParseIPv4address(uri))
1145 return true;
1146 else
1147 return false;
1148 }
1149 else
1150 {
1151 uri += 2;
846978d7 1152
dd65d8c8
RN
1153 if (numPrefix > 3)
1154 maxPostfix = 0;
1155 else
1156 maxPostfix = 4 - numPrefix;
1157 }
1158 }
1159
1160 bool bAllowAltEnding = maxPostfix == 0;
1161
1162 for(; maxPostfix != 0; --maxPostfix)
1163 {
ce321570 1164 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1165 return false;
1166 }
1167
1168 if(numPrefix <= 4)
1169 {
1170 const wxChar* uristart = uri;
1171 //parse ls32
1172 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1173 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1174 return true;
1175
1176 uri = uristart;
1177
1178 if (ParseIPv4address(uri))
1179 return true;
1180
1181 uri = uristart;
846978d7 1182
dd65d8c8
RN
1183 if (!bAllowAltEnding)
1184 return false;
1185 }
1186
1187 if(numPrefix <= 5 && ParseH16(uri))
1188 return true;
1189
1190 return true;
1191}
1192
1193bool wxURI::ParseIPvFuture(const wxChar*& uri)
1194{
1195 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1196 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1197 return false;
1198
1199 while (IsHex(*++uri)) {}
1200
ce321570 1201 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1202 return false;
1203
ce321570 1204 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1205
1206 return true;
1207}
1208
1209
1210// ---------------------------------------------------------------------------
ce321570
RN
1211// CharToHex
1212//
846978d7 1213// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1214// passed in character is not a valid hex character
dd65d8c8
RN
1215// ---------------------------------------------------------------------------
1216
ce321570 1217//static
409a7ba7 1218wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1219{
1676a194
WS
1220 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1221 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1222 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
dd65d8c8 1223
846978d7 1224 return 0;
dd65d8c8
RN
1225}
1226
ce321570
RN
1227// ---------------------------------------------------------------------------
1228// IsXXX
1229//
1230// Returns true if the passed in character meets the criteria of the method
1231// ---------------------------------------------------------------------------
1232
dd65d8c8
RN
1233//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1234bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1235{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1236 c == wxT('-') ||
1237 c == wxT('.') ||
1238 c == wxT('_') ||
1239 c == wxT('~') //tilde
846978d7 1240 ;
dd65d8c8
RN
1241}
1242
1243bool wxURI::IsReserved (const wxChar& c)
846978d7 1244{
dd65d8c8
RN
1245 return IsGenDelim(c) || IsSubDelim(c);
1246}
1247
1248//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1249bool wxURI::IsGenDelim (const wxChar& c)
1250{
ce321570
RN
1251 return c == wxT(':') ||
1252 c == wxT('/') ||
1253 c == wxT('?') ||
1254 c == wxT('#') ||
1255 c == wxT('[') ||
1256 c == wxT(']') ||
1257 c == wxT('@');
dd65d8c8
RN
1258}
1259
1260//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1261//! / "*" / "+" / "," / ";" / "="
1262bool wxURI::IsSubDelim (const wxChar& c)
1263{
ce321570
RN
1264 return c == wxT('!') ||
1265 c == wxT('$') ||
1266 c == wxT('&') ||
1267 c == wxT('\'') ||
1268 c == wxT('(') ||
1269 c == wxT(')') ||
1270 c == wxT('*') ||
1271 c == wxT('+') ||
1272 c == wxT(',') ||
1273 c == wxT(';') ||
846978d7 1274 c == wxT('=')
dd65d8c8
RN
1275 ;
1276}
1277
1278bool wxURI::IsHex(const wxChar& c)
ce321570 1279{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1280
1281bool wxURI::IsAlpha(const wxChar& c)
ce321570 1282{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1283
1284bool wxURI::IsDigit(const wxChar& c)
ce321570 1285{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1286
1287
dd65d8c8
RN
1288//end of uri.cpp
1289
1290
1291