]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
update frm Ivan Masar
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
dd65d8c8
RN
19// For compilers that support precompilation, includes "wx.h".
20#include "wx/wxprec.h"
21
22#ifdef __BORLANDC__
23 #pragma hdrstop
24#endif
25
26#include "wx/uri.h"
27
28// ---------------------------------------------------------------------------
29// definitions
30// ---------------------------------------------------------------------------
31
4115960d 32IMPLEMENT_CLASS(wxURI, wxObject)
dd65d8c8
RN
33
34// ===========================================================================
35// implementation
36// ===========================================================================
37
38// ---------------------------------------------------------------------------
39// utilities
40// ---------------------------------------------------------------------------
41
42// ---------------------------------------------------------------------------
43//
44// wxURI
45//
46// ---------------------------------------------------------------------------
47
48// ---------------------------------------------------------------------------
49// Constructors
50// ---------------------------------------------------------------------------
51
52wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
53{
54}
846978d7 55
dd65d8c8
RN
56wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58 Create(uri);
59}
60
60431236 61wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
dd65d8c8 62{
b60b2ec8 63 Assign(uri);
dd65d8c8
RN
64}
65
66// ---------------------------------------------------------------------------
67// Destructor and cleanup
68// ---------------------------------------------------------------------------
69
70wxURI::~wxURI()
71{
72 Clear();
73}
74
75void wxURI::Clear()
76{
4860d40d 77 m_scheme = m_userinfo = m_server = m_port = m_path =
525d8583 78 m_query = m_fragment = wxEmptyString;
dd65d8c8
RN
79
80 m_hostType = wxURI_REGNAME;
81
82 m_fields = 0;
83}
84
85// ---------------------------------------------------------------------------
86// Create
87//
846978d7 88// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
89// ---------------------------------------------------------------------------
90
86470d43 91const wxChar* wxURI::Create(const wxString& uri)
846978d7 92{
dd65d8c8
RN
93 if (m_fields)
94 Clear();
95
846978d7
WS
96 return Parse(uri);
97}
dd65d8c8
RN
98
99// ---------------------------------------------------------------------------
ce321570 100// Escape Methods
dd65d8c8 101//
846978d7 102// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 103//
dd65d8c8 104// Escape encodes an invalid URI character into a 3 character sequence
ce321570 105//
dd65d8c8
RN
106// IsEscape determines if the input string contains an escape sequence,
107// if it does, then it moves the input string past the escape sequence
ce321570
RN
108//
109// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
110// ---------------------------------------------------------------------------
111
c9f78968 112wxUniChar wxURI::TranslateEscape(const wxString::const_iterator& s)
dd65d8c8 113{
c9f78968
VS
114 wxChar c1(*s);
115 wxChar c2(*(s + 1));
8404931e 116
c9f78968
VS
117 wxASSERT_MSG( IsHex(c1) && IsHex(c2), wxT("Invalid escape sequence!"));
118
119 return wx_truncate_cast(wxChar, (CharToHex(c1) << 4 ) | CharToHex(c2));
dd65d8c8
RN
120}
121
86470d43
RN
122wxString wxURI::Unescape(const wxString& uri)
123{
124 wxString new_uri;
125
c9f78968 126 for (wxString::const_iterator i = uri.begin(); i != uri.end(); ++i)
86470d43 127 {
c9f78968 128 if ( *i == wxT('%') )
86470d43 129 {
c9f78968 130 new_uri += wxURI::TranslateEscape(i + 1);
86470d43
RN
131 i += 2;
132 }
d8d7193d 133 else
c9f78968 134 new_uri += *i;
86470d43
RN
135 }
136
137 return new_uri;
138}
139
dd65d8c8
RN
140void wxURI::Escape(wxString& s, const wxChar& c)
141{
142 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 143 s += wxT('%');
dd65d8c8 144 s += hdig[(c >> 4) & 15];
846978d7 145 s += hdig[c & 15];
dd65d8c8
RN
146}
147
148bool wxURI::IsEscape(const wxChar*& uri)
149{
ce321570
RN
150 // pct-encoded = "%" HEXDIG HEXDIG
151 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8 152 return true;
dd65d8c8
RN
153 else
154 return false;
155}
156
4860d40d
RN
157// ---------------------------------------------------------------------------
158// GetUser
159// GetPassword
160//
161// Gets the username and password via the old URL method.
162// ---------------------------------------------------------------------------
163wxString wxURI::GetUser() const
164{
165 size_t dwPasswordPos = m_userinfo.find(':');
166
167 if (dwPasswordPos == wxString::npos)
168 dwPasswordPos = 0;
169
170 return m_userinfo(0, dwPasswordPos);
171}
172
173wxString wxURI::GetPassword() const
174{
175 size_t dwPasswordPos = m_userinfo.find(':');
176
177 if (dwPasswordPos == wxString::npos)
178 return wxT("");
179 else
180 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
181}
182
dd65d8c8 183// ---------------------------------------------------------------------------
86470d43 184// BuildURI
dd65d8c8 185//
846978d7 186// BuildURI() builds the entire URI into a useable
dd65d8c8 187// representation, including proper identification characters such as slashes
ce321570
RN
188//
189// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
190// the components that accept escape sequences
dd65d8c8
RN
191// ---------------------------------------------------------------------------
192
86470d43 193wxString wxURI::BuildURI() const
846978d7 194{
dd65d8c8
RN
195 wxString ret;
196
197 if (HasScheme())
198 ret = ret + m_scheme + wxT(":");
199
200 if (HasServer())
201 {
202 ret += wxT("//");
203
4860d40d
RN
204 if (HasUserInfo())
205 ret = ret + m_userinfo + wxT("@");
dd65d8c8
RN
206
207 ret += m_server;
208
209 if (HasPort())
210 ret = ret + wxT(":") + m_port;
211 }
212
213 ret += m_path;
214
215 if (HasQuery())
216 ret = ret + wxT("?") + m_query;
217
218 if (HasFragment())
219 ret = ret + wxT("#") + m_fragment;
220
221 return ret;
222}
223
86470d43
RN
224wxString wxURI::BuildUnescapedURI() const
225{
226 wxString ret;
227
228 if (HasScheme())
229 ret = ret + m_scheme + wxT(":");
230
231 if (HasServer())
232 {
233 ret += wxT("//");
234
4860d40d
RN
235 if (HasUserInfo())
236 ret = ret + wxURI::Unescape(m_userinfo) + wxT("@");
86470d43
RN
237
238 if (m_hostType == wxURI_REGNAME)
239 ret += wxURI::Unescape(m_server);
240 else
241 ret += m_server;
242
243 if (HasPort())
244 ret = ret + wxT(":") + m_port;
245 }
246
247 ret += wxURI::Unescape(m_path);
248
249 if (HasQuery())
250 ret = ret + wxT("?") + wxURI::Unescape(m_query);
251
252 if (HasFragment())
253 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
254
255 return ret;
256}
257
dd65d8c8 258// ---------------------------------------------------------------------------
ce321570 259// Assignment
dd65d8c8
RN
260// ---------------------------------------------------------------------------
261
b60b2ec8
RN
262wxURI& wxURI::Assign(const wxURI& uri)
263{
264 //assign fields
265 m_fields = uri.m_fields;
266
267 //ref over components
268 m_scheme = uri.m_scheme;
4860d40d 269 m_userinfo = uri.m_userinfo;
b60b2ec8
RN
270 m_server = uri.m_server;
271 m_hostType = uri.m_hostType;
272 m_port = uri.m_port;
273 m_path = uri.m_path;
274 m_query = uri.m_query;
275 m_fragment = uri.m_fragment;
dd65d8c8
RN
276
277 return *this;
278}
279
ce321570
RN
280wxURI& wxURI::operator = (const wxURI& uri)
281{
282 return Assign(uri);
283}
284
b60b2ec8 285wxURI& wxURI::operator = (const wxString& string)
846978d7 286{
dd65d8c8
RN
287 Create(string);
288 return *this;
289}
290
ce321570
RN
291// ---------------------------------------------------------------------------
292// Comparison
293// ---------------------------------------------------------------------------
294
dd65d8c8 295bool wxURI::operator == (const wxURI& uri) const
846978d7 296{
dd65d8c8
RN
297 if (HasScheme())
298 {
299 if(m_scheme != uri.m_scheme)
300 return false;
301 }
302 else if (uri.HasScheme())
303 return false;
304
305
306 if (HasServer())
307 {
4860d40d 308 if (HasUserInfo())
dd65d8c8 309 {
4860d40d 310 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
311 return false;
312 }
4860d40d 313 else if (uri.HasUserInfo())
dd65d8c8
RN
314 return false;
315
316 if (m_server != uri.m_server ||
317 m_hostType != uri.m_hostType)
318 return false;
319
320 if (HasPort())
321 {
322 if(m_port != uri.m_port)
323 return false;
324 }
325 else if (uri.HasPort())
326 return false;
327 }
328 else if (uri.HasServer())
329 return false;
330
331
332 if (HasPath())
333 {
334 if(m_path != uri.m_path)
335 return false;
336 }
337 else if (uri.HasPath())
338 return false;
339
340 if (HasQuery())
341 {
342 if (m_query != uri.m_query)
343 return false;
344 }
345 else if (uri.HasQuery())
346 return false;
347
348 if (HasFragment())
349 {
350 if (m_fragment != uri.m_fragment)
351 return false;
352 }
353 else if (uri.HasFragment())
354 return false;
355
356 return true;
357}
358
359// ---------------------------------------------------------------------------
360// IsReference
361//
362// if there is no authority or scheme, it is a reference
363// ---------------------------------------------------------------------------
364
365bool wxURI::IsReference() const
366{ return !HasScheme() || !HasServer(); }
367
368// ---------------------------------------------------------------------------
369// Parse
370//
371// Master URI parsing method. Just calls the individual parsing methods
372//
373// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 374// URI-reference = URI / relative
dd65d8c8
RN
375// ---------------------------------------------------------------------------
376
377const wxChar* wxURI::Parse(const wxChar* uri)
378{
379 uri = ParseScheme(uri);
380 uri = ParseAuthority(uri);
381 uri = ParsePath(uri);
382 uri = ParseQuery(uri);
383 return ParseFragment(uri);
384}
385
386// ---------------------------------------------------------------------------
387// ParseXXX
388//
389// Individual parsers for each URI component
390// ---------------------------------------------------------------------------
391
392const wxChar* wxURI::ParseScheme(const wxChar* uri)
393{
394 wxASSERT(uri != NULL);
395
396 //copy of the uri - used for figuring out
397 //length of each component
398 const wxChar* uricopy = uri;
399
400 //Does the uri have a scheme (first character alpha)?
401 if (IsAlpha(*uri))
402 {
403 m_scheme += *uri++;
404
405 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 406 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
407 *uri == wxT('+') ||
408 *uri == wxT('-') ||
846978d7
WS
409 *uri == wxT('.'))
410 {
411 m_scheme += *uri++;
dd65d8c8
RN
412 }
413
414 //valid scheme?
ce321570 415 if (*uri == wxT(':'))
846978d7 416 {
dd65d8c8
RN
417 //mark the scheme as valid
418 m_fields |= wxURI_SCHEME;
419
420 //move reference point up to input buffer
421 uricopy = ++uri;
422 }
846978d7 423 else
dd65d8c8 424 //relative uri with relative path reference
525d8583 425 m_scheme = wxEmptyString;
dd65d8c8 426 }
846978d7 427// else
dd65d8c8
RN
428 //relative uri with _possible_ relative path reference
429
430 return uricopy;
431}
432
433const wxChar* wxURI::ParseAuthority(const wxChar* uri)
434{
435 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 436 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8 437 {
97ad053b 438 //skip past the two slashes
dd65d8c8
RN
439 uri += 2;
440
97ad053b
VZ
441 // ############# DEVIATION FROM RFC #########################
442 // Don't parse the server component for file URIs
443 if(m_scheme != wxT("file"))
444 {
445 //normal way
4860d40d 446 uri = ParseUserInfo(uri);
dd65d8c8
RN
447 uri = ParseServer(uri);
448 return ParsePort(uri);
97ad053b 449 }
dd65d8c8
RN
450 }
451
452 return uri;
453}
454
4860d40d 455const wxChar* wxURI::ParseUserInfo(const wxChar* uri)
dd65d8c8
RN
456{
457 wxASSERT(uri != NULL);
458
459 //copy of the uri - used for figuring out
460 //length of each component
461 const wxChar* uricopy = uri;
462
463 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 464 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 465 {
24ca04e7 466 if(IsUnreserved(*uri) ||
ce321570 467 IsSubDelim(*uri) || *uri == wxT(':'))
4860d40d 468 m_userinfo += *uri++;
24ca04e7
VZ
469 else if (IsEscape(uri))
470 {
471 m_userinfo += *uri++;
472 m_userinfo += *uri++;
473 m_userinfo += *uri++;
474 }
dd65d8c8 475 else
4860d40d 476 Escape(m_userinfo, *uri++);
dd65d8c8
RN
477 }
478
ce321570 479 if(*uri == wxT('@'))
dd65d8c8
RN
480 {
481 //valid userinfo
4860d40d 482 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
483
484 uricopy = ++uri;
485 }
486 else
4860d40d 487 m_userinfo = wxEmptyString;
dd65d8c8
RN
488
489 return uricopy;
490}
491
492const wxChar* wxURI::ParseServer(const wxChar* uri)
493{
494 wxASSERT(uri != NULL);
495
496 //copy of the uri - used for figuring out
497 //length of each component
498 const wxChar* uricopy = uri;
499
500 // host = IP-literal / IPv4address / reg-name
501 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 502 if (*uri == wxT('['))
dd65d8c8 503 {
e854db32
RN
504 ++uri; //some compilers don't support *&ing a ++*
505 if (ParseIPv6address(uri) && *uri == wxT(']'))
dd65d8c8
RN
506 {
507 ++uri;
508 m_hostType = wxURI_IPV6ADDRESS;
846978d7 509
dd65d8c8 510 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 511 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
512 theBuffer.SetLength(uri-uricopy);
513 }
514 else
515 {
516 uri = uricopy;
517
e854db32
RN
518 ++uri; //some compilers don't support *&ing a ++*
519 if (ParseIPvFuture(uri) && *uri == wxT(']'))
dd65d8c8
RN
520 {
521 ++uri;
846978d7
WS
522 m_hostType = wxURI_IPVFUTURE;
523
dd65d8c8 524 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 525 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
526 theBuffer.SetLength(uri-uricopy);
527 }
846978d7 528 else
dd65d8c8
RN
529 uri = uricopy;
530 }
531 }
846978d7 532 else
dd65d8c8
RN
533 {
534 if (ParseIPv4address(uri))
535 {
536 m_hostType = wxURI_IPV4ADDRESS;
537
538 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 539 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
540 theBuffer.SetLength(uri-uricopy);
541 }
846978d7 542 else
dd65d8c8
RN
543 uri = uricopy;
544 }
545
546 if(m_hostType == wxURI_REGNAME)
547 {
548 uri = uricopy;
549 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 550 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 551 {
24ca04e7
VZ
552 if(IsUnreserved(*uri) || IsSubDelim(*uri))
553 m_server += *uri++;
554 else if (IsEscape(uri))
555 {
dd65d8c8 556 m_server += *uri++;
24ca04e7
VZ
557 m_server += *uri++;
558 m_server += *uri++;
559 }
dd65d8c8
RN
560 else
561 Escape(m_server, *uri++);
846978d7 562 }
dd65d8c8
RN
563 }
564
565 //mark the server as valid
566 m_fields |= wxURI_SERVER;
567
568 return uri;
569}
570
846978d7 571
dd65d8c8
RN
572const wxChar* wxURI::ParsePort(const wxChar* uri)
573{
574 wxASSERT(uri != NULL);
575
576 // port = *DIGIT
ce321570 577 if(*uri == wxT(':'))
dd65d8c8
RN
578 {
579 ++uri;
846978d7 580 while(IsDigit(*uri))
dd65d8c8
RN
581 {
582 m_port += *uri++;
846978d7 583 }
dd65d8c8
RN
584
585 //mark the port as valid
586 m_fields |= wxURI_PORT;
587 }
588
589 return uri;
590}
591
8404931e 592const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
593{
594 wxASSERT(uri != NULL);
595
596 //copy of the uri - used for figuring out
597 //length of each component
598 const wxChar* uricopy = uri;
599
600 /// hier-part = "//" authority path-abempty
601 /// / path-absolute
602 /// / path-rootless
603 /// / path-empty
604 ///
605 /// relative-part = "//" authority path-abempty
606 /// / path-absolute
607 /// / path-noscheme
608 /// / path-empty
609 ///
610 /// path-abempty = *( "/" segment )
611 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
612 /// path-noscheme = segment-nz-nc *( "/" segment )
613 /// path-rootless = segment-nz *( "/" segment )
614 /// path-empty = 0<pchar>
615 ///
616 /// segment = *pchar
617 /// segment-nz = 1*pchar
618 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
619 /// ; non-zero-length segment without any colon ":"
620 ///
621 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 622 if (*uri == wxT('/'))
dd65d8c8
RN
623 {
624 m_path += *uri++;
625
846978d7
WS
626 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
627 {
24ca04e7 628 if( IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 629 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 630 m_path += *uri++;
24ca04e7
VZ
631 else if (IsEscape(uri))
632 {
633 m_path += *uri++;
634 m_path += *uri++;
635 m_path += *uri++;
636 }
846978d7
WS
637 else
638 Escape(m_path, *uri++);
dd65d8c8
RN
639 }
640
641 if (bNormalize)
642 {
643 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
81727065
VS
644#if wxUSE_STL || wxUSE_UNICODE_UTF8
645 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
2c09fb3b 646 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 647#endif
dd65d8c8
RN
648 Normalize(theBuffer, true);
649 theBuffer.SetLength(wxStrlen(theBuffer));
650 }
651 //mark the path as valid
652 m_fields |= wxURI_PATH;
653 }
654 else if(*uri) //Relative path
655 {
656 if (bReference)
657 {
658 //no colon allowed
846978d7 659 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 660 {
24ca04e7 661 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 662 *uri == wxT('@') || *uri == wxT('/'))
846978d7 663 m_path += *uri++;
24ca04e7
VZ
664 else if (IsEscape(uri))
665 {
666 m_path += *uri++;
667 m_path += *uri++;
668 m_path += *uri++;
669 }
846978d7
WS
670 else
671 Escape(m_path, *uri++);
dd65d8c8 672 }
846978d7 673 }
dd65d8c8
RN
674 else
675 {
846978d7 676 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 677 {
24ca04e7 678 if(IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 679 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7 680 m_path += *uri++;
24ca04e7
VZ
681 else if (IsEscape(uri))
682 {
683 m_path += *uri++;
684 m_path += *uri++;
685 m_path += *uri++;
686 }
846978d7
WS
687 else
688 Escape(m_path, *uri++);
dd65d8c8
RN
689 }
690 }
691
692 if (uri != uricopy)
846978d7 693 {
dd65d8c8
RN
694 if (bNormalize)
695 {
696 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
81727065
VS
697#if wxUSE_STL || wxUSE_UNICODE_UTF8
698 // FIXME-UTF8: have some wxReadWriteStringBuffer instead?
2c09fb3b 699 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 700#endif
dd65d8c8
RN
701 Normalize(theBuffer);
702 theBuffer.SetLength(wxStrlen(theBuffer));
703 }
704
705 //mark the path as valid
706 m_fields |= wxURI_PATH;
707 }
708 }
709
710 return uri;
711}
712
713
714const wxChar* wxURI::ParseQuery(const wxChar* uri)
715{
716 wxASSERT(uri != NULL);
717
718 // query = *( pchar / "/" / "?" )
ce321570 719 if (*uri == wxT('?'))
dd65d8c8
RN
720 {
721 ++uri;
ce321570 722 while(*uri && *uri != wxT('#'))
dd65d8c8 723 {
24ca04e7 724 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 725 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 726 m_query += *uri++;
24ca04e7
VZ
727 else if (IsEscape(uri))
728 {
729 m_query += *uri++;
730 m_query += *uri++;
731 m_query += *uri++;
732 }
dd65d8c8 733 else
846978d7 734 Escape(m_query, *uri++);
dd65d8c8
RN
735 }
736
737 //mark the server as valid
738 m_fields |= wxURI_QUERY;
739 }
740
741 return uri;
742}
743
744
745const wxChar* wxURI::ParseFragment(const wxChar* uri)
746{
747 wxASSERT(uri != NULL);
748
749 // fragment = *( pchar / "/" / "?" )
ce321570 750 if (*uri == wxT('#'))
dd65d8c8
RN
751 {
752 ++uri;
753 while(*uri)
754 {
24ca04e7 755 if (IsUnreserved(*uri) || IsSubDelim(*uri) ||
ce321570 756 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 757 m_fragment += *uri++;
24ca04e7
VZ
758 else if (IsEscape(uri))
759 {
760 m_fragment += *uri++;
761 m_fragment += *uri++;
762 m_fragment += *uri++;
763 }
dd65d8c8 764 else
846978d7 765 Escape(m_fragment, *uri++);
dd65d8c8
RN
766 }
767
768 //mark the server as valid
769 m_fields |= wxURI_FRAGMENT;
770 }
771
772 return uri;
773}
774
775// ---------------------------------------------------------------------------
ce321570 776// Resolve
dd65d8c8 777//
ce321570 778// Builds missing components of this uri from a base uri
dd65d8c8 779//
ce321570
RN
780// A version of the algorithm outlined in the RFC is used here
781// (it is shown in comments)
782//
846978d7 783// Note that an empty URI inherits all components
dd65d8c8
RN
784// ---------------------------------------------------------------------------
785
8404931e 786void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 787{
846978d7 788 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
789 wxT("wxURI to inherit from must not be a reference!"));
790
ce321570 791 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
792 // loophole that allows this uri to inherit other
793 // properties from the base uri - even if the scheme
794 // is defined
8404931e
VZ
795 if ( !(flags & wxURI_STRICT) &&
796 HasScheme() && base.HasScheme() &&
797 m_scheme == base.m_scheme )
846978d7 798 {
dd65d8c8
RN
799 m_fields -= wxURI_SCHEME;
800 }
801
802
803 // Do nothing if this is an absolute wxURI
804 // if defined(R.scheme) then
805 // T.scheme = R.scheme;
806 // T.authority = R.authority;
807 // T.path = remove_dot_segments(R.path);
808 // T.query = R.query;
809 if (HasScheme())
810 {
811 return;
812 }
813
ea4daac4 814 //No scheme - inherit
dd65d8c8
RN
815 m_scheme = base.m_scheme;
816 m_fields |= wxURI_SCHEME;
817
818 // All we need to do for relative URIs with an
819 // authority component is just inherit the scheme
820 // if defined(R.authority) then
821 // T.authority = R.authority;
822 // T.path = remove_dot_segments(R.path);
823 // T.query = R.query;
824 if (HasServer())
825 {
826 return;
827 }
828
829 //No authority - inherit
4860d40d 830 if (base.HasUserInfo())
dd65d8c8 831 {
4860d40d
RN
832 m_userinfo = base.m_userinfo;
833 m_fields |= wxURI_USERINFO;
dd65d8c8 834 }
846978d7 835
dd65d8c8
RN
836 m_server = base.m_server;
837 m_hostType = base.m_hostType;
838 m_fields |= wxURI_SERVER;
846978d7 839
dd65d8c8
RN
840 if (base.HasPort())
841 {
842 m_port = base.m_port;
843 m_fields |= wxURI_PORT;
844 }
846978d7 845
dd65d8c8
RN
846
847 // Simple path inheritance from base
848 if (!HasPath())
849 {
850 // T.path = Base.path;
851 m_path = base.m_path;
852 m_fields |= wxURI_PATH;
846978d7 853
dd65d8c8
RN
854
855 // if defined(R.query) then
856 // T.query = R.query;
857 // else
858 // T.query = Base.query;
859 // endif;
860 if (!HasQuery())
861 {
862 m_query = base.m_query;
863 m_fields |= wxURI_QUERY;
864 }
865 }
866 else
867 {
868 // if (R.path starts-with "/") then
869 // T.path = remove_dot_segments(R.path);
870 // else
871 // T.path = merge(Base.path, R.path);
872 // T.path = remove_dot_segments(T.path);
873 // endif;
874 // T.query = R.query;
ce321570 875 if (m_path[0u] != wxT('/'))
dd65d8c8 876 {
ea4daac4 877 //Merge paths
c9f78968
VS
878 wxString::const_iterator op = m_path.begin();
879 wxString::const_iterator bp = base.m_path.begin() + base.m_path.length();
dd65d8c8
RN
880
881 //not a ending directory? move up
ce321570 882 if (base.m_path[0] && *(bp-1) != wxT('/'))
c9f78968 883 UpTree(base.m_path.begin(), bp);
dd65d8c8
RN
884
885 //normalize directories
846978d7 886 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 887 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8 888 {
c9f78968 889 UpTree(base.m_path.begin(), bp);
dd65d8c8
RN
890
891 if (*(op+2) == '\0')
892 op += 2;
893 else
894 op += 3;
895 }
896
c9f78968
VS
897 m_path = base.m_path.substr(0, bp - base.m_path.begin()) +
898 m_path.substr((op - m_path.begin()), m_path.length());
dd65d8c8
RN
899 }
900 }
ce321570 901
846978d7 902 //T.fragment = R.fragment;
dd65d8c8
RN
903}
904
905// ---------------------------------------------------------------------------
846978d7 906// UpTree
dd65d8c8 907//
ce321570 908// Moves a URI path up a directory
dd65d8c8
RN
909// ---------------------------------------------------------------------------
910
ce321570 911//static
c9f78968
VS
912void wxURI::UpTree(wxString::const_iterator uristart,
913 wxString::const_iterator& uri)
914{
915 if (uri != uristart && *(uri-1) == wxT('/'))
916 {
917 uri -= 2;
918 }
919
920 for(;uri != uristart; --uri)
921 {
922 if (*uri == wxT('/'))
923 {
924 ++uri;
925 break;
926 }
927 }
928
929 //!!!TODO:HACK!!!//
930 if (uri == uristart && *uri == wxT('/'))
931 ++uri;
932 //!!!//
933}
934
935// FIXME-UTF8: fix Normalize() to use iterators instead of having this method!
936/*static*/ void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
dd65d8c8 937{
ce321570 938 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
939 {
940 uri -= 2;
941 }
846978d7 942
dd65d8c8
RN
943 for(;uri != uristart; --uri)
944 {
ce321570 945 if (*uri == wxT('/'))
dd65d8c8
RN
946 {
947 ++uri;
948 break;
949 }
950 }
951
952 //!!!TODO:HACK!!!//
ce321570 953 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
954 ++uri;
955 //!!!//
956}
c9f78968 957// end of FIXME-UTF8
dd65d8c8 958
ce321570
RN
959// ---------------------------------------------------------------------------
960// Normalize
961//
962// Normalizes directories in-place
963//
964// I.E. ./ and . are ignored
965//
966// ../ and .. are removed if a directory is before it, along
967// with that directory (leading .. and ../ are kept)
968// ---------------------------------------------------------------------------
969
970//static
8404931e 971void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
972{
973 wxChar* cp = s;
974 wxChar* bp = s;
975
ce321570 976 if(s[0] == wxT('/'))
dd65d8c8
RN
977 ++bp;
978
979 while(*cp)
980 {
ce321570
RN
981 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
982 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
983 {
984 //. _or_ ./ - ignore
985 if (*(cp+1) == '\0')
986 cp += 1;
987 else
988 cp += 2;
989 }
846978d7 990 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
991 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
992 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
993 {
994 //.. _or_ ../ - go up the tree
995 if (s != bp)
996 {
997 UpTree((const wxChar*)bp, (const wxChar*&)s);
998
999 if (*(cp+2) == '\0')
1000 cp += 2;
1001 else
1002 cp += 3;
1003 }
1004 else if (!bIgnoreLeads)
1005
1006 {
1007 *bp++ = *cp++;
1008 *bp++ = *cp++;
1009 if (*cp)
1010 *bp++ = *cp++;
1011
1012 s = bp;
1013 }
1014 else
1015 {
1016 if (*(cp+2) == '\0')
1017 cp += 2;
1018 else
1019 cp += 3;
1020 }
1021 }
1022 else
846978d7 1023 *s++ = *cp++;
dd65d8c8
RN
1024 }
1025
1026 *s = '\0';
1027}
1028
1029// ---------------------------------------------------------------------------
ce321570
RN
1030// ParseH16
1031//
1032// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 1033// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
1034// the input string back to its original position on failure.
1035// ---------------------------------------------------------------------------
1036
1037bool wxURI::ParseH16(const wxChar*& uri)
1038{
1039 // h16 = 1*4HEXDIG
1040 if(!IsHex(*++uri))
1041 return false;
1042
1043 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
1044 ++uri;
1045
1046 return true;
1047}
1048
1049// ---------------------------------------------------------------------------
1050// ParseIPXXX
1051//
846978d7
WS
1052// Parses a certain version of an IP address and moves the input string past
1053// it. Returns true if the input string contains the proper version of an ip
1054// address. It is the caller's responsability to move the input string back
ce321570 1055// to its original position on failure.
dd65d8c8
RN
1056// ---------------------------------------------------------------------------
1057
1058bool wxURI::ParseIPv4address(const wxChar*& uri)
1059{
1060 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
1061 //
1062 //dec-octet = DIGIT ; 0-9
1063 // / %x31-39 DIGIT ; 10-99
1064 // / "1" 2DIGIT ; 100-199
1065 // / "2" %x30-34 DIGIT ; 200-249
1066 // / "25" %x30-35 ; 250-255
1067 size_t iIPv4 = 0;
1068 if (IsDigit(*uri))
1069 {
1070 ++iIPv4;
1071
846978d7 1072
dd65d8c8
RN
1073 //each ip part must be between 0-255 (dupe of version in for loop)
1074 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1075 //100 or less (note !)
846978d7
WS
1076 !( (*(uri-2) < wxT('2')) ||
1077 //240 or less
1078 (*(uri-2) == wxT('2') &&
ce321570 1079 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1080 )
1081 )
1082 )
1083 {
1084 return false;
1085 }
1086
1087 if(IsDigit(*uri))++uri;
1088
1089 //compilers should unroll this loop
1090 for(; iIPv4 < 4; ++iIPv4)
1091 {
ce321570 1092 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
1093 break;
1094
1095 //each ip part must be between 0-255
1096 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1097 //100 or less (note !)
846978d7
WS
1098 !( (*(uri-2) < wxT('2')) ||
1099 //240 or less
1100 (*(uri-2) == wxT('2') &&
ce321570 1101 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1102 )
1103 )
1104 )
1105 {
1106 return false;
1107 }
1108 if(IsDigit(*uri))++uri;
1109 }
1110 }
1111 return iIPv4 == 4;
1112}
1113
dd65d8c8
RN
1114bool wxURI::ParseIPv6address(const wxChar*& uri)
1115{
1116 // IPv6address = 6( h16 ":" ) ls32
1117 // / "::" 5( h16 ":" ) ls32
1118 // / [ h16 ] "::" 4( h16 ":" ) ls32
1119 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1120 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1121 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1122 // / [ *4( h16 ":" ) h16 ] "::" ls32
1123 // / [ *5( h16 ":" ) h16 ] "::" h16
1124 // / [ *6( h16 ":" ) h16 ] "::"
1125
1126 size_t numPrefix = 0,
1127 maxPostfix;
1128
1129 bool bEndHex = false;
1130
1131 for( ; numPrefix < 6; ++numPrefix)
1132 {
1133 if(!ParseH16(uri))
1134 {
1135 --uri;
1136 bEndHex = true;
1137 break;
1138 }
846978d7 1139
ce321570 1140 if(*uri != wxT(':'))
dd65d8c8
RN
1141 {
1142 break;
1143 }
1144 }
1145
1146 if(!bEndHex && !ParseH16(uri))
1147 {
1148 --uri;
1149
1150 if (numPrefix)
1151 return false;
1152
ce321570 1153 if (*uri == wxT(':'))
dd65d8c8 1154 {
ce321570 1155 if (*++uri != wxT(':'))
dd65d8c8
RN
1156 return false;
1157
1158 maxPostfix = 5;
1159 }
1160 else
1161 maxPostfix = 6;
1162 }
1163 else
1164 {
ce321570 1165 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1166 {
1167 if (numPrefix != 6)
1168 return false;
1169
ce321570 1170 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1171 ++uri;
1172
1173 const wxChar* uristart = uri;
1174 //parse ls32
1175 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1176 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1177 return true;
1178
1179 uri = uristart;
1180
1181 if (ParseIPv4address(uri))
1182 return true;
1183 else
1184 return false;
1185 }
1186 else
1187 {
1188 uri += 2;
846978d7 1189
dd65d8c8
RN
1190 if (numPrefix > 3)
1191 maxPostfix = 0;
1192 else
1193 maxPostfix = 4 - numPrefix;
1194 }
1195 }
1196
1197 bool bAllowAltEnding = maxPostfix == 0;
1198
1199 for(; maxPostfix != 0; --maxPostfix)
1200 {
ce321570 1201 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1202 return false;
1203 }
1204
1205 if(numPrefix <= 4)
1206 {
1207 const wxChar* uristart = uri;
1208 //parse ls32
1209 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1210 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1211 return true;
1212
1213 uri = uristart;
1214
1215 if (ParseIPv4address(uri))
1216 return true;
1217
1218 uri = uristart;
846978d7 1219
dd65d8c8
RN
1220 if (!bAllowAltEnding)
1221 return false;
1222 }
1223
1224 if(numPrefix <= 5 && ParseH16(uri))
1225 return true;
1226
1227 return true;
1228}
1229
1230bool wxURI::ParseIPvFuture(const wxChar*& uri)
1231{
1232 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1233 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1234 return false;
1235
1236 while (IsHex(*++uri)) {}
1237
ce321570 1238 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1239 return false;
1240
ce321570 1241 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1242
1243 return true;
1244}
1245
1246
1247// ---------------------------------------------------------------------------
ce321570
RN
1248// CharToHex
1249//
846978d7 1250// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1251// passed in character is not a valid hex character
dd65d8c8
RN
1252// ---------------------------------------------------------------------------
1253
ce321570 1254//static
409a7ba7 1255wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1256{
1676a194
WS
1257 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1258 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1259 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
dd65d8c8 1260
846978d7 1261 return 0;
dd65d8c8
RN
1262}
1263
ce321570
RN
1264// ---------------------------------------------------------------------------
1265// IsXXX
1266//
1267// Returns true if the passed in character meets the criteria of the method
1268// ---------------------------------------------------------------------------
1269
dd65d8c8
RN
1270//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1271bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1272{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1273 c == wxT('-') ||
1274 c == wxT('.') ||
1275 c == wxT('_') ||
1276 c == wxT('~') //tilde
846978d7 1277 ;
dd65d8c8
RN
1278}
1279
1280bool wxURI::IsReserved (const wxChar& c)
846978d7 1281{
dd65d8c8
RN
1282 return IsGenDelim(c) || IsSubDelim(c);
1283}
1284
1285//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1286bool wxURI::IsGenDelim (const wxChar& c)
1287{
ce321570
RN
1288 return c == wxT(':') ||
1289 c == wxT('/') ||
1290 c == wxT('?') ||
1291 c == wxT('#') ||
1292 c == wxT('[') ||
1293 c == wxT(']') ||
1294 c == wxT('@');
dd65d8c8
RN
1295}
1296
1297//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1298//! / "*" / "+" / "," / ";" / "="
1299bool wxURI::IsSubDelim (const wxChar& c)
1300{
ce321570
RN
1301 return c == wxT('!') ||
1302 c == wxT('$') ||
1303 c == wxT('&') ||
1304 c == wxT('\'') ||
1305 c == wxT('(') ||
1306 c == wxT(')') ||
1307 c == wxT('*') ||
1308 c == wxT('+') ||
1309 c == wxT(',') ||
1310 c == wxT(';') ||
846978d7 1311 c == wxT('=')
dd65d8c8
RN
1312 ;
1313}
1314
1315bool wxURI::IsHex(const wxChar& c)
ce321570 1316{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1317
1318bool wxURI::IsAlpha(const wxChar& c)
ce321570 1319{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1320
1321bool wxURI::IsDigit(const wxChar& c)
ce321570 1322{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1323
1324
dd65d8c8
RN
1325//end of uri.cpp
1326
1327
1328