]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
As small as possible reorganization within wxDateTime to please PCH in DLL build...
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
19#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21#endif
22
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
30#include "wx/uri.h"
31
32// ---------------------------------------------------------------------------
33// definitions
34// ---------------------------------------------------------------------------
35
36IMPLEMENT_CLASS(wxURI, wxObject);
37
38// ===========================================================================
39// implementation
40// ===========================================================================
41
42// ---------------------------------------------------------------------------
43// utilities
44// ---------------------------------------------------------------------------
45
46// ---------------------------------------------------------------------------
47//
48// wxURI
49//
50// ---------------------------------------------------------------------------
51
52// ---------------------------------------------------------------------------
53// Constructors
54// ---------------------------------------------------------------------------
55
56wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58}
846978d7 59
dd65d8c8
RN
60wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61{
62 Create(uri);
63}
64
60431236 65wxURI::wxURI(const wxURI& uri) : wxObject(), m_hostType(wxURI_REGNAME), m_fields(0)
dd65d8c8 66{
b60b2ec8 67 Assign(uri);
dd65d8c8
RN
68}
69
70// ---------------------------------------------------------------------------
71// Destructor and cleanup
72// ---------------------------------------------------------------------------
73
74wxURI::~wxURI()
75{
76 Clear();
77}
78
79void wxURI::Clear()
80{
81 m_scheme = m_user = m_server = m_port = m_path =
82 m_query = m_fragment = wxT("");
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87}
88
89// ---------------------------------------------------------------------------
90// Create
91//
846978d7 92// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
93// ---------------------------------------------------------------------------
94
86470d43 95const wxChar* wxURI::Create(const wxString& uri)
846978d7 96{
dd65d8c8
RN
97 if (m_fields)
98 Clear();
99
846978d7
WS
100 return Parse(uri);
101}
dd65d8c8
RN
102
103// ---------------------------------------------------------------------------
ce321570 104// Escape Methods
dd65d8c8 105//
846978d7 106// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 107//
dd65d8c8 108// Escape encodes an invalid URI character into a 3 character sequence
ce321570 109//
dd65d8c8
RN
110// IsEscape determines if the input string contains an escape sequence,
111// if it does, then it moves the input string past the escape sequence
ce321570
RN
112//
113// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
114// ---------------------------------------------------------------------------
115
86470d43 116wxChar wxURI::TranslateEscape(const wxChar* s)
dd65d8c8
RN
117{
118 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
8404931e 119
d8d7193d 120 //<<4 == 16
6356d52a 121 return (wxChar)( CharToHex(*s) << 4 ) | CharToHex(*++s);
dd65d8c8
RN
122}
123
86470d43
RN
124wxString wxURI::Unescape(const wxString& uri)
125{
126 wxString new_uri;
127
128 for(size_t i = 0; i < uri.length(); ++i)
129 {
130 if (uri[i] == wxT('%'))
131 {
132 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
133 i += 2;
134 }
d8d7193d
RN
135 else
136 new_uri += uri[i];
86470d43
RN
137 }
138
139 return new_uri;
140}
141
dd65d8c8
RN
142void wxURI::Escape(wxString& s, const wxChar& c)
143{
144 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 145 s += wxT('%');
dd65d8c8 146 s += hdig[(c >> 4) & 15];
846978d7 147 s += hdig[c & 15];
dd65d8c8
RN
148}
149
150bool wxURI::IsEscape(const wxChar*& uri)
151{
ce321570
RN
152 // pct-encoded = "%" HEXDIG HEXDIG
153 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8
RN
154 {
155 uri += 3;
156 return true;
157 }
158 else
159 return false;
160}
161
162// ---------------------------------------------------------------------------
86470d43 163// BuildURI
dd65d8c8 164//
846978d7 165// BuildURI() builds the entire URI into a useable
dd65d8c8 166// representation, including proper identification characters such as slashes
ce321570
RN
167//
168// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
169// the components that accept escape sequences
dd65d8c8
RN
170// ---------------------------------------------------------------------------
171
86470d43 172wxString wxURI::BuildURI() const
846978d7 173{
dd65d8c8
RN
174 wxString ret;
175
176 if (HasScheme())
177 ret = ret + m_scheme + wxT(":");
178
179 if (HasServer())
180 {
181 ret += wxT("//");
182
183 if (HasUser())
184 ret = ret + m_user + wxT("@");
185
186 ret += m_server;
187
188 if (HasPort())
189 ret = ret + wxT(":") + m_port;
190 }
191
192 ret += m_path;
193
194 if (HasQuery())
195 ret = ret + wxT("?") + m_query;
196
197 if (HasFragment())
198 ret = ret + wxT("#") + m_fragment;
199
200 return ret;
201}
202
86470d43
RN
203wxString wxURI::BuildUnescapedURI() const
204{
205 wxString ret;
206
207 if (HasScheme())
208 ret = ret + m_scheme + wxT(":");
209
210 if (HasServer())
211 {
212 ret += wxT("//");
213
214 if (HasUser())
215 ret = ret + wxURI::Unescape(m_user) + wxT("@");
216
217 if (m_hostType == wxURI_REGNAME)
218 ret += wxURI::Unescape(m_server);
219 else
220 ret += m_server;
221
222 if (HasPort())
223 ret = ret + wxT(":") + m_port;
224 }
225
226 ret += wxURI::Unescape(m_path);
227
228 if (HasQuery())
229 ret = ret + wxT("?") + wxURI::Unescape(m_query);
230
231 if (HasFragment())
232 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
233
234 return ret;
235}
236
dd65d8c8 237// ---------------------------------------------------------------------------
ce321570 238// Assignment
dd65d8c8
RN
239// ---------------------------------------------------------------------------
240
b60b2ec8
RN
241wxURI& wxURI::Assign(const wxURI& uri)
242{
243 //assign fields
244 m_fields = uri.m_fields;
245
246 //ref over components
247 m_scheme = uri.m_scheme;
248 m_user = uri.m_user;
249 m_server = uri.m_server;
250 m_hostType = uri.m_hostType;
251 m_port = uri.m_port;
252 m_path = uri.m_path;
253 m_query = uri.m_query;
254 m_fragment = uri.m_fragment;
dd65d8c8
RN
255
256 return *this;
257}
258
ce321570
RN
259wxURI& wxURI::operator = (const wxURI& uri)
260{
261 return Assign(uri);
262}
263
b60b2ec8 264wxURI& wxURI::operator = (const wxString& string)
846978d7 265{
dd65d8c8
RN
266 Create(string);
267 return *this;
268}
269
ce321570
RN
270// ---------------------------------------------------------------------------
271// Comparison
272// ---------------------------------------------------------------------------
273
dd65d8c8 274bool wxURI::operator == (const wxURI& uri) const
846978d7 275{
dd65d8c8
RN
276 if (HasScheme())
277 {
278 if(m_scheme != uri.m_scheme)
279 return false;
280 }
281 else if (uri.HasScheme())
282 return false;
283
284
285 if (HasServer())
286 {
287 if (HasUser())
288 {
289 if (m_user != uri.m_user)
290 return false;
291 }
292 else if (uri.HasUser())
293 return false;
294
295 if (m_server != uri.m_server ||
296 m_hostType != uri.m_hostType)
297 return false;
298
299 if (HasPort())
300 {
301 if(m_port != uri.m_port)
302 return false;
303 }
304 else if (uri.HasPort())
305 return false;
306 }
307 else if (uri.HasServer())
308 return false;
309
310
311 if (HasPath())
312 {
313 if(m_path != uri.m_path)
314 return false;
315 }
316 else if (uri.HasPath())
317 return false;
318
319 if (HasQuery())
320 {
321 if (m_query != uri.m_query)
322 return false;
323 }
324 else if (uri.HasQuery())
325 return false;
326
327 if (HasFragment())
328 {
329 if (m_fragment != uri.m_fragment)
330 return false;
331 }
332 else if (uri.HasFragment())
333 return false;
334
335 return true;
336}
337
338// ---------------------------------------------------------------------------
339// IsReference
340//
341// if there is no authority or scheme, it is a reference
342// ---------------------------------------------------------------------------
343
344bool wxURI::IsReference() const
345{ return !HasScheme() || !HasServer(); }
346
347// ---------------------------------------------------------------------------
348// Parse
349//
350// Master URI parsing method. Just calls the individual parsing methods
351//
352// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
353// URI-reference = URI / relative-URITestCase
354// ---------------------------------------------------------------------------
355
356const wxChar* wxURI::Parse(const wxChar* uri)
357{
358 uri = ParseScheme(uri);
359 uri = ParseAuthority(uri);
360 uri = ParsePath(uri);
361 uri = ParseQuery(uri);
362 return ParseFragment(uri);
363}
364
365// ---------------------------------------------------------------------------
366// ParseXXX
367//
368// Individual parsers for each URI component
369// ---------------------------------------------------------------------------
370
371const wxChar* wxURI::ParseScheme(const wxChar* uri)
372{
373 wxASSERT(uri != NULL);
374
375 //copy of the uri - used for figuring out
376 //length of each component
377 const wxChar* uricopy = uri;
378
379 //Does the uri have a scheme (first character alpha)?
380 if (IsAlpha(*uri))
381 {
382 m_scheme += *uri++;
383
384 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 385 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
386 *uri == wxT('+') ||
387 *uri == wxT('-') ||
846978d7
WS
388 *uri == wxT('.'))
389 {
390 m_scheme += *uri++;
dd65d8c8
RN
391 }
392
393 //valid scheme?
ce321570 394 if (*uri == wxT(':'))
846978d7 395 {
dd65d8c8
RN
396 //mark the scheme as valid
397 m_fields |= wxURI_SCHEME;
398
399 //move reference point up to input buffer
400 uricopy = ++uri;
401 }
846978d7 402 else
dd65d8c8
RN
403 //relative uri with relative path reference
404 m_scheme = wxT("");
405 }
846978d7 406// else
dd65d8c8
RN
407 //relative uri with _possible_ relative path reference
408
409 return uricopy;
410}
411
412const wxChar* wxURI::ParseAuthority(const wxChar* uri)
413{
414 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 415 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8
RN
416 {
417 uri += 2;
418
419 uri = ParseUser(uri);
420 uri = ParseServer(uri);
421 return ParsePort(uri);
422 }
423
424 return uri;
425}
426
427const wxChar* wxURI::ParseUser(const wxChar* uri)
428{
429 wxASSERT(uri != NULL);
430
431 //copy of the uri - used for figuring out
432 //length of each component
433 const wxChar* uricopy = uri;
434
435 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 436 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 437 {
846978d7 438 if(IsUnreserved(*uri) || IsEscape(uri) ||
ce321570 439 IsSubDelim(*uri) || *uri == wxT(':'))
dd65d8c8
RN
440 m_user += *uri++;
441 else
442 Escape(m_user, *uri++);
443 }
444
ce321570 445 if(*uri == wxT('@'))
dd65d8c8
RN
446 {
447 //valid userinfo
448 m_fields |= wxURI_USER;
449
450 uricopy = ++uri;
451 }
452 else
453 m_user = wxT("");
454
455 return uricopy;
456}
457
458const wxChar* wxURI::ParseServer(const wxChar* uri)
459{
460 wxASSERT(uri != NULL);
461
462 //copy of the uri - used for figuring out
463 //length of each component
464 const wxChar* uricopy = uri;
465
466 // host = IP-literal / IPv4address / reg-name
467 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 468 if (*uri == wxT('['))
dd65d8c8 469 {
e854db32
RN
470 ++uri; //some compilers don't support *&ing a ++*
471 if (ParseIPv6address(uri) && *uri == wxT(']'))
dd65d8c8
RN
472 {
473 ++uri;
474 m_hostType = wxURI_IPV6ADDRESS;
846978d7 475
dd65d8c8 476 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 477 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
478 theBuffer.SetLength(uri-uricopy);
479 }
480 else
481 {
482 uri = uricopy;
483
e854db32
RN
484 ++uri; //some compilers don't support *&ing a ++*
485 if (ParseIPvFuture(uri) && *uri == wxT(']'))
dd65d8c8
RN
486 {
487 ++uri;
846978d7
WS
488 m_hostType = wxURI_IPVFUTURE;
489
dd65d8c8 490 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 491 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
492 theBuffer.SetLength(uri-uricopy);
493 }
846978d7 494 else
dd65d8c8
RN
495 uri = uricopy;
496 }
497 }
846978d7 498 else
dd65d8c8
RN
499 {
500 if (ParseIPv4address(uri))
501 {
502 m_hostType = wxURI_IPV4ADDRESS;
503
504 wxStringBufferLength theBuffer(m_server, uri - uricopy);
2c09fb3b 505 wxTmemcpy(theBuffer, uricopy, uri-uricopy);
dd65d8c8
RN
506 theBuffer.SetLength(uri-uricopy);
507 }
846978d7 508 else
dd65d8c8
RN
509 uri = uricopy;
510 }
511
512 if(m_hostType == wxURI_REGNAME)
513 {
514 uri = uricopy;
515 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 516 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
517 {
518 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
519 m_server += *uri++;
520 else
521 Escape(m_server, *uri++);
846978d7 522 }
dd65d8c8
RN
523 }
524
525 //mark the server as valid
526 m_fields |= wxURI_SERVER;
527
528 return uri;
529}
530
846978d7 531
dd65d8c8
RN
532const wxChar* wxURI::ParsePort(const wxChar* uri)
533{
534 wxASSERT(uri != NULL);
535
536 // port = *DIGIT
ce321570 537 if(*uri == wxT(':'))
dd65d8c8
RN
538 {
539 ++uri;
846978d7 540 while(IsDigit(*uri))
dd65d8c8
RN
541 {
542 m_port += *uri++;
846978d7 543 }
dd65d8c8
RN
544
545 //mark the port as valid
546 m_fields |= wxURI_PORT;
547 }
548
549 return uri;
550}
551
8404931e 552const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
553{
554 wxASSERT(uri != NULL);
555
556 //copy of the uri - used for figuring out
557 //length of each component
558 const wxChar* uricopy = uri;
559
560 /// hier-part = "//" authority path-abempty
561 /// / path-absolute
562 /// / path-rootless
563 /// / path-empty
564 ///
565 /// relative-part = "//" authority path-abempty
566 /// / path-absolute
567 /// / path-noscheme
568 /// / path-empty
569 ///
570 /// path-abempty = *( "/" segment )
571 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
572 /// path-noscheme = segment-nz-nc *( "/" segment )
573 /// path-rootless = segment-nz *( "/" segment )
574 /// path-empty = 0<pchar>
575 ///
576 /// segment = *pchar
577 /// segment-nz = 1*pchar
578 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
579 /// ; non-zero-length segment without any colon ":"
580 ///
581 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 582 if (*uri == wxT('/'))
dd65d8c8
RN
583 {
584 m_path += *uri++;
585
846978d7
WS
586 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
587 {
dd65d8c8 588 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 589 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
590 m_path += *uri++;
591 else
592 Escape(m_path, *uri++);
dd65d8c8
RN
593 }
594
595 if (bNormalize)
596 {
597 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 598#if wxUSE_STL
2c09fb3b 599 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 600#endif
dd65d8c8
RN
601 Normalize(theBuffer, true);
602 theBuffer.SetLength(wxStrlen(theBuffer));
603 }
604 //mark the path as valid
605 m_fields |= wxURI_PATH;
606 }
607 else if(*uri) //Relative path
608 {
609 if (bReference)
610 {
611 //no colon allowed
846978d7 612 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
613 {
614 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 615 *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
616 m_path += *uri++;
617 else
618 Escape(m_path, *uri++);
dd65d8c8 619 }
846978d7 620 }
dd65d8c8
RN
621 else
622 {
846978d7 623 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
624 {
625 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 626 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
627 m_path += *uri++;
628 else
629 Escape(m_path, *uri++);
dd65d8c8
RN
630 }
631 }
632
633 if (uri != uricopy)
846978d7 634 {
dd65d8c8
RN
635 if (bNormalize)
636 {
637 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21 638#if wxUSE_STL
2c09fb3b 639 wxTmemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
d21d3f21 640#endif
dd65d8c8
RN
641 Normalize(theBuffer);
642 theBuffer.SetLength(wxStrlen(theBuffer));
643 }
644
645 //mark the path as valid
646 m_fields |= wxURI_PATH;
647 }
648 }
649
650 return uri;
651}
652
653
654const wxChar* wxURI::ParseQuery(const wxChar* uri)
655{
656 wxASSERT(uri != NULL);
657
658 // query = *( pchar / "/" / "?" )
ce321570 659 if (*uri == wxT('?'))
dd65d8c8
RN
660 {
661 ++uri;
ce321570 662 while(*uri && *uri != wxT('#'))
dd65d8c8
RN
663 {
664 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 665 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 666 m_query += *uri++;
dd65d8c8 667 else
846978d7 668 Escape(m_query, *uri++);
dd65d8c8
RN
669 }
670
671 //mark the server as valid
672 m_fields |= wxURI_QUERY;
673 }
674
675 return uri;
676}
677
678
679const wxChar* wxURI::ParseFragment(const wxChar* uri)
680{
681 wxASSERT(uri != NULL);
682
683 // fragment = *( pchar / "/" / "?" )
ce321570 684 if (*uri == wxT('#'))
dd65d8c8
RN
685 {
686 ++uri;
687 while(*uri)
688 {
689 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 690 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 691 m_fragment += *uri++;
dd65d8c8 692 else
846978d7 693 Escape(m_fragment, *uri++);
dd65d8c8
RN
694 }
695
696 //mark the server as valid
697 m_fields |= wxURI_FRAGMENT;
698 }
699
700 return uri;
701}
702
703// ---------------------------------------------------------------------------
ce321570 704// Resolve
dd65d8c8 705//
ce321570 706// Builds missing components of this uri from a base uri
dd65d8c8 707//
ce321570
RN
708// A version of the algorithm outlined in the RFC is used here
709// (it is shown in comments)
710//
846978d7 711// Note that an empty URI inherits all components
dd65d8c8
RN
712// ---------------------------------------------------------------------------
713
8404931e 714void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 715{
846978d7 716 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
717 wxT("wxURI to inherit from must not be a reference!"));
718
ce321570 719 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
720 // loophole that allows this uri to inherit other
721 // properties from the base uri - even if the scheme
722 // is defined
8404931e
VZ
723 if ( !(flags & wxURI_STRICT) &&
724 HasScheme() && base.HasScheme() &&
725 m_scheme == base.m_scheme )
846978d7 726 {
dd65d8c8
RN
727 m_fields -= wxURI_SCHEME;
728 }
729
730
731 // Do nothing if this is an absolute wxURI
732 // if defined(R.scheme) then
733 // T.scheme = R.scheme;
734 // T.authority = R.authority;
735 // T.path = remove_dot_segments(R.path);
736 // T.query = R.query;
737 if (HasScheme())
738 {
739 return;
740 }
741
ea4daac4 742 //No scheme - inherit
dd65d8c8
RN
743 m_scheme = base.m_scheme;
744 m_fields |= wxURI_SCHEME;
745
746 // All we need to do for relative URIs with an
747 // authority component is just inherit the scheme
748 // if defined(R.authority) then
749 // T.authority = R.authority;
750 // T.path = remove_dot_segments(R.path);
751 // T.query = R.query;
752 if (HasServer())
753 {
754 return;
755 }
756
757 //No authority - inherit
758 if (base.HasUser())
759 {
760 m_user = base.m_user;
761 m_fields |= wxURI_USER;
762 }
846978d7 763
dd65d8c8
RN
764 m_server = base.m_server;
765 m_hostType = base.m_hostType;
766 m_fields |= wxURI_SERVER;
846978d7 767
dd65d8c8
RN
768 if (base.HasPort())
769 {
770 m_port = base.m_port;
771 m_fields |= wxURI_PORT;
772 }
846978d7 773
dd65d8c8
RN
774
775 // Simple path inheritance from base
776 if (!HasPath())
777 {
778 // T.path = Base.path;
779 m_path = base.m_path;
780 m_fields |= wxURI_PATH;
846978d7 781
dd65d8c8
RN
782
783 // if defined(R.query) then
784 // T.query = R.query;
785 // else
786 // T.query = Base.query;
787 // endif;
788 if (!HasQuery())
789 {
790 m_query = base.m_query;
791 m_fields |= wxURI_QUERY;
792 }
793 }
794 else
795 {
796 // if (R.path starts-with "/") then
797 // T.path = remove_dot_segments(R.path);
798 // else
799 // T.path = merge(Base.path, R.path);
800 // T.path = remove_dot_segments(T.path);
801 // endif;
802 // T.query = R.query;
ce321570 803 if (m_path[0u] != wxT('/'))
dd65d8c8 804 {
ea4daac4 805 //Merge paths
dd65d8c8
RN
806 const wxChar* op = m_path.c_str();
807 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
808
809 //not a ending directory? move up
ce321570 810 if (base.m_path[0] && *(bp-1) != wxT('/'))
dd65d8c8
RN
811 UpTree(base.m_path, bp);
812
813 //normalize directories
846978d7 814 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 815 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8
RN
816 {
817 UpTree(base.m_path, bp);
818
819 if (*(op+2) == '\0')
820 op += 2;
821 else
822 op += 3;
823 }
824
846978d7 825 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
ba5a47ae 826 m_path.substr((op - m_path.c_str()), m_path.Length());
dd65d8c8
RN
827 }
828 }
ce321570 829
846978d7 830 //T.fragment = R.fragment;
dd65d8c8
RN
831}
832
833// ---------------------------------------------------------------------------
846978d7 834// UpTree
dd65d8c8 835//
ce321570 836// Moves a URI path up a directory
dd65d8c8
RN
837// ---------------------------------------------------------------------------
838
ce321570 839//static
dd65d8c8
RN
840void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
841{
ce321570 842 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
843 {
844 uri -= 2;
845 }
846978d7 846
dd65d8c8
RN
847 for(;uri != uristart; --uri)
848 {
ce321570 849 if (*uri == wxT('/'))
dd65d8c8
RN
850 {
851 ++uri;
852 break;
853 }
854 }
855
856 //!!!TODO:HACK!!!//
ce321570 857 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
858 ++uri;
859 //!!!//
860}
861
ce321570
RN
862// ---------------------------------------------------------------------------
863// Normalize
864//
865// Normalizes directories in-place
866//
867// I.E. ./ and . are ignored
868//
869// ../ and .. are removed if a directory is before it, along
870// with that directory (leading .. and ../ are kept)
871// ---------------------------------------------------------------------------
872
873//static
8404931e 874void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
875{
876 wxChar* cp = s;
877 wxChar* bp = s;
878
ce321570 879 if(s[0] == wxT('/'))
dd65d8c8
RN
880 ++bp;
881
882 while(*cp)
883 {
ce321570
RN
884 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
885 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
886 {
887 //. _or_ ./ - ignore
888 if (*(cp+1) == '\0')
889 cp += 1;
890 else
891 cp += 2;
892 }
846978d7 893 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
894 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
895 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
896 {
897 //.. _or_ ../ - go up the tree
898 if (s != bp)
899 {
900 UpTree((const wxChar*)bp, (const wxChar*&)s);
901
902 if (*(cp+2) == '\0')
903 cp += 2;
904 else
905 cp += 3;
906 }
907 else if (!bIgnoreLeads)
908
909 {
910 *bp++ = *cp++;
911 *bp++ = *cp++;
912 if (*cp)
913 *bp++ = *cp++;
914
915 s = bp;
916 }
917 else
918 {
919 if (*(cp+2) == '\0')
920 cp += 2;
921 else
922 cp += 3;
923 }
924 }
925 else
846978d7 926 *s++ = *cp++;
dd65d8c8
RN
927 }
928
929 *s = '\0';
930}
931
932// ---------------------------------------------------------------------------
ce321570
RN
933// ParseH16
934//
935// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 936// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
937// the input string back to its original position on failure.
938// ---------------------------------------------------------------------------
939
940bool wxURI::ParseH16(const wxChar*& uri)
941{
942 // h16 = 1*4HEXDIG
943 if(!IsHex(*++uri))
944 return false;
945
946 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
947 ++uri;
948
949 return true;
950}
951
952// ---------------------------------------------------------------------------
953// ParseIPXXX
954//
846978d7
WS
955// Parses a certain version of an IP address and moves the input string past
956// it. Returns true if the input string contains the proper version of an ip
957// address. It is the caller's responsability to move the input string back
ce321570 958// to its original position on failure.
dd65d8c8
RN
959// ---------------------------------------------------------------------------
960
961bool wxURI::ParseIPv4address(const wxChar*& uri)
962{
963 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
964 //
965 //dec-octet = DIGIT ; 0-9
966 // / %x31-39 DIGIT ; 10-99
967 // / "1" 2DIGIT ; 100-199
968 // / "2" %x30-34 DIGIT ; 200-249
969 // / "25" %x30-35 ; 250-255
970 size_t iIPv4 = 0;
971 if (IsDigit(*uri))
972 {
973 ++iIPv4;
974
846978d7 975
dd65d8c8
RN
976 //each ip part must be between 0-255 (dupe of version in for loop)
977 if( IsDigit(*++uri) && IsDigit(*++uri) &&
978 //100 or less (note !)
846978d7
WS
979 !( (*(uri-2) < wxT('2')) ||
980 //240 or less
981 (*(uri-2) == wxT('2') &&
ce321570 982 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
983 )
984 )
985 )
986 {
987 return false;
988 }
989
990 if(IsDigit(*uri))++uri;
991
992 //compilers should unroll this loop
993 for(; iIPv4 < 4; ++iIPv4)
994 {
ce321570 995 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
996 break;
997
998 //each ip part must be between 0-255
999 if( IsDigit(*++uri) && IsDigit(*++uri) &&
1000 //100 or less (note !)
846978d7
WS
1001 !( (*(uri-2) < wxT('2')) ||
1002 //240 or less
1003 (*(uri-2) == wxT('2') &&
ce321570 1004 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1005 )
1006 )
1007 )
1008 {
1009 return false;
1010 }
1011 if(IsDigit(*uri))++uri;
1012 }
1013 }
1014 return iIPv4 == 4;
1015}
1016
dd65d8c8
RN
1017bool wxURI::ParseIPv6address(const wxChar*& uri)
1018{
1019 // IPv6address = 6( h16 ":" ) ls32
1020 // / "::" 5( h16 ":" ) ls32
1021 // / [ h16 ] "::" 4( h16 ":" ) ls32
1022 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1023 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1024 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1025 // / [ *4( h16 ":" ) h16 ] "::" ls32
1026 // / [ *5( h16 ":" ) h16 ] "::" h16
1027 // / [ *6( h16 ":" ) h16 ] "::"
1028
1029 size_t numPrefix = 0,
1030 maxPostfix;
1031
1032 bool bEndHex = false;
1033
1034 for( ; numPrefix < 6; ++numPrefix)
1035 {
1036 if(!ParseH16(uri))
1037 {
1038 --uri;
1039 bEndHex = true;
1040 break;
1041 }
846978d7 1042
ce321570 1043 if(*uri != wxT(':'))
dd65d8c8
RN
1044 {
1045 break;
1046 }
1047 }
1048
1049 if(!bEndHex && !ParseH16(uri))
1050 {
1051 --uri;
1052
1053 if (numPrefix)
1054 return false;
1055
ce321570 1056 if (*uri == wxT(':'))
dd65d8c8 1057 {
ce321570 1058 if (*++uri != wxT(':'))
dd65d8c8
RN
1059 return false;
1060
1061 maxPostfix = 5;
1062 }
1063 else
1064 maxPostfix = 6;
1065 }
1066 else
1067 {
ce321570 1068 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1069 {
1070 if (numPrefix != 6)
1071 return false;
1072
ce321570 1073 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1074 ++uri;
1075
1076 const wxChar* uristart = uri;
1077 //parse ls32
1078 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1079 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1080 return true;
1081
1082 uri = uristart;
1083
1084 if (ParseIPv4address(uri))
1085 return true;
1086 else
1087 return false;
1088 }
1089 else
1090 {
1091 uri += 2;
846978d7 1092
dd65d8c8
RN
1093 if (numPrefix > 3)
1094 maxPostfix = 0;
1095 else
1096 maxPostfix = 4 - numPrefix;
1097 }
1098 }
1099
1100 bool bAllowAltEnding = maxPostfix == 0;
1101
1102 for(; maxPostfix != 0; --maxPostfix)
1103 {
ce321570 1104 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1105 return false;
1106 }
1107
1108 if(numPrefix <= 4)
1109 {
1110 const wxChar* uristart = uri;
1111 //parse ls32
1112 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1113 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1114 return true;
1115
1116 uri = uristart;
1117
1118 if (ParseIPv4address(uri))
1119 return true;
1120
1121 uri = uristart;
846978d7 1122
dd65d8c8
RN
1123 if (!bAllowAltEnding)
1124 return false;
1125 }
1126
1127 if(numPrefix <= 5 && ParseH16(uri))
1128 return true;
1129
1130 return true;
1131}
1132
1133bool wxURI::ParseIPvFuture(const wxChar*& uri)
1134{
1135 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1136 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1137 return false;
1138
1139 while (IsHex(*++uri)) {}
1140
ce321570 1141 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1142 return false;
1143
ce321570 1144 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1145
1146 return true;
1147}
1148
1149
1150// ---------------------------------------------------------------------------
ce321570
RN
1151// CharToHex
1152//
846978d7 1153// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1154// passed in character is not a valid hex character
dd65d8c8
RN
1155// ---------------------------------------------------------------------------
1156
ce321570 1157//static
409a7ba7 1158wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1159{
1676a194
WS
1160 if ((c >= wxT('A')) && (c <= wxT('Z'))) return wxChar(c - wxT('A') + 0x0A);
1161 if ((c >= wxT('a')) && (c <= wxT('z'))) return wxChar(c - wxT('a') + 0x0a);
1162 if ((c >= wxT('0')) && (c <= wxT('9'))) return wxChar(c - wxT('0') + 0x00);
dd65d8c8 1163
846978d7 1164 return 0;
dd65d8c8
RN
1165}
1166
ce321570
RN
1167// ---------------------------------------------------------------------------
1168// IsXXX
1169//
1170// Returns true if the passed in character meets the criteria of the method
1171// ---------------------------------------------------------------------------
1172
dd65d8c8
RN
1173//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1174bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1175{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1176 c == wxT('-') ||
1177 c == wxT('.') ||
1178 c == wxT('_') ||
1179 c == wxT('~') //tilde
846978d7 1180 ;
dd65d8c8
RN
1181}
1182
1183bool wxURI::IsReserved (const wxChar& c)
846978d7 1184{
dd65d8c8
RN
1185 return IsGenDelim(c) || IsSubDelim(c);
1186}
1187
1188//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1189bool wxURI::IsGenDelim (const wxChar& c)
1190{
ce321570
RN
1191 return c == wxT(':') ||
1192 c == wxT('/') ||
1193 c == wxT('?') ||
1194 c == wxT('#') ||
1195 c == wxT('[') ||
1196 c == wxT(']') ||
1197 c == wxT('@');
dd65d8c8
RN
1198}
1199
1200//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1201//! / "*" / "+" / "," / ";" / "="
1202bool wxURI::IsSubDelim (const wxChar& c)
1203{
ce321570
RN
1204 return c == wxT('!') ||
1205 c == wxT('$') ||
1206 c == wxT('&') ||
1207 c == wxT('\'') ||
1208 c == wxT('(') ||
1209 c == wxT(')') ||
1210 c == wxT('*') ||
1211 c == wxT('+') ||
1212 c == wxT(',') ||
1213 c == wxT(';') ||
846978d7 1214 c == wxT('=')
dd65d8c8
RN
1215 ;
1216}
1217
1218bool wxURI::IsHex(const wxChar& c)
ce321570 1219{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1220
1221bool wxURI::IsAlpha(const wxChar& c)
ce321570 1222{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1223
1224bool wxURI::IsDigit(const wxChar& c)
ce321570 1225{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1226
1227
1228// ---------------------------------------------------------------------------
1229//
1230// wxURL Compatability
1231//
dd65d8c8
RN
1232// ---------------------------------------------------------------------------
1233
1234#if wxUSE_URL
1235
86470d43
RN
1236#if WXWIN_COMPATIBILITY_2_4
1237
dd65d8c8
RN
1238#include "wx/url.h"
1239
a6fb8636
WS
1240wxString wxURL::GetProtocolName() const
1241{
1242 return m_scheme;
1243}
1244
1245wxString wxURL::GetHostName() const
1246{
1247 return m_server;
1248}
1249
1250wxString wxURL::GetPath() const
1251{
1252 return m_path;
1253}
1254
997ba01b
RN
1255//Note that this old code really doesn't convert to a URI that well and looks
1256//more like a dirty hack than anything else...
1257
1258wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
dd65d8c8 1259{
997ba01b
RN
1260 wxString out_str;
1261 wxString hexa_code;
1262 size_t i;
1263
1264 for (i = 0; i < uri.Len(); i++)
1265 {
1266 wxChar c = uri.GetChar(i);
1267
1268 if (c == wxT(' '))
1269 {
1270 // GRG, Apr/2000: changed to "%20" instead of '+'
1271
1272 out_str += wxT("%20");
1273 }
1274 else
1275 {
1276 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1277 //
1278 // - Alphanumeric characters are never escaped
1279 // - Unreserved marks are never escaped
1280 // - Delimiters must be escaped if they appear within a component
1281 // but not if they are used to separate components. Here we have
1282 // no clear way to distinguish between these two cases, so they
1283 // are escaped unless they are passed in the 'delims' parameter
1284 // (allowed delimiters).
1285
1286 static const wxChar marks[] = wxT("-_.!~*()'");
1287
1288 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1289 {
1290 hexa_code.Printf(wxT("%%%02X"), c);
1291 out_str += hexa_code;
1292 }
1293 else
1294 {
1295 out_str += c;
1296 }
1297 }
1298 }
1299
1300 return out_str;
dd65d8c8
RN
1301}
1302
1303wxString wxURL::ConvertFromURI(const wxString& uri)
1304{
86470d43 1305 return wxURI::Unescape(uri);
dd65d8c8
RN
1306}
1307
86470d43
RN
1308#endif //WXWIN_COMPATIBILITY_2_4
1309
dd65d8c8
RN
1310#endif //wxUSE_URL
1311
1312//end of uri.cpp
1313
1314
1315