]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
We need a char, not wxChar.
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
3// Purpose: Implementation of a uri parser
4// Author: Ryan Norton
5// Created: 10/26/04
6// RCS-ID: $Id$
7// Copyright: (c) 2004 Ryan Norton
8// Licence: wxWindows
9/////////////////////////////////////////////////////////////////////////////
10
11// ===========================================================================
12// declarations
13// ===========================================================================
14
15// ---------------------------------------------------------------------------
16// headers
17// ---------------------------------------------------------------------------
18
19#if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21#endif
22
23// For compilers that support precompilation, includes "wx.h".
24#include "wx/wxprec.h"
25
26#ifdef __BORLANDC__
27 #pragma hdrstop
28#endif
29
30#include "wx/uri.h"
31
32// ---------------------------------------------------------------------------
33// definitions
34// ---------------------------------------------------------------------------
35
36IMPLEMENT_CLASS(wxURI, wxObject);
37
38// ===========================================================================
39// implementation
40// ===========================================================================
41
42// ---------------------------------------------------------------------------
43// utilities
44// ---------------------------------------------------------------------------
45
46// ---------------------------------------------------------------------------
47//
48// wxURI
49//
50// ---------------------------------------------------------------------------
51
52// ---------------------------------------------------------------------------
53// Constructors
54// ---------------------------------------------------------------------------
55
56wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57{
58}
846978d7 59
dd65d8c8
RN
60wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61{
62 Create(uri);
63}
64
65wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
66{
b60b2ec8 67 Assign(uri);
dd65d8c8
RN
68}
69
70// ---------------------------------------------------------------------------
71// Destructor and cleanup
72// ---------------------------------------------------------------------------
73
74wxURI::~wxURI()
75{
76 Clear();
77}
78
79void wxURI::Clear()
80{
81 m_scheme = m_user = m_server = m_port = m_path =
82 m_query = m_fragment = wxT("");
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87}
88
89// ---------------------------------------------------------------------------
90// Create
91//
846978d7 92// This creates the URI - all we do here is call the main parsing method
dd65d8c8
RN
93// ---------------------------------------------------------------------------
94
86470d43 95const wxChar* wxURI::Create(const wxString& uri)
846978d7 96{
dd65d8c8
RN
97 if (m_fields)
98 Clear();
99
846978d7
WS
100 return Parse(uri);
101}
dd65d8c8
RN
102
103// ---------------------------------------------------------------------------
ce321570 104// Escape Methods
dd65d8c8 105//
846978d7 106// TranslateEscape unencodes a 3 character URL escape sequence
ce321570 107//
dd65d8c8 108// Escape encodes an invalid URI character into a 3 character sequence
ce321570 109//
dd65d8c8
RN
110// IsEscape determines if the input string contains an escape sequence,
111// if it does, then it moves the input string past the escape sequence
ce321570
RN
112//
113// Unescape unencodes all 3 character URL escape sequences in a wxString
dd65d8c8
RN
114// ---------------------------------------------------------------------------
115
86470d43 116wxChar wxURI::TranslateEscape(const wxChar* s)
dd65d8c8
RN
117{
118 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
8404931e 119
409a7ba7 120 return CharToHex(*s) * 0x10 + CharToHex(*++s);
dd65d8c8
RN
121}
122
86470d43
RN
123wxString wxURI::Unescape(const wxString& uri)
124{
125 wxString new_uri;
126
127 for(size_t i = 0; i < uri.length(); ++i)
128 {
129 if (uri[i] == wxT('%'))
130 {
131 new_uri += wxURI::TranslateEscape( &(uri.c_str()[i+1]) );
132 i += 2;
133 }
134 }
135
136 return new_uri;
137}
138
dd65d8c8
RN
139void wxURI::Escape(wxString& s, const wxChar& c)
140{
141 const wxChar* hdig = wxT("0123456789abcdef");
ce321570 142 s += wxT('%');
dd65d8c8 143 s += hdig[(c >> 4) & 15];
846978d7 144 s += hdig[c & 15];
dd65d8c8
RN
145}
146
147bool wxURI::IsEscape(const wxChar*& uri)
148{
ce321570
RN
149 // pct-encoded = "%" HEXDIG HEXDIG
150 if(*uri == wxT('%') && IsHex(*(uri+1)) && IsHex(*(uri+2)))
dd65d8c8
RN
151 {
152 uri += 3;
153 return true;
154 }
155 else
156 return false;
157}
158
159// ---------------------------------------------------------------------------
86470d43 160// BuildURI
dd65d8c8 161//
846978d7 162// BuildURI() builds the entire URI into a useable
dd65d8c8 163// representation, including proper identification characters such as slashes
ce321570
RN
164//
165// BuildUnescapedURI() does the same thing as BuildURI(), only it unescapes
166// the components that accept escape sequences
dd65d8c8
RN
167// ---------------------------------------------------------------------------
168
86470d43 169wxString wxURI::BuildURI() const
846978d7 170{
dd65d8c8
RN
171 wxString ret;
172
173 if (HasScheme())
174 ret = ret + m_scheme + wxT(":");
175
176 if (HasServer())
177 {
178 ret += wxT("//");
179
180 if (HasUser())
181 ret = ret + m_user + wxT("@");
182
183 ret += m_server;
184
185 if (HasPort())
186 ret = ret + wxT(":") + m_port;
187 }
188
189 ret += m_path;
190
191 if (HasQuery())
192 ret = ret + wxT("?") + m_query;
193
194 if (HasFragment())
195 ret = ret + wxT("#") + m_fragment;
196
197 return ret;
198}
199
86470d43
RN
200wxString wxURI::BuildUnescapedURI() const
201{
202 wxString ret;
203
204 if (HasScheme())
205 ret = ret + m_scheme + wxT(":");
206
207 if (HasServer())
208 {
209 ret += wxT("//");
210
211 if (HasUser())
212 ret = ret + wxURI::Unescape(m_user) + wxT("@");
213
214 if (m_hostType == wxURI_REGNAME)
215 ret += wxURI::Unescape(m_server);
216 else
217 ret += m_server;
218
219 if (HasPort())
220 ret = ret + wxT(":") + m_port;
221 }
222
223 ret += wxURI::Unescape(m_path);
224
225 if (HasQuery())
226 ret = ret + wxT("?") + wxURI::Unescape(m_query);
227
228 if (HasFragment())
229 ret = ret + wxT("#") + wxURI::Unescape(m_fragment);
230
231 return ret;
232}
233
dd65d8c8 234// ---------------------------------------------------------------------------
ce321570 235// Assignment
dd65d8c8
RN
236// ---------------------------------------------------------------------------
237
b60b2ec8
RN
238wxURI& wxURI::Assign(const wxURI& uri)
239{
240 //assign fields
241 m_fields = uri.m_fields;
242
243 //ref over components
244 m_scheme = uri.m_scheme;
245 m_user = uri.m_user;
246 m_server = uri.m_server;
247 m_hostType = uri.m_hostType;
248 m_port = uri.m_port;
249 m_path = uri.m_path;
250 m_query = uri.m_query;
251 m_fragment = uri.m_fragment;
dd65d8c8
RN
252
253 return *this;
254}
255
ce321570
RN
256wxURI& wxURI::operator = (const wxURI& uri)
257{
258 return Assign(uri);
259}
260
b60b2ec8 261wxURI& wxURI::operator = (const wxString& string)
846978d7 262{
dd65d8c8
RN
263 Create(string);
264 return *this;
265}
266
ce321570
RN
267// ---------------------------------------------------------------------------
268// Comparison
269// ---------------------------------------------------------------------------
270
dd65d8c8 271bool wxURI::operator == (const wxURI& uri) const
846978d7 272{
dd65d8c8
RN
273 if (HasScheme())
274 {
275 if(m_scheme != uri.m_scheme)
276 return false;
277 }
278 else if (uri.HasScheme())
279 return false;
280
281
282 if (HasServer())
283 {
284 if (HasUser())
285 {
286 if (m_user != uri.m_user)
287 return false;
288 }
289 else if (uri.HasUser())
290 return false;
291
292 if (m_server != uri.m_server ||
293 m_hostType != uri.m_hostType)
294 return false;
295
296 if (HasPort())
297 {
298 if(m_port != uri.m_port)
299 return false;
300 }
301 else if (uri.HasPort())
302 return false;
303 }
304 else if (uri.HasServer())
305 return false;
306
307
308 if (HasPath())
309 {
310 if(m_path != uri.m_path)
311 return false;
312 }
313 else if (uri.HasPath())
314 return false;
315
316 if (HasQuery())
317 {
318 if (m_query != uri.m_query)
319 return false;
320 }
321 else if (uri.HasQuery())
322 return false;
323
324 if (HasFragment())
325 {
326 if (m_fragment != uri.m_fragment)
327 return false;
328 }
329 else if (uri.HasFragment())
330 return false;
331
332 return true;
333}
334
335// ---------------------------------------------------------------------------
336// IsReference
337//
338// if there is no authority or scheme, it is a reference
339// ---------------------------------------------------------------------------
340
341bool wxURI::IsReference() const
342{ return !HasScheme() || !HasServer(); }
343
344// ---------------------------------------------------------------------------
345// Parse
346//
347// Master URI parsing method. Just calls the individual parsing methods
348//
349// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
350// URI-reference = URI / relative-URITestCase
351// ---------------------------------------------------------------------------
352
353const wxChar* wxURI::Parse(const wxChar* uri)
354{
355 uri = ParseScheme(uri);
356 uri = ParseAuthority(uri);
357 uri = ParsePath(uri);
358 uri = ParseQuery(uri);
359 return ParseFragment(uri);
360}
361
362// ---------------------------------------------------------------------------
363// ParseXXX
364//
365// Individual parsers for each URI component
366// ---------------------------------------------------------------------------
367
368const wxChar* wxURI::ParseScheme(const wxChar* uri)
369{
370 wxASSERT(uri != NULL);
371
372 //copy of the uri - used for figuring out
373 //length of each component
374 const wxChar* uricopy = uri;
375
376 //Does the uri have a scheme (first character alpha)?
377 if (IsAlpha(*uri))
378 {
379 m_scheme += *uri++;
380
381 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 382 while (IsAlpha(*uri) || IsDigit(*uri) ||
ce321570
RN
383 *uri == wxT('+') ||
384 *uri == wxT('-') ||
846978d7
WS
385 *uri == wxT('.'))
386 {
387 m_scheme += *uri++;
dd65d8c8
RN
388 }
389
390 //valid scheme?
ce321570 391 if (*uri == wxT(':'))
846978d7 392 {
dd65d8c8
RN
393 //mark the scheme as valid
394 m_fields |= wxURI_SCHEME;
395
396 //move reference point up to input buffer
397 uricopy = ++uri;
398 }
846978d7 399 else
dd65d8c8
RN
400 //relative uri with relative path reference
401 m_scheme = wxT("");
402 }
846978d7 403// else
dd65d8c8
RN
404 //relative uri with _possible_ relative path reference
405
406 return uricopy;
407}
408
409const wxChar* wxURI::ParseAuthority(const wxChar* uri)
410{
411 // authority = [ userinfo "@" ] host [ ":" port ]
846978d7 412 if (*uri == wxT('/') && *(uri+1) == wxT('/'))
dd65d8c8
RN
413 {
414 uri += 2;
415
416 uri = ParseUser(uri);
417 uri = ParseServer(uri);
418 return ParsePort(uri);
419 }
420
421 return uri;
422}
423
424const wxChar* wxURI::ParseUser(const wxChar* uri)
425{
426 wxASSERT(uri != NULL);
427
428 //copy of the uri - used for figuring out
429 //length of each component
430 const wxChar* uricopy = uri;
431
432 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
846978d7 433 while(*uri && *uri != wxT('@') && *uri != wxT('/') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8 434 {
846978d7 435 if(IsUnreserved(*uri) || IsEscape(uri) ||
ce321570 436 IsSubDelim(*uri) || *uri == wxT(':'))
dd65d8c8
RN
437 m_user += *uri++;
438 else
439 Escape(m_user, *uri++);
440 }
441
ce321570 442 if(*uri == wxT('@'))
dd65d8c8
RN
443 {
444 //valid userinfo
445 m_fields |= wxURI_USER;
446
447 uricopy = ++uri;
448 }
449 else
450 m_user = wxT("");
451
452 return uricopy;
453}
454
455const wxChar* wxURI::ParseServer(const wxChar* uri)
456{
457 wxASSERT(uri != NULL);
458
459 //copy of the uri - used for figuring out
460 //length of each component
461 const wxChar* uricopy = uri;
462
463 // host = IP-literal / IPv4address / reg-name
464 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
ce321570 465 if (*uri == wxT('['))
dd65d8c8 466 {
ce321570 467 if (ParseIPv6address(++uri) && *uri == wxT(']'))
dd65d8c8
RN
468 {
469 ++uri;
470 m_hostType = wxURI_IPV6ADDRESS;
846978d7 471
dd65d8c8
RN
472 wxStringBufferLength theBuffer(m_server, uri - uricopy);
473 wxMemcpy(theBuffer, uricopy, uri-uricopy);
474 theBuffer.SetLength(uri-uricopy);
475 }
476 else
477 {
478 uri = uricopy;
479
ce321570 480 if (ParseIPvFuture(++uri) && *uri == wxT(']'))
dd65d8c8
RN
481 {
482 ++uri;
846978d7
WS
483 m_hostType = wxURI_IPVFUTURE;
484
dd65d8c8
RN
485 wxStringBufferLength theBuffer(m_server, uri - uricopy);
486 wxMemcpy(theBuffer, uricopy, uri-uricopy);
487 theBuffer.SetLength(uri-uricopy);
488 }
846978d7 489 else
dd65d8c8
RN
490 uri = uricopy;
491 }
492 }
846978d7 493 else
dd65d8c8
RN
494 {
495 if (ParseIPv4address(uri))
496 {
497 m_hostType = wxURI_IPV4ADDRESS;
498
499 wxStringBufferLength theBuffer(m_server, uri - uricopy);
500 wxMemcpy(theBuffer, uricopy, uri-uricopy);
501 theBuffer.SetLength(uri-uricopy);
502 }
846978d7 503 else
dd65d8c8
RN
504 uri = uricopy;
505 }
506
507 if(m_hostType == wxURI_REGNAME)
508 {
509 uri = uricopy;
510 // reg-name = *( unreserved / pct-encoded / sub-delims )
846978d7 511 while(*uri && *uri != wxT('/') && *uri != wxT(':') && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
512 {
513 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
514 m_server += *uri++;
515 else
516 Escape(m_server, *uri++);
846978d7 517 }
dd65d8c8
RN
518 }
519
520 //mark the server as valid
521 m_fields |= wxURI_SERVER;
522
523 return uri;
524}
525
846978d7 526
dd65d8c8
RN
527const wxChar* wxURI::ParsePort(const wxChar* uri)
528{
529 wxASSERT(uri != NULL);
530
531 // port = *DIGIT
ce321570 532 if(*uri == wxT(':'))
dd65d8c8
RN
533 {
534 ++uri;
846978d7 535 while(IsDigit(*uri))
dd65d8c8
RN
536 {
537 m_port += *uri++;
846978d7 538 }
dd65d8c8
RN
539
540 //mark the port as valid
541 m_fields |= wxURI_PORT;
542 }
543
544 return uri;
545}
546
8404931e 547const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
dd65d8c8
RN
548{
549 wxASSERT(uri != NULL);
550
551 //copy of the uri - used for figuring out
552 //length of each component
553 const wxChar* uricopy = uri;
554
555 /// hier-part = "//" authority path-abempty
556 /// / path-absolute
557 /// / path-rootless
558 /// / path-empty
559 ///
560 /// relative-part = "//" authority path-abempty
561 /// / path-absolute
562 /// / path-noscheme
563 /// / path-empty
564 ///
565 /// path-abempty = *( "/" segment )
566 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
567 /// path-noscheme = segment-nz-nc *( "/" segment )
568 /// path-rootless = segment-nz *( "/" segment )
569 /// path-empty = 0<pchar>
570 ///
571 /// segment = *pchar
572 /// segment-nz = 1*pchar
573 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
574 /// ; non-zero-length segment without any colon ":"
575 ///
576 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
ce321570 577 if (*uri == wxT('/'))
dd65d8c8
RN
578 {
579 m_path += *uri++;
580
846978d7
WS
581 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
582 {
dd65d8c8 583 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 584 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
585 m_path += *uri++;
586 else
587 Escape(m_path, *uri++);
dd65d8c8
RN
588 }
589
590 if (bNormalize)
591 {
592 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21
RN
593#if wxUSE_STL
594 wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
595#endif
dd65d8c8
RN
596 Normalize(theBuffer, true);
597 theBuffer.SetLength(wxStrlen(theBuffer));
598 }
599 //mark the path as valid
600 m_fields |= wxURI_PATH;
601 }
602 else if(*uri) //Relative path
603 {
604 if (bReference)
605 {
606 //no colon allowed
846978d7 607 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
608 {
609 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 610 *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
611 m_path += *uri++;
612 else
613 Escape(m_path, *uri++);
dd65d8c8 614 }
846978d7 615 }
dd65d8c8
RN
616 else
617 {
846978d7 618 while(*uri && *uri != wxT('#') && *uri != wxT('?'))
dd65d8c8
RN
619 {
620 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 621 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/'))
846978d7
WS
622 m_path += *uri++;
623 else
624 Escape(m_path, *uri++);
dd65d8c8
RN
625 }
626 }
627
628 if (uri != uricopy)
846978d7 629 {
dd65d8c8
RN
630 if (bNormalize)
631 {
632 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
d21d3f21
RN
633#if wxUSE_STL
634 wxMemcpy(theBuffer, m_path.c_str(), m_path.length()+1);
635#endif
dd65d8c8
RN
636 Normalize(theBuffer);
637 theBuffer.SetLength(wxStrlen(theBuffer));
638 }
639
640 //mark the path as valid
641 m_fields |= wxURI_PATH;
642 }
643 }
644
645 return uri;
646}
647
648
649const wxChar* wxURI::ParseQuery(const wxChar* uri)
650{
651 wxASSERT(uri != NULL);
652
653 // query = *( pchar / "/" / "?" )
ce321570 654 if (*uri == wxT('?'))
dd65d8c8
RN
655 {
656 ++uri;
ce321570 657 while(*uri && *uri != wxT('#'))
dd65d8c8
RN
658 {
659 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 660 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 661 m_query += *uri++;
dd65d8c8 662 else
846978d7 663 Escape(m_query, *uri++);
dd65d8c8
RN
664 }
665
666 //mark the server as valid
667 m_fields |= wxURI_QUERY;
668 }
669
670 return uri;
671}
672
673
674const wxChar* wxURI::ParseFragment(const wxChar* uri)
675{
676 wxASSERT(uri != NULL);
677
678 // fragment = *( pchar / "/" / "?" )
ce321570 679 if (*uri == wxT('#'))
dd65d8c8
RN
680 {
681 ++uri;
682 while(*uri)
683 {
684 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
ce321570 685 *uri == wxT(':') || *uri == wxT('@') || *uri == wxT('/') || *uri == wxT('?'))
846978d7 686 m_fragment += *uri++;
dd65d8c8 687 else
846978d7 688 Escape(m_fragment, *uri++);
dd65d8c8
RN
689 }
690
691 //mark the server as valid
692 m_fields |= wxURI_FRAGMENT;
693 }
694
695 return uri;
696}
697
698// ---------------------------------------------------------------------------
ce321570 699// Resolve
dd65d8c8 700//
ce321570 701// Builds missing components of this uri from a base uri
dd65d8c8 702//
ce321570
RN
703// A version of the algorithm outlined in the RFC is used here
704// (it is shown in comments)
705//
846978d7 706// Note that an empty URI inherits all components
dd65d8c8
RN
707// ---------------------------------------------------------------------------
708
8404931e 709void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 710{
846978d7 711 wxASSERT_MSG(!base.IsReference(),
dd65d8c8
RN
712 wxT("wxURI to inherit from must not be a reference!"));
713
ce321570 714 // If we arn't being strict, enable the older (pre-RFC2396)
dd65d8c8
RN
715 // loophole that allows this uri to inherit other
716 // properties from the base uri - even if the scheme
717 // is defined
8404931e
VZ
718 if ( !(flags & wxURI_STRICT) &&
719 HasScheme() && base.HasScheme() &&
720 m_scheme == base.m_scheme )
846978d7 721 {
dd65d8c8
RN
722 m_fields -= wxURI_SCHEME;
723 }
724
725
726 // Do nothing if this is an absolute wxURI
727 // if defined(R.scheme) then
728 // T.scheme = R.scheme;
729 // T.authority = R.authority;
730 // T.path = remove_dot_segments(R.path);
731 // T.query = R.query;
732 if (HasScheme())
733 {
734 return;
735 }
736
737 //No sheme - inherit
738 m_scheme = base.m_scheme;
739 m_fields |= wxURI_SCHEME;
740
741 // All we need to do for relative URIs with an
742 // authority component is just inherit the scheme
743 // if defined(R.authority) then
744 // T.authority = R.authority;
745 // T.path = remove_dot_segments(R.path);
746 // T.query = R.query;
747 if (HasServer())
748 {
749 return;
750 }
751
752 //No authority - inherit
753 if (base.HasUser())
754 {
755 m_user = base.m_user;
756 m_fields |= wxURI_USER;
757 }
846978d7 758
dd65d8c8
RN
759 m_server = base.m_server;
760 m_hostType = base.m_hostType;
761 m_fields |= wxURI_SERVER;
846978d7 762
dd65d8c8
RN
763 if (base.HasPort())
764 {
765 m_port = base.m_port;
766 m_fields |= wxURI_PORT;
767 }
846978d7 768
dd65d8c8
RN
769
770 // Simple path inheritance from base
771 if (!HasPath())
772 {
773 // T.path = Base.path;
774 m_path = base.m_path;
775 m_fields |= wxURI_PATH;
846978d7 776
dd65d8c8
RN
777
778 // if defined(R.query) then
779 // T.query = R.query;
780 // else
781 // T.query = Base.query;
782 // endif;
783 if (!HasQuery())
784 {
785 m_query = base.m_query;
786 m_fields |= wxURI_QUERY;
787 }
788 }
789 else
790 {
791 // if (R.path starts-with "/") then
792 // T.path = remove_dot_segments(R.path);
793 // else
794 // T.path = merge(Base.path, R.path);
795 // T.path = remove_dot_segments(T.path);
796 // endif;
797 // T.query = R.query;
ce321570 798 if (m_path[0u] != wxT('/'))
dd65d8c8
RN
799 {
800 //Marge paths
801 const wxChar* op = m_path.c_str();
802 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
803
804 //not a ending directory? move up
ce321570 805 if (base.m_path[0] && *(bp-1) != wxT('/'))
dd65d8c8
RN
806 UpTree(base.m_path, bp);
807
808 //normalize directories
846978d7 809 while(*op == wxT('.') && *(op+1) == wxT('.') &&
ce321570 810 (*(op+2) == '\0' || *(op+2) == wxT('/')) )
dd65d8c8
RN
811 {
812 UpTree(base.m_path, bp);
813
814 if (*(op+2) == '\0')
815 op += 2;
816 else
817 op += 3;
818 }
819
846978d7 820 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
ba5a47ae 821 m_path.substr((op - m_path.c_str()), m_path.Length());
dd65d8c8
RN
822 }
823 }
ce321570 824
846978d7 825 //T.fragment = R.fragment;
dd65d8c8
RN
826}
827
828// ---------------------------------------------------------------------------
846978d7 829// UpTree
dd65d8c8 830//
ce321570 831// Moves a URI path up a directory
dd65d8c8
RN
832// ---------------------------------------------------------------------------
833
ce321570 834//static
dd65d8c8
RN
835void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
836{
ce321570 837 if (uri != uristart && *(uri-1) == wxT('/'))
dd65d8c8
RN
838 {
839 uri -= 2;
840 }
846978d7 841
dd65d8c8
RN
842 for(;uri != uristart; --uri)
843 {
ce321570 844 if (*uri == wxT('/'))
dd65d8c8
RN
845 {
846 ++uri;
847 break;
848 }
849 }
850
851 //!!!TODO:HACK!!!//
ce321570 852 if (uri == uristart && *uri == wxT('/'))
dd65d8c8
RN
853 ++uri;
854 //!!!//
855}
856
ce321570
RN
857// ---------------------------------------------------------------------------
858// Normalize
859//
860// Normalizes directories in-place
861//
862// I.E. ./ and . are ignored
863//
864// ../ and .. are removed if a directory is before it, along
865// with that directory (leading .. and ../ are kept)
866// ---------------------------------------------------------------------------
867
868//static
8404931e 869void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
dd65d8c8
RN
870{
871 wxChar* cp = s;
872 wxChar* bp = s;
873
ce321570 874 if(s[0] == wxT('/'))
dd65d8c8
RN
875 ++bp;
876
877 while(*cp)
878 {
ce321570
RN
879 if (*cp == wxT('.') && (*(cp+1) == wxT('/') || *(cp+1) == '\0')
880 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
881 {
882 //. _or_ ./ - ignore
883 if (*(cp+1) == '\0')
884 cp += 1;
885 else
886 cp += 2;
887 }
846978d7 888 else if (*cp == wxT('.') && *(cp+1) == wxT('.') &&
ce321570
RN
889 (*(cp+2) == wxT('/') || *(cp+2) == '\0')
890 && (bp == cp || *(cp-1) == wxT('/')))
dd65d8c8
RN
891 {
892 //.. _or_ ../ - go up the tree
893 if (s != bp)
894 {
895 UpTree((const wxChar*)bp, (const wxChar*&)s);
896
897 if (*(cp+2) == '\0')
898 cp += 2;
899 else
900 cp += 3;
901 }
902 else if (!bIgnoreLeads)
903
904 {
905 *bp++ = *cp++;
906 *bp++ = *cp++;
907 if (*cp)
908 *bp++ = *cp++;
909
910 s = bp;
911 }
912 else
913 {
914 if (*(cp+2) == '\0')
915 cp += 2;
916 else
917 cp += 3;
918 }
919 }
920 else
846978d7 921 *s++ = *cp++;
dd65d8c8
RN
922 }
923
924 *s = '\0';
925}
926
927// ---------------------------------------------------------------------------
ce321570
RN
928// ParseH16
929//
930// Parses 1 to 4 hex values. Returns true if the first character of the input
846978d7 931// string is a valid hex character. It is the caller's responsability to move
ce321570
RN
932// the input string back to its original position on failure.
933// ---------------------------------------------------------------------------
934
935bool wxURI::ParseH16(const wxChar*& uri)
936{
937 // h16 = 1*4HEXDIG
938 if(!IsHex(*++uri))
939 return false;
940
941 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
942 ++uri;
943
944 return true;
945}
946
947// ---------------------------------------------------------------------------
948// ParseIPXXX
949//
846978d7
WS
950// Parses a certain version of an IP address and moves the input string past
951// it. Returns true if the input string contains the proper version of an ip
952// address. It is the caller's responsability to move the input string back
ce321570 953// to its original position on failure.
dd65d8c8
RN
954// ---------------------------------------------------------------------------
955
956bool wxURI::ParseIPv4address(const wxChar*& uri)
957{
958 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
959 //
960 //dec-octet = DIGIT ; 0-9
961 // / %x31-39 DIGIT ; 10-99
962 // / "1" 2DIGIT ; 100-199
963 // / "2" %x30-34 DIGIT ; 200-249
964 // / "25" %x30-35 ; 250-255
965 size_t iIPv4 = 0;
966 if (IsDigit(*uri))
967 {
968 ++iIPv4;
969
846978d7 970
dd65d8c8
RN
971 //each ip part must be between 0-255 (dupe of version in for loop)
972 if( IsDigit(*++uri) && IsDigit(*++uri) &&
973 //100 or less (note !)
846978d7
WS
974 !( (*(uri-2) < wxT('2')) ||
975 //240 or less
976 (*(uri-2) == wxT('2') &&
ce321570 977 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
978 )
979 )
980 )
981 {
982 return false;
983 }
984
985 if(IsDigit(*uri))++uri;
986
987 //compilers should unroll this loop
988 for(; iIPv4 < 4; ++iIPv4)
989 {
ce321570 990 if (*uri != wxT('.') || !IsDigit(*++uri))
dd65d8c8
RN
991 break;
992
993 //each ip part must be between 0-255
994 if( IsDigit(*++uri) && IsDigit(*++uri) &&
995 //100 or less (note !)
846978d7
WS
996 !( (*(uri-2) < wxT('2')) ||
997 //240 or less
998 (*(uri-2) == wxT('2') &&
ce321570 999 (*(uri-1) < wxT('5') || (*(uri-1) == wxT('5') && *uri <= wxT('5')))
dd65d8c8
RN
1000 )
1001 )
1002 )
1003 {
1004 return false;
1005 }
1006 if(IsDigit(*uri))++uri;
1007 }
1008 }
1009 return iIPv4 == 4;
1010}
1011
dd65d8c8
RN
1012bool wxURI::ParseIPv6address(const wxChar*& uri)
1013{
1014 // IPv6address = 6( h16 ":" ) ls32
1015 // / "::" 5( h16 ":" ) ls32
1016 // / [ h16 ] "::" 4( h16 ":" ) ls32
1017 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
1018 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
1019 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
1020 // / [ *4( h16 ":" ) h16 ] "::" ls32
1021 // / [ *5( h16 ":" ) h16 ] "::" h16
1022 // / [ *6( h16 ":" ) h16 ] "::"
1023
1024 size_t numPrefix = 0,
1025 maxPostfix;
1026
1027 bool bEndHex = false;
1028
1029 for( ; numPrefix < 6; ++numPrefix)
1030 {
1031 if(!ParseH16(uri))
1032 {
1033 --uri;
1034 bEndHex = true;
1035 break;
1036 }
846978d7 1037
ce321570 1038 if(*uri != wxT(':'))
dd65d8c8
RN
1039 {
1040 break;
1041 }
1042 }
1043
1044 if(!bEndHex && !ParseH16(uri))
1045 {
1046 --uri;
1047
1048 if (numPrefix)
1049 return false;
1050
ce321570 1051 if (*uri == wxT(':'))
dd65d8c8 1052 {
ce321570 1053 if (*++uri != wxT(':'))
dd65d8c8
RN
1054 return false;
1055
1056 maxPostfix = 5;
1057 }
1058 else
1059 maxPostfix = 6;
1060 }
1061 else
1062 {
ce321570 1063 if (*uri != wxT(':') || *(uri+1) != wxT(':'))
dd65d8c8
RN
1064 {
1065 if (numPrefix != 6)
1066 return false;
1067
ce321570 1068 while (*--uri != wxT(':')) {}
dd65d8c8
RN
1069 ++uri;
1070
1071 const wxChar* uristart = uri;
1072 //parse ls32
1073 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1074 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1075 return true;
1076
1077 uri = uristart;
1078
1079 if (ParseIPv4address(uri))
1080 return true;
1081 else
1082 return false;
1083 }
1084 else
1085 {
1086 uri += 2;
846978d7 1087
dd65d8c8
RN
1088 if (numPrefix > 3)
1089 maxPostfix = 0;
1090 else
1091 maxPostfix = 4 - numPrefix;
1092 }
1093 }
1094
1095 bool bAllowAltEnding = maxPostfix == 0;
1096
1097 for(; maxPostfix != 0; --maxPostfix)
1098 {
ce321570 1099 if(!ParseH16(uri) || *uri != wxT(':'))
dd65d8c8
RN
1100 return false;
1101 }
1102
1103 if(numPrefix <= 4)
1104 {
1105 const wxChar* uristart = uri;
1106 //parse ls32
1107 // ls32 = ( h16 ":" h16 ) / IPv4address
846978d7 1108 if (ParseH16(uri) && *uri == wxT(':') && ParseH16(uri))
dd65d8c8
RN
1109 return true;
1110
1111 uri = uristart;
1112
1113 if (ParseIPv4address(uri))
1114 return true;
1115
1116 uri = uristart;
846978d7 1117
dd65d8c8
RN
1118 if (!bAllowAltEnding)
1119 return false;
1120 }
1121
1122 if(numPrefix <= 5 && ParseH16(uri))
1123 return true;
1124
1125 return true;
1126}
1127
1128bool wxURI::ParseIPvFuture(const wxChar*& uri)
1129{
1130 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
ce321570 1131 if (*++uri != wxT('v') || !IsHex(*++uri))
dd65d8c8
RN
1132 return false;
1133
1134 while (IsHex(*++uri)) {}
1135
ce321570 1136 if (*uri != wxT('.') || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')))
dd65d8c8
RN
1137 return false;
1138
ce321570 1139 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == wxT(':')) {}
dd65d8c8
RN
1140
1141 return true;
1142}
1143
1144
1145// ---------------------------------------------------------------------------
ce321570
RN
1146// CharToHex
1147//
846978d7 1148// Converts a character into a numeric hexidecimal value, or 0 if the
ce321570 1149// passed in character is not a valid hex character
dd65d8c8
RN
1150// ---------------------------------------------------------------------------
1151
ce321570 1152//static
409a7ba7 1153wxChar wxURI::CharToHex(const wxChar& c)
dd65d8c8 1154{
846978d7
WS
1155 if ((c >= wxT('A')) && (c <= wxT('Z'))) return c - wxT('A') + 0x0A;
1156 if ((c >= wxT('a')) && (c <= wxT('z'))) return c - wxT('a') + 0x0a;
1157 if ((c >= wxT('0')) && (c <= wxT('9'))) return c - wxT('0') + 0x00;
dd65d8c8 1158
846978d7 1159 return 0;
dd65d8c8
RN
1160}
1161
ce321570
RN
1162// ---------------------------------------------------------------------------
1163// IsXXX
1164//
1165// Returns true if the passed in character meets the criteria of the method
1166// ---------------------------------------------------------------------------
1167
dd65d8c8
RN
1168//! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1169bool wxURI::IsUnreserved (const wxChar& c)
846978d7 1170{ return IsAlpha(c) || IsDigit(c) ||
ce321570
RN
1171 c == wxT('-') ||
1172 c == wxT('.') ||
1173 c == wxT('_') ||
1174 c == wxT('~') //tilde
846978d7 1175 ;
dd65d8c8
RN
1176}
1177
1178bool wxURI::IsReserved (const wxChar& c)
846978d7 1179{
dd65d8c8
RN
1180 return IsGenDelim(c) || IsSubDelim(c);
1181}
1182
1183//! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1184bool wxURI::IsGenDelim (const wxChar& c)
1185{
ce321570
RN
1186 return c == wxT(':') ||
1187 c == wxT('/') ||
1188 c == wxT('?') ||
1189 c == wxT('#') ||
1190 c == wxT('[') ||
1191 c == wxT(']') ||
1192 c == wxT('@');
dd65d8c8
RN
1193}
1194
1195//! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1196//! / "*" / "+" / "," / ";" / "="
1197bool wxURI::IsSubDelim (const wxChar& c)
1198{
ce321570
RN
1199 return c == wxT('!') ||
1200 c == wxT('$') ||
1201 c == wxT('&') ||
1202 c == wxT('\'') ||
1203 c == wxT('(') ||
1204 c == wxT(')') ||
1205 c == wxT('*') ||
1206 c == wxT('+') ||
1207 c == wxT(',') ||
1208 c == wxT(';') ||
846978d7 1209 c == wxT('=')
dd65d8c8
RN
1210 ;
1211}
1212
1213bool wxURI::IsHex(const wxChar& c)
ce321570 1214{ return IsDigit(c) || (c >= wxT('a') && c <= wxT('f')) || (c >= wxT('A') && c <= wxT('F')); }
dd65d8c8
RN
1215
1216bool wxURI::IsAlpha(const wxChar& c)
ce321570 1217{ return (c >= wxT('a') && c <= wxT('z')) || (c >= wxT('A') && c <= wxT('Z')); }
dd65d8c8
RN
1218
1219bool wxURI::IsDigit(const wxChar& c)
ce321570 1220{ return c >= wxT('0') && c <= wxT('9'); }
dd65d8c8
RN
1221
1222
1223// ---------------------------------------------------------------------------
1224//
1225// wxURL Compatability
1226//
dd65d8c8
RN
1227// ---------------------------------------------------------------------------
1228
1229#if wxUSE_URL
1230
86470d43
RN
1231#if WXWIN_COMPATIBILITY_2_4
1232
dd65d8c8
RN
1233#include "wx/url.h"
1234
997ba01b
RN
1235//Note that this old code really doesn't convert to a URI that well and looks
1236//more like a dirty hack than anything else...
1237
1238wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
dd65d8c8 1239{
997ba01b
RN
1240 wxString out_str;
1241 wxString hexa_code;
1242 size_t i;
1243
1244 for (i = 0; i < uri.Len(); i++)
1245 {
1246 wxChar c = uri.GetChar(i);
1247
1248 if (c == wxT(' '))
1249 {
1250 // GRG, Apr/2000: changed to "%20" instead of '+'
1251
1252 out_str += wxT("%20");
1253 }
1254 else
1255 {
1256 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1257 //
1258 // - Alphanumeric characters are never escaped
1259 // - Unreserved marks are never escaped
1260 // - Delimiters must be escaped if they appear within a component
1261 // but not if they are used to separate components. Here we have
1262 // no clear way to distinguish between these two cases, so they
1263 // are escaped unless they are passed in the 'delims' parameter
1264 // (allowed delimiters).
1265
1266 static const wxChar marks[] = wxT("-_.!~*()'");
1267
1268 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1269 {
1270 hexa_code.Printf(wxT("%%%02X"), c);
1271 out_str += hexa_code;
1272 }
1273 else
1274 {
1275 out_str += c;
1276 }
1277 }
1278 }
1279
1280 return out_str;
dd65d8c8
RN
1281}
1282
1283wxString wxURL::ConvertFromURI(const wxString& uri)
1284{
86470d43 1285 return wxURI::Unescape(uri);
dd65d8c8
RN
1286}
1287
86470d43
RN
1288#endif //WXWIN_COMPATIBILITY_2_4
1289
dd65d8c8
RN
1290#endif //wxUSE_URL
1291
1292//end of uri.cpp
1293
1294
1295