]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
fix Purpose comment
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8
RN
1/////////////////////////////////////////////////////////////////////////////
2// Name: uri.cpp
2186321f
VZ
3// Purpose: Implementation of a URI parser
4// Author: Ryan Norton,
5// Vadim Zeitlin (UTF-8 URI support, many other changes)
dd65d8c8
RN
6// Created: 10/26/04
7// RCS-ID: $Id$
2186321f
VZ
8// Copyright: (c) 2004 Ryan Norton,
9// 2008 Vadim Zeitlin
10// Licence: wxWindows licence
dd65d8c8
RN
11/////////////////////////////////////////////////////////////////////////////
12
13// ===========================================================================
14// declarations
15// ===========================================================================
16
17// ---------------------------------------------------------------------------
18// headers
19// ---------------------------------------------------------------------------
20
dd65d8c8
RN
21// For compilers that support precompilation, includes "wx.h".
22#include "wx/wxprec.h"
23
24#ifdef __BORLANDC__
25 #pragma hdrstop
26#endif
27
0bf751e7
VS
28#ifndef WX_PRECOMP
29 #include "wx/crt.h"
30#endif
31
dd65d8c8
RN
32#include "wx/uri.h"
33
34// ---------------------------------------------------------------------------
35// definitions
36// ---------------------------------------------------------------------------
37
4115960d 38IMPLEMENT_CLASS(wxURI, wxObject)
dd65d8c8
RN
39
40// ===========================================================================
2186321f 41// wxURI implementation
dd65d8c8
RN
42// ===========================================================================
43
44// ---------------------------------------------------------------------------
2186321f 45// Constructors and cleanup
dd65d8c8
RN
46// ---------------------------------------------------------------------------
47
2186321f
VZ
48wxURI::wxURI()
49 : m_hostType(wxURI_REGNAME),
50 m_fields(0)
dd65d8c8
RN
51{
52}
846978d7 53
2186321f
VZ
54wxURI::wxURI(const wxString& uri)
55 : m_hostType(wxURI_REGNAME),
56 m_fields(0)
dd65d8c8
RN
57{
58 Create(uri);
59}
60
2186321f 61bool wxURI::Create(const wxString& uri)
dd65d8c8 62{
2186321f
VZ
63 if (m_fields)
64 Clear();
dd65d8c8 65
2186321f 66 return Parse(uri.utf8_str());
dd65d8c8
RN
67}
68
69void wxURI::Clear()
70{
2186321f
VZ
71 m_scheme =
72 m_userinfo =
73 m_server =
74 m_port =
75 m_path =
76 m_query =
77 m_fragment = wxEmptyString;
dd65d8c8
RN
78
79 m_hostType = wxURI_REGNAME;
80
81 m_fields = 0;
82}
83
84// ---------------------------------------------------------------------------
2186321f 85// Escaped characters handling
dd65d8c8
RN
86// ---------------------------------------------------------------------------
87
2186321f
VZ
88// Converts a character into a numeric hexadecimal value, or -1 if the passed
89// in character is not a valid hex character
dd65d8c8 90
2186321f
VZ
91/* static */
92int wxURI::CharToHex(char c)
93{
94 if ((c >= 'A') && (c <= 'Z'))
95 return c - 'A' + 10;
96 if ((c >= 'a') && (c <= 'z'))
97 return c - 'a' + 10;
98 if ((c >= '0') && (c <= '9'))
99 return c - '0';
100
101 return -1;
846978d7 102}
dd65d8c8 103
2186321f 104int wxURI::DecodeEscape(wxString::const_iterator& i)
dd65d8c8 105{
2186321f
VZ
106 int hi = CharToHex(*++i);
107 if ( hi == -1 )
108 return -1;
8404931e 109
2186321f
VZ
110 int lo = CharToHex(*++i);
111 if ( lo == -1 )
112 return -1;
c9f78968 113
2186321f 114 return (hi << 4) | lo;
dd65d8c8
RN
115}
116
2186321f 117/* static */
86470d43
RN
118wxString wxURI::Unescape(const wxString& uri)
119{
2186321f
VZ
120 // the unescaped version can't be longer than the original one
121 wxCharBuffer buf(uri.length());
122 char *p = buf.data();
86470d43 123
2186321f 124 for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
86470d43 125 {
2186321f
VZ
126 char c = *i;
127 if ( c == '%' )
86470d43 128 {
2186321f
VZ
129 int n = wxURI::DecodeEscape(i);
130 if ( n == -1 )
131 return wxString();
132
133 wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
134
5c33522f 135 c = static_cast<char>(n);
86470d43 136 }
2186321f
VZ
137
138 *p = c;
86470d43
RN
139 }
140
2186321f 141 *p = '\0';
86470d43 142
2186321f
VZ
143 // by default assume that the URI is in UTF-8, this is the most common
144 // practice
145 wxString s = wxString::FromUTF8(buf);
146 if ( s.empty() )
147 {
148 // if it isn't, use latin-1 as a fallback -- at least this always
149 // succeeds
150 s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
151 }
152
153 return s;
dd65d8c8
RN
154}
155
2186321f 156void wxURI::AppendNextEscaped(wxString& s, const char *& p)
dd65d8c8 157{
2186321f
VZ
158 // check for an already encoded character:
159 //
ce321570 160 // pct-encoded = "%" HEXDIG HEXDIG
2186321f
VZ
161 if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
162 {
163 s += *p++;
164 s += *p++;
165 s += *p++;
166 }
167 else // really needs escaping
168 {
169 static const char* hexDigits = "0123456789abcdef";
170
171 const char c = *p++;
172
173 s += '%';
174 s += hexDigits[(c >> 4) & 15];
175 s += hexDigits[c & 15];
176 }
dd65d8c8
RN
177}
178
4860d40d
RN
179// ---------------------------------------------------------------------------
180// GetUser
181// GetPassword
182//
183// Gets the username and password via the old URL method.
184// ---------------------------------------------------------------------------
185wxString wxURI::GetUser() const
186{
187 size_t dwPasswordPos = m_userinfo.find(':');
188
189 if (dwPasswordPos == wxString::npos)
190 dwPasswordPos = 0;
2186321f 191
4860d40d
RN
192 return m_userinfo(0, dwPasswordPos);
193}
194
195wxString wxURI::GetPassword() const
196{
197 size_t dwPasswordPos = m_userinfo.find(':');
198
199 if (dwPasswordPos == wxString::npos)
2186321f 200 return "";
4860d40d 201 else
2186321f 202 return m_userinfo(dwPasswordPos+1, m_userinfo.length() + 1);
dd65d8c8
RN
203}
204
2186321f
VZ
205// combine all URI fields in a single string, applying funcDecode to each
206// component which it may make sense to decode (i.e. "unescape")
207wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
86470d43
RN
208{
209 wxString ret;
210
211 if (HasScheme())
2186321f 212 ret += m_scheme + ":";
86470d43
RN
213
214 if (HasServer())
215 {
2186321f 216 ret += "//";
86470d43 217
4860d40d 218 if (HasUserInfo())
2186321f 219 ret += funcDecode(m_userinfo) + "@";
86470d43
RN
220
221 if (m_hostType == wxURI_REGNAME)
2186321f 222 ret += funcDecode(m_server);
86470d43
RN
223 else
224 ret += m_server;
225
226 if (HasPort())
2186321f 227 ret += ":" + m_port;
86470d43
RN
228 }
229
2186321f 230 ret += funcDecode(m_path);
86470d43
RN
231
232 if (HasQuery())
2186321f 233 ret += "?" + funcDecode(m_query);
86470d43
RN
234
235 if (HasFragment())
2186321f 236 ret += "#" + funcDecode(m_fragment);
86470d43
RN
237
238 return ret;
239}
240
ce321570
RN
241// ---------------------------------------------------------------------------
242// Comparison
243// ---------------------------------------------------------------------------
244
2186321f 245bool wxURI::operator==(const wxURI& uri) const
846978d7 246{
dd65d8c8
RN
247 if (HasScheme())
248 {
249 if(m_scheme != uri.m_scheme)
250 return false;
251 }
252 else if (uri.HasScheme())
253 return false;
254
255
256 if (HasServer())
257 {
4860d40d 258 if (HasUserInfo())
dd65d8c8 259 {
4860d40d 260 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
261 return false;
262 }
4860d40d 263 else if (uri.HasUserInfo())
dd65d8c8
RN
264 return false;
265
266 if (m_server != uri.m_server ||
267 m_hostType != uri.m_hostType)
268 return false;
269
270 if (HasPort())
271 {
272 if(m_port != uri.m_port)
273 return false;
274 }
275 else if (uri.HasPort())
276 return false;
277 }
278 else if (uri.HasServer())
279 return false;
280
281
282 if (HasPath())
283 {
284 if(m_path != uri.m_path)
285 return false;
286 }
287 else if (uri.HasPath())
288 return false;
289
290 if (HasQuery())
291 {
292 if (m_query != uri.m_query)
293 return false;
294 }
295 else if (uri.HasQuery())
296 return false;
297
298 if (HasFragment())
299 {
300 if (m_fragment != uri.m_fragment)
301 return false;
302 }
303 else if (uri.HasFragment())
304 return false;
305
306 return true;
307}
308
309// ---------------------------------------------------------------------------
310// IsReference
311//
312// if there is no authority or scheme, it is a reference
313// ---------------------------------------------------------------------------
314
315bool wxURI::IsReference() const
2186321f
VZ
316{
317 return !HasScheme() || !HasServer();
318}
dd65d8c8
RN
319
320// ---------------------------------------------------------------------------
321// Parse
322//
323// Master URI parsing method. Just calls the individual parsing methods
324//
325// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 326// URI-reference = URI / relative
dd65d8c8
RN
327// ---------------------------------------------------------------------------
328
2186321f 329bool wxURI::Parse(const char *uri)
dd65d8c8
RN
330{
331 uri = ParseScheme(uri);
2186321f
VZ
332 if ( uri )
333 uri = ParseAuthority(uri);
334 if ( uri )
335 uri = ParsePath(uri);
336 if ( uri )
337 uri = ParseQuery(uri);
338 if ( uri )
339 uri = ParseFragment(uri);
340
341 // we only succeed if we parsed the entire string
342 return uri && *uri == '\0';
dd65d8c8
RN
343}
344
2186321f 345const char* wxURI::ParseScheme(const char *uri)
dd65d8c8 346{
2186321f 347 const char * const start = uri;
dd65d8c8 348
2186321f
VZ
349 // assume that we have a scheme if we have the valid start of it
350 if ( IsAlpha(*uri) )
dd65d8c8
RN
351 {
352 m_scheme += *uri++;
353
354 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 355 while (IsAlpha(*uri) || IsDigit(*uri) ||
2186321f
VZ
356 *uri == '+' ||
357 *uri == '-' ||
358 *uri == '.')
846978d7
WS
359 {
360 m_scheme += *uri++;
dd65d8c8
RN
361 }
362
363 //valid scheme?
2186321f 364 if (*uri == ':')
846978d7 365 {
dd65d8c8
RN
366 //mark the scheme as valid
367 m_fields |= wxURI_SCHEME;
368
369 //move reference point up to input buffer
2186321f
VZ
370 ++uri;
371 }
372 else // no valid scheme finally
373 {
374 uri = start; // rewind
375 m_scheme.clear();
dd65d8c8 376 }
dd65d8c8 377 }
2186321f 378 //else: can't have schema, possible a relative URI
dd65d8c8 379
2186321f 380 return uri;
dd65d8c8
RN
381}
382
2186321f 383const char* wxURI::ParseAuthority(const char* uri)
dd65d8c8
RN
384{
385 // authority = [ userinfo "@" ] host [ ":" port ]
2186321f 386 if ( uri[0] == '/' && uri[1] == '/' )
dd65d8c8 387 {
97ad053b 388 //skip past the two slashes
dd65d8c8
RN
389 uri += 2;
390
97ad053b
VZ
391 // ############# DEVIATION FROM RFC #########################
392 // Don't parse the server component for file URIs
2186321f 393 if(m_scheme != "file")
97ad053b
VZ
394 {
395 //normal way
2186321f
VZ
396 uri = ParseUserInfo(uri);
397 uri = ParseServer(uri);
398 return ParsePort(uri);
97ad053b 399 }
dd65d8c8
RN
400 }
401
402 return uri;
403}
404
2186321f 405const char* wxURI::ParseUserInfo(const char* uri)
dd65d8c8 406{
2186321f 407 const char * const start = uri;
dd65d8c8
RN
408
409 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
2186321f 410 while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
dd65d8c8 411 {
2186321f 412 if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
24ca04e7 413 m_userinfo += *uri++;
dd65d8c8 414 else
2186321f 415 AppendNextEscaped(m_userinfo, uri);
dd65d8c8
RN
416 }
417
2186321f 418 if ( *uri++ == '@' )
dd65d8c8 419 {
2186321f 420 // valid userinfo
4860d40d 421 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
422 }
423 else
2186321f
VZ
424 {
425 uri = start; // rewind
426 m_userinfo.clear();
427 }
dd65d8c8 428
2186321f 429 return uri;
dd65d8c8
RN
430}
431
2186321f 432const char* wxURI::ParseServer(const char* uri)
dd65d8c8 433{
2186321f 434 const char * const start = uri;
dd65d8c8
RN
435
436 // host = IP-literal / IPv4address / reg-name
437 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
2186321f 438 if (*uri == '[')
dd65d8c8 439 {
2186321f
VZ
440 ++uri;
441 if (ParseIPv6address(uri) && *uri == ']')
dd65d8c8 442 {
dd65d8c8 443 m_hostType = wxURI_IPV6ADDRESS;
846978d7 444
c4dbb953 445 m_server.assign(start + 1, uri - start - 1);
2186321f 446 ++uri;
dd65d8c8
RN
447 }
448 else
449 {
2186321f 450 uri = start + 1; // skip the leading '[' again
dd65d8c8 451
2186321f 452 if (ParseIPvFuture(uri) && *uri == ']')
dd65d8c8 453 {
846978d7
WS
454 m_hostType = wxURI_IPVFUTURE;
455
c4dbb953 456 m_server.assign(start + 1, uri - start - 1);
2186321f
VZ
457 ++uri;
458 }
459 else // unrecognized IP literal
460 {
461 uri = start;
dd65d8c8 462 }
dd65d8c8
RN
463 }
464 }
2186321f 465 else // IPv4 or a reg-name
dd65d8c8
RN
466 {
467 if (ParseIPv4address(uri))
468 {
469 m_hostType = wxURI_IPV4ADDRESS;
470
c4dbb953 471 m_server.assign(start, uri - start);
dd65d8c8 472 }
846978d7 473 else
2186321f
VZ
474 {
475 uri = start;
476 }
dd65d8c8
RN
477 }
478
2186321f 479 if ( m_hostType == wxURI_REGNAME )
dd65d8c8 480 {
2186321f 481 uri = start;
dd65d8c8 482 // reg-name = *( unreserved / pct-encoded / sub-delims )
2186321f 483 while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
dd65d8c8 484 {
2186321f 485 if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
24ca04e7 486 m_server += *uri++;
dd65d8c8 487 else
2186321f 488 AppendNextEscaped(m_server, uri);
846978d7 489 }
dd65d8c8
RN
490 }
491
dd65d8c8
RN
492 m_fields |= wxURI_SERVER;
493
494 return uri;
495}
496
846978d7 497
2186321f 498const char* wxURI::ParsePort(const char* uri)
dd65d8c8 499{
dd65d8c8 500 // port = *DIGIT
2186321f 501 if( *uri == ':' )
dd65d8c8
RN
502 {
503 ++uri;
2186321f 504 while ( IsDigit(*uri) )
dd65d8c8
RN
505 {
506 m_port += *uri++;
846978d7 507 }
dd65d8c8 508
dd65d8c8
RN
509 m_fields |= wxURI_PORT;
510 }
511
512 return uri;
513}
514
2186321f 515const char* wxURI::ParsePath(const char* uri)
dd65d8c8 516{
dd65d8c8
RN
517 /// hier-part = "//" authority path-abempty
518 /// / path-absolute
519 /// / path-rootless
520 /// / path-empty
521 ///
522 /// relative-part = "//" authority path-abempty
523 /// / path-absolute
524 /// / path-noscheme
525 /// / path-empty
526 ///
527 /// path-abempty = *( "/" segment )
528 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
529 /// path-noscheme = segment-nz-nc *( "/" segment )
530 /// path-rootless = segment-nz *( "/" segment )
531 /// path-empty = 0<pchar>
532 ///
533 /// segment = *pchar
534 /// segment-nz = 1*pchar
535 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
536 /// ; non-zero-length segment without any colon ":"
537 ///
538 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
dd65d8c8 539
2186321f
VZ
540 if ( IsEndPath(*uri) )
541 return uri;
dd65d8c8 542
2186321f
VZ
543 const bool isAbs = *uri == '/';
544 if ( isAbs )
545 m_path += *uri++;
546
547 wxArrayString segments;
548 wxString segment;
549 for ( ;; )
dd65d8c8 550 {
2186321f
VZ
551 const bool endPath = IsEndPath(*uri);
552 if ( endPath || *uri == '/' )
dd65d8c8 553 {
2186321f
VZ
554 // end of a segment, look at what we got
555 if ( segment == ".." )
dd65d8c8 556 {
2186321f
VZ
557 if ( !segments.empty() && *segments.rbegin() != ".." )
558 segments.pop_back();
559 else if ( !isAbs )
560 segments.push_back("..");
dd65d8c8 561 }
2186321f 562 else if ( segment == "." )
dd65d8c8 563 {
2186321f
VZ
564 // normally we ignore "." but the last one should be taken into
565 // account as "path/." is the same as "path/" and not just "path"
566 if ( endPath )
567 segments.push_back("");
dd65d8c8 568 }
2186321f 569 else // normal segment
dd65d8c8 570 {
2186321f 571 segments.push_back(segment);
dd65d8c8
RN
572 }
573
2186321f
VZ
574 if ( endPath )
575 break;
576
577 segment.clear();
578 ++uri;
579 continue;
dd65d8c8 580 }
2186321f
VZ
581
582 if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
583 segment += *uri++;
584 else
585 AppendNextEscaped(segment, uri);
dd65d8c8
RN
586 }
587
2186321f
VZ
588 m_path += wxJoin(segments, '/', '\0');
589 m_fields |= wxURI_PATH;
590
dd65d8c8
RN
591 return uri;
592}
593
594
2186321f 595const char* wxURI::ParseQuery(const char* uri)
dd65d8c8 596{
dd65d8c8 597 // query = *( pchar / "/" / "?" )
2186321f 598 if ( *uri == '?' )
dd65d8c8
RN
599 {
600 ++uri;
2186321f 601 while ( *uri && *uri != '#' )
dd65d8c8 602 {
2186321f
VZ
603 if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
604 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
605 m_query += *uri++;
dd65d8c8 606 else
2186321f 607 AppendNextEscaped(m_query, uri);
dd65d8c8
RN
608 }
609
dd65d8c8
RN
610 m_fields |= wxURI_QUERY;
611 }
612
613 return uri;
614}
615
616
2186321f 617const char* wxURI::ParseFragment(const char* uri)
dd65d8c8 618{
dd65d8c8 619 // fragment = *( pchar / "/" / "?" )
2186321f 620 if ( *uri == '#' )
dd65d8c8
RN
621 {
622 ++uri;
2186321f 623 while ( *uri )
dd65d8c8 624 {
2186321f
VZ
625 if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
626 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
627 m_fragment += *uri++;
dd65d8c8 628 else
2186321f 629 AppendNextEscaped(m_fragment, uri);
dd65d8c8
RN
630 }
631
dd65d8c8
RN
632 m_fields |= wxURI_FRAGMENT;
633 }
634
635 return uri;
636}
637
638// ---------------------------------------------------------------------------
ce321570 639// Resolve
dd65d8c8 640//
ce321570 641// Builds missing components of this uri from a base uri
dd65d8c8 642//
ce321570
RN
643// A version of the algorithm outlined in the RFC is used here
644// (it is shown in comments)
645//
846978d7 646// Note that an empty URI inherits all components
dd65d8c8
RN
647// ---------------------------------------------------------------------------
648
2186321f
VZ
649/* static */
650wxArrayString wxURI::SplitInSegments(const wxString& path)
651{
652 return wxSplit(path, '/', '\0' /* no escape character */);
653}
654
8404931e 655void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 656{
846978d7 657 wxASSERT_MSG(!base.IsReference(),
2186321f 658 "wxURI to inherit from must not be a reference!");
dd65d8c8 659
2186321f
VZ
660 // If we aren't being strict, enable the older (pre-RFC2396) loophole that
661 // allows this uri to inherit other properties from the base uri - even if
662 // the scheme is defined
8404931e
VZ
663 if ( !(flags & wxURI_STRICT) &&
664 HasScheme() && base.HasScheme() &&
665 m_scheme == base.m_scheme )
846978d7 666 {
dd65d8c8
RN
667 m_fields -= wxURI_SCHEME;
668 }
669
670
671 // Do nothing if this is an absolute wxURI
672 // if defined(R.scheme) then
673 // T.scheme = R.scheme;
674 // T.authority = R.authority;
675 // T.path = remove_dot_segments(R.path);
676 // T.query = R.query;
677 if (HasScheme())
dd65d8c8 678 return;
dd65d8c8 679
ea4daac4 680 //No scheme - inherit
dd65d8c8
RN
681 m_scheme = base.m_scheme;
682 m_fields |= wxURI_SCHEME;
683
684 // All we need to do for relative URIs with an
685 // authority component is just inherit the scheme
686 // if defined(R.authority) then
687 // T.authority = R.authority;
688 // T.path = remove_dot_segments(R.path);
689 // T.query = R.query;
690 if (HasServer())
dd65d8c8 691 return;
dd65d8c8
RN
692
693 //No authority - inherit
4860d40d 694 if (base.HasUserInfo())
dd65d8c8 695 {
4860d40d
RN
696 m_userinfo = base.m_userinfo;
697 m_fields |= wxURI_USERINFO;
dd65d8c8 698 }
846978d7 699
dd65d8c8
RN
700 m_server = base.m_server;
701 m_hostType = base.m_hostType;
702 m_fields |= wxURI_SERVER;
846978d7 703
dd65d8c8
RN
704 if (base.HasPort())
705 {
706 m_port = base.m_port;
707 m_fields |= wxURI_PORT;
708 }
846978d7 709
dd65d8c8
RN
710
711 // Simple path inheritance from base
712 if (!HasPath())
713 {
714 // T.path = Base.path;
715 m_path = base.m_path;
716 m_fields |= wxURI_PATH;
846978d7 717
dd65d8c8
RN
718
719 // if defined(R.query) then
720 // T.query = R.query;
721 // else
722 // T.query = Base.query;
723 // endif;
724 if (!HasQuery())
725 {
726 m_query = base.m_query;
727 m_fields |= wxURI_QUERY;
728 }
729 }
2186321f 730 else if ( m_path.empty() || m_path[0u] != '/' )
dd65d8c8
RN
731 {
732 // if (R.path starts-with "/") then
733 // T.path = remove_dot_segments(R.path);
734 // else
735 // T.path = merge(Base.path, R.path);
736 // T.path = remove_dot_segments(T.path);
737 // endif;
738 // T.query = R.query;
2186321f
VZ
739 //
740 // So we don't do anything for absolute paths and implement merge for
741 // the relative ones
c9f78968 742
2186321f
VZ
743 wxArrayString our(SplitInSegments(m_path)),
744 result(SplitInSegments(base.m_path));
c9f78968 745
2186321f
VZ
746 if ( !result.empty() )
747 result.pop_back();
846978d7 748
2186321f 749 if ( our.empty() )
dd65d8c8 750 {
2186321f
VZ
751 // if we have an empty path it means we were constructed from a "."
752 // string or something similar (e.g. "././././"), it should count
753 // as (empty) segment
754 our.push_back("");
dd65d8c8 755 }
dd65d8c8 756
2186321f
VZ
757 const wxArrayString::const_iterator end = our.end();
758 for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
dd65d8c8 759 {
2186321f 760 if ( i->empty() || *i == "." )
dd65d8c8 761 {
2186321f
VZ
762 // as in ParsePath(), while normally we ignore the empty
763 // segments, we need to take account of them at the end
764 if ( i == end - 1 )
765 result.push_back("");
766 continue;
dd65d8c8 767 }
dd65d8c8 768
2186321f 769 if ( *i == ".." )
dd65d8c8 770 {
2186321f
VZ
771 if ( !result.empty() )
772 {
773 result.pop_back();
dd65d8c8 774
2186321f
VZ
775 if ( i == end - 1 )
776 result.push_back("");
777 }
778 //else: just ignore, extra ".." don't accumulate
dd65d8c8
RN
779 }
780 else
781 {
2186321f
VZ
782 if ( result.empty() )
783 {
784 // ensure that the resulting path will always be absolute
785 result.push_back("");
786 }
787
788 result.push_back(*i);
dd65d8c8
RN
789 }
790 }
2186321f
VZ
791
792 m_path = wxJoin(result, '/', '\0');
dd65d8c8
RN
793 }
794
2186321f 795 //T.fragment = R.fragment;
dd65d8c8
RN
796}
797
798// ---------------------------------------------------------------------------
ce321570
RN
799// ParseH16
800//
801// Parses 1 to 4 hex values. Returns true if the first character of the input
2186321f 802// string is a valid hex character. It is the caller's responsibility to move
ce321570
RN
803// the input string back to its original position on failure.
804// ---------------------------------------------------------------------------
805
2186321f 806bool wxURI::ParseH16(const char*& uri)
ce321570
RN
807{
808 // h16 = 1*4HEXDIG
809 if(!IsHex(*++uri))
810 return false;
811
812 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
813 ++uri;
814
815 return true;
816}
817
818// ---------------------------------------------------------------------------
819// ParseIPXXX
820//
846978d7
WS
821// Parses a certain version of an IP address and moves the input string past
822// it. Returns true if the input string contains the proper version of an ip
823// address. It is the caller's responsability to move the input string back
ce321570 824// to its original position on failure.
dd65d8c8
RN
825// ---------------------------------------------------------------------------
826
2186321f 827bool wxURI::ParseIPv4address(const char*& uri)
dd65d8c8
RN
828{
829 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
830 //
831 //dec-octet = DIGIT ; 0-9
832 // / %x31-39 DIGIT ; 10-99
833 // / "1" 2DIGIT ; 100-199
834 // / "2" %x30-34 DIGIT ; 200-249
835 // / "25" %x30-35 ; 250-255
836 size_t iIPv4 = 0;
837 if (IsDigit(*uri))
838 {
839 ++iIPv4;
840
846978d7 841
dd65d8c8
RN
842 //each ip part must be between 0-255 (dupe of version in for loop)
843 if( IsDigit(*++uri) && IsDigit(*++uri) &&
844 //100 or less (note !)
2186321f 845 !( (*(uri-2) < '2') ||
846978d7 846 //240 or less
2186321f
VZ
847 (*(uri-2) == '2' &&
848 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
dd65d8c8
RN
849 )
850 )
851 )
852 {
853 return false;
854 }
855
856 if(IsDigit(*uri))++uri;
857
858 //compilers should unroll this loop
859 for(; iIPv4 < 4; ++iIPv4)
860 {
2186321f 861 if (*uri != '.' || !IsDigit(*++uri))
dd65d8c8
RN
862 break;
863
864 //each ip part must be between 0-255
865 if( IsDigit(*++uri) && IsDigit(*++uri) &&
866 //100 or less (note !)
2186321f 867 !( (*(uri-2) < '2') ||
846978d7 868 //240 or less
2186321f
VZ
869 (*(uri-2) == '2' &&
870 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
dd65d8c8
RN
871 )
872 )
873 )
874 {
875 return false;
876 }
877 if(IsDigit(*uri))++uri;
878 }
879 }
880 return iIPv4 == 4;
881}
882
2186321f 883bool wxURI::ParseIPv6address(const char*& uri)
dd65d8c8
RN
884{
885 // IPv6address = 6( h16 ":" ) ls32
886 // / "::" 5( h16 ":" ) ls32
887 // / [ h16 ] "::" 4( h16 ":" ) ls32
888 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
889 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
890 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
891 // / [ *4( h16 ":" ) h16 ] "::" ls32
892 // / [ *5( h16 ":" ) h16 ] "::" h16
893 // / [ *6( h16 ":" ) h16 ] "::"
894
895 size_t numPrefix = 0,
896 maxPostfix;
897
898 bool bEndHex = false;
899
900 for( ; numPrefix < 6; ++numPrefix)
901 {
902 if(!ParseH16(uri))
903 {
904 --uri;
905 bEndHex = true;
906 break;
907 }
846978d7 908
2186321f 909 if(*uri != ':')
dd65d8c8
RN
910 {
911 break;
912 }
913 }
914
915 if(!bEndHex && !ParseH16(uri))
916 {
917 --uri;
918
919 if (numPrefix)
920 return false;
921
2186321f 922 if (*uri == ':')
dd65d8c8 923 {
2186321f 924 if (*++uri != ':')
dd65d8c8
RN
925 return false;
926
927 maxPostfix = 5;
928 }
929 else
930 maxPostfix = 6;
931 }
932 else
933 {
2186321f 934 if (*uri != ':' || *(uri+1) != ':')
dd65d8c8
RN
935 {
936 if (numPrefix != 6)
937 return false;
938
2186321f 939 while (*--uri != ':') {}
dd65d8c8
RN
940 ++uri;
941
2186321f 942 const char * const start = uri;
dd65d8c8
RN
943 //parse ls32
944 // ls32 = ( h16 ":" h16 ) / IPv4address
2186321f 945 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
dd65d8c8
RN
946 return true;
947
2186321f 948 uri = start;
dd65d8c8
RN
949
950 if (ParseIPv4address(uri))
951 return true;
952 else
953 return false;
954 }
955 else
956 {
957 uri += 2;
846978d7 958
dd65d8c8
RN
959 if (numPrefix > 3)
960 maxPostfix = 0;
961 else
962 maxPostfix = 4 - numPrefix;
963 }
964 }
965
966 bool bAllowAltEnding = maxPostfix == 0;
967
968 for(; maxPostfix != 0; --maxPostfix)
969 {
2186321f 970 if(!ParseH16(uri) || *uri != ':')
dd65d8c8
RN
971 return false;
972 }
973
974 if(numPrefix <= 4)
975 {
2186321f 976 const char * const start = uri;
dd65d8c8
RN
977 //parse ls32
978 // ls32 = ( h16 ":" h16 ) / IPv4address
2186321f 979 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
dd65d8c8
RN
980 return true;
981
2186321f 982 uri = start;
dd65d8c8
RN
983
984 if (ParseIPv4address(uri))
985 return true;
986
2186321f 987 uri = start;
846978d7 988
dd65d8c8
RN
989 if (!bAllowAltEnding)
990 return false;
991 }
992
993 if(numPrefix <= 5 && ParseH16(uri))
994 return true;
995
996 return true;
997}
998
2186321f 999bool wxURI::ParseIPvFuture(const char*& uri)
dd65d8c8
RN
1000{
1001 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
2186321f 1002 if (*++uri != 'v' || !IsHex(*++uri))
dd65d8c8
RN
1003 return false;
1004
2186321f
VZ
1005 while (IsHex(*++uri))
1006 ;
dd65d8c8 1007
2186321f 1008 if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
dd65d8c8
RN
1009 return false;
1010
2186321f 1011 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
dd65d8c8
RN
1012
1013 return true;
1014}
1015
1016
ce321570
RN
1017// ---------------------------------------------------------------------------
1018// IsXXX
1019//
1020// Returns true if the passed in character meets the criteria of the method
1021// ---------------------------------------------------------------------------
1022
2186321f
VZ
1023// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1024bool wxURI::IsUnreserved(char c)
1025{
1026 return IsAlpha(c) ||
1027 IsDigit(c) ||
1028 c == '-' ||
1029 c == '.' ||
1030 c == '_' ||
1031 c == '~'
846978d7 1032 ;
dd65d8c8
RN
1033}
1034
2186321f 1035bool wxURI::IsReserved(char c)
846978d7 1036{
dd65d8c8
RN
1037 return IsGenDelim(c) || IsSubDelim(c);
1038}
1039
2186321f
VZ
1040// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1041bool wxURI::IsGenDelim(char c)
dd65d8c8 1042{
2186321f
VZ
1043 return c == ':' ||
1044 c == '/' ||
1045 c == '?' ||
1046 c == '#' ||
1047 c == '[' ||
1048 c == ']' ||
1049 c == '@';
dd65d8c8
RN
1050}
1051
2186321f
VZ
1052// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1053// / "*" / "+" / "," / ";" / "="
1054bool wxURI::IsSubDelim(char c)
dd65d8c8 1055{
2186321f
VZ
1056 return c == '!' ||
1057 c == '$' ||
1058 c == '&' ||
1059 c == '\'' ||
1060 c == '(' ||
1061 c == ')' ||
1062 c == '*' ||
1063 c == '+' ||
1064 c == ',' ||
1065 c == ';' ||
1066 c == '='
dd65d8c8
RN
1067 ;
1068}
1069
2186321f
VZ
1070bool wxURI::IsHex(char c)
1071{
1072 return IsDigit(c) ||
1073 (c >= 'a' && c <= 'f') ||
1074 (c >= 'A' && c <= 'F');
1075}
dd65d8c8 1076
2186321f
VZ
1077bool wxURI::IsAlpha(char c)
1078{
1079 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1080}
dd65d8c8 1081
2186321f
VZ
1082bool wxURI::IsDigit(char c)
1083{
1084 return c >= '0' && c <= '9';
1085}
dd65d8c8 1086
2186321f
VZ
1087bool wxURI::IsEndPath(char c)
1088{
1089 return c == '\0' || c == '#' || c == '?';
1090}
dd65d8c8 1091