]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
Always link with expat in monolithic build.
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8 1/////////////////////////////////////////////////////////////////////////////
80fdcdb9 2// Name: src/common/uri.cpp
2186321f
VZ
3// Purpose: Implementation of a URI parser
4// Author: Ryan Norton,
5// Vadim Zeitlin (UTF-8 URI support, many other changes)
dd65d8c8
RN
6// Created: 10/26/04
7// RCS-ID: $Id$
2186321f
VZ
8// Copyright: (c) 2004 Ryan Norton,
9// 2008 Vadim Zeitlin
10// Licence: wxWindows licence
dd65d8c8
RN
11/////////////////////////////////////////////////////////////////////////////
12
13// ===========================================================================
14// declarations
15// ===========================================================================
16
17// ---------------------------------------------------------------------------
18// headers
19// ---------------------------------------------------------------------------
20
dd65d8c8
RN
21// For compilers that support precompilation, includes "wx.h".
22#include "wx/wxprec.h"
23
24#ifdef __BORLANDC__
25 #pragma hdrstop
26#endif
27
0bf751e7
VS
28#ifndef WX_PRECOMP
29 #include "wx/crt.h"
30#endif
31
dd65d8c8
RN
32#include "wx/uri.h"
33
34// ---------------------------------------------------------------------------
35// definitions
36// ---------------------------------------------------------------------------
37
4115960d 38IMPLEMENT_CLASS(wxURI, wxObject)
dd65d8c8
RN
39
40// ===========================================================================
2186321f 41// wxURI implementation
dd65d8c8
RN
42// ===========================================================================
43
44// ---------------------------------------------------------------------------
2186321f 45// Constructors and cleanup
dd65d8c8
RN
46// ---------------------------------------------------------------------------
47
2186321f
VZ
48wxURI::wxURI()
49 : m_hostType(wxURI_REGNAME),
50 m_fields(0)
dd65d8c8
RN
51{
52}
846978d7 53
2186321f
VZ
54wxURI::wxURI(const wxString& uri)
55 : m_hostType(wxURI_REGNAME),
56 m_fields(0)
dd65d8c8
RN
57{
58 Create(uri);
59}
60
2186321f 61bool wxURI::Create(const wxString& uri)
dd65d8c8 62{
2186321f
VZ
63 if (m_fields)
64 Clear();
dd65d8c8 65
2186321f 66 return Parse(uri.utf8_str());
dd65d8c8
RN
67}
68
69void wxURI::Clear()
70{
2186321f
VZ
71 m_scheme =
72 m_userinfo =
73 m_server =
74 m_port =
75 m_path =
76 m_query =
77 m_fragment = wxEmptyString;
dd65d8c8
RN
78
79 m_hostType = wxURI_REGNAME;
80
81 m_fields = 0;
82}
83
84// ---------------------------------------------------------------------------
2186321f 85// Escaped characters handling
dd65d8c8
RN
86// ---------------------------------------------------------------------------
87
2186321f
VZ
88// Converts a character into a numeric hexadecimal value, or -1 if the passed
89// in character is not a valid hex character
dd65d8c8 90
2186321f
VZ
91/* static */
92int wxURI::CharToHex(char c)
93{
94 if ((c >= 'A') && (c <= 'Z'))
95 return c - 'A' + 10;
96 if ((c >= 'a') && (c <= 'z'))
97 return c - 'a' + 10;
98 if ((c >= '0') && (c <= '9'))
99 return c - '0';
100
101 return -1;
846978d7 102}
dd65d8c8 103
2186321f 104int wxURI::DecodeEscape(wxString::const_iterator& i)
dd65d8c8 105{
2186321f
VZ
106 int hi = CharToHex(*++i);
107 if ( hi == -1 )
108 return -1;
8404931e 109
2186321f
VZ
110 int lo = CharToHex(*++i);
111 if ( lo == -1 )
112 return -1;
c9f78968 113
2186321f 114 return (hi << 4) | lo;
dd65d8c8
RN
115}
116
2186321f 117/* static */
86470d43
RN
118wxString wxURI::Unescape(const wxString& uri)
119{
2186321f
VZ
120 // the unescaped version can't be longer than the original one
121 wxCharBuffer buf(uri.length());
122 char *p = buf.data();
86470d43 123
2186321f 124 for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
86470d43 125 {
2186321f
VZ
126 char c = *i;
127 if ( c == '%' )
86470d43 128 {
2186321f
VZ
129 int n = wxURI::DecodeEscape(i);
130 if ( n == -1 )
131 return wxString();
132
133 wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
134
5c33522f 135 c = static_cast<char>(n);
86470d43 136 }
2186321f
VZ
137
138 *p = c;
86470d43
RN
139 }
140
2186321f 141 *p = '\0';
86470d43 142
2186321f
VZ
143 // by default assume that the URI is in UTF-8, this is the most common
144 // practice
145 wxString s = wxString::FromUTF8(buf);
146 if ( s.empty() )
147 {
148 // if it isn't, use latin-1 as a fallback -- at least this always
149 // succeeds
150 s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
151 }
152
153 return s;
dd65d8c8
RN
154}
155
2186321f 156void wxURI::AppendNextEscaped(wxString& s, const char *& p)
dd65d8c8 157{
2186321f
VZ
158 // check for an already encoded character:
159 //
ce321570 160 // pct-encoded = "%" HEXDIG HEXDIG
2186321f
VZ
161 if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
162 {
163 s += *p++;
164 s += *p++;
165 s += *p++;
166 }
167 else // really needs escaping
168 {
169 static const char* hexDigits = "0123456789abcdef";
170
171 const char c = *p++;
172
173 s += '%';
174 s += hexDigits[(c >> 4) & 15];
175 s += hexDigits[c & 15];
176 }
dd65d8c8
RN
177}
178
4860d40d
RN
179// ---------------------------------------------------------------------------
180// GetUser
181// GetPassword
182//
183// Gets the username and password via the old URL method.
184// ---------------------------------------------------------------------------
185wxString wxURI::GetUser() const
186{
62e3e6c2
VZ
187 // if there is no colon at all, find() returns npos and this method returns
188 // the entire string which is correct as it means that password was omitted
189 return m_userinfo(0, m_userinfo.find(':'));
4860d40d
RN
190}
191
192wxString wxURI::GetPassword() const
193{
62e3e6c2 194 size_t posColon = m_userinfo.find(':');
4860d40d 195
62e3e6c2 196 if ( posColon == wxString::npos )
2186321f 197 return "";
62e3e6c2
VZ
198
199 return m_userinfo(posColon + 1, wxString::npos);
dd65d8c8
RN
200}
201
2186321f
VZ
202// combine all URI fields in a single string, applying funcDecode to each
203// component which it may make sense to decode (i.e. "unescape")
204wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
86470d43
RN
205{
206 wxString ret;
207
208 if (HasScheme())
2186321f 209 ret += m_scheme + ":";
86470d43
RN
210
211 if (HasServer())
212 {
2186321f 213 ret += "//";
86470d43 214
4860d40d 215 if (HasUserInfo())
2186321f 216 ret += funcDecode(m_userinfo) + "@";
86470d43
RN
217
218 if (m_hostType == wxURI_REGNAME)
2186321f 219 ret += funcDecode(m_server);
86470d43
RN
220 else
221 ret += m_server;
222
223 if (HasPort())
2186321f 224 ret += ":" + m_port;
86470d43
RN
225 }
226
2186321f 227 ret += funcDecode(m_path);
86470d43
RN
228
229 if (HasQuery())
2186321f 230 ret += "?" + funcDecode(m_query);
86470d43
RN
231
232 if (HasFragment())
2186321f 233 ret += "#" + funcDecode(m_fragment);
86470d43
RN
234
235 return ret;
236}
237
ce321570
RN
238// ---------------------------------------------------------------------------
239// Comparison
240// ---------------------------------------------------------------------------
241
2186321f 242bool wxURI::operator==(const wxURI& uri) const
846978d7 243{
dd65d8c8
RN
244 if (HasScheme())
245 {
246 if(m_scheme != uri.m_scheme)
247 return false;
248 }
249 else if (uri.HasScheme())
250 return false;
251
252
253 if (HasServer())
254 {
4860d40d 255 if (HasUserInfo())
dd65d8c8 256 {
4860d40d 257 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
258 return false;
259 }
4860d40d 260 else if (uri.HasUserInfo())
dd65d8c8
RN
261 return false;
262
263 if (m_server != uri.m_server ||
264 m_hostType != uri.m_hostType)
265 return false;
266
267 if (HasPort())
268 {
269 if(m_port != uri.m_port)
270 return false;
271 }
272 else if (uri.HasPort())
273 return false;
274 }
275 else if (uri.HasServer())
276 return false;
277
278
279 if (HasPath())
280 {
281 if(m_path != uri.m_path)
282 return false;
283 }
284 else if (uri.HasPath())
285 return false;
286
287 if (HasQuery())
288 {
289 if (m_query != uri.m_query)
290 return false;
291 }
292 else if (uri.HasQuery())
293 return false;
294
295 if (HasFragment())
296 {
297 if (m_fragment != uri.m_fragment)
298 return false;
299 }
300 else if (uri.HasFragment())
301 return false;
302
303 return true;
304}
305
306// ---------------------------------------------------------------------------
307// IsReference
308//
309// if there is no authority or scheme, it is a reference
310// ---------------------------------------------------------------------------
311
312bool wxURI::IsReference() const
2186321f
VZ
313{
314 return !HasScheme() || !HasServer();
315}
dd65d8c8
RN
316
317// ---------------------------------------------------------------------------
318// Parse
319//
320// Master URI parsing method. Just calls the individual parsing methods
321//
322// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 323// URI-reference = URI / relative
dd65d8c8
RN
324// ---------------------------------------------------------------------------
325
2186321f 326bool wxURI::Parse(const char *uri)
dd65d8c8
RN
327{
328 uri = ParseScheme(uri);
2186321f
VZ
329 if ( uri )
330 uri = ParseAuthority(uri);
331 if ( uri )
332 uri = ParsePath(uri);
333 if ( uri )
334 uri = ParseQuery(uri);
335 if ( uri )
336 uri = ParseFragment(uri);
337
338 // we only succeed if we parsed the entire string
339 return uri && *uri == '\0';
dd65d8c8
RN
340}
341
2186321f 342const char* wxURI::ParseScheme(const char *uri)
dd65d8c8 343{
2186321f 344 const char * const start = uri;
dd65d8c8 345
2186321f
VZ
346 // assume that we have a scheme if we have the valid start of it
347 if ( IsAlpha(*uri) )
dd65d8c8
RN
348 {
349 m_scheme += *uri++;
350
351 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 352 while (IsAlpha(*uri) || IsDigit(*uri) ||
2186321f
VZ
353 *uri == '+' ||
354 *uri == '-' ||
355 *uri == '.')
846978d7
WS
356 {
357 m_scheme += *uri++;
dd65d8c8
RN
358 }
359
360 //valid scheme?
2186321f 361 if (*uri == ':')
846978d7 362 {
dd65d8c8
RN
363 //mark the scheme as valid
364 m_fields |= wxURI_SCHEME;
365
366 //move reference point up to input buffer
2186321f
VZ
367 ++uri;
368 }
369 else // no valid scheme finally
370 {
371 uri = start; // rewind
372 m_scheme.clear();
dd65d8c8 373 }
dd65d8c8 374 }
2186321f 375 //else: can't have schema, possible a relative URI
dd65d8c8 376
2186321f 377 return uri;
dd65d8c8
RN
378}
379
2186321f 380const char* wxURI::ParseAuthority(const char* uri)
dd65d8c8
RN
381{
382 // authority = [ userinfo "@" ] host [ ":" port ]
2186321f 383 if ( uri[0] == '/' && uri[1] == '/' )
dd65d8c8 384 {
97ad053b 385 //skip past the two slashes
dd65d8c8
RN
386 uri += 2;
387
97ad053b
VZ
388 // ############# DEVIATION FROM RFC #########################
389 // Don't parse the server component for file URIs
2186321f 390 if(m_scheme != "file")
97ad053b
VZ
391 {
392 //normal way
2186321f
VZ
393 uri = ParseUserInfo(uri);
394 uri = ParseServer(uri);
395 return ParsePort(uri);
97ad053b 396 }
dd65d8c8
RN
397 }
398
399 return uri;
400}
401
2186321f 402const char* wxURI::ParseUserInfo(const char* uri)
dd65d8c8 403{
2186321f 404 const char * const start = uri;
dd65d8c8
RN
405
406 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
2186321f 407 while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
dd65d8c8 408 {
2186321f 409 if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
24ca04e7 410 m_userinfo += *uri++;
dd65d8c8 411 else
2186321f 412 AppendNextEscaped(m_userinfo, uri);
dd65d8c8
RN
413 }
414
2186321f 415 if ( *uri++ == '@' )
dd65d8c8 416 {
2186321f 417 // valid userinfo
4860d40d 418 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
419 }
420 else
2186321f
VZ
421 {
422 uri = start; // rewind
423 m_userinfo.clear();
424 }
dd65d8c8 425
2186321f 426 return uri;
dd65d8c8
RN
427}
428
2186321f 429const char* wxURI::ParseServer(const char* uri)
dd65d8c8 430{
2186321f 431 const char * const start = uri;
dd65d8c8
RN
432
433 // host = IP-literal / IPv4address / reg-name
434 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
2186321f 435 if (*uri == '[')
dd65d8c8 436 {
2186321f
VZ
437 ++uri;
438 if (ParseIPv6address(uri) && *uri == ']')
dd65d8c8 439 {
dd65d8c8 440 m_hostType = wxURI_IPV6ADDRESS;
846978d7 441
c4dbb953 442 m_server.assign(start + 1, uri - start - 1);
2186321f 443 ++uri;
dd65d8c8
RN
444 }
445 else
446 {
2186321f 447 uri = start + 1; // skip the leading '[' again
dd65d8c8 448
2186321f 449 if (ParseIPvFuture(uri) && *uri == ']')
dd65d8c8 450 {
846978d7
WS
451 m_hostType = wxURI_IPVFUTURE;
452
c4dbb953 453 m_server.assign(start + 1, uri - start - 1);
2186321f
VZ
454 ++uri;
455 }
456 else // unrecognized IP literal
457 {
458 uri = start;
dd65d8c8 459 }
dd65d8c8
RN
460 }
461 }
2186321f 462 else // IPv4 or a reg-name
dd65d8c8
RN
463 {
464 if (ParseIPv4address(uri))
465 {
466 m_hostType = wxURI_IPV4ADDRESS;
467
c4dbb953 468 m_server.assign(start, uri - start);
dd65d8c8 469 }
846978d7 470 else
2186321f
VZ
471 {
472 uri = start;
473 }
dd65d8c8
RN
474 }
475
2186321f 476 if ( m_hostType == wxURI_REGNAME )
dd65d8c8 477 {
2186321f 478 uri = start;
dd65d8c8 479 // reg-name = *( unreserved / pct-encoded / sub-delims )
2186321f 480 while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
dd65d8c8 481 {
2186321f 482 if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
24ca04e7 483 m_server += *uri++;
dd65d8c8 484 else
2186321f 485 AppendNextEscaped(m_server, uri);
846978d7 486 }
dd65d8c8
RN
487 }
488
dd65d8c8
RN
489 m_fields |= wxURI_SERVER;
490
491 return uri;
492}
493
846978d7 494
2186321f 495const char* wxURI::ParsePort(const char* uri)
dd65d8c8 496{
dd65d8c8 497 // port = *DIGIT
2186321f 498 if( *uri == ':' )
dd65d8c8
RN
499 {
500 ++uri;
2186321f 501 while ( IsDigit(*uri) )
dd65d8c8
RN
502 {
503 m_port += *uri++;
846978d7 504 }
dd65d8c8 505
dd65d8c8
RN
506 m_fields |= wxURI_PORT;
507 }
508
509 return uri;
510}
511
2186321f 512const char* wxURI::ParsePath(const char* uri)
dd65d8c8 513{
dd65d8c8
RN
514 /// hier-part = "//" authority path-abempty
515 /// / path-absolute
516 /// / path-rootless
517 /// / path-empty
518 ///
519 /// relative-part = "//" authority path-abempty
520 /// / path-absolute
521 /// / path-noscheme
522 /// / path-empty
523 ///
524 /// path-abempty = *( "/" segment )
525 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
526 /// path-noscheme = segment-nz-nc *( "/" segment )
527 /// path-rootless = segment-nz *( "/" segment )
528 /// path-empty = 0<pchar>
529 ///
530 /// segment = *pchar
531 /// segment-nz = 1*pchar
532 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
533 /// ; non-zero-length segment without any colon ":"
534 ///
535 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
dd65d8c8 536
2186321f
VZ
537 if ( IsEndPath(*uri) )
538 return uri;
dd65d8c8 539
2186321f
VZ
540 const bool isAbs = *uri == '/';
541 if ( isAbs )
542 m_path += *uri++;
543
544 wxArrayString segments;
545 wxString segment;
546 for ( ;; )
dd65d8c8 547 {
2186321f
VZ
548 const bool endPath = IsEndPath(*uri);
549 if ( endPath || *uri == '/' )
dd65d8c8 550 {
2186321f
VZ
551 // end of a segment, look at what we got
552 if ( segment == ".." )
dd65d8c8 553 {
2186321f
VZ
554 if ( !segments.empty() && *segments.rbegin() != ".." )
555 segments.pop_back();
556 else if ( !isAbs )
557 segments.push_back("..");
dd65d8c8 558 }
2186321f 559 else if ( segment == "." )
dd65d8c8 560 {
2186321f
VZ
561 // normally we ignore "." but the last one should be taken into
562 // account as "path/." is the same as "path/" and not just "path"
563 if ( endPath )
564 segments.push_back("");
dd65d8c8 565 }
2186321f 566 else // normal segment
dd65d8c8 567 {
2186321f 568 segments.push_back(segment);
dd65d8c8
RN
569 }
570
2186321f
VZ
571 if ( endPath )
572 break;
573
574 segment.clear();
575 ++uri;
576 continue;
dd65d8c8 577 }
2186321f
VZ
578
579 if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
580 segment += *uri++;
581 else
582 AppendNextEscaped(segment, uri);
dd65d8c8
RN
583 }
584
2186321f
VZ
585 m_path += wxJoin(segments, '/', '\0');
586 m_fields |= wxURI_PATH;
587
dd65d8c8
RN
588 return uri;
589}
590
591
2186321f 592const char* wxURI::ParseQuery(const char* uri)
dd65d8c8 593{
dd65d8c8 594 // query = *( pchar / "/" / "?" )
2186321f 595 if ( *uri == '?' )
dd65d8c8
RN
596 {
597 ++uri;
2186321f 598 while ( *uri && *uri != '#' )
dd65d8c8 599 {
2186321f
VZ
600 if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
601 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
602 m_query += *uri++;
dd65d8c8 603 else
2186321f 604 AppendNextEscaped(m_query, uri);
dd65d8c8
RN
605 }
606
dd65d8c8
RN
607 m_fields |= wxURI_QUERY;
608 }
609
610 return uri;
611}
612
613
2186321f 614const char* wxURI::ParseFragment(const char* uri)
dd65d8c8 615{
dd65d8c8 616 // fragment = *( pchar / "/" / "?" )
2186321f 617 if ( *uri == '#' )
dd65d8c8
RN
618 {
619 ++uri;
2186321f 620 while ( *uri )
dd65d8c8 621 {
2186321f
VZ
622 if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
623 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
624 m_fragment += *uri++;
dd65d8c8 625 else
2186321f 626 AppendNextEscaped(m_fragment, uri);
dd65d8c8
RN
627 }
628
dd65d8c8
RN
629 m_fields |= wxURI_FRAGMENT;
630 }
631
632 return uri;
633}
634
635// ---------------------------------------------------------------------------
ce321570 636// Resolve
dd65d8c8 637//
ce321570 638// Builds missing components of this uri from a base uri
dd65d8c8 639//
ce321570
RN
640// A version of the algorithm outlined in the RFC is used here
641// (it is shown in comments)
642//
846978d7 643// Note that an empty URI inherits all components
dd65d8c8
RN
644// ---------------------------------------------------------------------------
645
2186321f
VZ
646/* static */
647wxArrayString wxURI::SplitInSegments(const wxString& path)
648{
649 return wxSplit(path, '/', '\0' /* no escape character */);
650}
651
8404931e 652void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 653{
846978d7 654 wxASSERT_MSG(!base.IsReference(),
2186321f 655 "wxURI to inherit from must not be a reference!");
dd65d8c8 656
2186321f
VZ
657 // If we aren't being strict, enable the older (pre-RFC2396) loophole that
658 // allows this uri to inherit other properties from the base uri - even if
659 // the scheme is defined
8404931e
VZ
660 if ( !(flags & wxURI_STRICT) &&
661 HasScheme() && base.HasScheme() &&
662 m_scheme == base.m_scheme )
846978d7 663 {
dd65d8c8
RN
664 m_fields -= wxURI_SCHEME;
665 }
666
667
668 // Do nothing if this is an absolute wxURI
669 // if defined(R.scheme) then
670 // T.scheme = R.scheme;
671 // T.authority = R.authority;
672 // T.path = remove_dot_segments(R.path);
673 // T.query = R.query;
674 if (HasScheme())
dd65d8c8 675 return;
dd65d8c8 676
ea4daac4 677 //No scheme - inherit
dd65d8c8
RN
678 m_scheme = base.m_scheme;
679 m_fields |= wxURI_SCHEME;
680
681 // All we need to do for relative URIs with an
682 // authority component is just inherit the scheme
683 // if defined(R.authority) then
684 // T.authority = R.authority;
685 // T.path = remove_dot_segments(R.path);
686 // T.query = R.query;
687 if (HasServer())
dd65d8c8 688 return;
dd65d8c8
RN
689
690 //No authority - inherit
4860d40d 691 if (base.HasUserInfo())
dd65d8c8 692 {
4860d40d
RN
693 m_userinfo = base.m_userinfo;
694 m_fields |= wxURI_USERINFO;
dd65d8c8 695 }
846978d7 696
dd65d8c8
RN
697 m_server = base.m_server;
698 m_hostType = base.m_hostType;
699 m_fields |= wxURI_SERVER;
846978d7 700
dd65d8c8
RN
701 if (base.HasPort())
702 {
703 m_port = base.m_port;
704 m_fields |= wxURI_PORT;
705 }
846978d7 706
dd65d8c8
RN
707
708 // Simple path inheritance from base
709 if (!HasPath())
710 {
711 // T.path = Base.path;
712 m_path = base.m_path;
713 m_fields |= wxURI_PATH;
846978d7 714
dd65d8c8
RN
715
716 // if defined(R.query) then
717 // T.query = R.query;
718 // else
719 // T.query = Base.query;
720 // endif;
721 if (!HasQuery())
722 {
723 m_query = base.m_query;
724 m_fields |= wxURI_QUERY;
725 }
726 }
2186321f 727 else if ( m_path.empty() || m_path[0u] != '/' )
dd65d8c8
RN
728 {
729 // if (R.path starts-with "/") then
730 // T.path = remove_dot_segments(R.path);
731 // else
732 // T.path = merge(Base.path, R.path);
733 // T.path = remove_dot_segments(T.path);
734 // endif;
735 // T.query = R.query;
2186321f
VZ
736 //
737 // So we don't do anything for absolute paths and implement merge for
738 // the relative ones
c9f78968 739
2186321f
VZ
740 wxArrayString our(SplitInSegments(m_path)),
741 result(SplitInSegments(base.m_path));
c9f78968 742
2186321f
VZ
743 if ( !result.empty() )
744 result.pop_back();
846978d7 745
2186321f 746 if ( our.empty() )
dd65d8c8 747 {
2186321f
VZ
748 // if we have an empty path it means we were constructed from a "."
749 // string or something similar (e.g. "././././"), it should count
750 // as (empty) segment
751 our.push_back("");
dd65d8c8 752 }
dd65d8c8 753
2186321f
VZ
754 const wxArrayString::const_iterator end = our.end();
755 for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
dd65d8c8 756 {
2186321f 757 if ( i->empty() || *i == "." )
dd65d8c8 758 {
2186321f
VZ
759 // as in ParsePath(), while normally we ignore the empty
760 // segments, we need to take account of them at the end
761 if ( i == end - 1 )
762 result.push_back("");
763 continue;
dd65d8c8 764 }
dd65d8c8 765
2186321f 766 if ( *i == ".." )
dd65d8c8 767 {
2186321f
VZ
768 if ( !result.empty() )
769 {
770 result.pop_back();
dd65d8c8 771
2186321f
VZ
772 if ( i == end - 1 )
773 result.push_back("");
774 }
775 //else: just ignore, extra ".." don't accumulate
dd65d8c8
RN
776 }
777 else
778 {
2186321f
VZ
779 if ( result.empty() )
780 {
781 // ensure that the resulting path will always be absolute
782 result.push_back("");
783 }
784
785 result.push_back(*i);
dd65d8c8
RN
786 }
787 }
2186321f
VZ
788
789 m_path = wxJoin(result, '/', '\0');
dd65d8c8
RN
790 }
791
2186321f 792 //T.fragment = R.fragment;
dd65d8c8
RN
793}
794
795// ---------------------------------------------------------------------------
ce321570
RN
796// ParseH16
797//
798// Parses 1 to 4 hex values. Returns true if the first character of the input
2186321f 799// string is a valid hex character. It is the caller's responsibility to move
ce321570
RN
800// the input string back to its original position on failure.
801// ---------------------------------------------------------------------------
802
2186321f 803bool wxURI::ParseH16(const char*& uri)
ce321570
RN
804{
805 // h16 = 1*4HEXDIG
806 if(!IsHex(*++uri))
807 return false;
808
809 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
810 ++uri;
811
812 return true;
813}
814
815// ---------------------------------------------------------------------------
816// ParseIPXXX
817//
846978d7
WS
818// Parses a certain version of an IP address and moves the input string past
819// it. Returns true if the input string contains the proper version of an ip
820// address. It is the caller's responsability to move the input string back
ce321570 821// to its original position on failure.
dd65d8c8
RN
822// ---------------------------------------------------------------------------
823
2186321f 824bool wxURI::ParseIPv4address(const char*& uri)
dd65d8c8
RN
825{
826 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
827 //
828 //dec-octet = DIGIT ; 0-9
829 // / %x31-39 DIGIT ; 10-99
830 // / "1" 2DIGIT ; 100-199
831 // / "2" %x30-34 DIGIT ; 200-249
832 // / "25" %x30-35 ; 250-255
833 size_t iIPv4 = 0;
834 if (IsDigit(*uri))
835 {
836 ++iIPv4;
837
846978d7 838
dd65d8c8
RN
839 //each ip part must be between 0-255 (dupe of version in for loop)
840 if( IsDigit(*++uri) && IsDigit(*++uri) &&
841 //100 or less (note !)
2186321f 842 !( (*(uri-2) < '2') ||
846978d7 843 //240 or less
2186321f
VZ
844 (*(uri-2) == '2' &&
845 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
dd65d8c8
RN
846 )
847 )
848 )
849 {
850 return false;
851 }
852
853 if(IsDigit(*uri))++uri;
854
855 //compilers should unroll this loop
856 for(; iIPv4 < 4; ++iIPv4)
857 {
2186321f 858 if (*uri != '.' || !IsDigit(*++uri))
dd65d8c8
RN
859 break;
860
861 //each ip part must be between 0-255
862 if( IsDigit(*++uri) && IsDigit(*++uri) &&
863 //100 or less (note !)
2186321f 864 !( (*(uri-2) < '2') ||
846978d7 865 //240 or less
2186321f
VZ
866 (*(uri-2) == '2' &&
867 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
dd65d8c8
RN
868 )
869 )
870 )
871 {
872 return false;
873 }
874 if(IsDigit(*uri))++uri;
875 }
876 }
877 return iIPv4 == 4;
878}
879
2186321f 880bool wxURI::ParseIPv6address(const char*& uri)
dd65d8c8
RN
881{
882 // IPv6address = 6( h16 ":" ) ls32
883 // / "::" 5( h16 ":" ) ls32
884 // / [ h16 ] "::" 4( h16 ":" ) ls32
885 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
886 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
887 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
888 // / [ *4( h16 ":" ) h16 ] "::" ls32
889 // / [ *5( h16 ":" ) h16 ] "::" h16
890 // / [ *6( h16 ":" ) h16 ] "::"
891
892 size_t numPrefix = 0,
893 maxPostfix;
894
895 bool bEndHex = false;
896
897 for( ; numPrefix < 6; ++numPrefix)
898 {
899 if(!ParseH16(uri))
900 {
901 --uri;
902 bEndHex = true;
903 break;
904 }
846978d7 905
2186321f 906 if(*uri != ':')
dd65d8c8
RN
907 {
908 break;
909 }
910 }
911
912 if(!bEndHex && !ParseH16(uri))
913 {
914 --uri;
915
916 if (numPrefix)
917 return false;
918
2186321f 919 if (*uri == ':')
dd65d8c8 920 {
2186321f 921 if (*++uri != ':')
dd65d8c8
RN
922 return false;
923
924 maxPostfix = 5;
925 }
926 else
927 maxPostfix = 6;
928 }
929 else
930 {
2186321f 931 if (*uri != ':' || *(uri+1) != ':')
dd65d8c8
RN
932 {
933 if (numPrefix != 6)
934 return false;
935
2186321f 936 while (*--uri != ':') {}
dd65d8c8
RN
937 ++uri;
938
2186321f 939 const char * const start = uri;
dd65d8c8
RN
940 //parse ls32
941 // ls32 = ( h16 ":" h16 ) / IPv4address
2186321f 942 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
dd65d8c8
RN
943 return true;
944
2186321f 945 uri = start;
dd65d8c8
RN
946
947 if (ParseIPv4address(uri))
948 return true;
949 else
950 return false;
951 }
952 else
953 {
954 uri += 2;
846978d7 955
dd65d8c8
RN
956 if (numPrefix > 3)
957 maxPostfix = 0;
958 else
959 maxPostfix = 4 - numPrefix;
960 }
961 }
962
963 bool bAllowAltEnding = maxPostfix == 0;
964
965 for(; maxPostfix != 0; --maxPostfix)
966 {
2186321f 967 if(!ParseH16(uri) || *uri != ':')
dd65d8c8
RN
968 return false;
969 }
970
971 if(numPrefix <= 4)
972 {
2186321f 973 const char * const start = uri;
dd65d8c8
RN
974 //parse ls32
975 // ls32 = ( h16 ":" h16 ) / IPv4address
2186321f 976 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
dd65d8c8
RN
977 return true;
978
2186321f 979 uri = start;
dd65d8c8
RN
980
981 if (ParseIPv4address(uri))
982 return true;
983
2186321f 984 uri = start;
846978d7 985
dd65d8c8
RN
986 if (!bAllowAltEnding)
987 return false;
988 }
989
990 if(numPrefix <= 5 && ParseH16(uri))
991 return true;
992
993 return true;
994}
995
2186321f 996bool wxURI::ParseIPvFuture(const char*& uri)
dd65d8c8
RN
997{
998 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
2186321f 999 if (*++uri != 'v' || !IsHex(*++uri))
dd65d8c8
RN
1000 return false;
1001
2186321f
VZ
1002 while (IsHex(*++uri))
1003 ;
dd65d8c8 1004
2186321f 1005 if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
dd65d8c8
RN
1006 return false;
1007
2186321f 1008 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
dd65d8c8
RN
1009
1010 return true;
1011}
1012
1013
ce321570
RN
1014// ---------------------------------------------------------------------------
1015// IsXXX
1016//
1017// Returns true if the passed in character meets the criteria of the method
1018// ---------------------------------------------------------------------------
1019
2186321f
VZ
1020// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1021bool wxURI::IsUnreserved(char c)
1022{
1023 return IsAlpha(c) ||
1024 IsDigit(c) ||
1025 c == '-' ||
1026 c == '.' ||
1027 c == '_' ||
1028 c == '~'
846978d7 1029 ;
dd65d8c8
RN
1030}
1031
2186321f 1032bool wxURI::IsReserved(char c)
846978d7 1033{
dd65d8c8
RN
1034 return IsGenDelim(c) || IsSubDelim(c);
1035}
1036
2186321f
VZ
1037// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1038bool wxURI::IsGenDelim(char c)
dd65d8c8 1039{
2186321f
VZ
1040 return c == ':' ||
1041 c == '/' ||
1042 c == '?' ||
1043 c == '#' ||
1044 c == '[' ||
1045 c == ']' ||
1046 c == '@';
dd65d8c8
RN
1047}
1048
2186321f
VZ
1049// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1050// / "*" / "+" / "," / ";" / "="
1051bool wxURI::IsSubDelim(char c)
dd65d8c8 1052{
2186321f
VZ
1053 return c == '!' ||
1054 c == '$' ||
1055 c == '&' ||
1056 c == '\'' ||
1057 c == '(' ||
1058 c == ')' ||
1059 c == '*' ||
1060 c == '+' ||
1061 c == ',' ||
1062 c == ';' ||
1063 c == '='
dd65d8c8
RN
1064 ;
1065}
1066
2186321f
VZ
1067bool wxURI::IsHex(char c)
1068{
1069 return IsDigit(c) ||
1070 (c >= 'a' && c <= 'f') ||
1071 (c >= 'A' && c <= 'F');
1072}
dd65d8c8 1073
2186321f
VZ
1074bool wxURI::IsAlpha(char c)
1075{
1076 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1077}
dd65d8c8 1078
2186321f
VZ
1079bool wxURI::IsDigit(char c)
1080{
1081 return c >= '0' && c <= '9';
1082}
dd65d8c8 1083
2186321f
VZ
1084bool wxURI::IsEndPath(char c)
1085{
1086 return c == '\0' || c == '#' || c == '?';
1087}
dd65d8c8 1088