]> git.saurik.com Git - wxWidgets.git/blame - src/common/uri.cpp
Revert "Make wxMSW stack walking methods work with Unicode identifiers."
[wxWidgets.git] / src / common / uri.cpp
CommitLineData
dd65d8c8 1/////////////////////////////////////////////////////////////////////////////
80fdcdb9 2// Name: src/common/uri.cpp
2186321f
VZ
3// Purpose: Implementation of a URI parser
4// Author: Ryan Norton,
5// Vadim Zeitlin (UTF-8 URI support, many other changes)
dd65d8c8 6// Created: 10/26/04
2186321f
VZ
7// Copyright: (c) 2004 Ryan Norton,
8// 2008 Vadim Zeitlin
9// Licence: wxWindows licence
dd65d8c8
RN
10/////////////////////////////////////////////////////////////////////////////
11
12// ===========================================================================
13// declarations
14// ===========================================================================
15
16// ---------------------------------------------------------------------------
17// headers
18// ---------------------------------------------------------------------------
19
dd65d8c8
RN
20// For compilers that support precompilation, includes "wx.h".
21#include "wx/wxprec.h"
22
23#ifdef __BORLANDC__
24 #pragma hdrstop
25#endif
26
0bf751e7
VS
27#ifndef WX_PRECOMP
28 #include "wx/crt.h"
29#endif
30
dd65d8c8
RN
31#include "wx/uri.h"
32
33// ---------------------------------------------------------------------------
34// definitions
35// ---------------------------------------------------------------------------
36
4115960d 37IMPLEMENT_CLASS(wxURI, wxObject)
dd65d8c8
RN
38
39// ===========================================================================
2186321f 40// wxURI implementation
dd65d8c8
RN
41// ===========================================================================
42
43// ---------------------------------------------------------------------------
2186321f 44// Constructors and cleanup
dd65d8c8
RN
45// ---------------------------------------------------------------------------
46
2186321f
VZ
47wxURI::wxURI()
48 : m_hostType(wxURI_REGNAME),
49 m_fields(0)
dd65d8c8
RN
50{
51}
846978d7 52
2186321f
VZ
53wxURI::wxURI(const wxString& uri)
54 : m_hostType(wxURI_REGNAME),
55 m_fields(0)
dd65d8c8
RN
56{
57 Create(uri);
58}
59
2186321f 60bool wxURI::Create(const wxString& uri)
dd65d8c8 61{
2186321f
VZ
62 if (m_fields)
63 Clear();
dd65d8c8 64
2186321f 65 return Parse(uri.utf8_str());
dd65d8c8
RN
66}
67
68void wxURI::Clear()
69{
2186321f
VZ
70 m_scheme =
71 m_userinfo =
72 m_server =
73 m_port =
74 m_path =
75 m_query =
76 m_fragment = wxEmptyString;
dd65d8c8
RN
77
78 m_hostType = wxURI_REGNAME;
79
80 m_fields = 0;
81}
82
83// ---------------------------------------------------------------------------
2186321f 84// Escaped characters handling
dd65d8c8
RN
85// ---------------------------------------------------------------------------
86
2186321f
VZ
87// Converts a character into a numeric hexadecimal value, or -1 if the passed
88// in character is not a valid hex character
dd65d8c8 89
2186321f
VZ
90/* static */
91int wxURI::CharToHex(char c)
92{
93 if ((c >= 'A') && (c <= 'Z'))
94 return c - 'A' + 10;
95 if ((c >= 'a') && (c <= 'z'))
96 return c - 'a' + 10;
97 if ((c >= '0') && (c <= '9'))
98 return c - '0';
99
100 return -1;
846978d7 101}
dd65d8c8 102
2186321f 103int wxURI::DecodeEscape(wxString::const_iterator& i)
dd65d8c8 104{
2186321f
VZ
105 int hi = CharToHex(*++i);
106 if ( hi == -1 )
107 return -1;
8404931e 108
2186321f
VZ
109 int lo = CharToHex(*++i);
110 if ( lo == -1 )
111 return -1;
c9f78968 112
2186321f 113 return (hi << 4) | lo;
dd65d8c8
RN
114}
115
2186321f 116/* static */
86470d43
RN
117wxString wxURI::Unescape(const wxString& uri)
118{
2186321f
VZ
119 // the unescaped version can't be longer than the original one
120 wxCharBuffer buf(uri.length());
121 char *p = buf.data();
86470d43 122
2186321f 123 for ( wxString::const_iterator i = uri.begin(); i != uri.end(); ++i, ++p )
86470d43 124 {
2186321f
VZ
125 char c = *i;
126 if ( c == '%' )
86470d43 127 {
2186321f
VZ
128 int n = wxURI::DecodeEscape(i);
129 if ( n == -1 )
130 return wxString();
131
132 wxASSERT_MSG( n >= 0 && n <= 0xff, "unexpected character value" );
133
5c33522f 134 c = static_cast<char>(n);
86470d43 135 }
2186321f
VZ
136
137 *p = c;
86470d43
RN
138 }
139
2186321f 140 *p = '\0';
86470d43 141
2186321f
VZ
142 // by default assume that the URI is in UTF-8, this is the most common
143 // practice
144 wxString s = wxString::FromUTF8(buf);
145 if ( s.empty() )
146 {
147 // if it isn't, use latin-1 as a fallback -- at least this always
148 // succeeds
149 s = wxCSConv(wxFONTENCODING_ISO8859_1).cMB2WC(buf);
150 }
151
152 return s;
dd65d8c8
RN
153}
154
2186321f 155void wxURI::AppendNextEscaped(wxString& s, const char *& p)
dd65d8c8 156{
2186321f
VZ
157 // check for an already encoded character:
158 //
ce321570 159 // pct-encoded = "%" HEXDIG HEXDIG
2186321f
VZ
160 if ( p[0] == '%' && IsHex(p[1]) && IsHex(p[2]) )
161 {
162 s += *p++;
163 s += *p++;
164 s += *p++;
165 }
166 else // really needs escaping
167 {
168 static const char* hexDigits = "0123456789abcdef";
169
170 const char c = *p++;
171
172 s += '%';
173 s += hexDigits[(c >> 4) & 15];
174 s += hexDigits[c & 15];
175 }
dd65d8c8
RN
176}
177
4860d40d
RN
178// ---------------------------------------------------------------------------
179// GetUser
180// GetPassword
181//
182// Gets the username and password via the old URL method.
183// ---------------------------------------------------------------------------
184wxString wxURI::GetUser() const
185{
62e3e6c2
VZ
186 // if there is no colon at all, find() returns npos and this method returns
187 // the entire string which is correct as it means that password was omitted
188 return m_userinfo(0, m_userinfo.find(':'));
4860d40d
RN
189}
190
191wxString wxURI::GetPassword() const
192{
62e3e6c2 193 size_t posColon = m_userinfo.find(':');
4860d40d 194
62e3e6c2 195 if ( posColon == wxString::npos )
2186321f 196 return "";
62e3e6c2
VZ
197
198 return m_userinfo(posColon + 1, wxString::npos);
dd65d8c8
RN
199}
200
2186321f
VZ
201// combine all URI fields in a single string, applying funcDecode to each
202// component which it may make sense to decode (i.e. "unescape")
203wxString wxURI::DoBuildURI(wxString (*funcDecode)(const wxString&)) const
86470d43
RN
204{
205 wxString ret;
206
207 if (HasScheme())
2186321f 208 ret += m_scheme + ":";
86470d43
RN
209
210 if (HasServer())
211 {
2186321f 212 ret += "//";
86470d43 213
4860d40d 214 if (HasUserInfo())
2186321f 215 ret += funcDecode(m_userinfo) + "@";
86470d43
RN
216
217 if (m_hostType == wxURI_REGNAME)
2186321f 218 ret += funcDecode(m_server);
86470d43
RN
219 else
220 ret += m_server;
221
222 if (HasPort())
2186321f 223 ret += ":" + m_port;
86470d43
RN
224 }
225
2186321f 226 ret += funcDecode(m_path);
86470d43
RN
227
228 if (HasQuery())
2186321f 229 ret += "?" + funcDecode(m_query);
86470d43
RN
230
231 if (HasFragment())
2186321f 232 ret += "#" + funcDecode(m_fragment);
86470d43
RN
233
234 return ret;
235}
236
ce321570
RN
237// ---------------------------------------------------------------------------
238// Comparison
239// ---------------------------------------------------------------------------
240
2186321f 241bool wxURI::operator==(const wxURI& uri) const
846978d7 242{
dd65d8c8
RN
243 if (HasScheme())
244 {
245 if(m_scheme != uri.m_scheme)
246 return false;
247 }
248 else if (uri.HasScheme())
249 return false;
250
251
252 if (HasServer())
253 {
4860d40d 254 if (HasUserInfo())
dd65d8c8 255 {
4860d40d 256 if (m_userinfo != uri.m_userinfo)
dd65d8c8
RN
257 return false;
258 }
4860d40d 259 else if (uri.HasUserInfo())
dd65d8c8
RN
260 return false;
261
262 if (m_server != uri.m_server ||
263 m_hostType != uri.m_hostType)
264 return false;
265
266 if (HasPort())
267 {
268 if(m_port != uri.m_port)
269 return false;
270 }
271 else if (uri.HasPort())
272 return false;
273 }
274 else if (uri.HasServer())
275 return false;
276
277
278 if (HasPath())
279 {
280 if(m_path != uri.m_path)
281 return false;
282 }
283 else if (uri.HasPath())
284 return false;
285
286 if (HasQuery())
287 {
288 if (m_query != uri.m_query)
289 return false;
290 }
291 else if (uri.HasQuery())
292 return false;
293
294 if (HasFragment())
295 {
296 if (m_fragment != uri.m_fragment)
297 return false;
298 }
299 else if (uri.HasFragment())
300 return false;
301
302 return true;
303}
304
305// ---------------------------------------------------------------------------
306// IsReference
307//
308// if there is no authority or scheme, it is a reference
309// ---------------------------------------------------------------------------
310
311bool wxURI::IsReference() const
2186321f
VZ
312{
313 return !HasScheme() || !HasServer();
314}
dd65d8c8
RN
315
316// ---------------------------------------------------------------------------
317// Parse
318//
319// Master URI parsing method. Just calls the individual parsing methods
320//
321// URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
4cc52142 322// URI-reference = URI / relative
dd65d8c8
RN
323// ---------------------------------------------------------------------------
324
2186321f 325bool wxURI::Parse(const char *uri)
dd65d8c8
RN
326{
327 uri = ParseScheme(uri);
2186321f
VZ
328 if ( uri )
329 uri = ParseAuthority(uri);
330 if ( uri )
331 uri = ParsePath(uri);
332 if ( uri )
333 uri = ParseQuery(uri);
334 if ( uri )
335 uri = ParseFragment(uri);
336
337 // we only succeed if we parsed the entire string
338 return uri && *uri == '\0';
dd65d8c8
RN
339}
340
2186321f 341const char* wxURI::ParseScheme(const char *uri)
dd65d8c8 342{
2186321f 343 const char * const start = uri;
dd65d8c8 344
2186321f
VZ
345 // assume that we have a scheme if we have the valid start of it
346 if ( IsAlpha(*uri) )
dd65d8c8
RN
347 {
348 m_scheme += *uri++;
349
350 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
846978d7 351 while (IsAlpha(*uri) || IsDigit(*uri) ||
2186321f
VZ
352 *uri == '+' ||
353 *uri == '-' ||
354 *uri == '.')
846978d7
WS
355 {
356 m_scheme += *uri++;
dd65d8c8
RN
357 }
358
359 //valid scheme?
2186321f 360 if (*uri == ':')
846978d7 361 {
dd65d8c8
RN
362 //mark the scheme as valid
363 m_fields |= wxURI_SCHEME;
364
365 //move reference point up to input buffer
2186321f
VZ
366 ++uri;
367 }
368 else // no valid scheme finally
369 {
370 uri = start; // rewind
371 m_scheme.clear();
dd65d8c8 372 }
dd65d8c8 373 }
2186321f 374 //else: can't have schema, possible a relative URI
dd65d8c8 375
2186321f 376 return uri;
dd65d8c8
RN
377}
378
2186321f 379const char* wxURI::ParseAuthority(const char* uri)
dd65d8c8
RN
380{
381 // authority = [ userinfo "@" ] host [ ":" port ]
2186321f 382 if ( uri[0] == '/' && uri[1] == '/' )
dd65d8c8 383 {
97ad053b 384 //skip past the two slashes
dd65d8c8
RN
385 uri += 2;
386
97ad053b
VZ
387 // ############# DEVIATION FROM RFC #########################
388 // Don't parse the server component for file URIs
2186321f 389 if(m_scheme != "file")
97ad053b
VZ
390 {
391 //normal way
2186321f
VZ
392 uri = ParseUserInfo(uri);
393 uri = ParseServer(uri);
394 return ParsePort(uri);
97ad053b 395 }
dd65d8c8
RN
396 }
397
398 return uri;
399}
400
2186321f 401const char* wxURI::ParseUserInfo(const char* uri)
dd65d8c8 402{
2186321f 403 const char * const start = uri;
dd65d8c8
RN
404
405 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
2186321f 406 while ( *uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?' )
dd65d8c8 407 {
2186321f 408 if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' )
24ca04e7 409 m_userinfo += *uri++;
dd65d8c8 410 else
2186321f 411 AppendNextEscaped(m_userinfo, uri);
dd65d8c8
RN
412 }
413
2186321f 414 if ( *uri++ == '@' )
dd65d8c8 415 {
2186321f 416 // valid userinfo
4860d40d 417 m_fields |= wxURI_USERINFO;
dd65d8c8
RN
418 }
419 else
2186321f
VZ
420 {
421 uri = start; // rewind
422 m_userinfo.clear();
423 }
dd65d8c8 424
2186321f 425 return uri;
dd65d8c8
RN
426}
427
2186321f 428const char* wxURI::ParseServer(const char* uri)
dd65d8c8 429{
2186321f 430 const char * const start = uri;
dd65d8c8
RN
431
432 // host = IP-literal / IPv4address / reg-name
433 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
2186321f 434 if (*uri == '[')
dd65d8c8 435 {
2186321f
VZ
436 ++uri;
437 if (ParseIPv6address(uri) && *uri == ']')
dd65d8c8 438 {
dd65d8c8 439 m_hostType = wxURI_IPV6ADDRESS;
846978d7 440
c4dbb953 441 m_server.assign(start + 1, uri - start - 1);
2186321f 442 ++uri;
dd65d8c8
RN
443 }
444 else
445 {
2186321f 446 uri = start + 1; // skip the leading '[' again
dd65d8c8 447
2186321f 448 if (ParseIPvFuture(uri) && *uri == ']')
dd65d8c8 449 {
846978d7
WS
450 m_hostType = wxURI_IPVFUTURE;
451
c4dbb953 452 m_server.assign(start + 1, uri - start - 1);
2186321f
VZ
453 ++uri;
454 }
455 else // unrecognized IP literal
456 {
457 uri = start;
dd65d8c8 458 }
dd65d8c8
RN
459 }
460 }
2186321f 461 else // IPv4 or a reg-name
dd65d8c8
RN
462 {
463 if (ParseIPv4address(uri))
464 {
465 m_hostType = wxURI_IPV4ADDRESS;
466
c4dbb953 467 m_server.assign(start, uri - start);
dd65d8c8 468 }
846978d7 469 else
2186321f
VZ
470 {
471 uri = start;
472 }
dd65d8c8
RN
473 }
474
2186321f 475 if ( m_hostType == wxURI_REGNAME )
dd65d8c8 476 {
2186321f 477 uri = start;
dd65d8c8 478 // reg-name = *( unreserved / pct-encoded / sub-delims )
2186321f 479 while ( *uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?' )
dd65d8c8 480 {
2186321f 481 if ( IsUnreserved(*uri) || IsSubDelim(*uri) )
24ca04e7 482 m_server += *uri++;
dd65d8c8 483 else
2186321f 484 AppendNextEscaped(m_server, uri);
846978d7 485 }
dd65d8c8
RN
486 }
487
dd65d8c8
RN
488 m_fields |= wxURI_SERVER;
489
490 return uri;
491}
492
846978d7 493
2186321f 494const char* wxURI::ParsePort(const char* uri)
dd65d8c8 495{
dd65d8c8 496 // port = *DIGIT
2186321f 497 if( *uri == ':' )
dd65d8c8
RN
498 {
499 ++uri;
2186321f 500 while ( IsDigit(*uri) )
dd65d8c8
RN
501 {
502 m_port += *uri++;
846978d7 503 }
dd65d8c8 504
dd65d8c8
RN
505 m_fields |= wxURI_PORT;
506 }
507
508 return uri;
509}
510
2186321f 511const char* wxURI::ParsePath(const char* uri)
dd65d8c8 512{
dd65d8c8
RN
513 /// hier-part = "//" authority path-abempty
514 /// / path-absolute
515 /// / path-rootless
516 /// / path-empty
517 ///
518 /// relative-part = "//" authority path-abempty
519 /// / path-absolute
520 /// / path-noscheme
521 /// / path-empty
522 ///
523 /// path-abempty = *( "/" segment )
524 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
525 /// path-noscheme = segment-nz-nc *( "/" segment )
526 /// path-rootless = segment-nz *( "/" segment )
527 /// path-empty = 0<pchar>
528 ///
529 /// segment = *pchar
530 /// segment-nz = 1*pchar
531 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
532 /// ; non-zero-length segment without any colon ":"
533 ///
534 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
dd65d8c8 535
2186321f
VZ
536 if ( IsEndPath(*uri) )
537 return uri;
dd65d8c8 538
2186321f
VZ
539 const bool isAbs = *uri == '/';
540 if ( isAbs )
541 m_path += *uri++;
542
543 wxArrayString segments;
544 wxString segment;
545 for ( ;; )
dd65d8c8 546 {
2186321f
VZ
547 const bool endPath = IsEndPath(*uri);
548 if ( endPath || *uri == '/' )
dd65d8c8 549 {
2186321f
VZ
550 // end of a segment, look at what we got
551 if ( segment == ".." )
dd65d8c8 552 {
2186321f
VZ
553 if ( !segments.empty() && *segments.rbegin() != ".." )
554 segments.pop_back();
555 else if ( !isAbs )
556 segments.push_back("..");
dd65d8c8 557 }
2186321f 558 else if ( segment == "." )
dd65d8c8 559 {
2186321f
VZ
560 // normally we ignore "." but the last one should be taken into
561 // account as "path/." is the same as "path/" and not just "path"
562 if ( endPath )
563 segments.push_back("");
dd65d8c8 564 }
2186321f 565 else // normal segment
dd65d8c8 566 {
2186321f 567 segments.push_back(segment);
dd65d8c8
RN
568 }
569
2186321f
VZ
570 if ( endPath )
571 break;
572
573 segment.clear();
574 ++uri;
575 continue;
dd65d8c8 576 }
2186321f
VZ
577
578 if ( IsUnreserved(*uri) || IsSubDelim(*uri) || *uri == ':' || *uri == '@' )
579 segment += *uri++;
580 else
581 AppendNextEscaped(segment, uri);
dd65d8c8
RN
582 }
583
2186321f
VZ
584 m_path += wxJoin(segments, '/', '\0');
585 m_fields |= wxURI_PATH;
586
dd65d8c8
RN
587 return uri;
588}
589
590
2186321f 591const char* wxURI::ParseQuery(const char* uri)
dd65d8c8 592{
dd65d8c8 593 // query = *( pchar / "/" / "?" )
2186321f 594 if ( *uri == '?' )
dd65d8c8
RN
595 {
596 ++uri;
2186321f 597 while ( *uri && *uri != '#' )
dd65d8c8 598 {
2186321f
VZ
599 if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
600 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?' )
601 m_query += *uri++;
dd65d8c8 602 else
2186321f 603 AppendNextEscaped(m_query, uri);
dd65d8c8
RN
604 }
605
dd65d8c8
RN
606 m_fields |= wxURI_QUERY;
607 }
608
609 return uri;
610}
611
612
2186321f 613const char* wxURI::ParseFragment(const char* uri)
dd65d8c8 614{
dd65d8c8 615 // fragment = *( pchar / "/" / "?" )
2186321f 616 if ( *uri == '#' )
dd65d8c8
RN
617 {
618 ++uri;
2186321f 619 while ( *uri )
dd65d8c8 620 {
2186321f
VZ
621 if ( IsUnreserved(*uri) || IsSubDelim(*uri) ||
622 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
623 m_fragment += *uri++;
dd65d8c8 624 else
2186321f 625 AppendNextEscaped(m_fragment, uri);
dd65d8c8
RN
626 }
627
dd65d8c8
RN
628 m_fields |= wxURI_FRAGMENT;
629 }
630
631 return uri;
632}
633
634// ---------------------------------------------------------------------------
ce321570 635// Resolve
dd65d8c8 636//
ce321570 637// Builds missing components of this uri from a base uri
dd65d8c8 638//
ce321570
RN
639// A version of the algorithm outlined in the RFC is used here
640// (it is shown in comments)
641//
846978d7 642// Note that an empty URI inherits all components
dd65d8c8
RN
643// ---------------------------------------------------------------------------
644
2186321f
VZ
645/* static */
646wxArrayString wxURI::SplitInSegments(const wxString& path)
647{
648 return wxSplit(path, '/', '\0' /* no escape character */);
649}
650
8404931e 651void wxURI::Resolve(const wxURI& base, int flags)
dd65d8c8 652{
846978d7 653 wxASSERT_MSG(!base.IsReference(),
2186321f 654 "wxURI to inherit from must not be a reference!");
dd65d8c8 655
2186321f
VZ
656 // If we aren't being strict, enable the older (pre-RFC2396) loophole that
657 // allows this uri to inherit other properties from the base uri - even if
658 // the scheme is defined
8404931e
VZ
659 if ( !(flags & wxURI_STRICT) &&
660 HasScheme() && base.HasScheme() &&
661 m_scheme == base.m_scheme )
846978d7 662 {
dd65d8c8
RN
663 m_fields -= wxURI_SCHEME;
664 }
665
666
667 // Do nothing if this is an absolute wxURI
668 // if defined(R.scheme) then
669 // T.scheme = R.scheme;
670 // T.authority = R.authority;
671 // T.path = remove_dot_segments(R.path);
672 // T.query = R.query;
673 if (HasScheme())
dd65d8c8 674 return;
dd65d8c8 675
ea4daac4 676 //No scheme - inherit
dd65d8c8
RN
677 m_scheme = base.m_scheme;
678 m_fields |= wxURI_SCHEME;
679
680 // All we need to do for relative URIs with an
681 // authority component is just inherit the scheme
682 // if defined(R.authority) then
683 // T.authority = R.authority;
684 // T.path = remove_dot_segments(R.path);
685 // T.query = R.query;
686 if (HasServer())
dd65d8c8 687 return;
dd65d8c8
RN
688
689 //No authority - inherit
4860d40d 690 if (base.HasUserInfo())
dd65d8c8 691 {
4860d40d
RN
692 m_userinfo = base.m_userinfo;
693 m_fields |= wxURI_USERINFO;
dd65d8c8 694 }
846978d7 695
dd65d8c8
RN
696 m_server = base.m_server;
697 m_hostType = base.m_hostType;
698 m_fields |= wxURI_SERVER;
846978d7 699
dd65d8c8
RN
700 if (base.HasPort())
701 {
702 m_port = base.m_port;
703 m_fields |= wxURI_PORT;
704 }
846978d7 705
dd65d8c8
RN
706
707 // Simple path inheritance from base
708 if (!HasPath())
709 {
710 // T.path = Base.path;
711 m_path = base.m_path;
712 m_fields |= wxURI_PATH;
846978d7 713
dd65d8c8
RN
714
715 // if defined(R.query) then
716 // T.query = R.query;
717 // else
718 // T.query = Base.query;
719 // endif;
720 if (!HasQuery())
721 {
722 m_query = base.m_query;
723 m_fields |= wxURI_QUERY;
724 }
725 }
2186321f 726 else if ( m_path.empty() || m_path[0u] != '/' )
dd65d8c8
RN
727 {
728 // if (R.path starts-with "/") then
729 // T.path = remove_dot_segments(R.path);
730 // else
731 // T.path = merge(Base.path, R.path);
732 // T.path = remove_dot_segments(T.path);
733 // endif;
734 // T.query = R.query;
2186321f
VZ
735 //
736 // So we don't do anything for absolute paths and implement merge for
737 // the relative ones
c9f78968 738
2186321f
VZ
739 wxArrayString our(SplitInSegments(m_path)),
740 result(SplitInSegments(base.m_path));
c9f78968 741
2186321f
VZ
742 if ( !result.empty() )
743 result.pop_back();
846978d7 744
2186321f 745 if ( our.empty() )
dd65d8c8 746 {
2186321f
VZ
747 // if we have an empty path it means we were constructed from a "."
748 // string or something similar (e.g. "././././"), it should count
749 // as (empty) segment
750 our.push_back("");
dd65d8c8 751 }
dd65d8c8 752
2186321f
VZ
753 const wxArrayString::const_iterator end = our.end();
754 for ( wxArrayString::const_iterator i = our.begin(); i != end; ++i )
dd65d8c8 755 {
2186321f 756 if ( i->empty() || *i == "." )
dd65d8c8 757 {
2186321f
VZ
758 // as in ParsePath(), while normally we ignore the empty
759 // segments, we need to take account of them at the end
760 if ( i == end - 1 )
761 result.push_back("");
762 continue;
dd65d8c8 763 }
dd65d8c8 764
2186321f 765 if ( *i == ".." )
dd65d8c8 766 {
2186321f
VZ
767 if ( !result.empty() )
768 {
769 result.pop_back();
dd65d8c8 770
2186321f
VZ
771 if ( i == end - 1 )
772 result.push_back("");
773 }
774 //else: just ignore, extra ".." don't accumulate
dd65d8c8
RN
775 }
776 else
777 {
2186321f
VZ
778 if ( result.empty() )
779 {
780 // ensure that the resulting path will always be absolute
781 result.push_back("");
782 }
783
784 result.push_back(*i);
dd65d8c8
RN
785 }
786 }
2186321f
VZ
787
788 m_path = wxJoin(result, '/', '\0');
dd65d8c8
RN
789 }
790
2186321f 791 //T.fragment = R.fragment;
dd65d8c8
RN
792}
793
794// ---------------------------------------------------------------------------
ce321570
RN
795// ParseH16
796//
797// Parses 1 to 4 hex values. Returns true if the first character of the input
2186321f 798// string is a valid hex character. It is the caller's responsibility to move
ce321570
RN
799// the input string back to its original position on failure.
800// ---------------------------------------------------------------------------
801
2186321f 802bool wxURI::ParseH16(const char*& uri)
ce321570
RN
803{
804 // h16 = 1*4HEXDIG
805 if(!IsHex(*++uri))
806 return false;
807
808 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
809 ++uri;
810
811 return true;
812}
813
814// ---------------------------------------------------------------------------
815// ParseIPXXX
816//
846978d7
WS
817// Parses a certain version of an IP address and moves the input string past
818// it. Returns true if the input string contains the proper version of an ip
37424888 819// address. It is the caller's responsibility to move the input string back
ce321570 820// to its original position on failure.
dd65d8c8
RN
821// ---------------------------------------------------------------------------
822
2186321f 823bool wxURI::ParseIPv4address(const char*& uri)
dd65d8c8
RN
824{
825 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
826 //
827 //dec-octet = DIGIT ; 0-9
828 // / %x31-39 DIGIT ; 10-99
829 // / "1" 2DIGIT ; 100-199
830 // / "2" %x30-34 DIGIT ; 200-249
831 // / "25" %x30-35 ; 250-255
832 size_t iIPv4 = 0;
833 if (IsDigit(*uri))
834 {
835 ++iIPv4;
836
846978d7 837
dd65d8c8
RN
838 //each ip part must be between 0-255 (dupe of version in for loop)
839 if( IsDigit(*++uri) && IsDigit(*++uri) &&
840 //100 or less (note !)
2186321f 841 !( (*(uri-2) < '2') ||
846978d7 842 //240 or less
2186321f
VZ
843 (*(uri-2) == '2' &&
844 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
dd65d8c8
RN
845 )
846 )
847 )
848 {
849 return false;
850 }
851
852 if(IsDigit(*uri))++uri;
853
854 //compilers should unroll this loop
855 for(; iIPv4 < 4; ++iIPv4)
856 {
2186321f 857 if (*uri != '.' || !IsDigit(*++uri))
dd65d8c8
RN
858 break;
859
860 //each ip part must be between 0-255
861 if( IsDigit(*++uri) && IsDigit(*++uri) &&
862 //100 or less (note !)
2186321f 863 !( (*(uri-2) < '2') ||
846978d7 864 //240 or less
2186321f
VZ
865 (*(uri-2) == '2' &&
866 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
dd65d8c8
RN
867 )
868 )
869 )
870 {
871 return false;
872 }
873 if(IsDigit(*uri))++uri;
874 }
875 }
876 return iIPv4 == 4;
877}
878
2186321f 879bool wxURI::ParseIPv6address(const char*& uri)
dd65d8c8
RN
880{
881 // IPv6address = 6( h16 ":" ) ls32
882 // / "::" 5( h16 ":" ) ls32
883 // / [ h16 ] "::" 4( h16 ":" ) ls32
884 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
885 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
886 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
887 // / [ *4( h16 ":" ) h16 ] "::" ls32
888 // / [ *5( h16 ":" ) h16 ] "::" h16
889 // / [ *6( h16 ":" ) h16 ] "::"
890
891 size_t numPrefix = 0,
892 maxPostfix;
893
894 bool bEndHex = false;
895
896 for( ; numPrefix < 6; ++numPrefix)
897 {
898 if(!ParseH16(uri))
899 {
900 --uri;
901 bEndHex = true;
902 break;
903 }
846978d7 904
2186321f 905 if(*uri != ':')
dd65d8c8
RN
906 {
907 break;
908 }
909 }
910
911 if(!bEndHex && !ParseH16(uri))
912 {
913 --uri;
914
915 if (numPrefix)
916 return false;
917
2186321f 918 if (*uri == ':')
dd65d8c8 919 {
2186321f 920 if (*++uri != ':')
dd65d8c8
RN
921 return false;
922
923 maxPostfix = 5;
924 }
925 else
926 maxPostfix = 6;
927 }
928 else
929 {
2186321f 930 if (*uri != ':' || *(uri+1) != ':')
dd65d8c8
RN
931 {
932 if (numPrefix != 6)
933 return false;
934
2186321f 935 while (*--uri != ':') {}
dd65d8c8
RN
936 ++uri;
937
2186321f 938 const char * const start = uri;
dd65d8c8
RN
939 //parse ls32
940 // ls32 = ( h16 ":" h16 ) / IPv4address
2186321f 941 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
dd65d8c8
RN
942 return true;
943
2186321f 944 uri = start;
dd65d8c8
RN
945
946 if (ParseIPv4address(uri))
947 return true;
948 else
949 return false;
950 }
951 else
952 {
953 uri += 2;
846978d7 954
dd65d8c8
RN
955 if (numPrefix > 3)
956 maxPostfix = 0;
957 else
958 maxPostfix = 4 - numPrefix;
959 }
960 }
961
962 bool bAllowAltEnding = maxPostfix == 0;
963
964 for(; maxPostfix != 0; --maxPostfix)
965 {
2186321f 966 if(!ParseH16(uri) || *uri != ':')
dd65d8c8
RN
967 return false;
968 }
969
970 if(numPrefix <= 4)
971 {
2186321f 972 const char * const start = uri;
dd65d8c8
RN
973 //parse ls32
974 // ls32 = ( h16 ":" h16 ) / IPv4address
2186321f 975 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
dd65d8c8
RN
976 return true;
977
2186321f 978 uri = start;
dd65d8c8
RN
979
980 if (ParseIPv4address(uri))
981 return true;
982
2186321f 983 uri = start;
846978d7 984
dd65d8c8
RN
985 if (!bAllowAltEnding)
986 return false;
987 }
988
989 if(numPrefix <= 5 && ParseH16(uri))
990 return true;
991
992 return true;
993}
994
2186321f 995bool wxURI::ParseIPvFuture(const char*& uri)
dd65d8c8
RN
996{
997 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
2186321f 998 if (*++uri != 'v' || !IsHex(*++uri))
dd65d8c8
RN
999 return false;
1000
2186321f
VZ
1001 while (IsHex(*++uri))
1002 ;
dd65d8c8 1003
2186321f 1004 if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
dd65d8c8
RN
1005 return false;
1006
2186321f 1007 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
dd65d8c8
RN
1008
1009 return true;
1010}
1011
1012
ce321570
RN
1013// ---------------------------------------------------------------------------
1014// IsXXX
1015//
1016// Returns true if the passed in character meets the criteria of the method
1017// ---------------------------------------------------------------------------
1018
2186321f
VZ
1019// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1020bool wxURI::IsUnreserved(char c)
1021{
1022 return IsAlpha(c) ||
1023 IsDigit(c) ||
1024 c == '-' ||
1025 c == '.' ||
1026 c == '_' ||
1027 c == '~'
846978d7 1028 ;
dd65d8c8
RN
1029}
1030
2186321f 1031bool wxURI::IsReserved(char c)
846978d7 1032{
dd65d8c8
RN
1033 return IsGenDelim(c) || IsSubDelim(c);
1034}
1035
2186321f
VZ
1036// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1037bool wxURI::IsGenDelim(char c)
dd65d8c8 1038{
2186321f
VZ
1039 return c == ':' ||
1040 c == '/' ||
1041 c == '?' ||
1042 c == '#' ||
1043 c == '[' ||
1044 c == ']' ||
1045 c == '@';
dd65d8c8
RN
1046}
1047
2186321f
VZ
1048// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1049// / "*" / "+" / "," / ";" / "="
1050bool wxURI::IsSubDelim(char c)
dd65d8c8 1051{
2186321f
VZ
1052 return c == '!' ||
1053 c == '$' ||
1054 c == '&' ||
1055 c == '\'' ||
1056 c == '(' ||
1057 c == ')' ||
1058 c == '*' ||
1059 c == '+' ||
1060 c == ',' ||
1061 c == ';' ||
1062 c == '='
dd65d8c8
RN
1063 ;
1064}
1065
2186321f
VZ
1066bool wxURI::IsHex(char c)
1067{
1068 return IsDigit(c) ||
1069 (c >= 'a' && c <= 'f') ||
1070 (c >= 'A' && c <= 'F');
1071}
dd65d8c8 1072
2186321f
VZ
1073bool wxURI::IsAlpha(char c)
1074{
1075 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
1076}
dd65d8c8 1077
2186321f
VZ
1078bool wxURI::IsDigit(char c)
1079{
1080 return c >= '0' && c <= '9';
1081}
dd65d8c8 1082
2186321f
VZ
1083bool wxURI::IsEndPath(char c)
1084{
1085 return c == '\0' || c == '#' || c == '?';
1086}
dd65d8c8 1087