minor cleanup: don't use bool parameters, don't typedef the enums, don't abuse references
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21 #endif
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject);
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 *this = uri;
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_user = m_server = m_port = m_path =
82 m_query = m_fragment = wxT("");
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 void wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 Parse(uri);
101 }
102
103 // ---------------------------------------------------------------------------
104 // Escape/Unescape/IsEscape
105 //
106 // Unescape unencodes a 3 character URL escape sequence
107 // Escape encodes an invalid URI character into a 3 character sequence
108 // IsEscape determines if the input string contains an escape sequence,
109 // if it does, then it moves the input string past the escape sequence
110 // ---------------------------------------------------------------------------
111
112 wxChar wxURI::Unescape(const wxChar* s)
113 {
114 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
115
116 return CharToHex(*s) * 0x10 + CharToHex(*++s);
117 }
118
119 void wxURI::Escape(wxString& s, const wxChar& c)
120 {
121 const wxChar* hdig = wxT("0123456789abcdef");
122 s += '%';
123 s += hdig[(c >> 4) & 15];
124 s += hdig[c & 15];
125 }
126
127 bool wxURI::IsEscape(const wxChar*& uri)
128 {
129 if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2)))
130 {
131 uri += 3;
132 return true;
133 }
134 else
135 return false;
136 }
137
138 // ---------------------------------------------------------------------------
139 // HasXXX
140 // ---------------------------------------------------------------------------
141
142 bool wxURI::HasScheme() const
143 { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; }
144
145 bool wxURI::HasUser() const
146 { return (m_fields & wxURI_USER) == wxURI_USER; }
147
148 bool wxURI::HasServer() const
149 { return (m_fields & wxURI_SERVER) == wxURI_SERVER; }
150
151 bool wxURI::HasPort() const
152 { return (m_fields & wxURI_PORT) == wxURI_PORT; }
153
154 bool wxURI::HasPath() const
155 { return (m_fields & wxURI_PATH) == wxURI_PATH; }
156
157 bool wxURI::HasQuery() const
158 { return (m_fields & wxURI_QUERY) == wxURI_QUERY; }
159
160 bool wxURI::HasFragment() const
161 { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; }
162
163 // ---------------------------------------------------------------------------
164 // GetXXX
165 //
166 // The normal Get() actually builds the entire URI into a useable
167 // representation, including proper identification characters such as slashes
168 // ---------------------------------------------------------------------------
169
170 const wxString& wxURI::GetScheme() const
171 { return m_scheme; }
172
173 const wxString& wxURI::GetPath() const
174 { return m_path; }
175
176 const wxString& wxURI::GetQuery() const
177 { return m_query; }
178
179 const wxString& wxURI::GetFragment() const
180 { return m_fragment; }
181
182 const wxString& wxURI::GetPort() const
183 { return m_port; }
184
185 const wxString& wxURI::GetUser() const
186 { return m_user; }
187
188 const wxString& wxURI::GetServer() const
189 { return m_server; }
190
191 const wxURIHostType& wxURI::GetHostType() const
192 { return m_hostType; }
193
194 wxString wxURI::Get() const
195 {
196 wxString ret;
197
198 if (HasScheme())
199 ret = ret + m_scheme + wxT(":");
200
201 if (HasServer())
202 {
203 ret += wxT("//");
204
205 if (HasUser())
206 ret = ret + m_user + wxT("@");
207
208 ret += m_server;
209
210 if (HasPort())
211 ret = ret + wxT(":") + m_port;
212 }
213
214 ret += m_path;
215
216 if (HasQuery())
217 ret = ret + wxT("?") + m_query;
218
219 if (HasFragment())
220 ret = ret + wxT("#") + m_fragment;
221
222 return ret;
223 }
224
225 // ---------------------------------------------------------------------------
226 // operator = and ==
227 // ---------------------------------------------------------------------------
228
229 wxURI& wxURI::operator = (const wxURI& uri)
230 {
231 if (HasScheme())
232 m_scheme = uri.m_scheme;
233
234
235 if (HasServer())
236 {
237 if (HasUser())
238 m_user = uri.m_user;
239
240 m_server = uri.m_server;
241 m_hostType = uri.m_hostType;
242
243 if (HasPort())
244 m_port = uri.m_port;
245 }
246
247
248 if (HasPath())
249 m_path = uri.m_path;
250
251 if (HasQuery())
252 m_query = uri.m_query;
253
254 if (HasFragment())
255 m_fragment = uri.m_fragment;
256
257 return *this;
258 }
259
260 wxURI& wxURI::operator = (const wxChar* string)
261 {
262 Create(string);
263 return *this;
264 }
265
266 bool wxURI::operator == (const wxURI& uri) const
267 {
268 if (HasScheme())
269 {
270 if(m_scheme != uri.m_scheme)
271 return false;
272 }
273 else if (uri.HasScheme())
274 return false;
275
276
277 if (HasServer())
278 {
279 if (HasUser())
280 {
281 if (m_user != uri.m_user)
282 return false;
283 }
284 else if (uri.HasUser())
285 return false;
286
287 if (m_server != uri.m_server ||
288 m_hostType != uri.m_hostType)
289 return false;
290
291 if (HasPort())
292 {
293 if(m_port != uri.m_port)
294 return false;
295 }
296 else if (uri.HasPort())
297 return false;
298 }
299 else if (uri.HasServer())
300 return false;
301
302
303 if (HasPath())
304 {
305 if(m_path != uri.m_path)
306 return false;
307 }
308 else if (uri.HasPath())
309 return false;
310
311 if (HasQuery())
312 {
313 if (m_query != uri.m_query)
314 return false;
315 }
316 else if (uri.HasQuery())
317 return false;
318
319 if (HasFragment())
320 {
321 if (m_fragment != uri.m_fragment)
322 return false;
323 }
324 else if (uri.HasFragment())
325 return false;
326
327 return true;
328 }
329
330 // ---------------------------------------------------------------------------
331 // IsReference
332 //
333 // if there is no authority or scheme, it is a reference
334 // ---------------------------------------------------------------------------
335
336 bool wxURI::IsReference() const
337 { return !HasScheme() || !HasServer(); }
338
339 // ---------------------------------------------------------------------------
340 // Parse
341 //
342 // Master URI parsing method. Just calls the individual parsing methods
343 //
344 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
345 // URI-reference = URI / relative-URITestCase
346 // ---------------------------------------------------------------------------
347
348 const wxChar* wxURI::Parse(const wxChar* uri)
349 {
350 uri = ParseScheme(uri);
351 uri = ParseAuthority(uri);
352 uri = ParsePath(uri);
353 uri = ParseQuery(uri);
354 return ParseFragment(uri);
355 }
356
357 // ---------------------------------------------------------------------------
358 // ParseXXX
359 //
360 // Individual parsers for each URI component
361 // ---------------------------------------------------------------------------
362
363 const wxChar* wxURI::ParseScheme(const wxChar* uri)
364 {
365 wxASSERT(uri != NULL);
366
367 //copy of the uri - used for figuring out
368 //length of each component
369 const wxChar* uricopy = uri;
370
371 //Does the uri have a scheme (first character alpha)?
372 if (IsAlpha(*uri))
373 {
374 m_scheme += *uri++;
375
376 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
377 //RN: Scheme can not be escaped
378 while (IsAlpha(*uri) || IsDigit(*uri) ||
379 *uri == '+' ||
380 *uri == '-' ||
381 *uri == '.')
382 {
383 m_scheme += *uri++;
384 }
385
386 //valid scheme?
387 if (*uri == ':')
388 {
389 //mark the scheme as valid
390 m_fields |= wxURI_SCHEME;
391
392 //move reference point up to input buffer
393 uricopy = ++uri;
394 }
395 else
396 //relative uri with relative path reference
397 m_scheme = wxT("");
398 }
399 // else
400 //relative uri with _possible_ relative path reference
401
402 return uricopy;
403 }
404
405 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
406 {
407 // authority = [ userinfo "@" ] host [ ":" port ]
408 if (*uri == '/' && *(uri+1) == '/')
409 {
410 uri += 2;
411
412 uri = ParseUser(uri);
413 uri = ParseServer(uri);
414 return ParsePort(uri);
415 }
416
417 return uri;
418 }
419
420 const wxChar* wxURI::ParseUser(const wxChar* uri)
421 {
422 wxASSERT(uri != NULL);
423
424 //copy of the uri - used for figuring out
425 //length of each component
426 const wxChar* uricopy = uri;
427
428 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
429 while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?')
430 {
431 if(IsUnreserved(*uri) || IsEscape(uri) ||
432 IsSubDelim(*uri) || *uri == ':')
433 m_user += *uri++;
434 else
435 Escape(m_user, *uri++);
436 }
437
438 if(*uri == '@')
439 {
440 //valid userinfo
441 m_fields |= wxURI_USER;
442
443 uricopy = ++uri;
444 }
445 else
446 m_user = wxT("");
447
448 return uricopy;
449 }
450
451 const wxChar* wxURI::ParseServer(const wxChar* uri)
452 {
453 wxASSERT(uri != NULL);
454
455 //copy of the uri - used for figuring out
456 //length of each component
457 const wxChar* uricopy = uri;
458
459 // host = IP-literal / IPv4address / reg-name
460 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
461 if (*uri == '[')
462 {
463 if (ParseIPv6address(++uri) && *uri == ']')
464 {
465 ++uri;
466 m_hostType = wxURI_IPV6ADDRESS;
467
468 wxStringBufferLength theBuffer(m_server, uri - uricopy);
469 wxMemcpy(theBuffer, uricopy, uri-uricopy);
470 theBuffer.SetLength(uri-uricopy);
471 }
472 else
473 {
474 uri = uricopy;
475
476 if (ParseIPvFuture(++uri) && *uri == ']')
477 {
478 ++uri;
479 m_hostType = wxURI_IPVFUTURE;
480
481 wxStringBufferLength theBuffer(m_server, uri - uricopy);
482 wxMemcpy(theBuffer, uricopy, uri-uricopy);
483 theBuffer.SetLength(uri-uricopy);
484 }
485 else
486 uri = uricopy;
487 }
488 }
489 else
490 {
491 if (ParseIPv4address(uri))
492 {
493 m_hostType = wxURI_IPV4ADDRESS;
494
495 wxStringBufferLength theBuffer(m_server, uri - uricopy);
496 wxMemcpy(theBuffer, uricopy, uri-uricopy);
497 theBuffer.SetLength(uri-uricopy);
498 }
499 else
500 uri = uricopy;
501 }
502
503 if(m_hostType == wxURI_REGNAME)
504 {
505 uri = uricopy;
506 // reg-name = *( unreserved / pct-encoded / sub-delims )
507 while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?')
508 {
509 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
510 m_server += *uri++;
511 else
512 Escape(m_server, *uri++);
513 }
514 }
515
516 //mark the server as valid
517 m_fields |= wxURI_SERVER;
518
519 return uri;
520 }
521
522
523 const wxChar* wxURI::ParsePort(const wxChar* uri)
524 {
525 wxASSERT(uri != NULL);
526
527 // port = *DIGIT
528 if(*uri == ':')
529 {
530 ++uri;
531 while(IsDigit(*uri))
532 {
533 m_port += *uri++;
534 }
535
536 //mark the port as valid
537 m_fields |= wxURI_PORT;
538 }
539
540 return uri;
541 }
542
543 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
544 {
545 wxASSERT(uri != NULL);
546
547 //copy of the uri - used for figuring out
548 //length of each component
549 const wxChar* uricopy = uri;
550
551 /// hier-part = "//" authority path-abempty
552 /// / path-absolute
553 /// / path-rootless
554 /// / path-empty
555 ///
556 /// relative-part = "//" authority path-abempty
557 /// / path-absolute
558 /// / path-noscheme
559 /// / path-empty
560 ///
561 /// path-abempty = *( "/" segment )
562 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
563 /// path-noscheme = segment-nz-nc *( "/" segment )
564 /// path-rootless = segment-nz *( "/" segment )
565 /// path-empty = 0<pchar>
566 ///
567 /// segment = *pchar
568 /// segment-nz = 1*pchar
569 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
570 /// ; non-zero-length segment without any colon ":"
571 ///
572 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
573 if (*uri == '/')
574 {
575 m_path += *uri++;
576
577 while(*uri && *uri != '#' && *uri != '?')
578 {
579 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
580 *uri == ':' || *uri == '@' || *uri == '/')
581 m_path += *uri++;
582 else
583 Escape(m_path, *uri++);
584 }
585
586 if (bNormalize)
587 {
588 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
589 Normalize(theBuffer, true);
590 theBuffer.SetLength(wxStrlen(theBuffer));
591 }
592 //mark the path as valid
593 m_fields |= wxURI_PATH;
594 }
595 else if(*uri) //Relative path
596 {
597 if (bReference)
598 {
599 //no colon allowed
600 while(*uri && *uri != '#' && *uri != '?')
601 {
602 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
603 *uri == '@' || *uri == '/')
604 m_path += *uri++;
605 else
606 Escape(m_path, *uri++);
607 }
608 }
609 else
610 {
611 while(*uri && *uri != '#' && *uri != '?')
612 {
613 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
614 *uri == ':' || *uri == '@' || *uri == '/')
615 m_path += *uri++;
616 else
617 Escape(m_path, *uri++);
618 }
619 }
620
621 if (uri != uricopy)
622 {
623 if (bNormalize)
624 {
625 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
626 Normalize(theBuffer);
627 theBuffer.SetLength(wxStrlen(theBuffer));
628 }
629
630 //mark the path as valid
631 m_fields |= wxURI_PATH;
632 }
633 }
634
635 return uri;
636 }
637
638
639 const wxChar* wxURI::ParseQuery(const wxChar* uri)
640 {
641 wxASSERT(uri != NULL);
642
643 // query = *( pchar / "/" / "?" )
644 if (*uri == '?')
645 {
646 ++uri;
647 while(*uri && *uri != '#')
648 {
649 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
650 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
651 m_query += *uri++;
652 else
653 Escape(m_query, *uri++);
654 }
655
656 //mark the server as valid
657 m_fields |= wxURI_QUERY;
658 }
659
660 return uri;
661 }
662
663
664 const wxChar* wxURI::ParseFragment(const wxChar* uri)
665 {
666 wxASSERT(uri != NULL);
667
668 // fragment = *( pchar / "/" / "?" )
669 if (*uri == '#')
670 {
671 ++uri;
672 while(*uri)
673 {
674 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
675 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
676 m_fragment += *uri++;
677 else
678 Escape(m_fragment, *uri++);
679 }
680
681 //mark the server as valid
682 m_fields |= wxURI_FRAGMENT;
683 }
684
685 return uri;
686 }
687
688 // ---------------------------------------------------------------------------
689 // Resolve URI
690 //
691 // Builds missing components of this uri from a base uri
692 //
693 // A version of the algorithm outlined in the RFC is used here
694 // (it is shown in comments)
695 // ---------------------------------------------------------------------------
696
697 void wxURI::Resolve(const wxURI& base, int flags)
698 {
699 wxASSERT_MSG(!base.IsReference(),
700 wxT("wxURI to inherit from must not be a reference!"));
701
702 // If we arn't being strict, enable the older
703 // loophole that allows this uri to inherit other
704 // properties from the base uri - even if the scheme
705 // is defined
706 if ( !(flags & wxURI_STRICT) &&
707 HasScheme() && base.HasScheme() &&
708 m_scheme == base.m_scheme )
709 {
710 m_fields -= wxURI_SCHEME;
711 }
712
713
714 // Do nothing if this is an absolute wxURI
715 // if defined(R.scheme) then
716 // T.scheme = R.scheme;
717 // T.authority = R.authority;
718 // T.path = remove_dot_segments(R.path);
719 // T.query = R.query;
720 if (HasScheme())
721 {
722 return;
723 }
724
725 //No sheme - inherit
726 m_scheme = base.m_scheme;
727 m_fields |= wxURI_SCHEME;
728
729 // All we need to do for relative URIs with an
730 // authority component is just inherit the scheme
731 // if defined(R.authority) then
732 // T.authority = R.authority;
733 // T.path = remove_dot_segments(R.path);
734 // T.query = R.query;
735 if (HasServer())
736 {
737 return;
738 }
739
740 //No authority - inherit
741 if (base.HasUser())
742 {
743 m_user = base.m_user;
744 m_fields |= wxURI_USER;
745 }
746
747 m_server = base.m_server;
748 m_hostType = base.m_hostType;
749 m_fields |= wxURI_SERVER;
750
751 if (base.HasPort())
752 {
753 m_port = base.m_port;
754 m_fields |= wxURI_PORT;
755 }
756
757
758 // Simple path inheritance from base
759 if (!HasPath())
760 {
761 // T.path = Base.path;
762 m_path = base.m_path;
763 m_fields |= wxURI_PATH;
764
765
766 // if defined(R.query) then
767 // T.query = R.query;
768 // else
769 // T.query = Base.query;
770 // endif;
771 if (!HasQuery())
772 {
773 m_query = base.m_query;
774 m_fields |= wxURI_QUERY;
775 }
776 }
777 else
778 {
779 // if (R.path starts-with "/") then
780 // T.path = remove_dot_segments(R.path);
781 // else
782 // T.path = merge(Base.path, R.path);
783 // T.path = remove_dot_segments(T.path);
784 // endif;
785 // T.query = R.query;
786 if (m_path[(const size_t&)0] != '/')
787 {
788 //Marge paths
789 const wxChar* op = m_path.c_str();
790 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
791
792 //not a ending directory? move up
793 if (base.m_path[0] && *(bp-1) != '/')
794 UpTree(base.m_path, bp);
795
796 //normalize directories
797 while(*op == '.' && *(op+1) == '.' &&
798 (*(op+2) == '\0' || *(op+2) == '/') )
799 {
800 UpTree(base.m_path, bp);
801
802 if (*(op+2) == '\0')
803 op += 2;
804 else
805 op += 3;
806 }
807
808 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
809 m_path.Mid((op - m_path.c_str()), m_path.Length());
810 }
811 }
812 }
813
814 // ---------------------------------------------------------------------------
815 // Directory Normalization (static)
816 //
817 // UpTree goes up a directory in a string and moves the pointer as such,
818 // while Normalize gets rid of duplicate/erronues directories in a URI
819 // according to RFC 2396 and modified quite a bit to meet the unit tests
820 // in it.
821 // ---------------------------------------------------------------------------
822
823 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
824 {
825 if (uri != uristart && *(uri-1) == '/')
826 {
827 uri -= 2;
828 }
829
830 for(;uri != uristart; --uri)
831 {
832 if (*uri == '/')
833 {
834 ++uri;
835 break;
836 }
837 }
838
839 //!!!TODO:HACK!!!//
840 if (uri == uristart && *uri == '/')
841 ++uri;
842 //!!!//
843 }
844
845 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
846 {
847 wxChar* cp = s;
848 wxChar* bp = s;
849
850 if(s[0] == '/')
851 ++bp;
852
853 while(*cp)
854 {
855 if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0')
856 && (bp == cp || *(cp-1) == '/'))
857 {
858 //. _or_ ./ - ignore
859 if (*(cp+1) == '\0')
860 cp += 1;
861 else
862 cp += 2;
863 }
864 else if (*cp == '.' && *(cp+1) == '.' &&
865 (*(cp+2) == '/' || *(cp+2) == '\0')
866 && (bp == cp || *(cp-1) == '/'))
867 {
868 //.. _or_ ../ - go up the tree
869 if (s != bp)
870 {
871 UpTree((const wxChar*)bp, (const wxChar*&)s);
872
873 if (*(cp+2) == '\0')
874 cp += 2;
875 else
876 cp += 3;
877 }
878 else if (!bIgnoreLeads)
879
880 {
881 *bp++ = *cp++;
882 *bp++ = *cp++;
883 if (*cp)
884 *bp++ = *cp++;
885
886 s = bp;
887 }
888 else
889 {
890 if (*(cp+2) == '\0')
891 cp += 2;
892 else
893 cp += 3;
894 }
895 }
896 else
897 *s++ = *cp++;
898 }
899
900 *s = '\0';
901 }
902
903 // ---------------------------------------------------------------------------
904 // Misc. Parsing Methods
905 // ---------------------------------------------------------------------------
906
907 bool wxURI::ParseIPv4address(const wxChar*& uri)
908 {
909 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
910 //
911 //dec-octet = DIGIT ; 0-9
912 // / %x31-39 DIGIT ; 10-99
913 // / "1" 2DIGIT ; 100-199
914 // / "2" %x30-34 DIGIT ; 200-249
915 // / "25" %x30-35 ; 250-255
916 size_t iIPv4 = 0;
917 if (IsDigit(*uri))
918 {
919 ++iIPv4;
920
921
922 //each ip part must be between 0-255 (dupe of version in for loop)
923 if( IsDigit(*++uri) && IsDigit(*++uri) &&
924 //100 or less (note !)
925 !( (*(uri-2) < '2') ||
926 //240 or less
927 (*(uri-2) == '2' &&
928 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
929 )
930 )
931 )
932 {
933 return false;
934 }
935
936 if(IsDigit(*uri))++uri;
937
938 //compilers should unroll this loop
939 for(; iIPv4 < 4; ++iIPv4)
940 {
941 if (*uri != '.' || !IsDigit(*++uri))
942 break;
943
944 //each ip part must be between 0-255
945 if( IsDigit(*++uri) && IsDigit(*++uri) &&
946 //100 or less (note !)
947 !( (*(uri-2) < '2') ||
948 //240 or less
949 (*(uri-2) == '2' &&
950 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
951 )
952 )
953 )
954 {
955 return false;
956 }
957 if(IsDigit(*uri))++uri;
958 }
959 }
960 return iIPv4 == 4;
961 }
962
963 bool wxURI::ParseH16(const wxChar*& uri)
964 {
965 // h16 = 1*4HEXDIG
966 if(!IsHex(*++uri))
967 return false;
968
969 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
970 ++uri;
971
972 return true;
973 }
974
975 bool wxURI::ParseIPv6address(const wxChar*& uri)
976 {
977 // IPv6address = 6( h16 ":" ) ls32
978 // / "::" 5( h16 ":" ) ls32
979 // / [ h16 ] "::" 4( h16 ":" ) ls32
980 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
981 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
982 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
983 // / [ *4( h16 ":" ) h16 ] "::" ls32
984 // / [ *5( h16 ":" ) h16 ] "::" h16
985 // / [ *6( h16 ":" ) h16 ] "::"
986
987 size_t numPrefix = 0,
988 maxPostfix;
989
990 bool bEndHex = false;
991
992 for( ; numPrefix < 6; ++numPrefix)
993 {
994 if(!ParseH16(uri))
995 {
996 --uri;
997 bEndHex = true;
998 break;
999 }
1000
1001 if(*uri != ':')
1002 {
1003 break;
1004 }
1005 }
1006
1007 if(!bEndHex && !ParseH16(uri))
1008 {
1009 --uri;
1010
1011 if (numPrefix)
1012 return false;
1013
1014 if (*uri == ':')
1015 {
1016 if (*++uri != ':')
1017 return false;
1018
1019 maxPostfix = 5;
1020 }
1021 else
1022 maxPostfix = 6;
1023 }
1024 else
1025 {
1026 if (*uri != ':' || *(uri+1) != ':')
1027 {
1028 if (numPrefix != 6)
1029 return false;
1030
1031 while (*--uri != ':') {}
1032 ++uri;
1033
1034 const wxChar* uristart = uri;
1035 //parse ls32
1036 // ls32 = ( h16 ":" h16 ) / IPv4address
1037 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1038 return true;
1039
1040 uri = uristart;
1041
1042 if (ParseIPv4address(uri))
1043 return true;
1044 else
1045 return false;
1046 }
1047 else
1048 {
1049 uri += 2;
1050
1051 if (numPrefix > 3)
1052 maxPostfix = 0;
1053 else
1054 maxPostfix = 4 - numPrefix;
1055 }
1056 }
1057
1058 bool bAllowAltEnding = maxPostfix == 0;
1059
1060 for(; maxPostfix != 0; --maxPostfix)
1061 {
1062 if(!ParseH16(uri) || *uri != ':')
1063 return false;
1064 }
1065
1066 if(numPrefix <= 4)
1067 {
1068 const wxChar* uristart = uri;
1069 //parse ls32
1070 // ls32 = ( h16 ":" h16 ) / IPv4address
1071 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1072 return true;
1073
1074 uri = uristart;
1075
1076 if (ParseIPv4address(uri))
1077 return true;
1078
1079 uri = uristart;
1080
1081 if (!bAllowAltEnding)
1082 return false;
1083 }
1084
1085 if(numPrefix <= 5 && ParseH16(uri))
1086 return true;
1087
1088 return true;
1089 }
1090
1091 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1092 {
1093 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1094 if (*++uri != 'v' || !IsHex(*++uri))
1095 return false;
1096
1097 while (IsHex(*++uri)) {}
1098
1099 if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1100 return false;
1101
1102 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1103
1104 return true;
1105 }
1106
1107
1108 // ---------------------------------------------------------------------------
1109 // Misc methods - IsXXX and CharToHex
1110 // ---------------------------------------------------------------------------
1111
1112 int wxURI::CharToHex(const wxChar& c)
1113 {
1114 if ((c >= 'A') && (c <= 'Z')) return c - 'A' + 0x0A;
1115 if ((c >= 'a') && (c <= 'z')) return c - 'a' + 0x0a;
1116 if ((c >= '0') && (c <= '9')) return c - '0' + 0x00;
1117
1118 return 0;
1119 }
1120
1121 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1122 bool wxURI::IsUnreserved (const wxChar& c)
1123 { return IsAlpha(c) || IsDigit(c) ||
1124 c == '-' ||
1125 c == '.' ||
1126 c == '_' ||
1127 c == '~' //tilde
1128 ;
1129 }
1130
1131 bool wxURI::IsReserved (const wxChar& c)
1132 {
1133 return IsGenDelim(c) || IsSubDelim(c);
1134 }
1135
1136 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1137 bool wxURI::IsGenDelim (const wxChar& c)
1138 {
1139 return c == ':' ||
1140 c == '/' ||
1141 c == '?' ||
1142 c == '#' ||
1143 c == '[' ||
1144 c == ']' ||
1145 c == '@';
1146 }
1147
1148 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1149 //! / "*" / "+" / "," / ";" / "="
1150 bool wxURI::IsSubDelim (const wxChar& c)
1151 {
1152 return c == '!' ||
1153 c == '$' ||
1154 c == '&' ||
1155 c == '\'' ||
1156 c == '(' ||
1157 c == ')' ||
1158 c == '*' ||
1159 c == '+' ||
1160 c == ',' ||
1161 c == ';' ||
1162 c == '='
1163 ;
1164 }
1165
1166 bool wxURI::IsHex(const wxChar& c)
1167 { return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
1168
1169 bool wxURI::IsAlpha(const wxChar& c)
1170 { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
1171
1172 bool wxURI::IsDigit(const wxChar& c)
1173 { return c >= '0' && c <= '9'; }
1174
1175
1176 // ---------------------------------------------------------------------------
1177 //
1178 // wxURL Compatability
1179 //
1180 // TODO: Use wxURI instead here...
1181 // ---------------------------------------------------------------------------
1182
1183 #if wxUSE_URL
1184
1185 #include "wx/url.h"
1186
1187 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1188 {
1189 wxString out_str;
1190 wxString hexa_code;
1191 size_t i;
1192
1193 for (i = 0; i < uri.Len(); i++)
1194 {
1195 wxChar c = uri.GetChar(i);
1196
1197 if (c == wxT(' '))
1198 {
1199 // GRG, Apr/2000: changed to "%20" instead of '+'
1200
1201 out_str += wxT("%20");
1202 }
1203 else
1204 {
1205 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1206 //
1207 // - Alphanumeric characters are never escaped
1208 // - Unreserved marks are never escaped
1209 // - Delimiters must be escaped if they appear within a component
1210 // but not if they are used to separate components. Here we have
1211 // no clear way to distinguish between these two cases, so they
1212 // are escaped unless they are passed in the 'delims' parameter
1213 // (allowed delimiters).
1214
1215 static const wxChar marks[] = wxT("-_.!~*()'");
1216
1217 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1218 {
1219 hexa_code.Printf(wxT("%%%02X"), c);
1220 out_str += hexa_code;
1221 }
1222 else
1223 {
1224 out_str += c;
1225 }
1226 }
1227 }
1228
1229 return out_str;
1230 }
1231
1232 wxString wxURL::ConvertFromURI(const wxString& uri)
1233 {
1234 wxString new_uri;
1235
1236 size_t i = 0;
1237 while (i < uri.Len())
1238 {
1239 int code;
1240 if (uri[i] == wxT('%'))
1241 {
1242 i++;
1243 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1244 code = (uri[i] - wxT('A') + 10) * 16;
1245 else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1246 code = (uri[i] - wxT('a') + 10) * 16;
1247 else
1248 code = (uri[i] - wxT('0')) * 16;
1249
1250 i++;
1251 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1252 code += (uri[i] - wxT('A')) + 10;
1253 else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1254 code += (uri[i] - wxT('a')) + 10;
1255 else
1256 code += (uri[i] - wxT('0'));
1257
1258 i++;
1259 new_uri += (wxChar)code;
1260 continue;
1261 }
1262 new_uri += uri[i];
1263 i++;
1264 }
1265 return new_uri;
1266 }
1267
1268 #endif //wxUSE_URL
1269
1270 //end of uri.cpp
1271
1272
1273