Inherit wxURL from wxURI, providing assignment, copy construction, comparison, and...
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 //
12 //TODO: RN: I had some massive doxygen docs, I need to move these
13 //in a presentable form in these sources
14 //
15
16 // ===========================================================================
17 // declarations
18 // ===========================================================================
19
20 // ---------------------------------------------------------------------------
21 // headers
22 // ---------------------------------------------------------------------------
23
24 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
25 #pragma implementation "uri.h"
26 #endif
27
28 // For compilers that support precompilation, includes "wx.h".
29 #include "wx/wxprec.h"
30
31 #ifdef __BORLANDC__
32 #pragma hdrstop
33 #endif
34
35 #include "wx/uri.h"
36
37 // ---------------------------------------------------------------------------
38 // definitions
39 // ---------------------------------------------------------------------------
40
41 IMPLEMENT_CLASS(wxURI, wxObject);
42
43 // ===========================================================================
44 // implementation
45 // ===========================================================================
46
47 // ---------------------------------------------------------------------------
48 // utilities
49 // ---------------------------------------------------------------------------
50
51 // ---------------------------------------------------------------------------
52 //
53 // wxURI
54 //
55 // ---------------------------------------------------------------------------
56
57 // ---------------------------------------------------------------------------
58 // Constructors
59 // ---------------------------------------------------------------------------
60
61 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
62 {
63 }
64
65 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 Create(uri);
68 }
69
70 wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
71 {
72 Assign(uri);
73 }
74
75 // ---------------------------------------------------------------------------
76 // Destructor and cleanup
77 // ---------------------------------------------------------------------------
78
79 wxURI::~wxURI()
80 {
81 Clear();
82 }
83
84 void wxURI::Clear()
85 {
86 m_scheme = m_user = m_server = m_port = m_path =
87 m_query = m_fragment = wxT("");
88
89 m_hostType = wxURI_REGNAME;
90
91 m_fields = 0;
92 }
93
94 // ---------------------------------------------------------------------------
95 // Create
96 //
97 // This creates the URI - all we do here is call the main parsing method
98 // ---------------------------------------------------------------------------
99
100 void wxURI::Create(const wxString& uri)
101 {
102 if (m_fields)
103 Clear();
104
105 Parse(uri);
106 }
107
108 // ---------------------------------------------------------------------------
109 // Escape/Unescape/IsEscape
110 //
111 // Unescape unencodes a 3 character URL escape sequence
112 // Escape encodes an invalid URI character into a 3 character sequence
113 // IsEscape determines if the input string contains an escape sequence,
114 // if it does, then it moves the input string past the escape sequence
115 // ---------------------------------------------------------------------------
116
117 wxChar wxURI::Unescape(const wxChar* s)
118 {
119 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
120
121 return CharToHex(*s) * 0x10 + CharToHex(*++s);
122 }
123
124 void wxURI::Escape(wxString& s, const wxChar& c)
125 {
126 const wxChar* hdig = wxT("0123456789abcdef");
127 s += '%';
128 s += hdig[(c >> 4) & 15];
129 s += hdig[c & 15];
130 }
131
132 bool wxURI::IsEscape(const wxChar*& uri)
133 {
134 if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2)))
135 {
136 uri += 3;
137 return true;
138 }
139 else
140 return false;
141 }
142
143 // ---------------------------------------------------------------------------
144 // Get
145 //
146 // Get() actually builds the entire URI into a useable
147 // representation, including proper identification characters such as slashes
148 // ---------------------------------------------------------------------------
149
150 wxString wxURI::Get() const
151 {
152 wxString ret;
153
154 if (HasScheme())
155 ret = ret + m_scheme + wxT(":");
156
157 if (HasServer())
158 {
159 ret += wxT("//");
160
161 if (HasUser())
162 ret = ret + m_user + wxT("@");
163
164 ret += m_server;
165
166 if (HasPort())
167 ret = ret + wxT(":") + m_port;
168 }
169
170 ret += m_path;
171
172 if (HasQuery())
173 ret = ret + wxT("?") + m_query;
174
175 if (HasFragment())
176 ret = ret + wxT("#") + m_fragment;
177
178 return ret;
179 }
180
181 // ---------------------------------------------------------------------------
182 // operator = and ==
183 // ---------------------------------------------------------------------------
184
185 wxURI& wxURI::operator = (const wxURI& uri)
186 {
187 return Assign(uri);
188 }
189
190 wxURI& wxURI::Assign(const wxURI& uri)
191 {
192 //assign fields
193 m_fields = uri.m_fields;
194
195 //ref over components
196 m_scheme = uri.m_scheme;
197 m_user = uri.m_user;
198 m_server = uri.m_server;
199 m_hostType = uri.m_hostType;
200 m_port = uri.m_port;
201 m_path = uri.m_path;
202 m_query = uri.m_query;
203 m_fragment = uri.m_fragment;
204
205 return *this;
206 }
207
208 wxURI& wxURI::operator = (const wxString& string)
209 {
210 Create(string);
211 return *this;
212 }
213
214 bool wxURI::operator == (const wxURI& uri) const
215 {
216 if (HasScheme())
217 {
218 if(m_scheme != uri.m_scheme)
219 return false;
220 }
221 else if (uri.HasScheme())
222 return false;
223
224
225 if (HasServer())
226 {
227 if (HasUser())
228 {
229 if (m_user != uri.m_user)
230 return false;
231 }
232 else if (uri.HasUser())
233 return false;
234
235 if (m_server != uri.m_server ||
236 m_hostType != uri.m_hostType)
237 return false;
238
239 if (HasPort())
240 {
241 if(m_port != uri.m_port)
242 return false;
243 }
244 else if (uri.HasPort())
245 return false;
246 }
247 else if (uri.HasServer())
248 return false;
249
250
251 if (HasPath())
252 {
253 if(m_path != uri.m_path)
254 return false;
255 }
256 else if (uri.HasPath())
257 return false;
258
259 if (HasQuery())
260 {
261 if (m_query != uri.m_query)
262 return false;
263 }
264 else if (uri.HasQuery())
265 return false;
266
267 if (HasFragment())
268 {
269 if (m_fragment != uri.m_fragment)
270 return false;
271 }
272 else if (uri.HasFragment())
273 return false;
274
275 return true;
276 }
277
278 // ---------------------------------------------------------------------------
279 // IsReference
280 //
281 // if there is no authority or scheme, it is a reference
282 // ---------------------------------------------------------------------------
283
284 bool wxURI::IsReference() const
285 { return !HasScheme() || !HasServer(); }
286
287 // ---------------------------------------------------------------------------
288 // Parse
289 //
290 // Master URI parsing method. Just calls the individual parsing methods
291 //
292 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
293 // URI-reference = URI / relative-URITestCase
294 // ---------------------------------------------------------------------------
295
296 const wxChar* wxURI::Parse(const wxChar* uri)
297 {
298 uri = ParseScheme(uri);
299 uri = ParseAuthority(uri);
300 uri = ParsePath(uri);
301 uri = ParseQuery(uri);
302 return ParseFragment(uri);
303 }
304
305 // ---------------------------------------------------------------------------
306 // ParseXXX
307 //
308 // Individual parsers for each URI component
309 // ---------------------------------------------------------------------------
310
311 const wxChar* wxURI::ParseScheme(const wxChar* uri)
312 {
313 wxASSERT(uri != NULL);
314
315 //copy of the uri - used for figuring out
316 //length of each component
317 const wxChar* uricopy = uri;
318
319 //Does the uri have a scheme (first character alpha)?
320 if (IsAlpha(*uri))
321 {
322 m_scheme += *uri++;
323
324 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
325 //RN: Scheme can not be escaped
326 while (IsAlpha(*uri) || IsDigit(*uri) ||
327 *uri == '+' ||
328 *uri == '-' ||
329 *uri == '.')
330 {
331 m_scheme += *uri++;
332 }
333
334 //valid scheme?
335 if (*uri == ':')
336 {
337 //mark the scheme as valid
338 m_fields |= wxURI_SCHEME;
339
340 //move reference point up to input buffer
341 uricopy = ++uri;
342 }
343 else
344 //relative uri with relative path reference
345 m_scheme = wxT("");
346 }
347 // else
348 //relative uri with _possible_ relative path reference
349
350 return uricopy;
351 }
352
353 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
354 {
355 // authority = [ userinfo "@" ] host [ ":" port ]
356 if (*uri == '/' && *(uri+1) == '/')
357 {
358 uri += 2;
359
360 uri = ParseUser(uri);
361 uri = ParseServer(uri);
362 return ParsePort(uri);
363 }
364
365 return uri;
366 }
367
368 const wxChar* wxURI::ParseUser(const wxChar* uri)
369 {
370 wxASSERT(uri != NULL);
371
372 //copy of the uri - used for figuring out
373 //length of each component
374 const wxChar* uricopy = uri;
375
376 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
377 while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?')
378 {
379 if(IsUnreserved(*uri) || IsEscape(uri) ||
380 IsSubDelim(*uri) || *uri == ':')
381 m_user += *uri++;
382 else
383 Escape(m_user, *uri++);
384 }
385
386 if(*uri == '@')
387 {
388 //valid userinfo
389 m_fields |= wxURI_USER;
390
391 uricopy = ++uri;
392 }
393 else
394 m_user = wxT("");
395
396 return uricopy;
397 }
398
399 const wxChar* wxURI::ParseServer(const wxChar* uri)
400 {
401 wxASSERT(uri != NULL);
402
403 //copy of the uri - used for figuring out
404 //length of each component
405 const wxChar* uricopy = uri;
406
407 // host = IP-literal / IPv4address / reg-name
408 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
409 if (*uri == '[')
410 {
411 if (ParseIPv6address(++uri) && *uri == ']')
412 {
413 ++uri;
414 m_hostType = wxURI_IPV6ADDRESS;
415
416 wxStringBufferLength theBuffer(m_server, uri - uricopy);
417 wxMemcpy(theBuffer, uricopy, uri-uricopy);
418 theBuffer.SetLength(uri-uricopy);
419 }
420 else
421 {
422 uri = uricopy;
423
424 if (ParseIPvFuture(++uri) && *uri == ']')
425 {
426 ++uri;
427 m_hostType = wxURI_IPVFUTURE;
428
429 wxStringBufferLength theBuffer(m_server, uri - uricopy);
430 wxMemcpy(theBuffer, uricopy, uri-uricopy);
431 theBuffer.SetLength(uri-uricopy);
432 }
433 else
434 uri = uricopy;
435 }
436 }
437 else
438 {
439 if (ParseIPv4address(uri))
440 {
441 m_hostType = wxURI_IPV4ADDRESS;
442
443 wxStringBufferLength theBuffer(m_server, uri - uricopy);
444 wxMemcpy(theBuffer, uricopy, uri-uricopy);
445 theBuffer.SetLength(uri-uricopy);
446 }
447 else
448 uri = uricopy;
449 }
450
451 if(m_hostType == wxURI_REGNAME)
452 {
453 uri = uricopy;
454 // reg-name = *( unreserved / pct-encoded / sub-delims )
455 while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?')
456 {
457 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
458 m_server += *uri++;
459 else
460 Escape(m_server, *uri++);
461 }
462 }
463
464 //mark the server as valid
465 m_fields |= wxURI_SERVER;
466
467 return uri;
468 }
469
470
471 const wxChar* wxURI::ParsePort(const wxChar* uri)
472 {
473 wxASSERT(uri != NULL);
474
475 // port = *DIGIT
476 if(*uri == ':')
477 {
478 ++uri;
479 while(IsDigit(*uri))
480 {
481 m_port += *uri++;
482 }
483
484 //mark the port as valid
485 m_fields |= wxURI_PORT;
486 }
487
488 return uri;
489 }
490
491 const wxChar* wxURI::ParsePath(const wxChar* uri, bool bReference, bool bNormalize)
492 {
493 wxASSERT(uri != NULL);
494
495 //copy of the uri - used for figuring out
496 //length of each component
497 const wxChar* uricopy = uri;
498
499 /// hier-part = "//" authority path-abempty
500 /// / path-absolute
501 /// / path-rootless
502 /// / path-empty
503 ///
504 /// relative-part = "//" authority path-abempty
505 /// / path-absolute
506 /// / path-noscheme
507 /// / path-empty
508 ///
509 /// path-abempty = *( "/" segment )
510 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
511 /// path-noscheme = segment-nz-nc *( "/" segment )
512 /// path-rootless = segment-nz *( "/" segment )
513 /// path-empty = 0<pchar>
514 ///
515 /// segment = *pchar
516 /// segment-nz = 1*pchar
517 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
518 /// ; non-zero-length segment without any colon ":"
519 ///
520 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
521 if (*uri == '/')
522 {
523 m_path += *uri++;
524
525 while(*uri && *uri != '#' && *uri != '?')
526 {
527 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
528 *uri == ':' || *uri == '@' || *uri == '/')
529 m_path += *uri++;
530 else
531 Escape(m_path, *uri++);
532 }
533
534 if (bNormalize)
535 {
536 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
537 Normalize(theBuffer, true);
538 theBuffer.SetLength(wxStrlen(theBuffer));
539 }
540 //mark the path as valid
541 m_fields |= wxURI_PATH;
542 }
543 else if(*uri) //Relative path
544 {
545 if (bReference)
546 {
547 //no colon allowed
548 while(*uri && *uri != '#' && *uri != '?')
549 {
550 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
551 *uri == '@' || *uri == '/')
552 m_path += *uri++;
553 else
554 Escape(m_path, *uri++);
555 }
556 }
557 else
558 {
559 while(*uri && *uri != '#' && *uri != '?')
560 {
561 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
562 *uri == ':' || *uri == '@' || *uri == '/')
563 m_path += *uri++;
564 else
565 Escape(m_path, *uri++);
566 }
567 }
568
569 if (uri != uricopy)
570 {
571 if (bNormalize)
572 {
573 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
574 Normalize(theBuffer);
575 theBuffer.SetLength(wxStrlen(theBuffer));
576 }
577
578 //mark the path as valid
579 m_fields |= wxURI_PATH;
580 }
581 }
582
583 return uri;
584 }
585
586
587 const wxChar* wxURI::ParseQuery(const wxChar* uri)
588 {
589 wxASSERT(uri != NULL);
590
591 // query = *( pchar / "/" / "?" )
592 if (*uri == '?')
593 {
594 ++uri;
595 while(*uri && *uri != '#')
596 {
597 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
598 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
599 m_query += *uri++;
600 else
601 Escape(m_query, *uri++);
602 }
603
604 //mark the server as valid
605 m_fields |= wxURI_QUERY;
606 }
607
608 return uri;
609 }
610
611
612 const wxChar* wxURI::ParseFragment(const wxChar* uri)
613 {
614 wxASSERT(uri != NULL);
615
616 // fragment = *( pchar / "/" / "?" )
617 if (*uri == '#')
618 {
619 ++uri;
620 while(*uri)
621 {
622 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
623 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
624 m_fragment += *uri++;
625 else
626 Escape(m_fragment, *uri++);
627 }
628
629 //mark the server as valid
630 m_fields |= wxURI_FRAGMENT;
631 }
632
633 return uri;
634 }
635
636 // ---------------------------------------------------------------------------
637 // Resolve URI
638 //
639 // Builds missing components of this uri from a base uri
640 //
641 // A version of the algorithm outlined in the RFC is used here
642 // (it is shown in comments)
643 // ---------------------------------------------------------------------------
644
645 void wxURI::Resolve(const wxURI& base, int flags)
646 {
647 wxASSERT_MSG(!base.IsReference(),
648 wxT("wxURI to inherit from must not be a reference!"));
649
650 // If we arn't being strict, enable the older
651 // loophole that allows this uri to inherit other
652 // properties from the base uri - even if the scheme
653 // is defined
654 if ( !(flags & wxURI_STRICT) &&
655 HasScheme() && base.HasScheme() &&
656 m_scheme == base.m_scheme )
657 {
658 m_fields -= wxURI_SCHEME;
659 }
660
661
662 // Do nothing if this is an absolute wxURI
663 // if defined(R.scheme) then
664 // T.scheme = R.scheme;
665 // T.authority = R.authority;
666 // T.path = remove_dot_segments(R.path);
667 // T.query = R.query;
668 if (HasScheme())
669 {
670 return;
671 }
672
673 //No sheme - inherit
674 m_scheme = base.m_scheme;
675 m_fields |= wxURI_SCHEME;
676
677 // All we need to do for relative URIs with an
678 // authority component is just inherit the scheme
679 // if defined(R.authority) then
680 // T.authority = R.authority;
681 // T.path = remove_dot_segments(R.path);
682 // T.query = R.query;
683 if (HasServer())
684 {
685 return;
686 }
687
688 //No authority - inherit
689 if (base.HasUser())
690 {
691 m_user = base.m_user;
692 m_fields |= wxURI_USER;
693 }
694
695 m_server = base.m_server;
696 m_hostType = base.m_hostType;
697 m_fields |= wxURI_SERVER;
698
699 if (base.HasPort())
700 {
701 m_port = base.m_port;
702 m_fields |= wxURI_PORT;
703 }
704
705
706 // Simple path inheritance from base
707 if (!HasPath())
708 {
709 // T.path = Base.path;
710 m_path = base.m_path;
711 m_fields |= wxURI_PATH;
712
713
714 // if defined(R.query) then
715 // T.query = R.query;
716 // else
717 // T.query = Base.query;
718 // endif;
719 if (!HasQuery())
720 {
721 m_query = base.m_query;
722 m_fields |= wxURI_QUERY;
723 }
724 }
725 else
726 {
727 // if (R.path starts-with "/") then
728 // T.path = remove_dot_segments(R.path);
729 // else
730 // T.path = merge(Base.path, R.path);
731 // T.path = remove_dot_segments(T.path);
732 // endif;
733 // T.query = R.query;
734 if (m_path[(const size_t&)0] != '/')
735 {
736 //Marge paths
737 const wxChar* op = m_path.c_str();
738 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
739
740 //not a ending directory? move up
741 if (base.m_path[0] && *(bp-1) != '/')
742 UpTree(base.m_path, bp);
743
744 //normalize directories
745 while(*op == '.' && *(op+1) == '.' &&
746 (*(op+2) == '\0' || *(op+2) == '/') )
747 {
748 UpTree(base.m_path, bp);
749
750 if (*(op+2) == '\0')
751 op += 2;
752 else
753 op += 3;
754 }
755
756 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
757 m_path.Mid((op - m_path.c_str()), m_path.Length());
758 }
759 }
760 }
761
762 // ---------------------------------------------------------------------------
763 // Directory Normalization (static)
764 //
765 // UpTree goes up a directory in a string and moves the pointer as such,
766 // while Normalize gets rid of duplicate/erronues directories in a URI
767 // according to RFC 2396 and modified quite a bit to meet the unit tests
768 // in it.
769 // ---------------------------------------------------------------------------
770
771 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
772 {
773 if (uri != uristart && *(uri-1) == '/')
774 {
775 uri -= 2;
776 }
777
778 for(;uri != uristart; --uri)
779 {
780 if (*uri == '/')
781 {
782 ++uri;
783 break;
784 }
785 }
786
787 //!!!TODO:HACK!!!//
788 if (uri == uristart && *uri == '/')
789 ++uri;
790 //!!!//
791 }
792
793 void wxURI::Normalize(wxChar* s, bool bIgnoreLeads)
794 {
795 wxChar* cp = s;
796 wxChar* bp = s;
797
798 if(s[0] == '/')
799 ++bp;
800
801 while(*cp)
802 {
803 if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0')
804 && (bp == cp || *(cp-1) == '/'))
805 {
806 //. _or_ ./ - ignore
807 if (*(cp+1) == '\0')
808 cp += 1;
809 else
810 cp += 2;
811 }
812 else if (*cp == '.' && *(cp+1) == '.' &&
813 (*(cp+2) == '/' || *(cp+2) == '\0')
814 && (bp == cp || *(cp-1) == '/'))
815 {
816 //.. _or_ ../ - go up the tree
817 if (s != bp)
818 {
819 UpTree((const wxChar*)bp, (const wxChar*&)s);
820
821 if (*(cp+2) == '\0')
822 cp += 2;
823 else
824 cp += 3;
825 }
826 else if (!bIgnoreLeads)
827
828 {
829 *bp++ = *cp++;
830 *bp++ = *cp++;
831 if (*cp)
832 *bp++ = *cp++;
833
834 s = bp;
835 }
836 else
837 {
838 if (*(cp+2) == '\0')
839 cp += 2;
840 else
841 cp += 3;
842 }
843 }
844 else
845 *s++ = *cp++;
846 }
847
848 *s = '\0';
849 }
850
851 // ---------------------------------------------------------------------------
852 // Misc. Parsing Methods
853 // ---------------------------------------------------------------------------
854
855 bool wxURI::ParseIPv4address(const wxChar*& uri)
856 {
857 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
858 //
859 //dec-octet = DIGIT ; 0-9
860 // / %x31-39 DIGIT ; 10-99
861 // / "1" 2DIGIT ; 100-199
862 // / "2" %x30-34 DIGIT ; 200-249
863 // / "25" %x30-35 ; 250-255
864 size_t iIPv4 = 0;
865 if (IsDigit(*uri))
866 {
867 ++iIPv4;
868
869
870 //each ip part must be between 0-255 (dupe of version in for loop)
871 if( IsDigit(*++uri) && IsDigit(*++uri) &&
872 //100 or less (note !)
873 !( (*(uri-2) < '2') ||
874 //240 or less
875 (*(uri-2) == '2' &&
876 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
877 )
878 )
879 )
880 {
881 return false;
882 }
883
884 if(IsDigit(*uri))++uri;
885
886 //compilers should unroll this loop
887 for(; iIPv4 < 4; ++iIPv4)
888 {
889 if (*uri != '.' || !IsDigit(*++uri))
890 break;
891
892 //each ip part must be between 0-255
893 if( IsDigit(*++uri) && IsDigit(*++uri) &&
894 //100 or less (note !)
895 !( (*(uri-2) < '2') ||
896 //240 or less
897 (*(uri-2) == '2' &&
898 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
899 )
900 )
901 )
902 {
903 return false;
904 }
905 if(IsDigit(*uri))++uri;
906 }
907 }
908 return iIPv4 == 4;
909 }
910
911 bool wxURI::ParseH16(const wxChar*& uri)
912 {
913 // h16 = 1*4HEXDIG
914 if(!IsHex(*++uri))
915 return false;
916
917 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
918 ++uri;
919
920 return true;
921 }
922
923 bool wxURI::ParseIPv6address(const wxChar*& uri)
924 {
925 // IPv6address = 6( h16 ":" ) ls32
926 // / "::" 5( h16 ":" ) ls32
927 // / [ h16 ] "::" 4( h16 ":" ) ls32
928 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
929 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
930 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
931 // / [ *4( h16 ":" ) h16 ] "::" ls32
932 // / [ *5( h16 ":" ) h16 ] "::" h16
933 // / [ *6( h16 ":" ) h16 ] "::"
934
935 size_t numPrefix = 0,
936 maxPostfix;
937
938 bool bEndHex = false;
939
940 for( ; numPrefix < 6; ++numPrefix)
941 {
942 if(!ParseH16(uri))
943 {
944 --uri;
945 bEndHex = true;
946 break;
947 }
948
949 if(*uri != ':')
950 {
951 break;
952 }
953 }
954
955 if(!bEndHex && !ParseH16(uri))
956 {
957 --uri;
958
959 if (numPrefix)
960 return false;
961
962 if (*uri == ':')
963 {
964 if (*++uri != ':')
965 return false;
966
967 maxPostfix = 5;
968 }
969 else
970 maxPostfix = 6;
971 }
972 else
973 {
974 if (*uri != ':' || *(uri+1) != ':')
975 {
976 if (numPrefix != 6)
977 return false;
978
979 while (*--uri != ':') {}
980 ++uri;
981
982 const wxChar* uristart = uri;
983 //parse ls32
984 // ls32 = ( h16 ":" h16 ) / IPv4address
985 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
986 return true;
987
988 uri = uristart;
989
990 if (ParseIPv4address(uri))
991 return true;
992 else
993 return false;
994 }
995 else
996 {
997 uri += 2;
998
999 if (numPrefix > 3)
1000 maxPostfix = 0;
1001 else
1002 maxPostfix = 4 - numPrefix;
1003 }
1004 }
1005
1006 bool bAllowAltEnding = maxPostfix == 0;
1007
1008 for(; maxPostfix != 0; --maxPostfix)
1009 {
1010 if(!ParseH16(uri) || *uri != ':')
1011 return false;
1012 }
1013
1014 if(numPrefix <= 4)
1015 {
1016 const wxChar* uristart = uri;
1017 //parse ls32
1018 // ls32 = ( h16 ":" h16 ) / IPv4address
1019 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1020 return true;
1021
1022 uri = uristart;
1023
1024 if (ParseIPv4address(uri))
1025 return true;
1026
1027 uri = uristart;
1028
1029 if (!bAllowAltEnding)
1030 return false;
1031 }
1032
1033 if(numPrefix <= 5 && ParseH16(uri))
1034 return true;
1035
1036 return true;
1037 }
1038
1039 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1040 {
1041 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1042 if (*++uri != 'v' || !IsHex(*++uri))
1043 return false;
1044
1045 while (IsHex(*++uri)) {}
1046
1047 if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1048 return false;
1049
1050 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1051
1052 return true;
1053 }
1054
1055
1056 // ---------------------------------------------------------------------------
1057 // Misc methods - IsXXX and CharToHex
1058 // ---------------------------------------------------------------------------
1059
1060 int wxURI::CharToHex(const wxChar& c)
1061 {
1062 if ((c >= 'A') && (c <= 'Z')) return c - 'A' + 0x0A;
1063 if ((c >= 'a') && (c <= 'z')) return c - 'a' + 0x0a;
1064 if ((c >= '0') && (c <= '9')) return c - '0' + 0x00;
1065
1066 return 0;
1067 }
1068
1069 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1070 bool wxURI::IsUnreserved (const wxChar& c)
1071 { return IsAlpha(c) || IsDigit(c) ||
1072 c == '-' ||
1073 c == '.' ||
1074 c == '_' ||
1075 c == '~' //tilde
1076 ;
1077 }
1078
1079 bool wxURI::IsReserved (const wxChar& c)
1080 {
1081 return IsGenDelim(c) || IsSubDelim(c);
1082 }
1083
1084 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1085 bool wxURI::IsGenDelim (const wxChar& c)
1086 {
1087 return c == ':' ||
1088 c == '/' ||
1089 c == '?' ||
1090 c == '#' ||
1091 c == '[' ||
1092 c == ']' ||
1093 c == '@';
1094 }
1095
1096 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1097 //! / "*" / "+" / "," / ";" / "="
1098 bool wxURI::IsSubDelim (const wxChar& c)
1099 {
1100 return c == '!' ||
1101 c == '$' ||
1102 c == '&' ||
1103 c == '\'' ||
1104 c == '(' ||
1105 c == ')' ||
1106 c == '*' ||
1107 c == '+' ||
1108 c == ',' ||
1109 c == ';' ||
1110 c == '='
1111 ;
1112 }
1113
1114 bool wxURI::IsHex(const wxChar& c)
1115 { return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
1116
1117 bool wxURI::IsAlpha(const wxChar& c)
1118 { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
1119
1120 bool wxURI::IsDigit(const wxChar& c)
1121 { return c >= '0' && c <= '9'; }
1122
1123
1124 // ---------------------------------------------------------------------------
1125 //
1126 // wxURL Compatability
1127 //
1128 // TODO: Use wxURI instead here...
1129 // ---------------------------------------------------------------------------
1130
1131 #if wxUSE_URL
1132
1133 #include "wx/url.h"
1134
1135 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1136 {
1137 wxString out_str;
1138 wxString hexa_code;
1139 size_t i;
1140
1141 for (i = 0; i < uri.Len(); i++)
1142 {
1143 wxChar c = uri.GetChar(i);
1144
1145 if (c == wxT(' '))
1146 {
1147 // GRG, Apr/2000: changed to "%20" instead of '+'
1148
1149 out_str += wxT("%20");
1150 }
1151 else
1152 {
1153 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1154 //
1155 // - Alphanumeric characters are never escaped
1156 // - Unreserved marks are never escaped
1157 // - Delimiters must be escaped if they appear within a component
1158 // but not if they are used to separate components. Here we have
1159 // no clear way to distinguish between these two cases, so they
1160 // are escaped unless they are passed in the 'delims' parameter
1161 // (allowed delimiters).
1162
1163 static const wxChar marks[] = wxT("-_.!~*()'");
1164
1165 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1166 {
1167 hexa_code.Printf(wxT("%%%02X"), c);
1168 out_str += hexa_code;
1169 }
1170 else
1171 {
1172 out_str += c;
1173 }
1174 }
1175 }
1176
1177 return out_str;
1178 }
1179
1180 wxString wxURL::ConvertFromURI(const wxString& uri)
1181 {
1182 wxString new_uri;
1183
1184 size_t i = 0;
1185 while (i < uri.Len())
1186 {
1187 int code;
1188 if (uri[i] == wxT('%'))
1189 {
1190 i++;
1191 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1192 code = (uri[i] - wxT('A') + 10) * 16;
1193 else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1194 code = (uri[i] - wxT('a') + 10) * 16;
1195 else
1196 code = (uri[i] - wxT('0')) * 16;
1197
1198 i++;
1199 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1200 code += (uri[i] - wxT('A')) + 10;
1201 else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1202 code += (uri[i] - wxT('a')) + 10;
1203 else
1204 code += (uri[i] - wxT('0'));
1205
1206 i++;
1207 new_uri += (wxChar)code;
1208 continue;
1209 }
1210 new_uri += uri[i];
1211 i++;
1212 }
1213 return new_uri;
1214 }
1215
1216 #endif //wxUSE_URL
1217
1218 //end of uri.cpp
1219
1220
1221