]> git.saurik.com Git - wxWidgets.git/blob - src/common/uri.cpp
c62395617778da6d8a869fd34c2f479c0280f769
[wxWidgets.git] / src / common / uri.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: uri.cpp
3 // Purpose: Implementation of a uri parser
4 // Author: Ryan Norton
5 // Created: 10/26/04
6 // RCS-ID: $Id$
7 // Copyright: (c) 2004 Ryan Norton
8 // Licence: wxWindows
9 /////////////////////////////////////////////////////////////////////////////
10
11 // ===========================================================================
12 // declarations
13 // ===========================================================================
14
15 // ---------------------------------------------------------------------------
16 // headers
17 // ---------------------------------------------------------------------------
18
19 #if defined(__GNUG__) && !defined(NO_GCC_PRAGMA)
20 #pragma implementation "uri.h"
21 #endif
22
23 // For compilers that support precompilation, includes "wx.h".
24 #include "wx/wxprec.h"
25
26 #ifdef __BORLANDC__
27 #pragma hdrstop
28 #endif
29
30 #include "wx/uri.h"
31
32 // ---------------------------------------------------------------------------
33 // definitions
34 // ---------------------------------------------------------------------------
35
36 IMPLEMENT_CLASS(wxURI, wxObject);
37
38 // ===========================================================================
39 // implementation
40 // ===========================================================================
41
42 // ---------------------------------------------------------------------------
43 // utilities
44 // ---------------------------------------------------------------------------
45
46 // ---------------------------------------------------------------------------
47 //
48 // wxURI
49 //
50 // ---------------------------------------------------------------------------
51
52 // ---------------------------------------------------------------------------
53 // Constructors
54 // ---------------------------------------------------------------------------
55
56 wxURI::wxURI() : m_hostType(wxURI_REGNAME), m_fields(0)
57 {
58 }
59
60 wxURI::wxURI(const wxString& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
61 {
62 Create(uri);
63 }
64
65 wxURI::wxURI(const wxURI& uri) : m_hostType(wxURI_REGNAME), m_fields(0)
66 {
67 *this = uri;
68 }
69
70 // ---------------------------------------------------------------------------
71 // Destructor and cleanup
72 // ---------------------------------------------------------------------------
73
74 wxURI::~wxURI()
75 {
76 Clear();
77 }
78
79 void wxURI::Clear()
80 {
81 m_scheme = m_user = m_server = m_port = m_path =
82 m_query = m_fragment = wxT("");
83
84 m_hostType = wxURI_REGNAME;
85
86 m_fields = 0;
87 }
88
89 // ---------------------------------------------------------------------------
90 // Create
91 //
92 // This creates the URI - all we do here is call the main parsing method
93 // ---------------------------------------------------------------------------
94
95 void wxURI::Create(const wxString& uri)
96 {
97 if (m_fields)
98 Clear();
99
100 Parse(uri);
101 }
102
103 // ---------------------------------------------------------------------------
104 // Escape/Unescape/IsEscape
105 //
106 // Unescape unencodes a 3 character URL escape sequence
107 // Escape encodes an invalid URI character into a 3 character sequence
108 // IsEscape determines if the input string contains an escape sequence,
109 // if it does, then it moves the input string past the escape sequence
110 // ---------------------------------------------------------------------------
111
112 void wxURI::Unescape(const wxChar*& s, wxChar& c)
113 {
114 wxASSERT_MSG(IsHex(*s) && IsHex(*(s+1)), wxT("Invalid escape!"));
115 c = CharToHex(*s) * 0x10 + CharToHex(*++s);
116 }
117
118 void wxURI::Escape(wxString& s, const wxChar& c)
119 {
120 const wxChar* hdig = wxT("0123456789abcdef");
121 s += '%';
122 s += hdig[(c >> 4) & 15];
123 s += hdig[c & 15];
124 }
125
126 bool wxURI::IsEscape(const wxChar*& uri)
127 {
128 if(*uri == '%' && IsHex(*(uri+1)) && IsHex(*(uri+2)))
129 {
130 uri += 3;
131 return true;
132 }
133 else
134 return false;
135 }
136
137 // ---------------------------------------------------------------------------
138 // HasXXX
139 // ---------------------------------------------------------------------------
140
141 bool wxURI::HasScheme() const
142 { return (m_fields & wxURI_SCHEME) == wxURI_SCHEME; }
143
144 bool wxURI::HasUser() const
145 { return (m_fields & wxURI_USER) == wxURI_USER; }
146
147 bool wxURI::HasServer() const
148 { return (m_fields & wxURI_SERVER) == wxURI_SERVER; }
149
150 bool wxURI::HasPort() const
151 { return (m_fields & wxURI_PORT) == wxURI_PORT; }
152
153 bool wxURI::HasPath() const
154 { return (m_fields & wxURI_PATH) == wxURI_PATH; }
155
156 bool wxURI::HasQuery() const
157 { return (m_fields & wxURI_QUERY) == wxURI_QUERY; }
158
159 bool wxURI::HasFragment() const
160 { return (m_fields & wxURI_FRAGMENT) == wxURI_FRAGMENT; }
161
162 // ---------------------------------------------------------------------------
163 // GetXXX
164 //
165 // The normal Get() actually builds the entire URI into a useable
166 // representation, including proper identification characters such as slashes
167 // ---------------------------------------------------------------------------
168
169 const wxString& wxURI::GetScheme() const
170 { return m_scheme; }
171
172 const wxString& wxURI::GetPath() const
173 { return m_path; }
174
175 const wxString& wxURI::GetQuery() const
176 { return m_query; }
177
178 const wxString& wxURI::GetFragment() const
179 { return m_fragment; }
180
181 const wxString& wxURI::GetPort() const
182 { return m_port; }
183
184 const wxString& wxURI::GetUser() const
185 { return m_user; }
186
187 const wxString& wxURI::GetServer() const
188 { return m_server; }
189
190 const wxURIHostType& wxURI::GetHostType() const
191 { return m_hostType; }
192
193 wxString wxURI::Get() const
194 {
195 wxString ret;
196
197 if (HasScheme())
198 ret = ret + m_scheme + wxT(":");
199
200 if (HasServer())
201 {
202 ret += wxT("//");
203
204 if (HasUser())
205 ret = ret + m_user + wxT("@");
206
207 ret += m_server;
208
209 if (HasPort())
210 ret = ret + wxT(":") + m_port;
211 }
212
213 ret += m_path;
214
215 if (HasQuery())
216 ret = ret + wxT("?") + m_query;
217
218 if (HasFragment())
219 ret = ret + wxT("#") + m_fragment;
220
221 return ret;
222 }
223
224 // ---------------------------------------------------------------------------
225 // operator = and ==
226 // ---------------------------------------------------------------------------
227
228 wxURI& wxURI::operator = (const wxURI& uri)
229 {
230 if (HasScheme())
231 m_scheme = uri.m_scheme;
232
233
234 if (HasServer())
235 {
236 if (HasUser())
237 m_user = uri.m_user;
238
239 m_server = uri.m_server;
240 m_hostType = uri.m_hostType;
241
242 if (HasPort())
243 m_port = uri.m_port;
244 }
245
246
247 if (HasPath())
248 m_path = uri.m_path;
249
250 if (HasQuery())
251 m_query = uri.m_query;
252
253 if (HasFragment())
254 m_fragment = uri.m_fragment;
255
256 return *this;
257 }
258
259 wxURI& wxURI::operator = (const wxChar* string)
260 {
261 Create(string);
262 return *this;
263 }
264
265 bool wxURI::operator == (const wxURI& uri) const
266 {
267 if (HasScheme())
268 {
269 if(m_scheme != uri.m_scheme)
270 return false;
271 }
272 else if (uri.HasScheme())
273 return false;
274
275
276 if (HasServer())
277 {
278 if (HasUser())
279 {
280 if (m_user != uri.m_user)
281 return false;
282 }
283 else if (uri.HasUser())
284 return false;
285
286 if (m_server != uri.m_server ||
287 m_hostType != uri.m_hostType)
288 return false;
289
290 if (HasPort())
291 {
292 if(m_port != uri.m_port)
293 return false;
294 }
295 else if (uri.HasPort())
296 return false;
297 }
298 else if (uri.HasServer())
299 return false;
300
301
302 if (HasPath())
303 {
304 if(m_path != uri.m_path)
305 return false;
306 }
307 else if (uri.HasPath())
308 return false;
309
310 if (HasQuery())
311 {
312 if (m_query != uri.m_query)
313 return false;
314 }
315 else if (uri.HasQuery())
316 return false;
317
318 if (HasFragment())
319 {
320 if (m_fragment != uri.m_fragment)
321 return false;
322 }
323 else if (uri.HasFragment())
324 return false;
325
326 return true;
327 }
328
329 // ---------------------------------------------------------------------------
330 // IsReference
331 //
332 // if there is no authority or scheme, it is a reference
333 // ---------------------------------------------------------------------------
334
335 bool wxURI::IsReference() const
336 { return !HasScheme() || !HasServer(); }
337
338 // ---------------------------------------------------------------------------
339 // Parse
340 //
341 // Master URI parsing method. Just calls the individual parsing methods
342 //
343 // URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
344 // URI-reference = URI / relative-URITestCase
345 // ---------------------------------------------------------------------------
346
347 const wxChar* wxURI::Parse(const wxChar* uri)
348 {
349 uri = ParseScheme(uri);
350 uri = ParseAuthority(uri);
351 uri = ParsePath(uri);
352 uri = ParseQuery(uri);
353 return ParseFragment(uri);
354 }
355
356 // ---------------------------------------------------------------------------
357 // ParseXXX
358 //
359 // Individual parsers for each URI component
360 // ---------------------------------------------------------------------------
361
362 const wxChar* wxURI::ParseScheme(const wxChar* uri)
363 {
364 wxASSERT(uri != NULL);
365
366 //copy of the uri - used for figuring out
367 //length of each component
368 const wxChar* uricopy = uri;
369
370 //Does the uri have a scheme (first character alpha)?
371 if (IsAlpha(*uri))
372 {
373 m_scheme += *uri++;
374
375 //scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
376 //RN: Scheme can not be escaped
377 while (IsAlpha(*uri) || IsDigit(*uri) ||
378 *uri == '+' ||
379 *uri == '-' ||
380 *uri == '.')
381 {
382 m_scheme += *uri++;
383 }
384
385 //valid scheme?
386 if (*uri == ':')
387 {
388 //mark the scheme as valid
389 m_fields |= wxURI_SCHEME;
390
391 //move reference point up to input buffer
392 uricopy = ++uri;
393 }
394 else
395 //relative uri with relative path reference
396 m_scheme = wxT("");
397 }
398 // else
399 //relative uri with _possible_ relative path reference
400
401 return uricopy;
402 }
403
404 const wxChar* wxURI::ParseAuthority(const wxChar* uri)
405 {
406 // authority = [ userinfo "@" ] host [ ":" port ]
407 if (*uri == '/' && *(uri+1) == '/')
408 {
409 uri += 2;
410
411 uri = ParseUser(uri);
412 uri = ParseServer(uri);
413 return ParsePort(uri);
414 }
415
416 return uri;
417 }
418
419 const wxChar* wxURI::ParseUser(const wxChar* uri)
420 {
421 wxASSERT(uri != NULL);
422
423 //copy of the uri - used for figuring out
424 //length of each component
425 const wxChar* uricopy = uri;
426
427 // userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
428 while(*uri && *uri != '@' && *uri != '/' && *uri != '#' && *uri != '?')
429 {
430 if(IsUnreserved(*uri) || IsEscape(uri) ||
431 IsSubDelim(*uri) || *uri == ':')
432 m_user += *uri++;
433 else
434 Escape(m_user, *uri++);
435 }
436
437 if(*uri == '@')
438 {
439 //valid userinfo
440 m_fields |= wxURI_USER;
441
442 uricopy = ++uri;
443 }
444 else
445 m_user = wxT("");
446
447 return uricopy;
448 }
449
450 const wxChar* wxURI::ParseServer(const wxChar* uri)
451 {
452 wxASSERT(uri != NULL);
453
454 //copy of the uri - used for figuring out
455 //length of each component
456 const wxChar* uricopy = uri;
457
458 // host = IP-literal / IPv4address / reg-name
459 // IP-literal = "[" ( IPv6address / IPvFuture ) "]"
460 if (*uri == '[')
461 {
462 if (ParseIPv6address(++uri) && *uri == ']')
463 {
464 ++uri;
465 m_hostType = wxURI_IPV6ADDRESS;
466
467 wxStringBufferLength theBuffer(m_server, uri - uricopy);
468 wxMemcpy(theBuffer, uricopy, uri-uricopy);
469 theBuffer.SetLength(uri-uricopy);
470 }
471 else
472 {
473 uri = uricopy;
474
475 if (ParseIPvFuture(++uri) && *uri == ']')
476 {
477 ++uri;
478 m_hostType = wxURI_IPVFUTURE;
479
480 wxStringBufferLength theBuffer(m_server, uri - uricopy);
481 wxMemcpy(theBuffer, uricopy, uri-uricopy);
482 theBuffer.SetLength(uri-uricopy);
483 }
484 else
485 uri = uricopy;
486 }
487 }
488 else
489 {
490 if (ParseIPv4address(uri))
491 {
492 m_hostType = wxURI_IPV4ADDRESS;
493
494 wxStringBufferLength theBuffer(m_server, uri - uricopy);
495 wxMemcpy(theBuffer, uricopy, uri-uricopy);
496 theBuffer.SetLength(uri-uricopy);
497 }
498 else
499 uri = uricopy;
500 }
501
502 if(m_hostType == wxURI_REGNAME)
503 {
504 uri = uricopy;
505 // reg-name = *( unreserved / pct-encoded / sub-delims )
506 while(*uri && *uri != '/' && *uri != ':' && *uri != '#' && *uri != '?')
507 {
508 if(IsUnreserved(*uri) || IsEscape(uri) || IsSubDelim(*uri))
509 m_server += *uri++;
510 else
511 Escape(m_server, *uri++);
512 }
513 }
514
515 //mark the server as valid
516 m_fields |= wxURI_SERVER;
517
518 return uri;
519 }
520
521
522 const wxChar* wxURI::ParsePort(const wxChar* uri)
523 {
524 wxASSERT(uri != NULL);
525
526 // port = *DIGIT
527 if(*uri == ':')
528 {
529 ++uri;
530 while(IsDigit(*uri))
531 {
532 m_port += *uri++;
533 }
534
535 //mark the port as valid
536 m_fields |= wxURI_PORT;
537 }
538
539 return uri;
540 }
541
542 const wxChar* wxURI::ParsePath(const wxChar* uri, const bool& bReference, const bool& bNormalize)
543 {
544 wxASSERT(uri != NULL);
545
546 //copy of the uri - used for figuring out
547 //length of each component
548 const wxChar* uricopy = uri;
549
550 /// hier-part = "//" authority path-abempty
551 /// / path-absolute
552 /// / path-rootless
553 /// / path-empty
554 ///
555 /// relative-part = "//" authority path-abempty
556 /// / path-absolute
557 /// / path-noscheme
558 /// / path-empty
559 ///
560 /// path-abempty = *( "/" segment )
561 /// path-absolute = "/" [ segment-nz *( "/" segment ) ]
562 /// path-noscheme = segment-nz-nc *( "/" segment )
563 /// path-rootless = segment-nz *( "/" segment )
564 /// path-empty = 0<pchar>
565 ///
566 /// segment = *pchar
567 /// segment-nz = 1*pchar
568 /// segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
569 /// ; non-zero-length segment without any colon ":"
570 ///
571 /// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
572 if (*uri == '/')
573 {
574 m_path += *uri++;
575
576 while(*uri && *uri != '#' && *uri != '?')
577 {
578 if( IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
579 *uri == ':' || *uri == '@' || *uri == '/')
580 m_path += *uri++;
581 else
582 Escape(m_path, *uri++);
583 }
584
585 if (bNormalize)
586 {
587 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
588 Normalize(theBuffer, true);
589 theBuffer.SetLength(wxStrlen(theBuffer));
590 }
591 //mark the path as valid
592 m_fields |= wxURI_PATH;
593 }
594 else if(*uri) //Relative path
595 {
596 if (bReference)
597 {
598 //no colon allowed
599 while(*uri && *uri != '#' && *uri != '?')
600 {
601 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
602 *uri == '@' || *uri == '/')
603 m_path += *uri++;
604 else
605 Escape(m_path, *uri++);
606 }
607 }
608 else
609 {
610 while(*uri && *uri != '#' && *uri != '?')
611 {
612 if(IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
613 *uri == ':' || *uri == '@' || *uri == '/')
614 m_path += *uri++;
615 else
616 Escape(m_path, *uri++);
617 }
618 }
619
620 if (uri != uricopy)
621 {
622 if (bNormalize)
623 {
624 wxStringBufferLength theBuffer(m_path, m_path.length() + 1);
625 Normalize(theBuffer);
626 theBuffer.SetLength(wxStrlen(theBuffer));
627 }
628
629 //mark the path as valid
630 m_fields |= wxURI_PATH;
631 }
632 }
633
634 return uri;
635 }
636
637
638 const wxChar* wxURI::ParseQuery(const wxChar* uri)
639 {
640 wxASSERT(uri != NULL);
641
642 // query = *( pchar / "/" / "?" )
643 if (*uri == '?')
644 {
645 ++uri;
646 while(*uri && *uri != '#')
647 {
648 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
649 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
650 m_query += *uri++;
651 else
652 Escape(m_query, *uri++);
653 }
654
655 //mark the server as valid
656 m_fields |= wxURI_QUERY;
657 }
658
659 return uri;
660 }
661
662
663 const wxChar* wxURI::ParseFragment(const wxChar* uri)
664 {
665 wxASSERT(uri != NULL);
666
667 // fragment = *( pchar / "/" / "?" )
668 if (*uri == '#')
669 {
670 ++uri;
671 while(*uri)
672 {
673 if (IsUnreserved(*uri) || IsSubDelim(*uri) || IsEscape(uri) ||
674 *uri == ':' || *uri == '@' || *uri == '/' || *uri == '?')
675 m_fragment += *uri++;
676 else
677 Escape(m_fragment, *uri++);
678 }
679
680 //mark the server as valid
681 m_fields |= wxURI_FRAGMENT;
682 }
683
684 return uri;
685 }
686
687 // ---------------------------------------------------------------------------
688 // Resolve URI
689 //
690 // Builds missing components of this uri from a base uri
691 //
692 // A version of the algorithm outlined in the RFC is used here
693 // (it is shown in comments)
694 // ---------------------------------------------------------------------------
695
696 void wxURI::Resolve(const wxURI& base, const bool& bStrict)
697 {
698 wxASSERT_MSG(!base.IsReference(),
699 wxT("wxURI to inherit from must not be a reference!"));
700
701 // If we arn't being strict, enable the older
702 // loophole that allows this uri to inherit other
703 // properties from the base uri - even if the scheme
704 // is defined
705 if (!bStrict &&
706 HasScheme() && base.HasScheme() &&
707 this->m_scheme == base.m_scheme )
708 {
709 m_fields -= wxURI_SCHEME;
710 }
711
712
713 // Do nothing if this is an absolute wxURI
714 // if defined(R.scheme) then
715 // T.scheme = R.scheme;
716 // T.authority = R.authority;
717 // T.path = remove_dot_segments(R.path);
718 // T.query = R.query;
719 if (HasScheme())
720 {
721 return;
722 }
723
724 //No sheme - inherit
725 m_scheme = base.m_scheme;
726 m_fields |= wxURI_SCHEME;
727
728 // All we need to do for relative URIs with an
729 // authority component is just inherit the scheme
730 // if defined(R.authority) then
731 // T.authority = R.authority;
732 // T.path = remove_dot_segments(R.path);
733 // T.query = R.query;
734 if (HasServer())
735 {
736 return;
737 }
738
739 //No authority - inherit
740 if (base.HasUser())
741 {
742 m_user = base.m_user;
743 m_fields |= wxURI_USER;
744 }
745
746 m_server = base.m_server;
747 m_hostType = base.m_hostType;
748 m_fields |= wxURI_SERVER;
749
750 if (base.HasPort())
751 {
752 m_port = base.m_port;
753 m_fields |= wxURI_PORT;
754 }
755
756
757 // Simple path inheritance from base
758 if (!HasPath())
759 {
760 // T.path = Base.path;
761 m_path = base.m_path;
762 m_fields |= wxURI_PATH;
763
764
765 // if defined(R.query) then
766 // T.query = R.query;
767 // else
768 // T.query = Base.query;
769 // endif;
770 if (!HasQuery())
771 {
772 m_query = base.m_query;
773 m_fields |= wxURI_QUERY;
774 }
775 }
776 else
777 {
778 // if (R.path starts-with "/") then
779 // T.path = remove_dot_segments(R.path);
780 // else
781 // T.path = merge(Base.path, R.path);
782 // T.path = remove_dot_segments(T.path);
783 // endif;
784 // T.query = R.query;
785 if (m_path[(const size_t&)0] != '/')
786 {
787 //Marge paths
788 const wxChar* op = m_path.c_str();
789 const wxChar* bp = base.m_path.c_str() + base.m_path.Length();
790
791 //not a ending directory? move up
792 if (base.m_path[0] && *(bp-1) != '/')
793 UpTree(base.m_path, bp);
794
795 //normalize directories
796 while(*op == '.' && *(op+1) == '.' &&
797 (*(op+2) == '\0' || *(op+2) == '/') )
798 {
799 UpTree(base.m_path, bp);
800
801 if (*(op+2) == '\0')
802 op += 2;
803 else
804 op += 3;
805 }
806
807 m_path = base.m_path.substr(0, bp - base.m_path.c_str()) +
808 m_path.Mid((op - m_path.c_str()), m_path.Length());
809 }
810 }
811 }
812
813 // ---------------------------------------------------------------------------
814 // Directory Normalization (static)
815 //
816 // UpTree goes up a directory in a string and moves the pointer as such,
817 // while Normalize gets rid of duplicate/erronues directories in a URI
818 // according to RFC 2396 and modified quite a bit to meet the unit tests
819 // in it.
820 // ---------------------------------------------------------------------------
821
822 void wxURI::UpTree(const wxChar* uristart, const wxChar*& uri)
823 {
824 if (uri != uristart && *(uri-1) == '/')
825 {
826 uri -= 2;
827 }
828
829 for(;uri != uristart; --uri)
830 {
831 if (*uri == '/')
832 {
833 ++uri;
834 break;
835 }
836 }
837
838 //!!!TODO:HACK!!!//
839 if (uri == uristart && *uri == '/')
840 ++uri;
841 //!!!//
842 }
843
844 void wxURI::Normalize(wxChar* s, const bool& bIgnoreLeads)
845 {
846 wxChar* cp = s;
847 wxChar* bp = s;
848
849 if(s[0] == '/')
850 ++bp;
851
852 while(*cp)
853 {
854 if (*cp == '.' && (*(cp+1) == '/' || *(cp+1) == '\0')
855 && (bp == cp || *(cp-1) == '/'))
856 {
857 //. _or_ ./ - ignore
858 if (*(cp+1) == '\0')
859 cp += 1;
860 else
861 cp += 2;
862 }
863 else if (*cp == '.' && *(cp+1) == '.' &&
864 (*(cp+2) == '/' || *(cp+2) == '\0')
865 && (bp == cp || *(cp-1) == '/'))
866 {
867 //.. _or_ ../ - go up the tree
868 if (s != bp)
869 {
870 UpTree((const wxChar*)bp, (const wxChar*&)s);
871
872 if (*(cp+2) == '\0')
873 cp += 2;
874 else
875 cp += 3;
876 }
877 else if (!bIgnoreLeads)
878
879 {
880 *bp++ = *cp++;
881 *bp++ = *cp++;
882 if (*cp)
883 *bp++ = *cp++;
884
885 s = bp;
886 }
887 else
888 {
889 if (*(cp+2) == '\0')
890 cp += 2;
891 else
892 cp += 3;
893 }
894 }
895 else
896 *s++ = *cp++;
897 }
898
899 *s = '\0';
900 }
901
902 // ---------------------------------------------------------------------------
903 // Misc. Parsing Methods
904 // ---------------------------------------------------------------------------
905
906 bool wxURI::ParseIPv4address(const wxChar*& uri)
907 {
908 //IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
909 //
910 //dec-octet = DIGIT ; 0-9
911 // / %x31-39 DIGIT ; 10-99
912 // / "1" 2DIGIT ; 100-199
913 // / "2" %x30-34 DIGIT ; 200-249
914 // / "25" %x30-35 ; 250-255
915 size_t iIPv4 = 0;
916 if (IsDigit(*uri))
917 {
918 ++iIPv4;
919
920
921 //each ip part must be between 0-255 (dupe of version in for loop)
922 if( IsDigit(*++uri) && IsDigit(*++uri) &&
923 //100 or less (note !)
924 !( (*(uri-2) < '2') ||
925 //240 or less
926 (*(uri-2) == '2' &&
927 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
928 )
929 )
930 )
931 {
932 return false;
933 }
934
935 if(IsDigit(*uri))++uri;
936
937 //compilers should unroll this loop
938 for(; iIPv4 < 4; ++iIPv4)
939 {
940 if (*uri != '.' || !IsDigit(*++uri))
941 break;
942
943 //each ip part must be between 0-255
944 if( IsDigit(*++uri) && IsDigit(*++uri) &&
945 //100 or less (note !)
946 !( (*(uri-2) < '2') ||
947 //240 or less
948 (*(uri-2) == '2' &&
949 (*(uri-1) < '5' || (*(uri-1) == '5' && *uri <= '5'))
950 )
951 )
952 )
953 {
954 return false;
955 }
956 if(IsDigit(*uri))++uri;
957 }
958 }
959 return iIPv4 == 4;
960 }
961
962 bool wxURI::ParseH16(const wxChar*& uri)
963 {
964 // h16 = 1*4HEXDIG
965 if(!IsHex(*++uri))
966 return false;
967
968 if(IsHex(*++uri) && IsHex(*++uri) && IsHex(*++uri))
969 ++uri;
970
971 return true;
972 }
973
974 bool wxURI::ParseIPv6address(const wxChar*& uri)
975 {
976 // IPv6address = 6( h16 ":" ) ls32
977 // / "::" 5( h16 ":" ) ls32
978 // / [ h16 ] "::" 4( h16 ":" ) ls32
979 // / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
980 // / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
981 // / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
982 // / [ *4( h16 ":" ) h16 ] "::" ls32
983 // / [ *5( h16 ":" ) h16 ] "::" h16
984 // / [ *6( h16 ":" ) h16 ] "::"
985
986 size_t numPrefix = 0,
987 maxPostfix;
988
989 bool bEndHex = false;
990
991 for( ; numPrefix < 6; ++numPrefix)
992 {
993 if(!ParseH16(uri))
994 {
995 --uri;
996 bEndHex = true;
997 break;
998 }
999
1000 if(*uri != ':')
1001 {
1002 break;
1003 }
1004 }
1005
1006 if(!bEndHex && !ParseH16(uri))
1007 {
1008 --uri;
1009
1010 if (numPrefix)
1011 return false;
1012
1013 if (*uri == ':')
1014 {
1015 if (*++uri != ':')
1016 return false;
1017
1018 maxPostfix = 5;
1019 }
1020 else
1021 maxPostfix = 6;
1022 }
1023 else
1024 {
1025 if (*uri != ':' || *(uri+1) != ':')
1026 {
1027 if (numPrefix != 6)
1028 return false;
1029
1030 while (*--uri != ':') {}
1031 ++uri;
1032
1033 const wxChar* uristart = uri;
1034 //parse ls32
1035 // ls32 = ( h16 ":" h16 ) / IPv4address
1036 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1037 return true;
1038
1039 uri = uristart;
1040
1041 if (ParseIPv4address(uri))
1042 return true;
1043 else
1044 return false;
1045 }
1046 else
1047 {
1048 uri += 2;
1049
1050 if (numPrefix > 3)
1051 maxPostfix = 0;
1052 else
1053 maxPostfix = 4 - numPrefix;
1054 }
1055 }
1056
1057 bool bAllowAltEnding = maxPostfix == 0;
1058
1059 for(; maxPostfix != 0; --maxPostfix)
1060 {
1061 if(!ParseH16(uri) || *uri != ':')
1062 return false;
1063 }
1064
1065 if(numPrefix <= 4)
1066 {
1067 const wxChar* uristart = uri;
1068 //parse ls32
1069 // ls32 = ( h16 ":" h16 ) / IPv4address
1070 if (ParseH16(uri) && *uri == ':' && ParseH16(uri))
1071 return true;
1072
1073 uri = uristart;
1074
1075 if (ParseIPv4address(uri))
1076 return true;
1077
1078 uri = uristart;
1079
1080 if (!bAllowAltEnding)
1081 return false;
1082 }
1083
1084 if(numPrefix <= 5 && ParseH16(uri))
1085 return true;
1086
1087 return true;
1088 }
1089
1090 bool wxURI::ParseIPvFuture(const wxChar*& uri)
1091 {
1092 // IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
1093 if (*++uri != 'v' || !IsHex(*++uri))
1094 return false;
1095
1096 while (IsHex(*++uri)) {}
1097
1098 if (*uri != '.' || !(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':'))
1099 return false;
1100
1101 while(IsUnreserved(*++uri) || IsSubDelim(*uri) || *uri == ':') {}
1102
1103 return true;
1104 }
1105
1106
1107 // ---------------------------------------------------------------------------
1108 // Misc methods - IsXXX and CharToHex
1109 // ---------------------------------------------------------------------------
1110
1111 int wxURI::CharToHex(const wxChar& c)
1112 {
1113 if ((c >= 'A') && (c <= 'Z')) return c - 'A' + 0x0A;
1114 if ((c >= 'a') && (c <= 'z')) return c - 'a' + 0x0a;
1115 if ((c >= '0') && (c <= '9')) return c - '0' + 0x00;
1116
1117 return 0;
1118 }
1119
1120 //! unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
1121 bool wxURI::IsUnreserved (const wxChar& c)
1122 { return IsAlpha(c) || IsDigit(c) ||
1123 c == '-' ||
1124 c == '.' ||
1125 c == '_' ||
1126 c == '~' //tilde
1127 ;
1128 }
1129
1130 bool wxURI::IsReserved (const wxChar& c)
1131 {
1132 return IsGenDelim(c) || IsSubDelim(c);
1133 }
1134
1135 //! gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
1136 bool wxURI::IsGenDelim (const wxChar& c)
1137 {
1138 return c == ':' ||
1139 c == '/' ||
1140 c == '?' ||
1141 c == '#' ||
1142 c == '[' ||
1143 c == ']' ||
1144 c == '@';
1145 }
1146
1147 //! sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
1148 //! / "*" / "+" / "," / ";" / "="
1149 bool wxURI::IsSubDelim (const wxChar& c)
1150 {
1151 return c == '!' ||
1152 c == '$' ||
1153 c == '&' ||
1154 c == '\'' ||
1155 c == '(' ||
1156 c == ')' ||
1157 c == '*' ||
1158 c == '+' ||
1159 c == ',' ||
1160 c == ';' ||
1161 c == '='
1162 ;
1163 }
1164
1165 bool wxURI::IsHex(const wxChar& c)
1166 { return IsDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); }
1167
1168 bool wxURI::IsAlpha(const wxChar& c)
1169 { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); }
1170
1171 bool wxURI::IsDigit(const wxChar& c)
1172 { return c >= '0' && c <= '9'; }
1173
1174
1175 // ---------------------------------------------------------------------------
1176 //
1177 // wxURL Compatability
1178 //
1179 // TODO: Use wxURI instead here...
1180 // ---------------------------------------------------------------------------
1181
1182 #if wxUSE_URL
1183
1184 #include "wx/url.h"
1185
1186 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
1187 {
1188 wxString out_str;
1189 wxString hexa_code;
1190 size_t i;
1191
1192 for (i = 0; i < uri.Len(); i++)
1193 {
1194 wxChar c = uri.GetChar(i);
1195
1196 if (c == wxT(' '))
1197 {
1198 // GRG, Apr/2000: changed to "%20" instead of '+'
1199
1200 out_str += wxT("%20");
1201 }
1202 else
1203 {
1204 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
1205 //
1206 // - Alphanumeric characters are never escaped
1207 // - Unreserved marks are never escaped
1208 // - Delimiters must be escaped if they appear within a component
1209 // but not if they are used to separate components. Here we have
1210 // no clear way to distinguish between these two cases, so they
1211 // are escaped unless they are passed in the 'delims' parameter
1212 // (allowed delimiters).
1213
1214 static const wxChar marks[] = wxT("-_.!~*()'");
1215
1216 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
1217 {
1218 hexa_code.Printf(wxT("%%%02X"), c);
1219 out_str += hexa_code;
1220 }
1221 else
1222 {
1223 out_str += c;
1224 }
1225 }
1226 }
1227
1228 return out_str;
1229 }
1230
1231 wxString wxURL::ConvertFromURI(const wxString& uri)
1232 {
1233 wxString new_uri;
1234
1235 size_t i = 0;
1236 while (i < uri.Len())
1237 {
1238 int code;
1239 if (uri[i] == wxT('%'))
1240 {
1241 i++;
1242 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1243 code = (uri[i] - wxT('A') + 10) * 16;
1244 else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1245 code = (uri[i] - wxT('a') + 10) * 16;
1246 else
1247 code = (uri[i] - wxT('0')) * 16;
1248
1249 i++;
1250 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
1251 code += (uri[i] - wxT('A')) + 10;
1252 else if (uri[i] >= wxT('a') && uri[i] <= wxT('f'))
1253 code += (uri[i] - wxT('a')) + 10;
1254 else
1255 code += (uri[i] - wxT('0'));
1256
1257 i++;
1258 new_uri += (wxChar)code;
1259 continue;
1260 }
1261 new_uri += uri[i];
1262 i++;
1263 }
1264 return new_uri;
1265 }
1266
1267 #endif //wxUSE_URL
1268
1269 //end of uri.cpp
1270
1271
1272