]> git.saurik.com Git - wxWidgets.git/blob - src/common/url.cpp
fixed handling of HTML tables with empty row(s)
[wxWidgets.git] / src / common / url.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: url.cpp
3 // Purpose: URL parser
4 // Author: Guilhem Lavaux
5 // Modified by:
6 // Created: 20/07/1997
7 // RCS-ID: $Id$
8 // Copyright: (c) 1997, 1998 Guilhem Lavaux
9 // Licence: wxWindows license
10 /////////////////////////////////////////////////////////////////////////////
11
12 #ifdef __GNUG__
13 #pragma implementation "url.h"
14 #endif
15
16 // For compilers that support precompilation, includes "wx.h".
17 #include "wx/wxprec.h"
18
19 #ifdef __BORLANDC__
20 #pragma hdrstop
21 #endif
22
23 #include <string.h>
24 #include <ctype.h>
25
26 #include "wx/string.h"
27 #include "wx/list.h"
28 #include "wx/utils.h"
29 #include "wx/module.h"
30 #include "wx/url.h"
31
32 IMPLEMENT_CLASS(wxProtoInfo, wxObject)
33 IMPLEMENT_CLASS(wxURL, wxObject)
34
35 // Protocols list
36 wxProtoInfo *wxURL::ms_protocols = NULL;
37
38 // Enforce linking of protocol classes:
39 USE_PROTOCOL(wxFileProto)
40
41 #if wxUSE_SOCKETS
42 USE_PROTOCOL(wxHTTP)
43 USE_PROTOCOL(wxFTP)
44
45 wxHTTP *wxURL::ms_proxyDefault = NULL;
46 bool wxURL::ms_useDefaultProxy = FALSE;
47 #endif
48
49 // --------------------------------------------------------------
50 // wxURL
51 // --------------------------------------------------------------
52
53 // --------------------------------------------------------------
54 // --------- wxURL CONSTRUCTOR DESTRUCTOR -----------------------
55 // --------------------------------------------------------------
56
57 wxURL::wxURL(const wxString& url)
58 {
59 m_protocol = NULL;
60 m_error = wxURL_NOERR;
61 m_url = url;
62
63 #if wxUSE_SOCKETS
64 if ( ms_useDefaultProxy && !ms_proxyDefault )
65 {
66 SetDefaultProxy(getenv("HTTP_PROXY"));
67
68 if ( !ms_proxyDefault )
69 {
70 // don't try again
71 ms_useDefaultProxy = FALSE;
72 }
73 }
74
75 m_useProxy = ms_proxyDefault != NULL;
76 m_proxy = ms_proxyDefault;
77 #endif // wxUSE_SOCKETS
78
79 ParseURL();
80 }
81
82 bool wxURL::ParseURL()
83 {
84 wxString last_url = m_url;
85
86 // If the URL was already parsed (m_protocol != NULL), pass this section.
87 if (!m_protocol)
88 {
89 // Clean up
90 CleanData();
91
92 // Extract protocol name
93 if (!PrepProto(last_url))
94 {
95 m_error = wxURL_SNTXERR;
96 return FALSE;
97 }
98
99 // Find and create the protocol object
100 if (!FetchProtocol())
101 {
102 m_error = wxURL_NOPROTO;
103 return FALSE;
104 }
105
106 // Do we need a host name ?
107 if (m_protoinfo->m_needhost)
108 {
109 // Extract it
110 if (!PrepHost(last_url))
111 {
112 m_error = wxURL_SNTXERR;
113 return FALSE;
114 }
115 }
116
117 // Extract full path
118 if (!PrepPath(last_url))
119 {
120 m_error = wxURL_NOPATH;
121 return FALSE;
122 }
123 }
124 // URL parse finished.
125
126 #if wxUSE_SOCKETS
127 if (m_useProxy)
128 {
129 // We destroy the newly created protocol.
130 CleanData();
131
132 // Third, we rebuild the URL.
133 m_url = m_protoname + wxT(":");
134 if (m_protoinfo->m_needhost)
135 m_url = m_url + wxT("//") + m_hostname;
136
137 m_url += m_path;
138
139 // We initialize specific variables.
140 m_protocol = m_proxy; // FIXME: we should clone the protocol
141 }
142 #endif
143
144 m_error = wxURL_NOERR;
145 return TRUE;
146 }
147
148 void wxURL::CleanData()
149 {
150 #if wxUSE_SOCKETS
151 if (!m_useProxy)
152 #endif
153 delete m_protocol;
154 }
155
156 wxURL::~wxURL()
157 {
158 CleanData();
159 #if wxUSE_SOCKETS
160 if (m_proxy && m_proxy != ms_proxyDefault)
161 delete m_proxy;
162 #endif
163 }
164
165 // --------------------------------------------------------------
166 // --------- wxURL urls decoders --------------------------------
167 // --------------------------------------------------------------
168
169 bool wxURL::PrepProto(wxString& url)
170 {
171 int pos;
172
173 // Find end
174 pos = url.Find(wxT(':'));
175 if (pos == -1)
176 return FALSE;
177
178 m_protoname = url(0, pos);
179
180 url = url(pos+1, url.Length());
181
182 return TRUE;
183 }
184
185 bool wxURL::PrepHost(wxString& url)
186 {
187 wxString temp_url;
188 int pos, pos2;
189
190 if ((url.GetChar(0) != wxT('/')) || (url.GetChar(1) != wxT('/')))
191 return FALSE;
192
193 url = url(2, url.Length());
194
195 pos = url.Find(wxT('/'));
196 if (pos == -1)
197 pos = url.Length();
198
199 if (pos == 0)
200 return FALSE;
201
202 temp_url = url(0, pos);
203 url = url(url.Find(wxT('/')), url.Length());
204
205 // Retrieve service number
206 pos2 = temp_url.Find(wxT(':'), TRUE);
207 if (pos2 != -1 && pos2 < pos)
208 {
209 m_servname = temp_url(pos2+1, pos);
210 if (!m_servname.IsNumber())
211 return FALSE;
212 temp_url = temp_url(0, pos2);
213 }
214
215 // Retrieve user and password.
216 pos2 = temp_url.Find(wxT('@'));
217 // Even if pos2 equals -1, this code is right.
218 m_hostname = temp_url(pos2+1, temp_url.Length());
219
220 m_user = wxT("");
221 m_password = wxT("");
222
223 if (pos2 == -1)
224 return TRUE;
225
226 temp_url = temp_url(0, pos2);
227 pos2 = temp_url.Find(wxT(':'));
228
229 if (pos2 == -1)
230 return FALSE;
231
232 m_user = temp_url(0, pos2);
233 m_password = temp_url(pos2+1, url.Length());
234
235 return TRUE;
236 }
237
238 bool wxURL::PrepPath(wxString& url)
239 {
240 if (url.Length() != 0)
241 m_path = ConvertToValidURI(url);
242 else
243 m_path = wxT("/");
244 return TRUE;
245 }
246
247 bool wxURL::FetchProtocol()
248 {
249 wxProtoInfo *info = ms_protocols;
250
251 while (info)
252 {
253 if (m_protoname == info->m_protoname)
254 {
255 if (m_servname.IsNull())
256 m_servname = info->m_servname;
257
258 m_protoinfo = info;
259 m_protocol = (wxProtocol *)m_protoinfo->m_cinfo->CreateObject();
260 return TRUE;
261 }
262 info = info->next;
263 }
264 return FALSE;
265 }
266
267 // --------------------------------------------------------------
268 // --------- wxURL get ------------------------------------------
269 // --------------------------------------------------------------
270
271 wxInputStream *wxURL::GetInputStream()
272 {
273 wxInputStream *the_i_stream = NULL;
274
275 if (!m_protocol)
276 {
277 m_error = wxURL_NOPROTO;
278 return NULL;
279 }
280
281 m_error = wxURL_NOERR;
282 if (m_user != wxT(""))
283 {
284 m_protocol->SetUser(m_user);
285 m_protocol->SetPassword(m_password);
286 }
287
288 #if wxUSE_SOCKETS
289 wxIPV4address addr;
290
291 // m_protoinfo is NULL when we use a proxy
292 if (!m_useProxy && m_protoinfo->m_needhost)
293 {
294 if (!addr.Hostname(m_hostname))
295 {
296 m_error = wxURL_NOHOST;
297 return NULL;
298 }
299
300 addr.Service(m_servname);
301
302 if (!m_protocol->Connect(addr, TRUE)) // Watcom needs the 2nd arg for some reason
303 {
304 m_error = wxURL_CONNERR;
305 return NULL;
306 }
307 }
308 #endif
309
310 // When we use a proxy, we have to pass the whole URL to it.
311 if (m_useProxy)
312 the_i_stream = m_protocol->GetInputStream(m_url);
313 else
314 the_i_stream = m_protocol->GetInputStream(m_path);
315
316 if (!the_i_stream)
317 {
318 m_error = wxURL_PROTOERR;
319 return NULL;
320 }
321
322 return the_i_stream;
323 }
324
325 #if wxUSE_SOCKETS
326 void wxURL::SetDefaultProxy(const wxString& url_proxy)
327 {
328 if ( !url_proxy )
329 {
330 if ( ms_proxyDefault )
331 {
332 ms_proxyDefault->Close();
333 delete ms_proxyDefault;
334 ms_proxyDefault = NULL;
335 }
336 }
337 else
338 {
339 wxString tmp_str = url_proxy;
340 int pos = tmp_str.Find(wxT(':'));
341 if (pos == -1)
342 return;
343
344 wxString hostname = tmp_str(0, pos),
345 port = tmp_str(pos+1, tmp_str.Length()-pos);
346 wxIPV4address addr;
347
348 if (!addr.Hostname(hostname))
349 return;
350 if (!addr.Service(port))
351 return;
352
353 if (ms_proxyDefault)
354 // Finally, when all is right, we connect the new proxy.
355 ms_proxyDefault->Close();
356 else
357 ms_proxyDefault = new wxHTTP();
358 ms_proxyDefault->Connect(addr, TRUE); // Watcom needs the 2nd arg for some reason
359 }
360 }
361
362 void wxURL::SetProxy(const wxString& url_proxy)
363 {
364 if ( !url_proxy )
365 {
366 if ( m_proxy && m_proxy != ms_proxyDefault )
367 {
368 m_proxy->Close();
369 delete m_proxy;
370 }
371
372 m_useProxy = FALSE;
373 }
374 else
375 {
376 wxString tmp_str;
377 wxString hostname, port;
378 int pos;
379 wxIPV4address addr;
380
381 tmp_str = url_proxy;
382 pos = tmp_str.Find(wxT(':'));
383 // This is an invalid proxy name.
384 if (pos == -1)
385 return;
386
387 hostname = tmp_str(0, pos);
388 port = tmp_str(pos, tmp_str.Length()-pos);
389
390 addr.Hostname(hostname);
391 addr.Service(port);
392
393 // Finally, create the whole stuff.
394 if (m_proxy && m_proxy != ms_proxyDefault)
395 delete m_proxy;
396 m_proxy = new wxHTTP();
397 m_proxy->Connect(addr, TRUE); // Watcom needs the 2nd arg for some reason
398
399 CleanData();
400 // Reparse url.
401 m_useProxy = TRUE;
402 ParseURL();
403 }
404 }
405 #endif // wxUSE_SOCKETS
406
407 wxString wxURL::ConvertToValidURI(const wxString& uri, const wxChar* delims)
408 {
409 wxString out_str;
410 wxString hexa_code;
411 size_t i;
412
413 for (i = 0; i < uri.Len(); i++)
414 {
415 wxChar c = uri.GetChar(i);
416
417 if (c == wxT(' '))
418 {
419 // GRG, Apr/2000: changed to "%20" instead of '+'
420
421 out_str += wxT("%20");
422 }
423 else
424 {
425 // GRG, Apr/2000: modified according to the URI definition (RFC 2396)
426 //
427 // - Alphanumeric characters are never escaped
428 // - Unreserved marks are never escaped
429 // - Delimiters must be escaped if they appear within a component
430 // but not if they are used to separate components. Here we have
431 // no clear way to distinguish between these two cases, so they
432 // are escaped unless they are passed in the 'delims' parameter
433 // (allowed delimiters).
434
435 static const wxChar marks[] = wxT("-_.!~*()'");
436
437 if ( !wxIsalnum(c) && !wxStrchr(marks, c) && !wxStrchr(delims, c) )
438 {
439 hexa_code.Printf(wxT("%%%02X"), c);
440 out_str += hexa_code;
441 }
442 else
443 {
444 out_str += c;
445 }
446 }
447 }
448
449 return out_str;
450 }
451
452 wxString wxURL::ConvertFromURI(const wxString& uri)
453 {
454 wxString new_uri;
455
456 size_t i = 0;
457 while (i < uri.Len())
458 {
459 int code;
460 if (uri[i] == wxT('%'))
461 {
462 i++;
463 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
464 code = (uri[i] - wxT('A') + 10) * 16;
465 else
466 code = (uri[i] - wxT('0')) * 16;
467
468 i++;
469 if (uri[i] >= wxT('A') && uri[i] <= wxT('F'))
470 code += (uri[i] - wxT('A')) + 10;
471 else
472 code += (uri[i] - wxT('0'));
473
474 i++;
475 new_uri += (wxChar)code;
476 continue;
477 }
478 new_uri += uri[i];
479 i++;
480 }
481 return new_uri;
482 }
483
484 // ----------------------------------------------------------------------
485 // A module which deletes the default proxy if we created it
486 // ----------------------------------------------------------------------
487
488 #if wxUSE_SOCKETS
489
490 class wxURLModule : public wxModule
491 {
492 public:
493 virtual bool OnInit();
494 virtual void OnExit();
495
496 private:
497 DECLARE_DYNAMIC_CLASS(wxURLModule)
498 };
499
500 IMPLEMENT_DYNAMIC_CLASS(wxURLModule, wxModule)
501
502 bool wxURLModule::OnInit()
503 {
504 // env var HTTP_PROXY contains the address of the default proxy to use if
505 // set, but don't try to create this proxy right now because it will slow
506 // down the program startup (especially if there is no DNS server
507 // available, in which case it may take up to 1 minute)
508
509 if ( getenv("HTTP_PROXY") )
510 {
511 wxURL::ms_useDefaultProxy = TRUE;
512 }
513
514 return TRUE;
515 }
516
517 void wxURLModule::OnExit()
518 {
519 delete wxURL::ms_proxyDefault;
520 wxURL::ms_proxyDefault = NULL;
521 }
522
523 #endif // wxUSE_SOCKETS