]> git.saurik.com Git - wxWidgets.git/blob - docs/doxygen/overviews/arc.h
use @subpage where possible instead of @ref
[wxWidgets.git] / docs / doxygen / overviews / arc.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: arc
3 // Purpose: topic overview
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /*!
10
11 @page arc_overview Archive formats such as zip
12
13 The archive classes handle archive formats such as zip, tar, rar and cab.
14 Currently #wxZip
15 and #wxTar classes are included.
16 For each archive type, there are the following classes (using zip here
17 as an example):
18
19
20
21
22
23
24 #wxZipInputStream
25
26
27
28
29 Input stream
30
31
32
33
34
35 #wxZipOutputStream
36
37
38
39
40 Output stream
41
42
43
44
45
46 #wxZipEntry
47
48
49
50
51 Holds the meta-data for an
52 entry (e.g. filename, timestamp, etc.)
53
54
55
56
57
58 There are also abstract wxArchive classes that can be used to write code
59 that can handle any of the archive types,
60 see '@ref arcgeneric_overview'.
61 Also see #wxFileSystem for a higher level interface that
62 can handle archive files in a generic way.
63 The classes are designed to handle archives on both seekable streams such
64 as disk files, or non-seekable streams such as pipes and sockets
65 (see '@ref arcnoseek_overview').
66 @b See also
67 #wxFileSystem
68
69 @ref arccreate_overview
70 @ref arcextract_overview
71 @ref arcmodify_overview
72 @ref arcbyname_overview
73 @ref arcgeneric_overview
74 @ref arcnoseek_overview
75
76
77 @section wxarccreate Creating an archive
78
79 @ref arc_overview
80 Call #PutNextEntry() to
81 create each new entry in the archive, then write the entry's data.
82 Another call to PutNextEntry() closes the current entry and begins the next.
83 For example:
84
85 @code
86 wxFFileOutputStream out(_T("test.zip"));
87 wxZipOutputStream zip(out);
88 wxTextOutputStream txt(zip);
89 wxString sep(wxFileName::GetPathSeparator());
90
91 zip.PutNextEntry(_T("entry1.txt"));
92 txt _T("Some text for entry1.txt\n");
93
94 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
95 txt _T("Some text for subdir/entry2.txt\n");
96 @endcode
97
98 The name of each entry can be a full path, which makes it possible to
99 store entries in subdirectories.
100
101
102 @section wxarcextract Extracting an archive
103
104 @ref arc_overview
105 #GetNextEntry() returns a pointer
106 to entry object containing the meta-data for the next entry in the archive
107 (and gives away ownership). Reading from the input stream then returns the
108 entry's data. Eof() becomes @true after an attempt has been made to read past
109 the end of the entry's data.
110 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
111
112 @code
113 auto_ptrwxZipEntry entry;
114
115 wxFFileInputStream in(_T("test.zip"));
116 wxZipInputStream zip(in);
117
118 while (entry.reset(zip.GetNextEntry()), entry.get() != @NULL)
119 {
120 // access meta-data
121 wxString name = entry-GetName();
122 // read 'zip' to access the entry's data
123 }
124 @endcode
125
126
127
128 @section wxarcmodify Modifying an archive
129
130 @ref arc_overview
131 To modify an existing archive, write a new copy of the archive to a new file,
132 making any necessary changes along the way and transferring any unchanged
133 entries using #CopyEntry().
134 For archive types which compress entry data, CopyEntry() is likely to be
135 much more efficient than transferring the data using Read() and Write()
136 since it will copy them without decompressing and recompressing them.
137 In general modifications are not possible without rewriting the archive,
138 though it may be possible in some limited cases. Even then, rewriting the
139 archive is usually a better choice since a failure can be handled without
140 losing the whole
141 archive. #wxTempFileOutputStream can
142 be helpful to do this.
143 For example to delete all entries matching the pattern "*.txt":
144
145 @code
146 auto_ptrwxFFileInputStream in(new wxFFileInputStream(_T("test.zip")));
147 wxTempFileOutputStream out(_T("test.zip"));
148
149 wxZipInputStream inzip(*in);
150 wxZipOutputStream outzip(out);
151
152 auto_ptrwxZipEntry entry;
153
154 // transfer any meta-data for the archive as a whole (the zip comment
155 // in the case of zip)
156 outzip.CopyArchiveMetaData(inzip);
157
158 // call CopyEntry for each entry except those matching the pattern
159 while (entry.reset(inzip.GetNextEntry()), entry.get() != @NULL)
160 if (!entry-GetName().Matches(_T("*.txt")))
161 if (!outzip.CopyEntry(entry.release(), inzip))
162 break;
163
164 // close the input stream by releasing the pointer to it, do this
165 // before closing the output stream so that the file can be replaced
166 in.reset();
167
168 // you can check for success as follows
169 bool success = inzip.Eof() && outzip.Close() && out.Commit();
170 @endcode
171
172
173
174 @section wxarcbyname Looking up an archive entry by name
175
176 @ref arc_overview
177 Also see #wxFileSystem for a higher level interface that is
178 more convenient for accessing archive entries by name.
179 To open just one entry in an archive, the most efficient way is
180 to simply search for it linearly by calling
181 #GetNextEntry() until the
182 required entry is found. This works both for archives on seekable and
183 non-seekable streams.
184 The format of filenames in the archive is likely to be different
185 from the local filename format. For example zips and tars use
186 unix style names, with forward slashes as the path separator,
187 and absolute paths are not allowed. So if on Windows the file
188 "C:\MYDIR\MYFILE.TXT" is stored, then when reading
189 the entry back #GetName() will return
190 "MYDIR\MYFILE.TXT". The conversion into the internal format
191 and back has lost some information.
192 So to avoid ambiguity when searching for an entry matching a local name,
193 it is better to convert the local name to the archive's internal format
194 and search for that:
195
196 @code
197 auto_ptrwxZipEntry entry;
198
199 // convert the local name we are looking for into the internal format
200 wxString name = wxZipEntry::GetInternalName(localname);
201
202 // open the zip
203 wxFFileInputStream in(_T("test.zip"));
204 wxZipInputStream zip(in);
205
206 // call GetNextEntry() until the required internal name is found
207 do {
208 entry.reset(zip.GetNextEntry());
209 }
210 while (entry.get() != @NULL && entry-GetInternalName() != name);
211
212 if (entry.get() != @NULL) {
213 // read the entry's data...
214 }
215 @endcode
216
217 To access several entries randomly, it is most efficient to transfer the
218 entire catalogue of entries to a container such as a std::map or a
219 #wxHashMap then entries looked up by name can be
220 opened using the #OpenEntry() method.
221
222 @code
223 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
224 ZipCatalog::iterator it;
225 wxZipEntry *entry;
226 ZipCatalog cat;
227
228 // open the zip
229 wxFFileInputStream in(_T("test.zip"));
230 wxZipInputStream zip(in);
231
232 // load the zip catalog
233 while ((entry = zip.GetNextEntry()) != @NULL) {
234 wxZipEntry*& current = cat[entry-GetInternalName()];
235 // some archive formats can have multiple entries with the same name
236 // (e.g. tar) though it is an error in the case of zip
237 delete current;
238 current = entry;
239 }
240
241 // open an entry by name
242 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
243 zip.OpenEntry(*it-second);
244 // ... now read entry's data
245 }
246 @endcode
247
248 To open more than one entry simultaneously you need more than one
249 underlying stream on the same archive:
250
251 @code
252 // opening another entry without closing the first requires another
253 // input stream for the same file
254 wxFFileInputStream in2(_T("test.zip"));
255 wxZipInputStream zip2(in2);
256 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
257 zip2.OpenEntry(*it-second);
258 @endcode
259
260
261
262 @section wxarcgeneric Generic archive programming
263
264 @ref arc_overview
265 Also see #wxFileSystem for a higher level interface that
266 can handle archive files in a generic way.
267 The specific archive classes, such as the wxZip classes, inherit from
268 the following abstract classes which can be used to write code that can
269 handle any of the archive types:
270
271
272
273
274
275
276 #wxArchiveInputStream
277
278
279
280
281 Input stream
282
283
284
285
286
287 #wxArchiveOutputStream
288
289
290
291
292 Output stream
293
294
295
296
297
298 #wxArchiveEntry
299
300
301
302
303 Holds the meta-data for an
304 entry (e.g. filename)
305
306
307
308
309
310 In order to able to write generic code it's necessary to be able to create
311 instances of the classes without knowing which archive type is being used.
312 To allow this there is a class factory for each archive type, derived from
313 #wxArchiveClassFactory, that can create
314 the other classes.
315 For example, given @e wxArchiveClassFactory* factory, streams and
316 entries can be created like this:
317
318 @code
319 // create streams without knowing their type
320 auto_ptrwxArchiveInputStream inarc(factory-NewStream(in));
321 auto_ptrwxArchiveOutputStream outarc(factory-NewStream(out));
322
323 // create an empty entry object
324 auto_ptrwxArchiveEntry entry(factory-NewEntry());
325 @endcode
326
327 For the factory itself, the static member
328 wxArchiveClassFactory::Find().
329 can be used to find a class factory that can handle a given file
330 extension or mime type. For example, given @e filename:
331
332 @code
333 const wxArchiveClassFactory *factory;
334 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
335
336 if (factory)
337 stream = factory-NewStream(new wxFFileInputStream(filename));
338 @endcode
339
340 @e Find does not give away ownership of the returned pointer, so it
341 does not need to be deleted.
342 There are similar class factories for the filter streams that handle the
343 compression and decompression of a single stream, such as wxGzipInputStream.
344 These can be found using
345 wxFilterClassFactory::Find().
346 For example, to list the contents of archive @e filename:
347
348 @code
349 auto_ptrwxInputStream in(new wxFFileInputStream(filename));
350
351 if (in-IsOk())
352 {
353 // look for a filter handler, e.g. for '.gz'
354 const wxFilterClassFactory *fcf;
355 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
356 if (fcf) {
357 in.reset(fcf-NewStream(in.release()));
358 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
359 filename = fcf-PopExtension(filename);
360 }
361
362 // look for a archive handler, e.g. for '.zip' or '.tar'
363 const wxArchiveClassFactory *acf;
364 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
365 if (acf) {
366 auto_ptrwxArchiveInputStream arc(acf-NewStream(in.release()));
367 auto_ptrwxArchiveEntry entry;
368
369 // list the contents of the archive
370 while ((entry.reset(arc-GetNextEntry())), entry.get() != @NULL)
371 std::wcout entry-GetName().c_str() "\n";
372 }
373 else {
374 wxLogError(_T("can't handle '%s'"), filename.c_str());
375 }
376 }
377 @endcode
378
379
380
381 @section wxarcnoseek Archives on non-seekable streams
382
383 @ref arc_overview
384 In general, handling archives on non-seekable streams is done in the same
385 way as for seekable streams, with a few caveats.
386 The main limitation is that accessing entries randomly using
387 #OpenEntry()
388 is not possible, the entries can only be accessed sequentially in the order
389 they are stored within the archive.
390 For each archive type, there will also be other limitations which will
391 depend on the order the entries' meta-data is stored within the archive.
392 These are not too difficult to deal with, and are outlined below.
393 @b PutNextEntry and the entry size
394 When writing archives, some archive formats store the entry size before
395 the entry's data (tar has this limitation, zip doesn't). In this case
396 the entry's size must be passed to
397 #PutNextEntry() or an error
398 occurs.
399 This is only an issue on non-seekable streams, since otherwise the archive
400 output stream can seek back and fix up the header once the size of the
401 entry is known.
402 For generic programming, one way to handle this is to supply the size
403 whenever it is known, and rely on the error message from the output
404 stream when the operation is not supported.
405 @b GetNextEntry and the weak reference mechanism
406 Some archive formats do not store all an entry's meta-data before the
407 entry's data (zip is an example). In this case, when reading from a
408 non-seekable stream, #GetNextEntry()
409 can only return a partially populated #wxArchiveEntry
410 object - not all the fields are set.
411 The input stream then keeps a weak reference to the entry object and
412 updates it when more meta-data becomes available. A weak reference being
413 one that does not prevent you from deleting the wxArchiveEntry object - the
414 input stream only attempts to update it if it is still around.
415 The documentation for each archive entry type gives the details
416 of what meta-data becomes available and when. For generic programming,
417 when the worst case must be assumed, you can rely on all the fields
418 of wxArchiveEntry being fully populated when GetNextEntry() returns,
419 with the the following exceptions:
420
421
422
423
424
425
426 #GetSize()
427
428
429
430
431 Guaranteed to be
432 available after the entry has been read to #Eof(),
433 or #CloseEntry() has been called
434
435
436
437
438
439 #IsReadOnly()
440
441
442
443
444 Guaranteed to
445 be available after the end of the archive has been reached, i.e. after
446 GetNextEntry() returns @NULL and Eof() is @true
447
448
449
450
451
452 This mechanism allows #CopyEntry()
453 to always fully preserve entries' meta-data. No matter what order order
454 the meta-data occurs within the archive, the input stream will always
455 have read it before the output stream must write it.
456 @b wxArchiveNotifier
457 Notifier objects can be used to get a notification whenever an input
458 stream updates a #wxArchiveEntry object's data
459 via the weak reference mechanism.
460 Consider the following code which renames an entry in an archive.
461 This is the usual way to modify an entry's meta-data, simply set the
462 required field before writing it with
463 #CopyEntry():
464
465 @code
466 auto_ptrwxArchiveInputStream arc(factory-NewStream(in));
467 auto_ptrwxArchiveOutputStream outarc(factory-NewStream(out));
468 auto_ptrwxArchiveEntry entry;
469
470 outarc-CopyArchiveMetaData(*arc);
471
472 while (entry.reset(arc-GetNextEntry()), entry.get() != @NULL) {
473 if (entry-GetName() == from)
474 entry-SetName(to);
475 if (!outarc-CopyEntry(entry.release(), *arc))
476 break;
477 }
478
479 bool success = arc-Eof() && outarc-Close();
480 @endcode
481
482 However, for non-seekable streams, this technique cannot be used for
483 fields such as #IsReadOnly(),
484 which are not necessarily set when
485 #GetNextEntry() returns. In
486 this case a #wxArchiveNotifier can be used:
487
488 @code
489 class MyNotifier : public wxArchiveNotifier
490 {
491 public:
492 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(@false); }
493 };
494 @endcode
495
496 The meta-data changes are done in your notifier's
497 #OnEntryUpdated() method,
498 then #SetNotifier() is called before
499 CopyEntry():
500
501 @code
502 auto_ptrwxArchiveInputStream arc(factory-NewStream(in));
503 auto_ptrwxArchiveOutputStream outarc(factory-NewStream(out));
504 auto_ptrwxArchiveEntry entry;
505 MyNotifier notifier;
506
507 outarc-CopyArchiveMetaData(*arc);
508
509 while (entry.reset(arc-GetNextEntry()), entry.get() != @NULL) {
510 entry-SetNotifier(notifier);
511 if (!outarc-CopyEntry(entry.release(), *arc))
512 break;
513 }
514
515 bool success = arc-Eof() && outarc-Close();
516 @endcode
517
518 SetNotifier() calls OnEntryUpdated() immediately, then the input
519 stream calls it again whenever it sets more fields in the entry. Since
520 OnEntryUpdated() will be called at least once, this technique always
521 works even when it is not strictly necessary to use it. For example,
522 changing the entry name can be done this way too and it works on seekable
523 streams as well as non-seekable.
524
525 */
526
527