]> git.saurik.com Git - wxWidgets.git/blame - docs/doxygen/overviews/arc.h
fix some links
[wxWidgets.git] / docs / doxygen / overviews / arc.h
CommitLineData
15b6757b 1/////////////////////////////////////////////////////////////////////////////
e0a47918 2// Name: arc.h
15b6757b
FM
3// Purpose: topic overview
4// Author: wxWidgets team
5// RCS-ID: $Id$
6// Licence: wxWindows license
7/////////////////////////////////////////////////////////////////////////////
8
9/*!
36c9828f 10
e0a47918 11 @page overview_arc Archive formats such as zip
36c9828f 12
15b6757b 13 The archive classes handle archive formats such as zip, tar, rar and cab.
e0a47918
FM
14 Currently #wxZip and #wxTar classes are included.
15
15b6757b
FM
16 For each archive type, there are the following classes (using zip here
17 as an example):
36c9828f 18
e0a47918
FM
19 @li wxZipInputStream: input stream
20 @li wxZipOutputStream: output stream
21 @li wxZipEntry: holds the meta-data for an entry (e.g. filename, timestamp, etc.)
36c9828f 22
15b6757b 23 There are also abstract wxArchive classes that can be used to write code
e0a47918
FM
24 that can handle any of the archive types, see @ref overview_arc_generic.
25
15b6757b
FM
26 Also see #wxFileSystem for a higher level interface that
27 can handle archive files in a generic way.
e0a47918 28
15b6757b
FM
29 The classes are designed to handle archives on both seekable streams such
30 as disk files, or non-seekable streams such as pipes and sockets
e0a47918
FM
31 (see @ref overview_arc_noseek).
32
33 See also #wxFileSystem.
36c9828f 34
e0a47918
FM
35 @li @ref overview_arc_create
36 @li @ref overview_arc_extract
37 @li @ref overview_arc_modify
38 @li @ref overview_arc_byname
39 @li @ref overview_arc_generic
40 @li @ref overview_arc_noseek
36c9828f
FM
41
42
e0a47918 43 <hr>
36c9828f 44
e0a47918
FM
45
46 @section overview_arc_create Creating an archive
47
48 Call #PutNextEntry() to create each new entry in the archive, then write the entry's data.
15b6757b
FM
49 Another call to PutNextEntry() closes the current entry and begins the next.
50 For example:
36c9828f 51
15b6757b 52 @code
e0a47918 53 wxFFileOutputStream out(_T("test.zip"));
15b6757b
FM
54 wxZipOutputStream zip(out);
55 wxTextOutputStream txt(zip);
56 wxString sep(wxFileName::GetPathSeparator());
36c9828f 57
15b6757b
FM
58 zip.PutNextEntry(_T("entry1.txt"));
59 txt _T("Some text for entry1.txt\n");
36c9828f 60
15b6757b
FM
61 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
62 txt _T("Some text for subdir/entry2.txt\n");
63 @endcode
36c9828f 64
15b6757b
FM
65 The name of each entry can be a full path, which makes it possible to
66 store entries in subdirectories.
36c9828f
FM
67
68
e0a47918
FM
69 @section overview_arc_extract Extracting an archive
70
71 #GetNextEntry() returns a pointer to entry object containing the meta-data for
72 the next entry in the archive (and gives away ownership).
73
74 Reading from the input stream then returns the entry's data.
75 Eof() becomes @true after an attempt has been made to read past the end of the entry's data.
36c9828f 76
15b6757b 77 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
36c9828f 78
15b6757b 79 @code
e0a47918 80 auto_ptr<wxZipEntry> entry;
36c9828f 81
15b6757b
FM
82 wxFFileInputStream in(_T("test.zip"));
83 wxZipInputStream zip(in);
36c9828f 84
15b6757b
FM
85 while (entry.reset(zip.GetNextEntry()), entry.get() != @NULL)
86 {
87 // access meta-data
88 wxString name = entry-GetName();
89 // read 'zip' to access the entry's data
90 }
91 @endcode
36c9828f
FM
92
93
94
e0a47918 95 @section overview_arc_modify Modifying an archive
36c9828f 96
15b6757b
FM
97 To modify an existing archive, write a new copy of the archive to a new file,
98 making any necessary changes along the way and transferring any unchanged
99 entries using #CopyEntry().
e0a47918 100
15b6757b
FM
101 For archive types which compress entry data, CopyEntry() is likely to be
102 much more efficient than transferring the data using Read() and Write()
103 since it will copy them without decompressing and recompressing them.
e0a47918 104
15b6757b
FM
105 In general modifications are not possible without rewriting the archive,
106 though it may be possible in some limited cases. Even then, rewriting the
107 archive is usually a better choice since a failure can be handled without
e0a47918
FM
108 losing the whole archive. #wxTempFileOutputStream can be helpful to do this.
109
15b6757b 110 For example to delete all entries matching the pattern "*.txt":
36c9828f 111
15b6757b 112 @code
e0a47918 113 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
15b6757b 114 wxTempFileOutputStream out(_T("test.zip"));
36c9828f 115
15b6757b
FM
116 wxZipInputStream inzip(*in);
117 wxZipOutputStream outzip(out);
36c9828f 118
e0a47918 119 auto_ptr<wxZipEntry> entry;
36c9828f 120
15b6757b
FM
121 // transfer any meta-data for the archive as a whole (the zip comment
122 // in the case of zip)
123 outzip.CopyArchiveMetaData(inzip);
36c9828f 124
15b6757b
FM
125 // call CopyEntry for each entry except those matching the pattern
126 while (entry.reset(inzip.GetNextEntry()), entry.get() != @NULL)
127 if (!entry-GetName().Matches(_T("*.txt")))
128 if (!outzip.CopyEntry(entry.release(), inzip))
129 break;
36c9828f 130
15b6757b
FM
131 // close the input stream by releasing the pointer to it, do this
132 // before closing the output stream so that the file can be replaced
133 in.reset();
36c9828f 134
15b6757b
FM
135 // you can check for success as follows
136 bool success = inzip.Eof() && outzip.Close() && out.Commit();
137 @endcode
36c9828f
FM
138
139
140
e0a47918 141 @section overview_arc_byname Looking up an archive entry by name
36c9828f 142
15b6757b
FM
143 Also see #wxFileSystem for a higher level interface that is
144 more convenient for accessing archive entries by name.
e0a47918 145
15b6757b 146 To open just one entry in an archive, the most efficient way is
e0a47918 147 to simply search for it linearly by calling #GetNextEntry() until the
15b6757b
FM
148 required entry is found. This works both for archives on seekable and
149 non-seekable streams.
e0a47918 150
15b6757b
FM
151 The format of filenames in the archive is likely to be different
152 from the local filename format. For example zips and tars use
153 unix style names, with forward slashes as the path separator,
154 and absolute paths are not allowed. So if on Windows the file
e0a47918
FM
155 "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry back #GetName()
156 will return "MYDIR\MYFILE.TXT". The conversion into the internal format
15b6757b 157 and back has lost some information.
e0a47918 158
15b6757b
FM
159 So to avoid ambiguity when searching for an entry matching a local name,
160 it is better to convert the local name to the archive's internal format
161 and search for that:
36c9828f 162
15b6757b 163 @code
e0a47918 164 auto_ptr<wxZipEntry> entry;
36c9828f 165
15b6757b
FM
166 // convert the local name we are looking for into the internal format
167 wxString name = wxZipEntry::GetInternalName(localname);
36c9828f 168
15b6757b
FM
169 // open the zip
170 wxFFileInputStream in(_T("test.zip"));
171 wxZipInputStream zip(in);
36c9828f 172
15b6757b
FM
173 // call GetNextEntry() until the required internal name is found
174 do {
175 entry.reset(zip.GetNextEntry());
176 }
177 while (entry.get() != @NULL && entry-GetInternalName() != name);
36c9828f 178
15b6757b
FM
179 if (entry.get() != @NULL) {
180 // read the entry's data...
181 }
182 @endcode
36c9828f 183
15b6757b
FM
184 To access several entries randomly, it is most efficient to transfer the
185 entire catalogue of entries to a container such as a std::map or a
e0a47918 186 #wxHashMap then entries looked up by name can be opened using the #OpenEntry() method.
36c9828f 187
15b6757b 188 @code
e0a47918 189 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
15b6757b
FM
190 ZipCatalog::iterator it;
191 wxZipEntry *entry;
192 ZipCatalog cat;
36c9828f 193
15b6757b
FM
194 // open the zip
195 wxFFileInputStream in(_T("test.zip"));
196 wxZipInputStream zip(in);
36c9828f 197
15b6757b
FM
198 // load the zip catalog
199 while ((entry = zip.GetNextEntry()) != @NULL) {
200 wxZipEntry*& current = cat[entry-GetInternalName()];
201 // some archive formats can have multiple entries with the same name
202 // (e.g. tar) though it is an error in the case of zip
203 delete current;
204 current = entry;
205 }
36c9828f 206
15b6757b
FM
207 // open an entry by name
208 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
209 zip.OpenEntry(*it-second);
210 // ... now read entry's data
211 }
212 @endcode
36c9828f 213
15b6757b
FM
214 To open more than one entry simultaneously you need more than one
215 underlying stream on the same archive:
36c9828f 216
15b6757b 217 @code
e0a47918 218 // opening another entry without closing the first requires another
15b6757b
FM
219 // input stream for the same file
220 wxFFileInputStream in2(_T("test.zip"));
221 wxZipInputStream zip2(in2);
222 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
223 zip2.OpenEntry(*it-second);
224 @endcode
36c9828f
FM
225
226
227
e0a47918 228 @section overview_arc_generic Generic archive programming
36c9828f 229
15b6757b
FM
230 Also see #wxFileSystem for a higher level interface that
231 can handle archive files in a generic way.
e0a47918 232
15b6757b
FM
233 The specific archive classes, such as the wxZip classes, inherit from
234 the following abstract classes which can be used to write code that can
235 handle any of the archive types:
36c9828f 236
e0a47918
FM
237 @li wxArchiveInputStream: input stream
238 @li wxArchiveOutputStream: output stream
239 @li wxArchiveEntry: holds the meta-data for an entry (e.g. filename)
36c9828f 240
15b6757b
FM
241 In order to able to write generic code it's necessary to be able to create
242 instances of the classes without knowing which archive type is being used.
e0a47918 243
15b6757b 244 To allow this there is a class factory for each archive type, derived from
e0a47918
FM
245 #wxArchiveClassFactory, that can create the other classes.
246
15b6757b
FM
247 For example, given @e wxArchiveClassFactory* factory, streams and
248 entries can be created like this:
36c9828f 249
15b6757b 250 @code
e0a47918
FM
251 // create streams without knowing their type
252 auto_ptr<wxArchiveInputStream> inarc(factory-NewStream(in));
253 auto_ptr<wxArchiveOutputStream> outarc(factory-NewStream(out));
36c9828f 254
15b6757b 255 // create an empty entry object
e0a47918 256 auto_ptr<wxArchiveEntry> entry(factory-NewEntry());
15b6757b 257 @endcode
36c9828f 258
e0a47918 259 For the factory itself, the static member wxArchiveClassFactory::Find().
15b6757b
FM
260 can be used to find a class factory that can handle a given file
261 extension or mime type. For example, given @e filename:
36c9828f 262
15b6757b 263 @code
e0a47918 264 const wxArchiveClassFactory *factory;
15b6757b 265 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
36c9828f 266
15b6757b
FM
267 if (factory)
268 stream = factory-NewStream(new wxFFileInputStream(filename));
269 @endcode
36c9828f 270
15b6757b
FM
271 @e Find does not give away ownership of the returned pointer, so it
272 does not need to be deleted.
e0a47918 273
15b6757b
FM
274 There are similar class factories for the filter streams that handle the
275 compression and decompression of a single stream, such as wxGzipInputStream.
e0a47918
FM
276 These can be found using wxFilterClassFactory::Find().
277
15b6757b 278 For example, to list the contents of archive @e filename:
36c9828f 279
15b6757b 280 @code
e0a47918 281 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
36c9828f 282
15b6757b
FM
283 if (in-IsOk())
284 {
285 // look for a filter handler, e.g. for '.gz'
286 const wxFilterClassFactory *fcf;
287 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
288 if (fcf) {
289 in.reset(fcf-NewStream(in.release()));
290 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
291 filename = fcf-PopExtension(filename);
292 }
36c9828f 293
15b6757b
FM
294 // look for a archive handler, e.g. for '.zip' or '.tar'
295 const wxArchiveClassFactory *acf;
296 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
297 if (acf) {
e0a47918
FM
298 auto_ptr<wxArchiveInputStream> arc(acf-NewStream(in.release()));
299 auto_ptr<wxArchiveEntry> entry;
36c9828f 300
15b6757b
FM
301 // list the contents of the archive
302 while ((entry.reset(arc-GetNextEntry())), entry.get() != @NULL)
303 std::wcout entry-GetName().c_str() "\n";
304 }
305 else {
306 wxLogError(_T("can't handle '%s'"), filename.c_str());
307 }
308 }
309 @endcode
36c9828f
FM
310
311
312
e0a47918 313 @section overview_arc_noseek Archives on non-seekable streams
36c9828f 314
15b6757b
FM
315 In general, handling archives on non-seekable streams is done in the same
316 way as for seekable streams, with a few caveats.
e0a47918
FM
317
318 The main limitation is that accessing entries randomly using #OpenEntry()
36c9828f 319 is not possible, the entries can only be accessed sequentially in the order
15b6757b 320 they are stored within the archive.
e0a47918 321
15b6757b
FM
322 For each archive type, there will also be other limitations which will
323 depend on the order the entries' meta-data is stored within the archive.
324 These are not too difficult to deal with, and are outlined below.
e0a47918 325
15b6757b
FM
326 @b PutNextEntry and the entry size
327 When writing archives, some archive formats store the entry size before
328 the entry's data (tar has this limitation, zip doesn't). In this case
e0a47918
FM
329 the entry's size must be passed to #PutNextEntry() or an error occurs.
330
15b6757b
FM
331 This is only an issue on non-seekable streams, since otherwise the archive
332 output stream can seek back and fix up the header once the size of the
333 entry is known.
e0a47918 334
15b6757b
FM
335 For generic programming, one way to handle this is to supply the size
336 whenever it is known, and rely on the error message from the output
337 stream when the operation is not supported.
e0a47918 338
15b6757b
FM
339 @b GetNextEntry and the weak reference mechanism
340 Some archive formats do not store all an entry's meta-data before the
341 entry's data (zip is an example). In this case, when reading from a
e0a47918
FM
342 non-seekable stream, #GetNextEntry() can only return a partially populated
343 #wxArchiveEntry object - not all the fields are set.
344
15b6757b
FM
345 The input stream then keeps a weak reference to the entry object and
346 updates it when more meta-data becomes available. A weak reference being
347 one that does not prevent you from deleting the wxArchiveEntry object - the
348 input stream only attempts to update it if it is still around.
e0a47918 349
15b6757b
FM
350 The documentation for each archive entry type gives the details
351 of what meta-data becomes available and when. For generic programming,
352 when the worst case must be assumed, you can rely on all the fields
353 of wxArchiveEntry being fully populated when GetNextEntry() returns,
354 with the the following exceptions:
36c9828f 355
e0a47918
FM
356 @li GetSize(): Guaranteed to be available after the entry has been read to #Eof(),
357 or #CloseEntry() has been called
36c9828f 358
e0a47918
FM
359 @li IsReadOnly(): Guaranteed to be available after the end of the archive has been
360 reached, i.e. after GetNextEntry() returns @NULL and Eof() is @true
36c9828f 361
e0a47918
FM
362 This mechanism allows #CopyEntry() to always fully preserve entries' meta-data.
363 No matter what order order the meta-data occurs within the archive, the input stream
364 will always have read it before the output stream must write it.
36c9828f 365
15b6757b
FM
366 @b wxArchiveNotifier
367 Notifier objects can be used to get a notification whenever an input
368 stream updates a #wxArchiveEntry object's data
369 via the weak reference mechanism.
e0a47918 370
15b6757b
FM
371 Consider the following code which renames an entry in an archive.
372 This is the usual way to modify an entry's meta-data, simply set the
e0a47918 373 required field before writing it with #CopyEntry():
36c9828f 374
15b6757b 375 @code
e0a47918
FM
376 auto_ptr<wxArchiveInputStream> arc(factory-NewStream(in));
377 auto_ptr<wxArchiveOutputStream> outarc(factory-NewStream(out));
378 auto_ptr<wxArchiveEntry> entry;
36c9828f 379
15b6757b 380 outarc-CopyArchiveMetaData(*arc);
36c9828f 381
15b6757b
FM
382 while (entry.reset(arc-GetNextEntry()), entry.get() != @NULL) {
383 if (entry-GetName() == from)
384 entry-SetName(to);
385 if (!outarc-CopyEntry(entry.release(), *arc))
386 break;
387 }
36c9828f 388
15b6757b
FM
389 bool success = arc-Eof() && outarc-Close();
390 @endcode
36c9828f 391
15b6757b 392 However, for non-seekable streams, this technique cannot be used for
e0a47918
FM
393 fields such as #IsReadOnly(), which are not necessarily set when
394 #GetNextEntry() returns. In this case a #wxArchiveNotifier can be used:
36c9828f 395
15b6757b
FM
396 @code
397 class MyNotifier : public wxArchiveNotifier
398 {
399 public:
400 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(@false); }
401 };
402 @endcode
36c9828f 403
e0a47918
FM
404 The meta-data changes are done in your notifier's #OnEntryUpdated() method,
405 then #SetNotifier() is called before CopyEntry():
36c9828f 406
15b6757b 407 @code
e0a47918
FM
408 auto_ptr<wxArchiveInputStream> arc(factory-NewStream(in));
409 auto_ptr<wxArchiveOutputStream> outarc(factory-NewStream(out));
410 auto_ptr<wxArchiveEntry> entry;
15b6757b 411 MyNotifier notifier;
36c9828f 412
15b6757b 413 outarc-CopyArchiveMetaData(*arc);
36c9828f 414
15b6757b
FM
415 while (entry.reset(arc-GetNextEntry()), entry.get() != @NULL) {
416 entry-SetNotifier(notifier);
417 if (!outarc-CopyEntry(entry.release(), *arc))
418 break;
419 }
36c9828f 420
15b6757b
FM
421 bool success = arc-Eof() && outarc-Close();
422 @endcode
36c9828f 423
15b6757b
FM
424 SetNotifier() calls OnEntryUpdated() immediately, then the input
425 stream calls it again whenever it sets more fields in the entry. Since
426 OnEntryUpdated() will be called at least once, this technique always
427 works even when it is not strictly necessary to use it. For example,
428 changing the entry name can be done this way too and it works on seekable
429 streams as well as non-seekable.
36c9828f 430
e0a47918 431*/
36c9828f 432