]> git.saurik.com Git - wxWidgets.git/blame - docs/doxygen/overviews/archive.h
Fix problem with COMDLG_FILTERSPEC declaration with MinGW-w64 4.8.
[wxWidgets.git] / docs / doxygen / overviews / archive.h
CommitLineData
15b6757b 1/////////////////////////////////////////////////////////////////////////////
98ba1eee 2// Name: archive.h
15b6757b
FM
3// Purpose: topic overview
4// Author: wxWidgets team
526954c5 5// Licence: wxWindows licence
15b6757b
FM
6/////////////////////////////////////////////////////////////////////////////
7
880efa2a 8/**
36c9828f 9
4cbfec15 10@page overview_archive Archive Formats
36c9828f 11
e7054054
BP
12@tableofcontents
13
032e27aa 14The archive classes handle archive formats such as zip, tar, rar and cab.
d13dc522 15Currently wxZip, wxTar and wxZlib classes are included.
e0a47918 16
032e27aa
BP
17For each archive type, there are the following classes (using zip here as an
18example):
36c9828f 19
032e27aa
BP
20@li wxZipInputStream: Input stream
21@li wxZipOutputStream: Output stream
22@li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
36c9828f 23
032e27aa 24There are also abstract wxArchive classes that can be used to write code that
4cbfec15 25can handle any of the archive types, see @ref overview_archive_generic.
e0a47918 26
032e27aa
BP
27Also see wxFileSystem for a higher level interface that can handle archive
28files in a generic way.
e0a47918 29
032e27aa
BP
30The classes are designed to handle archives on both seekable streams such as
31disk files, or non-seekable streams such as pipes and sockets (see
4cbfec15 32@ref overview_archive_noseek).
e0a47918 33
36c9828f 34
e0a47918 35
4cbfec15 36@section overview_archive_create Creating an Archive
e0a47918 37
032e27aa
BP
38Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
39archive, then write the entry's data. Another call to PutNextEntry() closes the
40current entry and begins the next. For example:
36c9828f 41
032e27aa 42@code
9a83f860 43wxFFileOutputStream out(wxT("test.zip"));
032e27aa
BP
44wxZipOutputStream zip(out);
45wxTextOutputStream txt(zip);
46wxString sep(wxFileName::GetPathSeparator());
36c9828f 47
9a83f860
VZ
48zip.PutNextEntry(wxT("entry1.txt"));
49txt << wxT("Some text for entry1.txt\n");
36c9828f 50
9a83f860
VZ
51zip.PutNextEntry(wxT("subdir") + sep + wxT("entry2.txt"));
52txt << wxT("Some text for subdir/entry2.txt\n");
032e27aa 53@endcode
36c9828f 54
032e27aa
BP
55The name of each entry can be a full path, which makes it possible to store
56entries in subdirectories.
36c9828f
FM
57
58
4cbfec15 59@section overview_archive_extract Extracting an Archive
e0a47918 60
032e27aa
BP
61wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
62containing the meta-data for the next entry in the archive (and gives away
63ownership).
e0a47918 64
032e27aa
BP
65Reading from the input stream then returns the entry's data. Eof() becomes
66@true after an attempt has been made to read past the end of the entry's data.
36c9828f 67
032e27aa 68When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
36c9828f 69
032e27aa
BP
70@code
71auto_ptr<wxZipEntry> entry;
36c9828f 72
9a83f860 73wxFFileInputStream in(wxT("test.zip"));
032e27aa 74wxZipInputStream zip(in);
36c9828f 75
032e27aa
BP
76while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
77{
78 // access meta-data
79 wxString name = entry->GetName();
80 // read 'zip' to access the entry's data
81}
82@endcode
36c9828f
FM
83
84
85
4cbfec15 86@section overview_archive_modify Modifying an Archive
36c9828f 87
032e27aa
BP
88To modify an existing archive, write a new copy of the archive to a new file,
89making any necessary changes along the way and transferring any unchanged
90entries using wxArchiveOutputStream::CopyEntry().
e0a47918 91
032e27aa
BP
92For archive types which compress entry data, CopyEntry() is likely to be much
93more efficient than transferring the data using Read() and Write() since it
94will copy them without decompressing and recompressing them.
e0a47918 95
032e27aa
BP
96In general modifications are not possible without rewriting the archive, though
97it may be possible in some limited cases. Even then, rewriting the archive is
98usually a better choice since a failure can be handled without losing the whole
99archive. wxTempFileOutputStream can be helpful to do this.
e0a47918 100
032e27aa 101For example to delete all entries matching the pattern "*.txt":
36c9828f 102
032e27aa 103@code
9a83f860
VZ
104auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(wxT("test.zip")));
105wxTempFileOutputStream out(wxT("test.zip"));
36c9828f 106
032e27aa
BP
107wxZipInputStream inzip(*in);
108wxZipOutputStream outzip(out);
36c9828f 109
032e27aa 110auto_ptr<wxZipEntry> entry;
36c9828f 111
032e27aa
BP
112// transfer any meta-data for the archive as a whole (the zip comment
113// in the case of zip)
114outzip.CopyArchiveMetaData(inzip);
36c9828f 115
032e27aa
BP
116// call CopyEntry for each entry except those matching the pattern
117while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
9a83f860 118 if (!entry->GetName().Matches(wxT("*.txt")))
032e27aa
BP
119 if (!outzip.CopyEntry(entry.release(), inzip))
120 break;
36c9828f 121
032e27aa
BP
122// close the input stream by releasing the pointer to it, do this
123// before closing the output stream so that the file can be replaced
124in.reset();
36c9828f 125
032e27aa
BP
126// you can check for success as follows
127bool success = inzip.Eof() && outzip.Close() && out.Commit();
128@endcode
36c9828f
FM
129
130
131
4cbfec15 132@section overview_archive_byname Looking Up an Archive Entry by Name
36c9828f 133
032e27aa
BP
134Also see wxFileSystem for a higher level interface that is more convenient for
135accessing archive entries by name.
e0a47918 136
032e27aa
BP
137To open just one entry in an archive, the most efficient way is to simply
138search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
139the required entry is found. This works both for archives on seekable and
140non-seekable streams.
e0a47918 141
032e27aa
BP
142The format of filenames in the archive is likely to be different from the local
143filename format. For example zips and tars use unix style names, with forward
144slashes as the path separator, and absolute paths are not allowed. So if on
145Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
146back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
147into the internal format and back has lost some information.
e0a47918 148
032e27aa
BP
149So to avoid ambiguity when searching for an entry matching a local name, it is
150better to convert the local name to the archive's internal format and search
151for that:
36c9828f 152
032e27aa
BP
153@code
154auto_ptr<wxZipEntry> entry;
36c9828f 155
032e27aa
BP
156// convert the local name we are looking for into the internal format
157wxString name = wxZipEntry::GetInternalName(localname);
36c9828f 158
032e27aa 159// open the zip
9a83f860 160wxFFileInputStream in(wxT("test.zip"));
032e27aa 161wxZipInputStream zip(in);
36c9828f 162
032e27aa
BP
163// call GetNextEntry() until the required internal name is found
164do
165{
166 entry.reset(zip.GetNextEntry());
167}
168while (entry.get() != NULL && entry->GetInternalName() != name);
36c9828f 169
032e27aa
BP
170if (entry.get() != NULL)
171{
172 // read the entry's data...
173}
174@endcode
36c9828f 175
032e27aa
BP
176To access several entries randomly, it is most efficient to transfer the entire
177catalogue of entries to a container such as a std::map or a wxHashMap then
178entries looked up by name can be opened using the
179wxArchiveInputStream::OpenEntry() method.
36c9828f 180
032e27aa
BP
181@code
182WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
183ZipCatalog::iterator it;
184wxZipEntry *entry;
185ZipCatalog cat;
186
187// open the zip
9a83f860 188wxFFileInputStream in(wxT("test.zip"));
032e27aa
BP
189wxZipInputStream zip(in);
190
191// load the zip catalog
192while ((entry = zip.GetNextEntry()) != NULL)
193{
194 wxZipEntry*& current = cat[entry->GetInternalName()];
195 // some archive formats can have multiple entries with the same name
196 // (e.g. tar) though it is an error in the case of zip
197 delete current;
198 current = entry;
199}
200
201// open an entry by name
202if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
203{
204 zip.OpenEntry(*it->second);
205 // ... now read entry's data
206}
207@endcode
208
209To open more than one entry simultaneously you need more than one underlying
210stream on the same archive:
211
212@code
213// opening another entry without closing the first requires another
214// input stream for the same file
9a83f860 215wxFFileInputStream in2(wxT("test.zip"));
032e27aa
BP
216wxZipInputStream zip2(in2);
217if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
218 zip2.OpenEntry(*it->second);
219@endcode
220
221
222
4cbfec15 223@section overview_archive_generic Generic Archive Programming
032e27aa
BP
224
225Also see wxFileSystem for a higher level interface that can handle archive
226files in a generic way.
227
228The specific archive classes, such as the wxZip classes, inherit from the
229following abstract classes which can be used to write code that can handle any
230of the archive types:
231
232@li wxArchiveInputStream: Input stream
233@li wxArchiveOutputStream: Output stream
234@li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
235
236In order to able to write generic code it's necessary to be able to create
237instances of the classes without knowing which archive type is being used.
238
239To allow this there is a class factory for each archive type, derived from
240wxArchiveClassFactory, that can create the other classes.
241
242For example, given wxArchiveClassFactory* factory, streams and entries can be
243created like this:
244
245@code
246// create streams without knowing their type
247auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
248auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
249
250// create an empty entry object
251auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
252@endcode
253
254For the factory itself, the static member wxArchiveClassFactory::Find() can be
255used to find a class factory that can handle a given file extension or mime
256type. For example, given @e filename:
257
258@code
259const wxArchiveClassFactory *factory;
260factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
261
262if (factory)
263 stream = factory->NewStream(new wxFFileInputStream(filename));
264@endcode
265
266@e Find() does not give away ownership of the returned pointer, so it does not
267need to be deleted.
268
269There are similar class factories for the filter streams that handle the
270compression and decompression of a single stream, such as wxGzipInputStream.
271These can be found using wxFilterClassFactory::Find().
272
273For example, to list the contents of archive @e filename:
274
275@code
276auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
277
278if (in->IsOk())
279{
280 // look for a filter handler, e.g. for '.gz'
281 const wxFilterClassFactory *fcf;
282 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
283 if (fcf)
284 {
285 in.reset(fcf->NewStream(in.release()));
286 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
287 filename = fcf->PopExtension(filename);
288 }
289
290 // look for a archive handler, e.g. for '.zip' or '.tar'
291 const wxArchiveClassFactory *acf;
292 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
293 if (acf)
294 {
295 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
296 auto_ptr<wxArchiveEntry> entry;
297
298 // list the contents of the archive
299 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
300 std::wcout << entry->GetName().c_str() << "\n";
301 }
302 else
303 {
9a83f860 304 wxLogError(wxT("can't handle '%s'"), filename.c_str());
032e27aa
BP
305 }
306}
307@endcode
36c9828f
FM
308
309
36c9828f 310
4cbfec15 311@section overview_archive_noseek Archives on Non-Seekable Streams
032e27aa
BP
312
313In general, handling archives on non-seekable streams is done in the same way
314as for seekable streams, with a few caveats.
315
316The main limitation is that accessing entries randomly using
317wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
318accessed sequentially in the order they are stored within the archive.
319
320For each archive type, there will also be other limitations which will depend
321on the order the entries' meta-data is stored within the archive. These are not
322too difficult to deal with, and are outlined below.
323
4cbfec15 324@subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
032e27aa
BP
325
326When writing archives, some archive formats store the entry size before the
327entry's data (tar has this limitation, zip doesn't). In this case the entry's
328size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
329occurs.
330
331This is only an issue on non-seekable streams, since otherwise the archive
332output stream can seek back and fix up the header once the size of the entry is
333known.
334
335For generic programming, one way to handle this is to supply the size whenever
336it is known, and rely on the error message from the output stream when the
337operation is not supported.
338
4cbfec15 339@subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
032e27aa
BP
340
341Some archive formats do not store all an entry's meta-data before the entry's
342data (zip is an example). In this case, when reading from a non-seekable
343stream, wxArchiveInputStream::GetNextEntry() can only return a partially
344populated wxArchiveEntry object - not all the fields are set.
345
346The input stream then keeps a weak reference to the entry object and updates it
347when more meta-data becomes available. A weak reference being one that does not
348prevent you from deleting the wxArchiveEntry object - the input stream only
349attempts to update it if it is still around.
350
351The documentation for each archive entry type gives the details of what
352meta-data becomes available and when. For generic programming, when the worst
353case must be assumed, you can rely on all the fields of wxArchiveEntry being
4c51a665 354fully populated when GetNextEntry() returns, with the following exceptions:
032e27aa
BP
355
356@li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
357 been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
358 has been called.
359@li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
360 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
361 Eof() is @true.
362
363This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
364preserve entries' meta-data. No matter what order order the meta-data occurs
365within the archive, the input stream will always have read it before the output
366stream must write it.
367
4cbfec15 368@subsection overview_archive_noseek_notifier wxArchiveNotifier
032e27aa
BP
369
370Notifier objects can be used to get a notification whenever an input stream
371updates a wxArchiveEntry object's data via the weak reference mechanism.
372
373Consider the following code which renames an entry in an archive. This is the
374usual way to modify an entry's meta-data, simply set the required field before
375writing it with wxArchiveOutputStream::CopyEntry():
376
377@code
378auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
379auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
380auto_ptr<wxArchiveEntry> entry;
381
382outarc->CopyArchiveMetaData(*arc);
383
384while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
385{
386 if (entry->GetName() == from)
387 entry->SetName(to);
388 if (!outarc->CopyEntry(entry.release(), *arc))
389 break;
390}
391
392bool success = arc->Eof() && outarc->Close();
393@endcode
394
395However, for non-seekable streams, this technique cannot be used for fields
396such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
397wxArchiveInputStream::GetNextEntry() returns.
398
399In this case a wxArchiveNotifier can be used:
400
401@code
402class MyNotifier : public wxArchiveNotifier
403{
404public:
405 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
406};
407@endcode
408
409The meta-data changes are done in your notifier's
410wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
411is called before CopyEntry():
412
413@code
414auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
415auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
416auto_ptr<wxArchiveEntry> entry;
417MyNotifier notifier;
418
419outarc->CopyArchiveMetaData(*arc);
420
421while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
422{
423 entry->SetNotifier(notifier);
424 if (!outarc->CopyEntry(entry.release(), *arc))
425 break;
426}
427
428bool success = arc->Eof() && outarc->Close();
429@endcode
430
431SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
432it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
433be called at least once, this technique always works even when it is not
434strictly necessary to use it. For example, changing the entry name can be done
435this way too and it works on seekable streams as well as non-seekable.
36c9828f 436
e0a47918 437*/
36c9828f 438