]> git.saurik.com Git - wxWidgets.git/blame - docs/doxygen/overviews/archive.h
Fix background corruption in scrolled wxHtmlWindow.
[wxWidgets.git] / docs / doxygen / overviews / archive.h
CommitLineData
15b6757b 1/////////////////////////////////////////////////////////////////////////////
98ba1eee 2// Name: archive.h
15b6757b
FM
3// Purpose: topic overview
4// Author: wxWidgets team
5// RCS-ID: $Id$
526954c5 6// Licence: wxWindows licence
15b6757b
FM
7/////////////////////////////////////////////////////////////////////////////
8
880efa2a 9/**
36c9828f 10
4cbfec15 11@page overview_archive Archive Formats
36c9828f 12
e7054054
BP
13@tableofcontents
14
032e27aa 15The archive classes handle archive formats such as zip, tar, rar and cab.
d13dc522 16Currently wxZip, wxTar and wxZlib classes are included.
e0a47918 17
032e27aa
BP
18For each archive type, there are the following classes (using zip here as an
19example):
36c9828f 20
032e27aa
BP
21@li wxZipInputStream: Input stream
22@li wxZipOutputStream: Output stream
23@li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
36c9828f 24
032e27aa 25There are also abstract wxArchive classes that can be used to write code that
4cbfec15 26can handle any of the archive types, see @ref overview_archive_generic.
e0a47918 27
032e27aa
BP
28Also see wxFileSystem for a higher level interface that can handle archive
29files in a generic way.
e0a47918 30
032e27aa
BP
31The classes are designed to handle archives on both seekable streams such as
32disk files, or non-seekable streams such as pipes and sockets (see
4cbfec15 33@ref overview_archive_noseek).
e0a47918 34
36c9828f 35
e0a47918 36
4cbfec15 37@section overview_archive_create Creating an Archive
e0a47918 38
032e27aa
BP
39Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
40archive, then write the entry's data. Another call to PutNextEntry() closes the
41current entry and begins the next. For example:
36c9828f 42
032e27aa 43@code
9a83f860 44wxFFileOutputStream out(wxT("test.zip"));
032e27aa
BP
45wxZipOutputStream zip(out);
46wxTextOutputStream txt(zip);
47wxString sep(wxFileName::GetPathSeparator());
36c9828f 48
9a83f860
VZ
49zip.PutNextEntry(wxT("entry1.txt"));
50txt << wxT("Some text for entry1.txt\n");
36c9828f 51
9a83f860
VZ
52zip.PutNextEntry(wxT("subdir") + sep + wxT("entry2.txt"));
53txt << wxT("Some text for subdir/entry2.txt\n");
032e27aa 54@endcode
36c9828f 55
032e27aa
BP
56The name of each entry can be a full path, which makes it possible to store
57entries in subdirectories.
36c9828f
FM
58
59
4cbfec15 60@section overview_archive_extract Extracting an Archive
e0a47918 61
032e27aa
BP
62wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
63containing the meta-data for the next entry in the archive (and gives away
64ownership).
e0a47918 65
032e27aa
BP
66Reading from the input stream then returns the entry's data. Eof() becomes
67@true after an attempt has been made to read past the end of the entry's data.
36c9828f 68
032e27aa 69When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
36c9828f 70
032e27aa
BP
71@code
72auto_ptr<wxZipEntry> entry;
36c9828f 73
9a83f860 74wxFFileInputStream in(wxT("test.zip"));
032e27aa 75wxZipInputStream zip(in);
36c9828f 76
032e27aa
BP
77while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
78{
79 // access meta-data
80 wxString name = entry->GetName();
81 // read 'zip' to access the entry's data
82}
83@endcode
36c9828f
FM
84
85
86
4cbfec15 87@section overview_archive_modify Modifying an Archive
36c9828f 88
032e27aa
BP
89To modify an existing archive, write a new copy of the archive to a new file,
90making any necessary changes along the way and transferring any unchanged
91entries using wxArchiveOutputStream::CopyEntry().
e0a47918 92
032e27aa
BP
93For archive types which compress entry data, CopyEntry() is likely to be much
94more efficient than transferring the data using Read() and Write() since it
95will copy them without decompressing and recompressing them.
e0a47918 96
032e27aa
BP
97In general modifications are not possible without rewriting the archive, though
98it may be possible in some limited cases. Even then, rewriting the archive is
99usually a better choice since a failure can be handled without losing the whole
100archive. wxTempFileOutputStream can be helpful to do this.
e0a47918 101
032e27aa 102For example to delete all entries matching the pattern "*.txt":
36c9828f 103
032e27aa 104@code
9a83f860
VZ
105auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(wxT("test.zip")));
106wxTempFileOutputStream out(wxT("test.zip"));
36c9828f 107
032e27aa
BP
108wxZipInputStream inzip(*in);
109wxZipOutputStream outzip(out);
36c9828f 110
032e27aa 111auto_ptr<wxZipEntry> entry;
36c9828f 112
032e27aa
BP
113// transfer any meta-data for the archive as a whole (the zip comment
114// in the case of zip)
115outzip.CopyArchiveMetaData(inzip);
36c9828f 116
032e27aa
BP
117// call CopyEntry for each entry except those matching the pattern
118while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
9a83f860 119 if (!entry->GetName().Matches(wxT("*.txt")))
032e27aa
BP
120 if (!outzip.CopyEntry(entry.release(), inzip))
121 break;
36c9828f 122
032e27aa
BP
123// close the input stream by releasing the pointer to it, do this
124// before closing the output stream so that the file can be replaced
125in.reset();
36c9828f 126
032e27aa
BP
127// you can check for success as follows
128bool success = inzip.Eof() && outzip.Close() && out.Commit();
129@endcode
36c9828f
FM
130
131
132
4cbfec15 133@section overview_archive_byname Looking Up an Archive Entry by Name
36c9828f 134
032e27aa
BP
135Also see wxFileSystem for a higher level interface that is more convenient for
136accessing archive entries by name.
e0a47918 137
032e27aa
BP
138To open just one entry in an archive, the most efficient way is to simply
139search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
140the required entry is found. This works both for archives on seekable and
141non-seekable streams.
e0a47918 142
032e27aa
BP
143The format of filenames in the archive is likely to be different from the local
144filename format. For example zips and tars use unix style names, with forward
145slashes as the path separator, and absolute paths are not allowed. So if on
146Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
147back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
148into the internal format and back has lost some information.
e0a47918 149
032e27aa
BP
150So to avoid ambiguity when searching for an entry matching a local name, it is
151better to convert the local name to the archive's internal format and search
152for that:
36c9828f 153
032e27aa
BP
154@code
155auto_ptr<wxZipEntry> entry;
36c9828f 156
032e27aa
BP
157// convert the local name we are looking for into the internal format
158wxString name = wxZipEntry::GetInternalName(localname);
36c9828f 159
032e27aa 160// open the zip
9a83f860 161wxFFileInputStream in(wxT("test.zip"));
032e27aa 162wxZipInputStream zip(in);
36c9828f 163
032e27aa
BP
164// call GetNextEntry() until the required internal name is found
165do
166{
167 entry.reset(zip.GetNextEntry());
168}
169while (entry.get() != NULL && entry->GetInternalName() != name);
36c9828f 170
032e27aa
BP
171if (entry.get() != NULL)
172{
173 // read the entry's data...
174}
175@endcode
36c9828f 176
032e27aa
BP
177To access several entries randomly, it is most efficient to transfer the entire
178catalogue of entries to a container such as a std::map or a wxHashMap then
179entries looked up by name can be opened using the
180wxArchiveInputStream::OpenEntry() method.
36c9828f 181
032e27aa
BP
182@code
183WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
184ZipCatalog::iterator it;
185wxZipEntry *entry;
186ZipCatalog cat;
187
188// open the zip
9a83f860 189wxFFileInputStream in(wxT("test.zip"));
032e27aa
BP
190wxZipInputStream zip(in);
191
192// load the zip catalog
193while ((entry = zip.GetNextEntry()) != NULL)
194{
195 wxZipEntry*& current = cat[entry->GetInternalName()];
196 // some archive formats can have multiple entries with the same name
197 // (e.g. tar) though it is an error in the case of zip
198 delete current;
199 current = entry;
200}
201
202// open an entry by name
203if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
204{
205 zip.OpenEntry(*it->second);
206 // ... now read entry's data
207}
208@endcode
209
210To open more than one entry simultaneously you need more than one underlying
211stream on the same archive:
212
213@code
214// opening another entry without closing the first requires another
215// input stream for the same file
9a83f860 216wxFFileInputStream in2(wxT("test.zip"));
032e27aa
BP
217wxZipInputStream zip2(in2);
218if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
219 zip2.OpenEntry(*it->second);
220@endcode
221
222
223
4cbfec15 224@section overview_archive_generic Generic Archive Programming
032e27aa
BP
225
226Also see wxFileSystem for a higher level interface that can handle archive
227files in a generic way.
228
229The specific archive classes, such as the wxZip classes, inherit from the
230following abstract classes which can be used to write code that can handle any
231of the archive types:
232
233@li wxArchiveInputStream: Input stream
234@li wxArchiveOutputStream: Output stream
235@li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
236
237In order to able to write generic code it's necessary to be able to create
238instances of the classes without knowing which archive type is being used.
239
240To allow this there is a class factory for each archive type, derived from
241wxArchiveClassFactory, that can create the other classes.
242
243For example, given wxArchiveClassFactory* factory, streams and entries can be
244created like this:
245
246@code
247// create streams without knowing their type
248auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
249auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
250
251// create an empty entry object
252auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
253@endcode
254
255For the factory itself, the static member wxArchiveClassFactory::Find() can be
256used to find a class factory that can handle a given file extension or mime
257type. For example, given @e filename:
258
259@code
260const wxArchiveClassFactory *factory;
261factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
262
263if (factory)
264 stream = factory->NewStream(new wxFFileInputStream(filename));
265@endcode
266
267@e Find() does not give away ownership of the returned pointer, so it does not
268need to be deleted.
269
270There are similar class factories for the filter streams that handle the
271compression and decompression of a single stream, such as wxGzipInputStream.
272These can be found using wxFilterClassFactory::Find().
273
274For example, to list the contents of archive @e filename:
275
276@code
277auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
278
279if (in->IsOk())
280{
281 // look for a filter handler, e.g. for '.gz'
282 const wxFilterClassFactory *fcf;
283 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
284 if (fcf)
285 {
286 in.reset(fcf->NewStream(in.release()));
287 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
288 filename = fcf->PopExtension(filename);
289 }
290
291 // look for a archive handler, e.g. for '.zip' or '.tar'
292 const wxArchiveClassFactory *acf;
293 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
294 if (acf)
295 {
296 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
297 auto_ptr<wxArchiveEntry> entry;
298
299 // list the contents of the archive
300 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
301 std::wcout << entry->GetName().c_str() << "\n";
302 }
303 else
304 {
9a83f860 305 wxLogError(wxT("can't handle '%s'"), filename.c_str());
032e27aa
BP
306 }
307}
308@endcode
36c9828f
FM
309
310
36c9828f 311
4cbfec15 312@section overview_archive_noseek Archives on Non-Seekable Streams
032e27aa
BP
313
314In general, handling archives on non-seekable streams is done in the same way
315as for seekable streams, with a few caveats.
316
317The main limitation is that accessing entries randomly using
318wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
319accessed sequentially in the order they are stored within the archive.
320
321For each archive type, there will also be other limitations which will depend
322on the order the entries' meta-data is stored within the archive. These are not
323too difficult to deal with, and are outlined below.
324
4cbfec15 325@subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
032e27aa
BP
326
327When writing archives, some archive formats store the entry size before the
328entry's data (tar has this limitation, zip doesn't). In this case the entry's
329size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
330occurs.
331
332This is only an issue on non-seekable streams, since otherwise the archive
333output stream can seek back and fix up the header once the size of the entry is
334known.
335
336For generic programming, one way to handle this is to supply the size whenever
337it is known, and rely on the error message from the output stream when the
338operation is not supported.
339
4cbfec15 340@subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
032e27aa
BP
341
342Some archive formats do not store all an entry's meta-data before the entry's
343data (zip is an example). In this case, when reading from a non-seekable
344stream, wxArchiveInputStream::GetNextEntry() can only return a partially
345populated wxArchiveEntry object - not all the fields are set.
346
347The input stream then keeps a weak reference to the entry object and updates it
348when more meta-data becomes available. A weak reference being one that does not
349prevent you from deleting the wxArchiveEntry object - the input stream only
350attempts to update it if it is still around.
351
352The documentation for each archive entry type gives the details of what
353meta-data becomes available and when. For generic programming, when the worst
354case must be assumed, you can rely on all the fields of wxArchiveEntry being
4c51a665 355fully populated when GetNextEntry() returns, with the following exceptions:
032e27aa
BP
356
357@li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
358 been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
359 has been called.
360@li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
361 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
362 Eof() is @true.
363
364This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
365preserve entries' meta-data. No matter what order order the meta-data occurs
366within the archive, the input stream will always have read it before the output
367stream must write it.
368
4cbfec15 369@subsection overview_archive_noseek_notifier wxArchiveNotifier
032e27aa
BP
370
371Notifier objects can be used to get a notification whenever an input stream
372updates a wxArchiveEntry object's data via the weak reference mechanism.
373
374Consider the following code which renames an entry in an archive. This is the
375usual way to modify an entry's meta-data, simply set the required field before
376writing it with wxArchiveOutputStream::CopyEntry():
377
378@code
379auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
380auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
381auto_ptr<wxArchiveEntry> entry;
382
383outarc->CopyArchiveMetaData(*arc);
384
385while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
386{
387 if (entry->GetName() == from)
388 entry->SetName(to);
389 if (!outarc->CopyEntry(entry.release(), *arc))
390 break;
391}
392
393bool success = arc->Eof() && outarc->Close();
394@endcode
395
396However, for non-seekable streams, this technique cannot be used for fields
397such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
398wxArchiveInputStream::GetNextEntry() returns.
399
400In this case a wxArchiveNotifier can be used:
401
402@code
403class MyNotifier : public wxArchiveNotifier
404{
405public:
406 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
407};
408@endcode
409
410The meta-data changes are done in your notifier's
411wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
412is called before CopyEntry():
413
414@code
415auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
416auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
417auto_ptr<wxArchiveEntry> entry;
418MyNotifier notifier;
419
420outarc->CopyArchiveMetaData(*arc);
421
422while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
423{
424 entry->SetNotifier(notifier);
425 if (!outarc->CopyEntry(entry.release(), *arc))
426 break;
427}
428
429bool success = arc->Eof() && outarc->Close();
430@endcode
431
432SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
433it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
434be called at least once, this technique always works even when it is not
435strictly necessary to use it. For example, changing the entry name can be done
436this way too and it works on seekable streams as well as non-seekable.
36c9828f 437
e0a47918 438*/
36c9828f 439