]> git.saurik.com Git - wxWidgets.git/blame_incremental - docs/doxygen/overviews/archive.h
Merge the new wxWebView classes from the SOC2011_WEBVIEW branch.
[wxWidgets.git] / docs / doxygen / overviews / archive.h
... / ...
CommitLineData
1/////////////////////////////////////////////////////////////////////////////
2// Name: archive.h
3// Purpose: topic overview
4// Author: wxWidgets team
5// RCS-ID: $Id$
6// Licence: wxWindows licence
7/////////////////////////////////////////////////////////////////////////////
8
9/**
10
11@page overview_archive Archive Formats
12
13The archive classes handle archive formats such as zip, tar, rar and cab.
14Currently wxZip, wxTar and wxZlib classes are included.
15
16For each archive type, there are the following classes (using zip here as an
17example):
18
19@li wxZipInputStream: Input stream
20@li wxZipOutputStream: Output stream
21@li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
22
23There are also abstract wxArchive classes that can be used to write code that
24can handle any of the archive types, see @ref overview_archive_generic.
25
26Also see wxFileSystem for a higher level interface that can handle archive
27files in a generic way.
28
29The classes are designed to handle archives on both seekable streams such as
30disk files, or non-seekable streams such as pipes and sockets (see
31@ref overview_archive_noseek).
32
33See also wxFileSystem.
34
35@li @ref overview_archive_create
36@li @ref overview_archive_extract
37@li @ref overview_archive_modify
38@li @ref overview_archive_byname
39@li @ref overview_archive_generic
40@li @ref overview_archive_noseek
41
42
43<hr>
44
45
46@section overview_archive_create Creating an Archive
47
48Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
49archive, then write the entry's data. Another call to PutNextEntry() closes the
50current entry and begins the next. For example:
51
52@code
53wxFFileOutputStream out(wxT("test.zip"));
54wxZipOutputStream zip(out);
55wxTextOutputStream txt(zip);
56wxString sep(wxFileName::GetPathSeparator());
57
58zip.PutNextEntry(wxT("entry1.txt"));
59txt << wxT("Some text for entry1.txt\n");
60
61zip.PutNextEntry(wxT("subdir") + sep + wxT("entry2.txt"));
62txt << wxT("Some text for subdir/entry2.txt\n");
63@endcode
64
65The name of each entry can be a full path, which makes it possible to store
66entries in subdirectories.
67
68
69@section overview_archive_extract Extracting an Archive
70
71wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
72containing the meta-data for the next entry in the archive (and gives away
73ownership).
74
75Reading from the input stream then returns the entry's data. Eof() becomes
76@true after an attempt has been made to read past the end of the entry's data.
77
78When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
79
80@code
81auto_ptr<wxZipEntry> entry;
82
83wxFFileInputStream in(wxT("test.zip"));
84wxZipInputStream zip(in);
85
86while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87{
88 // access meta-data
89 wxString name = entry->GetName();
90 // read 'zip' to access the entry's data
91}
92@endcode
93
94
95
96@section overview_archive_modify Modifying an Archive
97
98To modify an existing archive, write a new copy of the archive to a new file,
99making any necessary changes along the way and transferring any unchanged
100entries using wxArchiveOutputStream::CopyEntry().
101
102For archive types which compress entry data, CopyEntry() is likely to be much
103more efficient than transferring the data using Read() and Write() since it
104will copy them without decompressing and recompressing them.
105
106In general modifications are not possible without rewriting the archive, though
107it may be possible in some limited cases. Even then, rewriting the archive is
108usually a better choice since a failure can be handled without losing the whole
109archive. wxTempFileOutputStream can be helpful to do this.
110
111For example to delete all entries matching the pattern "*.txt":
112
113@code
114auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(wxT("test.zip")));
115wxTempFileOutputStream out(wxT("test.zip"));
116
117wxZipInputStream inzip(*in);
118wxZipOutputStream outzip(out);
119
120auto_ptr<wxZipEntry> entry;
121
122// transfer any meta-data for the archive as a whole (the zip comment
123// in the case of zip)
124outzip.CopyArchiveMetaData(inzip);
125
126// call CopyEntry for each entry except those matching the pattern
127while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
128 if (!entry->GetName().Matches(wxT("*.txt")))
129 if (!outzip.CopyEntry(entry.release(), inzip))
130 break;
131
132// close the input stream by releasing the pointer to it, do this
133// before closing the output stream so that the file can be replaced
134in.reset();
135
136// you can check for success as follows
137bool success = inzip.Eof() && outzip.Close() && out.Commit();
138@endcode
139
140
141
142@section overview_archive_byname Looking Up an Archive Entry by Name
143
144Also see wxFileSystem for a higher level interface that is more convenient for
145accessing archive entries by name.
146
147To open just one entry in an archive, the most efficient way is to simply
148search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
149the required entry is found. This works both for archives on seekable and
150non-seekable streams.
151
152The format of filenames in the archive is likely to be different from the local
153filename format. For example zips and tars use unix style names, with forward
154slashes as the path separator, and absolute paths are not allowed. So if on
155Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
156back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
157into the internal format and back has lost some information.
158
159So to avoid ambiguity when searching for an entry matching a local name, it is
160better to convert the local name to the archive's internal format and search
161for that:
162
163@code
164auto_ptr<wxZipEntry> entry;
165
166// convert the local name we are looking for into the internal format
167wxString name = wxZipEntry::GetInternalName(localname);
168
169// open the zip
170wxFFileInputStream in(wxT("test.zip"));
171wxZipInputStream zip(in);
172
173// call GetNextEntry() until the required internal name is found
174do
175{
176 entry.reset(zip.GetNextEntry());
177}
178while (entry.get() != NULL && entry->GetInternalName() != name);
179
180if (entry.get() != NULL)
181{
182 // read the entry's data...
183}
184@endcode
185
186To access several entries randomly, it is most efficient to transfer the entire
187catalogue of entries to a container such as a std::map or a wxHashMap then
188entries looked up by name can be opened using the
189wxArchiveInputStream::OpenEntry() method.
190
191@code
192WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
193ZipCatalog::iterator it;
194wxZipEntry *entry;
195ZipCatalog cat;
196
197// open the zip
198wxFFileInputStream in(wxT("test.zip"));
199wxZipInputStream zip(in);
200
201// load the zip catalog
202while ((entry = zip.GetNextEntry()) != NULL)
203{
204 wxZipEntry*& current = cat[entry->GetInternalName()];
205 // some archive formats can have multiple entries with the same name
206 // (e.g. tar) though it is an error in the case of zip
207 delete current;
208 current = entry;
209}
210
211// open an entry by name
212if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
213{
214 zip.OpenEntry(*it->second);
215 // ... now read entry's data
216}
217@endcode
218
219To open more than one entry simultaneously you need more than one underlying
220stream on the same archive:
221
222@code
223// opening another entry without closing the first requires another
224// input stream for the same file
225wxFFileInputStream in2(wxT("test.zip"));
226wxZipInputStream zip2(in2);
227if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
228 zip2.OpenEntry(*it->second);
229@endcode
230
231
232
233@section overview_archive_generic Generic Archive Programming
234
235Also see wxFileSystem for a higher level interface that can handle archive
236files in a generic way.
237
238The specific archive classes, such as the wxZip classes, inherit from the
239following abstract classes which can be used to write code that can handle any
240of the archive types:
241
242@li wxArchiveInputStream: Input stream
243@li wxArchiveOutputStream: Output stream
244@li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
245
246In order to able to write generic code it's necessary to be able to create
247instances of the classes without knowing which archive type is being used.
248
249To allow this there is a class factory for each archive type, derived from
250wxArchiveClassFactory, that can create the other classes.
251
252For example, given wxArchiveClassFactory* factory, streams and entries can be
253created like this:
254
255@code
256// create streams without knowing their type
257auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
258auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
259
260// create an empty entry object
261auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
262@endcode
263
264For the factory itself, the static member wxArchiveClassFactory::Find() can be
265used to find a class factory that can handle a given file extension or mime
266type. For example, given @e filename:
267
268@code
269const wxArchiveClassFactory *factory;
270factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
271
272if (factory)
273 stream = factory->NewStream(new wxFFileInputStream(filename));
274@endcode
275
276@e Find() does not give away ownership of the returned pointer, so it does not
277need to be deleted.
278
279There are similar class factories for the filter streams that handle the
280compression and decompression of a single stream, such as wxGzipInputStream.
281These can be found using wxFilterClassFactory::Find().
282
283For example, to list the contents of archive @e filename:
284
285@code
286auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
287
288if (in->IsOk())
289{
290 // look for a filter handler, e.g. for '.gz'
291 const wxFilterClassFactory *fcf;
292 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
293 if (fcf)
294 {
295 in.reset(fcf->NewStream(in.release()));
296 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
297 filename = fcf->PopExtension(filename);
298 }
299
300 // look for a archive handler, e.g. for '.zip' or '.tar'
301 const wxArchiveClassFactory *acf;
302 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
303 if (acf)
304 {
305 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
306 auto_ptr<wxArchiveEntry> entry;
307
308 // list the contents of the archive
309 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
310 std::wcout << entry->GetName().c_str() << "\n";
311 }
312 else
313 {
314 wxLogError(wxT("can't handle '%s'"), filename.c_str());
315 }
316}
317@endcode
318
319
320
321@section overview_archive_noseek Archives on Non-Seekable Streams
322
323In general, handling archives on non-seekable streams is done in the same way
324as for seekable streams, with a few caveats.
325
326The main limitation is that accessing entries randomly using
327wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
328accessed sequentially in the order they are stored within the archive.
329
330For each archive type, there will also be other limitations which will depend
331on the order the entries' meta-data is stored within the archive. These are not
332too difficult to deal with, and are outlined below.
333
334@subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
335
336When writing archives, some archive formats store the entry size before the
337entry's data (tar has this limitation, zip doesn't). In this case the entry's
338size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
339occurs.
340
341This is only an issue on non-seekable streams, since otherwise the archive
342output stream can seek back and fix up the header once the size of the entry is
343known.
344
345For generic programming, one way to handle this is to supply the size whenever
346it is known, and rely on the error message from the output stream when the
347operation is not supported.
348
349@subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
350
351Some archive formats do not store all an entry's meta-data before the entry's
352data (zip is an example). In this case, when reading from a non-seekable
353stream, wxArchiveInputStream::GetNextEntry() can only return a partially
354populated wxArchiveEntry object - not all the fields are set.
355
356The input stream then keeps a weak reference to the entry object and updates it
357when more meta-data becomes available. A weak reference being one that does not
358prevent you from deleting the wxArchiveEntry object - the input stream only
359attempts to update it if it is still around.
360
361The documentation for each archive entry type gives the details of what
362meta-data becomes available and when. For generic programming, when the worst
363case must be assumed, you can rely on all the fields of wxArchiveEntry being
364fully populated when GetNextEntry() returns, with the following exceptions:
365
366@li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
367 been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
368 has been called.
369@li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
370 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
371 Eof() is @true.
372
373This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
374preserve entries' meta-data. No matter what order order the meta-data occurs
375within the archive, the input stream will always have read it before the output
376stream must write it.
377
378@subsection overview_archive_noseek_notifier wxArchiveNotifier
379
380Notifier objects can be used to get a notification whenever an input stream
381updates a wxArchiveEntry object's data via the weak reference mechanism.
382
383Consider the following code which renames an entry in an archive. This is the
384usual way to modify an entry's meta-data, simply set the required field before
385writing it with wxArchiveOutputStream::CopyEntry():
386
387@code
388auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
389auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
390auto_ptr<wxArchiveEntry> entry;
391
392outarc->CopyArchiveMetaData(*arc);
393
394while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
395{
396 if (entry->GetName() == from)
397 entry->SetName(to);
398 if (!outarc->CopyEntry(entry.release(), *arc))
399 break;
400}
401
402bool success = arc->Eof() && outarc->Close();
403@endcode
404
405However, for non-seekable streams, this technique cannot be used for fields
406such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
407wxArchiveInputStream::GetNextEntry() returns.
408
409In this case a wxArchiveNotifier can be used:
410
411@code
412class MyNotifier : public wxArchiveNotifier
413{
414public:
415 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
416};
417@endcode
418
419The meta-data changes are done in your notifier's
420wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
421is called before CopyEntry():
422
423@code
424auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
425auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
426auto_ptr<wxArchiveEntry> entry;
427MyNotifier notifier;
428
429outarc->CopyArchiveMetaData(*arc);
430
431while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
432{
433 entry->SetNotifier(notifier);
434 if (!outarc->CopyEntry(entry.release(), *arc))
435 break;
436}
437
438bool success = arc->Eof() && outarc->Close();
439@endcode
440
441SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
442it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
443be called at least once, this technique always works even when it is not
444strictly necessary to use it. For example, changing the entry name can be done
445this way too and it works on seekable streams as well as non-seekable.
446
447*/
448