]> git.saurik.com Git - wxWidgets.git/blob - docs/doxygen/overviews/archive.h
Fix broken and missing DataView interface items for Phoenix
[wxWidgets.git] / docs / doxygen / overviews / archive.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: archive.h
3 // Purpose: topic overview
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows licence
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10
11 @page overview_archive Archive Formats
12
13 @tableofcontents
14
15 The archive classes handle archive formats such as zip, tar, rar and cab.
16 Currently wxZip, wxTar and wxZlib classes are included.
17
18 For each archive type, there are the following classes (using zip here as an
19 example):
20
21 @li wxZipInputStream: Input stream
22 @li wxZipOutputStream: Output stream
23 @li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
24
25 There are also abstract wxArchive classes that can be used to write code that
26 can handle any of the archive types, see @ref overview_archive_generic.
27
28 Also see wxFileSystem for a higher level interface that can handle archive
29 files in a generic way.
30
31 The classes are designed to handle archives on both seekable streams such as
32 disk files, or non-seekable streams such as pipes and sockets (see
33 @ref overview_archive_noseek).
34
35
36
37 @section overview_archive_create Creating an Archive
38
39 Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
40 archive, then write the entry's data. Another call to PutNextEntry() closes the
41 current entry and begins the next. For example:
42
43 @code
44 wxFFileOutputStream out(wxT("test.zip"));
45 wxZipOutputStream zip(out);
46 wxTextOutputStream txt(zip);
47 wxString sep(wxFileName::GetPathSeparator());
48
49 zip.PutNextEntry(wxT("entry1.txt"));
50 txt << wxT("Some text for entry1.txt\n");
51
52 zip.PutNextEntry(wxT("subdir") + sep + wxT("entry2.txt"));
53 txt << wxT("Some text for subdir/entry2.txt\n");
54 @endcode
55
56 The name of each entry can be a full path, which makes it possible to store
57 entries in subdirectories.
58
59
60 @section overview_archive_extract Extracting an Archive
61
62 wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
63 containing the meta-data for the next entry in the archive (and gives away
64 ownership).
65
66 Reading from the input stream then returns the entry's data. Eof() becomes
67 @true after an attempt has been made to read past the end of the entry's data.
68
69 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
70
71 @code
72 auto_ptr<wxZipEntry> entry;
73
74 wxFFileInputStream in(wxT("test.zip"));
75 wxZipInputStream zip(in);
76
77 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
78 {
79 // access meta-data
80 wxString name = entry->GetName();
81 // read 'zip' to access the entry's data
82 }
83 @endcode
84
85
86
87 @section overview_archive_modify Modifying an Archive
88
89 To modify an existing archive, write a new copy of the archive to a new file,
90 making any necessary changes along the way and transferring any unchanged
91 entries using wxArchiveOutputStream::CopyEntry().
92
93 For archive types which compress entry data, CopyEntry() is likely to be much
94 more efficient than transferring the data using Read() and Write() since it
95 will copy them without decompressing and recompressing them.
96
97 In general modifications are not possible without rewriting the archive, though
98 it may be possible in some limited cases. Even then, rewriting the archive is
99 usually a better choice since a failure can be handled without losing the whole
100 archive. wxTempFileOutputStream can be helpful to do this.
101
102 For example to delete all entries matching the pattern "*.txt":
103
104 @code
105 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(wxT("test.zip")));
106 wxTempFileOutputStream out(wxT("test.zip"));
107
108 wxZipInputStream inzip(*in);
109 wxZipOutputStream outzip(out);
110
111 auto_ptr<wxZipEntry> entry;
112
113 // transfer any meta-data for the archive as a whole (the zip comment
114 // in the case of zip)
115 outzip.CopyArchiveMetaData(inzip);
116
117 // call CopyEntry for each entry except those matching the pattern
118 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
119 if (!entry->GetName().Matches(wxT("*.txt")))
120 if (!outzip.CopyEntry(entry.release(), inzip))
121 break;
122
123 // close the input stream by releasing the pointer to it, do this
124 // before closing the output stream so that the file can be replaced
125 in.reset();
126
127 // you can check for success as follows
128 bool success = inzip.Eof() && outzip.Close() && out.Commit();
129 @endcode
130
131
132
133 @section overview_archive_byname Looking Up an Archive Entry by Name
134
135 Also see wxFileSystem for a higher level interface that is more convenient for
136 accessing archive entries by name.
137
138 To open just one entry in an archive, the most efficient way is to simply
139 search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
140 the required entry is found. This works both for archives on seekable and
141 non-seekable streams.
142
143 The format of filenames in the archive is likely to be different from the local
144 filename format. For example zips and tars use unix style names, with forward
145 slashes as the path separator, and absolute paths are not allowed. So if on
146 Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
147 back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
148 into the internal format and back has lost some information.
149
150 So to avoid ambiguity when searching for an entry matching a local name, it is
151 better to convert the local name to the archive's internal format and search
152 for that:
153
154 @code
155 auto_ptr<wxZipEntry> entry;
156
157 // convert the local name we are looking for into the internal format
158 wxString name = wxZipEntry::GetInternalName(localname);
159
160 // open the zip
161 wxFFileInputStream in(wxT("test.zip"));
162 wxZipInputStream zip(in);
163
164 // call GetNextEntry() until the required internal name is found
165 do
166 {
167 entry.reset(zip.GetNextEntry());
168 }
169 while (entry.get() != NULL && entry->GetInternalName() != name);
170
171 if (entry.get() != NULL)
172 {
173 // read the entry's data...
174 }
175 @endcode
176
177 To access several entries randomly, it is most efficient to transfer the entire
178 catalogue of entries to a container such as a std::map or a wxHashMap then
179 entries looked up by name can be opened using the
180 wxArchiveInputStream::OpenEntry() method.
181
182 @code
183 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
184 ZipCatalog::iterator it;
185 wxZipEntry *entry;
186 ZipCatalog cat;
187
188 // open the zip
189 wxFFileInputStream in(wxT("test.zip"));
190 wxZipInputStream zip(in);
191
192 // load the zip catalog
193 while ((entry = zip.GetNextEntry()) != NULL)
194 {
195 wxZipEntry*& current = cat[entry->GetInternalName()];
196 // some archive formats can have multiple entries with the same name
197 // (e.g. tar) though it is an error in the case of zip
198 delete current;
199 current = entry;
200 }
201
202 // open an entry by name
203 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
204 {
205 zip.OpenEntry(*it->second);
206 // ... now read entry's data
207 }
208 @endcode
209
210 To open more than one entry simultaneously you need more than one underlying
211 stream on the same archive:
212
213 @code
214 // opening another entry without closing the first requires another
215 // input stream for the same file
216 wxFFileInputStream in2(wxT("test.zip"));
217 wxZipInputStream zip2(in2);
218 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
219 zip2.OpenEntry(*it->second);
220 @endcode
221
222
223
224 @section overview_archive_generic Generic Archive Programming
225
226 Also see wxFileSystem for a higher level interface that can handle archive
227 files in a generic way.
228
229 The specific archive classes, such as the wxZip classes, inherit from the
230 following abstract classes which can be used to write code that can handle any
231 of the archive types:
232
233 @li wxArchiveInputStream: Input stream
234 @li wxArchiveOutputStream: Output stream
235 @li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
236
237 In order to able to write generic code it's necessary to be able to create
238 instances of the classes without knowing which archive type is being used.
239
240 To allow this there is a class factory for each archive type, derived from
241 wxArchiveClassFactory, that can create the other classes.
242
243 For example, given wxArchiveClassFactory* factory, streams and entries can be
244 created like this:
245
246 @code
247 // create streams without knowing their type
248 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
249 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
250
251 // create an empty entry object
252 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
253 @endcode
254
255 For the factory itself, the static member wxArchiveClassFactory::Find() can be
256 used to find a class factory that can handle a given file extension or mime
257 type. For example, given @e filename:
258
259 @code
260 const wxArchiveClassFactory *factory;
261 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
262
263 if (factory)
264 stream = factory->NewStream(new wxFFileInputStream(filename));
265 @endcode
266
267 @e Find() does not give away ownership of the returned pointer, so it does not
268 need to be deleted.
269
270 There are similar class factories for the filter streams that handle the
271 compression and decompression of a single stream, such as wxGzipInputStream.
272 These can be found using wxFilterClassFactory::Find().
273
274 For example, to list the contents of archive @e filename:
275
276 @code
277 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
278
279 if (in->IsOk())
280 {
281 // look for a filter handler, e.g. for '.gz'
282 const wxFilterClassFactory *fcf;
283 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
284 if (fcf)
285 {
286 in.reset(fcf->NewStream(in.release()));
287 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
288 filename = fcf->PopExtension(filename);
289 }
290
291 // look for a archive handler, e.g. for '.zip' or '.tar'
292 const wxArchiveClassFactory *acf;
293 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
294 if (acf)
295 {
296 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
297 auto_ptr<wxArchiveEntry> entry;
298
299 // list the contents of the archive
300 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
301 std::wcout << entry->GetName().c_str() << "\n";
302 }
303 else
304 {
305 wxLogError(wxT("can't handle '%s'"), filename.c_str());
306 }
307 }
308 @endcode
309
310
311
312 @section overview_archive_noseek Archives on Non-Seekable Streams
313
314 In general, handling archives on non-seekable streams is done in the same way
315 as for seekable streams, with a few caveats.
316
317 The main limitation is that accessing entries randomly using
318 wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
319 accessed sequentially in the order they are stored within the archive.
320
321 For each archive type, there will also be other limitations which will depend
322 on the order the entries' meta-data is stored within the archive. These are not
323 too difficult to deal with, and are outlined below.
324
325 @subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
326
327 When writing archives, some archive formats store the entry size before the
328 entry's data (tar has this limitation, zip doesn't). In this case the entry's
329 size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
330 occurs.
331
332 This is only an issue on non-seekable streams, since otherwise the archive
333 output stream can seek back and fix up the header once the size of the entry is
334 known.
335
336 For generic programming, one way to handle this is to supply the size whenever
337 it is known, and rely on the error message from the output stream when the
338 operation is not supported.
339
340 @subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
341
342 Some archive formats do not store all an entry's meta-data before the entry's
343 data (zip is an example). In this case, when reading from a non-seekable
344 stream, wxArchiveInputStream::GetNextEntry() can only return a partially
345 populated wxArchiveEntry object - not all the fields are set.
346
347 The input stream then keeps a weak reference to the entry object and updates it
348 when more meta-data becomes available. A weak reference being one that does not
349 prevent you from deleting the wxArchiveEntry object - the input stream only
350 attempts to update it if it is still around.
351
352 The documentation for each archive entry type gives the details of what
353 meta-data becomes available and when. For generic programming, when the worst
354 case must be assumed, you can rely on all the fields of wxArchiveEntry being
355 fully populated when GetNextEntry() returns, with the following exceptions:
356
357 @li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
358 been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
359 has been called.
360 @li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
361 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
362 Eof() is @true.
363
364 This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
365 preserve entries' meta-data. No matter what order order the meta-data occurs
366 within the archive, the input stream will always have read it before the output
367 stream must write it.
368
369 @subsection overview_archive_noseek_notifier wxArchiveNotifier
370
371 Notifier objects can be used to get a notification whenever an input stream
372 updates a wxArchiveEntry object's data via the weak reference mechanism.
373
374 Consider the following code which renames an entry in an archive. This is the
375 usual way to modify an entry's meta-data, simply set the required field before
376 writing it with wxArchiveOutputStream::CopyEntry():
377
378 @code
379 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
380 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
381 auto_ptr<wxArchiveEntry> entry;
382
383 outarc->CopyArchiveMetaData(*arc);
384
385 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
386 {
387 if (entry->GetName() == from)
388 entry->SetName(to);
389 if (!outarc->CopyEntry(entry.release(), *arc))
390 break;
391 }
392
393 bool success = arc->Eof() && outarc->Close();
394 @endcode
395
396 However, for non-seekable streams, this technique cannot be used for fields
397 such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
398 wxArchiveInputStream::GetNextEntry() returns.
399
400 In this case a wxArchiveNotifier can be used:
401
402 @code
403 class MyNotifier : public wxArchiveNotifier
404 {
405 public:
406 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
407 };
408 @endcode
409
410 The meta-data changes are done in your notifier's
411 wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
412 is called before CopyEntry():
413
414 @code
415 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
416 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
417 auto_ptr<wxArchiveEntry> entry;
418 MyNotifier notifier;
419
420 outarc->CopyArchiveMetaData(*arc);
421
422 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
423 {
424 entry->SetNotifier(notifier);
425 if (!outarc->CopyEntry(entry.release(), *arc))
426 break;
427 }
428
429 bool success = arc->Eof() && outarc->Close();
430 @endcode
431
432 SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
433 it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
434 be called at least once, this technique always works even when it is not
435 strictly necessary to use it. For example, changing the entry name can be done
436 this way too and it works on seekable streams as well as non-seekable.
437
438 */
439