]> git.saurik.com Git - wxWidgets.git/blob - docs/doxygen/overviews/archive.h
document standard IDs only in one place, not two; bring the list up to date
[wxWidgets.git] / docs / doxygen / overviews / archive.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: archive.h
3 // Purpose: topic overview
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10
11 @page overview_archive Archive Formats
12
13 The archive classes handle archive formats such as zip, tar, rar and cab.
14 Currently wxZip and wxTar classes are included.
15
16 For each archive type, there are the following classes (using zip here as an
17 example):
18
19 @li wxZipInputStream: Input stream
20 @li wxZipOutputStream: Output stream
21 @li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
22
23 There are also abstract wxArchive classes that can be used to write code that
24 can handle any of the archive types, see @ref overview_archive_generic.
25
26 Also see wxFileSystem for a higher level interface that can handle archive
27 files in a generic way.
28
29 The classes are designed to handle archives on both seekable streams such as
30 disk files, or non-seekable streams such as pipes and sockets (see
31 @ref overview_archive_noseek).
32
33 See also wxFileSystem.
34
35 @li @ref overview_archive_create
36 @li @ref overview_archive_extract
37 @li @ref overview_archive_modify
38 @li @ref overview_archive_byname
39 @li @ref overview_archive_generic
40 @li @ref overview_archive_noseek
41
42
43 <hr>
44
45
46 @section overview_archive_create Creating an Archive
47
48 Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
49 archive, then write the entry's data. Another call to PutNextEntry() closes the
50 current entry and begins the next. For example:
51
52 @code
53 wxFFileOutputStream out(_T("test.zip"));
54 wxZipOutputStream zip(out);
55 wxTextOutputStream txt(zip);
56 wxString sep(wxFileName::GetPathSeparator());
57
58 zip.PutNextEntry(_T("entry1.txt"));
59 txt << _T("Some text for entry1.txt\n");
60
61 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
62 txt << _T("Some text for subdir/entry2.txt\n");
63 @endcode
64
65 The name of each entry can be a full path, which makes it possible to store
66 entries in subdirectories.
67
68
69 @section overview_archive_extract Extracting an Archive
70
71 wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
72 containing the meta-data for the next entry in the archive (and gives away
73 ownership).
74
75 Reading from the input stream then returns the entry's data. Eof() becomes
76 @true after an attempt has been made to read past the end of the entry's data.
77
78 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
79
80 @code
81 auto_ptr<wxZipEntry> entry;
82
83 wxFFileInputStream in(_T("test.zip"));
84 wxZipInputStream zip(in);
85
86 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87 {
88 // access meta-data
89 wxString name = entry->GetName();
90 // read 'zip' to access the entry's data
91 }
92 @endcode
93
94
95
96 @section overview_archive_modify Modifying an Archive
97
98 To modify an existing archive, write a new copy of the archive to a new file,
99 making any necessary changes along the way and transferring any unchanged
100 entries using wxArchiveOutputStream::CopyEntry().
101
102 For archive types which compress entry data, CopyEntry() is likely to be much
103 more efficient than transferring the data using Read() and Write() since it
104 will copy them without decompressing and recompressing them.
105
106 In general modifications are not possible without rewriting the archive, though
107 it may be possible in some limited cases. Even then, rewriting the archive is
108 usually a better choice since a failure can be handled without losing the whole
109 archive. wxTempFileOutputStream can be helpful to do this.
110
111 For example to delete all entries matching the pattern "*.txt":
112
113 @code
114 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
115 wxTempFileOutputStream out(_T("test.zip"));
116
117 wxZipInputStream inzip(*in);
118 wxZipOutputStream outzip(out);
119
120 auto_ptr<wxZipEntry> entry;
121
122 // transfer any meta-data for the archive as a whole (the zip comment
123 // in the case of zip)
124 outzip.CopyArchiveMetaData(inzip);
125
126 // call CopyEntry for each entry except those matching the pattern
127 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
128 if (!entry->GetName().Matches(_T("*.txt")))
129 if (!outzip.CopyEntry(entry.release(), inzip))
130 break;
131
132 // close the input stream by releasing the pointer to it, do this
133 // before closing the output stream so that the file can be replaced
134 in.reset();
135
136 // you can check for success as follows
137 bool success = inzip.Eof() && outzip.Close() && out.Commit();
138 @endcode
139
140
141
142 @section overview_archive_byname Looking Up an Archive Entry by Name
143
144 Also see wxFileSystem for a higher level interface that is more convenient for
145 accessing archive entries by name.
146
147 To open just one entry in an archive, the most efficient way is to simply
148 search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
149 the required entry is found. This works both for archives on seekable and
150 non-seekable streams.
151
152 The format of filenames in the archive is likely to be different from the local
153 filename format. For example zips and tars use unix style names, with forward
154 slashes as the path separator, and absolute paths are not allowed. So if on
155 Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
156 back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
157 into the internal format and back has lost some information.
158
159 So to avoid ambiguity when searching for an entry matching a local name, it is
160 better to convert the local name to the archive's internal format and search
161 for that:
162
163 @code
164 auto_ptr<wxZipEntry> entry;
165
166 // convert the local name we are looking for into the internal format
167 wxString name = wxZipEntry::GetInternalName(localname);
168
169 // open the zip
170 wxFFileInputStream in(_T("test.zip"));
171 wxZipInputStream zip(in);
172
173 // call GetNextEntry() until the required internal name is found
174 do
175 {
176 entry.reset(zip.GetNextEntry());
177 }
178 while (entry.get() != NULL && entry->GetInternalName() != name);
179
180 if (entry.get() != NULL)
181 {
182 // read the entry's data...
183 }
184 @endcode
185
186 To access several entries randomly, it is most efficient to transfer the entire
187 catalogue of entries to a container such as a std::map or a wxHashMap then
188 entries looked up by name can be opened using the
189 wxArchiveInputStream::OpenEntry() method.
190
191 @code
192 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
193 ZipCatalog::iterator it;
194 wxZipEntry *entry;
195 ZipCatalog cat;
196
197 // open the zip
198 wxFFileInputStream in(_T("test.zip"));
199 wxZipInputStream zip(in);
200
201 // load the zip catalog
202 while ((entry = zip.GetNextEntry()) != NULL)
203 {
204 wxZipEntry*& current = cat[entry->GetInternalName()];
205 // some archive formats can have multiple entries with the same name
206 // (e.g. tar) though it is an error in the case of zip
207 delete current;
208 current = entry;
209 }
210
211 // open an entry by name
212 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
213 {
214 zip.OpenEntry(*it->second);
215 // ... now read entry's data
216 }
217 @endcode
218
219 To open more than one entry simultaneously you need more than one underlying
220 stream on the same archive:
221
222 @code
223 // opening another entry without closing the first requires another
224 // input stream for the same file
225 wxFFileInputStream in2(_T("test.zip"));
226 wxZipInputStream zip2(in2);
227 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
228 zip2.OpenEntry(*it->second);
229 @endcode
230
231
232
233 @section overview_archive_generic Generic Archive Programming
234
235 Also see wxFileSystem for a higher level interface that can handle archive
236 files in a generic way.
237
238 The specific archive classes, such as the wxZip classes, inherit from the
239 following abstract classes which can be used to write code that can handle any
240 of the archive types:
241
242 @li wxArchiveInputStream: Input stream
243 @li wxArchiveOutputStream: Output stream
244 @li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
245
246 In order to able to write generic code it's necessary to be able to create
247 instances of the classes without knowing which archive type is being used.
248
249 To allow this there is a class factory for each archive type, derived from
250 wxArchiveClassFactory, that can create the other classes.
251
252 For example, given wxArchiveClassFactory* factory, streams and entries can be
253 created like this:
254
255 @code
256 // create streams without knowing their type
257 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
258 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
259
260 // create an empty entry object
261 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
262 @endcode
263
264 For the factory itself, the static member wxArchiveClassFactory::Find() can be
265 used to find a class factory that can handle a given file extension or mime
266 type. For example, given @e filename:
267
268 @code
269 const wxArchiveClassFactory *factory;
270 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
271
272 if (factory)
273 stream = factory->NewStream(new wxFFileInputStream(filename));
274 @endcode
275
276 @e Find() does not give away ownership of the returned pointer, so it does not
277 need to be deleted.
278
279 There are similar class factories for the filter streams that handle the
280 compression and decompression of a single stream, such as wxGzipInputStream.
281 These can be found using wxFilterClassFactory::Find().
282
283 For example, to list the contents of archive @e filename:
284
285 @code
286 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
287
288 if (in->IsOk())
289 {
290 // look for a filter handler, e.g. for '.gz'
291 const wxFilterClassFactory *fcf;
292 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
293 if (fcf)
294 {
295 in.reset(fcf->NewStream(in.release()));
296 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
297 filename = fcf->PopExtension(filename);
298 }
299
300 // look for a archive handler, e.g. for '.zip' or '.tar'
301 const wxArchiveClassFactory *acf;
302 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
303 if (acf)
304 {
305 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
306 auto_ptr<wxArchiveEntry> entry;
307
308 // list the contents of the archive
309 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
310 std::wcout << entry->GetName().c_str() << "\n";
311 }
312 else
313 {
314 wxLogError(_T("can't handle '%s'"), filename.c_str());
315 }
316 }
317 @endcode
318
319
320
321 @section overview_archive_noseek Archives on Non-Seekable Streams
322
323 In general, handling archives on non-seekable streams is done in the same way
324 as for seekable streams, with a few caveats.
325
326 The main limitation is that accessing entries randomly using
327 wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
328 accessed sequentially in the order they are stored within the archive.
329
330 For each archive type, there will also be other limitations which will depend
331 on the order the entries' meta-data is stored within the archive. These are not
332 too difficult to deal with, and are outlined below.
333
334 @subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
335
336 When writing archives, some archive formats store the entry size before the
337 entry's data (tar has this limitation, zip doesn't). In this case the entry's
338 size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
339 occurs.
340
341 This is only an issue on non-seekable streams, since otherwise the archive
342 output stream can seek back and fix up the header once the size of the entry is
343 known.
344
345 For generic programming, one way to handle this is to supply the size whenever
346 it is known, and rely on the error message from the output stream when the
347 operation is not supported.
348
349 @subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
350
351 Some archive formats do not store all an entry's meta-data before the entry's
352 data (zip is an example). In this case, when reading from a non-seekable
353 stream, wxArchiveInputStream::GetNextEntry() can only return a partially
354 populated wxArchiveEntry object - not all the fields are set.
355
356 The input stream then keeps a weak reference to the entry object and updates it
357 when more meta-data becomes available. A weak reference being one that does not
358 prevent you from deleting the wxArchiveEntry object - the input stream only
359 attempts to update it if it is still around.
360
361 The documentation for each archive entry type gives the details of what
362 meta-data becomes available and when. For generic programming, when the worst
363 case must be assumed, you can rely on all the fields of wxArchiveEntry being
364 fully populated when GetNextEntry() returns, with the the following exceptions:
365
366 @li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
367 been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
368 has been called.
369 @li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
370 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
371 Eof() is @true.
372
373 This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
374 preserve entries' meta-data. No matter what order order the meta-data occurs
375 within the archive, the input stream will always have read it before the output
376 stream must write it.
377
378 @subsection overview_archive_noseek_notifier wxArchiveNotifier
379
380 Notifier objects can be used to get a notification whenever an input stream
381 updates a wxArchiveEntry object's data via the weak reference mechanism.
382
383 Consider the following code which renames an entry in an archive. This is the
384 usual way to modify an entry's meta-data, simply set the required field before
385 writing it with wxArchiveOutputStream::CopyEntry():
386
387 @code
388 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
389 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
390 auto_ptr<wxArchiveEntry> entry;
391
392 outarc->CopyArchiveMetaData(*arc);
393
394 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
395 {
396 if (entry->GetName() == from)
397 entry->SetName(to);
398 if (!outarc->CopyEntry(entry.release(), *arc))
399 break;
400 }
401
402 bool success = arc->Eof() && outarc->Close();
403 @endcode
404
405 However, for non-seekable streams, this technique cannot be used for fields
406 such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
407 wxArchiveInputStream::GetNextEntry() returns.
408
409 In this case a wxArchiveNotifier can be used:
410
411 @code
412 class MyNotifier : public wxArchiveNotifier
413 {
414 public:
415 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
416 };
417 @endcode
418
419 The meta-data changes are done in your notifier's
420 wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
421 is called before CopyEntry():
422
423 @code
424 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
425 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
426 auto_ptr<wxArchiveEntry> entry;
427 MyNotifier notifier;
428
429 outarc->CopyArchiveMetaData(*arc);
430
431 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
432 {
433 entry->SetNotifier(notifier);
434 if (!outarc->CopyEntry(entry.release(), *arc))
435 break;
436 }
437
438 bool success = arc->Eof() && outarc->Close();
439 @endcode
440
441 SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
442 it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
443 be called at least once, this technique always works even when it is not
444 strictly necessary to use it. For example, changing the entry name can be done
445 this way too and it works on seekable streams as well as non-seekable.
446
447 */
448