]> git.saurik.com Git - wxWidgets.git/blame - docs/doxygen/overviews/archive.h
Removed some remaining '#' references.
[wxWidgets.git] / docs / doxygen / overviews / archive.h
CommitLineData
15b6757b 1/////////////////////////////////////////////////////////////////////////////
98ba1eee 2// Name: archive.h
15b6757b
FM
3// Purpose: topic overview
4// Author: wxWidgets team
5// RCS-ID: $Id$
6// Licence: wxWindows license
7/////////////////////////////////////////////////////////////////////////////
8
9/*!
36c9828f 10
e0a47918 11 @page overview_arc Archive formats such as zip
36c9828f 12
15b6757b 13 The archive classes handle archive formats such as zip, tar, rar and cab.
98ba1eee 14 Currently wxZip and wxTar classes are included.
e0a47918 15
15b6757b
FM
16 For each archive type, there are the following classes (using zip here
17 as an example):
36c9828f 18
e0a47918
FM
19 @li wxZipInputStream: input stream
20 @li wxZipOutputStream: output stream
21 @li wxZipEntry: holds the meta-data for an entry (e.g. filename, timestamp, etc.)
36c9828f 22
15b6757b 23 There are also abstract wxArchive classes that can be used to write code
e0a47918
FM
24 that can handle any of the archive types, see @ref overview_arc_generic.
25
98ba1eee 26 Also see wxFileSystem for a higher level interface that
15b6757b 27 can handle archive files in a generic way.
e0a47918 28
15b6757b
FM
29 The classes are designed to handle archives on both seekable streams such
30 as disk files, or non-seekable streams such as pipes and sockets
e0a47918
FM
31 (see @ref overview_arc_noseek).
32
98ba1eee 33 See also wxFileSystem.
36c9828f 34
e0a47918
FM
35 @li @ref overview_arc_create
36 @li @ref overview_arc_extract
37 @li @ref overview_arc_modify
38 @li @ref overview_arc_byname
39 @li @ref overview_arc_generic
40 @li @ref overview_arc_noseek
36c9828f
FM
41
42
e0a47918 43 <hr>
36c9828f 44
e0a47918
FM
45
46 @section overview_arc_create Creating an archive
47
98ba1eee
FM
48 Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the archive,
49 then write the entry's data.
15b6757b
FM
50 Another call to PutNextEntry() closes the current entry and begins the next.
51 For example:
36c9828f 52
15b6757b 53 @code
e0a47918 54 wxFFileOutputStream out(_T("test.zip"));
15b6757b
FM
55 wxZipOutputStream zip(out);
56 wxTextOutputStream txt(zip);
57 wxString sep(wxFileName::GetPathSeparator());
36c9828f 58
15b6757b 59 zip.PutNextEntry(_T("entry1.txt"));
98ba1eee 60 txt << _T("Some text for entry1.txt\n");
36c9828f 61
15b6757b 62 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
98ba1eee 63 txt << _T("Some text for subdir/entry2.txt\n");
15b6757b 64 @endcode
36c9828f 65
15b6757b
FM
66 The name of each entry can be a full path, which makes it possible to
67 store entries in subdirectories.
36c9828f
FM
68
69
e0a47918
FM
70 @section overview_arc_extract Extracting an archive
71
98ba1eee
FM
72 wxArchiveInputStream::GetNextEntry() returns a pointer to entry object containing the
73 meta-data for the next entry in the archive (and gives away ownership).
e0a47918
FM
74
75 Reading from the input stream then returns the entry's data.
76 Eof() becomes @true after an attempt has been made to read past the end of the entry's data.
36c9828f 77
15b6757b 78 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
36c9828f 79
15b6757b 80 @code
e0a47918 81 auto_ptr<wxZipEntry> entry;
36c9828f 82
15b6757b
FM
83 wxFFileInputStream in(_T("test.zip"));
84 wxZipInputStream zip(in);
36c9828f 85
98ba1eee 86 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
15b6757b
FM
87 {
88 // access meta-data
98ba1eee 89 wxString name = entry->GetName();
15b6757b
FM
90 // read 'zip' to access the entry's data
91 }
92 @endcode
36c9828f
FM
93
94
95
e0a47918 96 @section overview_arc_modify Modifying an archive
36c9828f 97
15b6757b
FM
98 To modify an existing archive, write a new copy of the archive to a new file,
99 making any necessary changes along the way and transferring any unchanged
98ba1eee 100 entries using wxArchiveOutputStream::CopyEntry().
e0a47918 101
15b6757b
FM
102 For archive types which compress entry data, CopyEntry() is likely to be
103 much more efficient than transferring the data using Read() and Write()
104 since it will copy them without decompressing and recompressing them.
e0a47918 105
15b6757b
FM
106 In general modifications are not possible without rewriting the archive,
107 though it may be possible in some limited cases. Even then, rewriting the
108 archive is usually a better choice since a failure can be handled without
98ba1eee 109 losing the whole archive. wxTempFileOutputStream can be helpful to do this.
e0a47918 110
15b6757b 111 For example to delete all entries matching the pattern "*.txt":
36c9828f 112
15b6757b 113 @code
e0a47918 114 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
15b6757b 115 wxTempFileOutputStream out(_T("test.zip"));
36c9828f 116
15b6757b
FM
117 wxZipInputStream inzip(*in);
118 wxZipOutputStream outzip(out);
36c9828f 119
e0a47918 120 auto_ptr<wxZipEntry> entry;
36c9828f 121
15b6757b
FM
122 // transfer any meta-data for the archive as a whole (the zip comment
123 // in the case of zip)
124 outzip.CopyArchiveMetaData(inzip);
36c9828f 125
15b6757b 126 // call CopyEntry for each entry except those matching the pattern
98ba1eee
FM
127 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
128 if (!entry->GetName().Matches(_T("*.txt")))
15b6757b
FM
129 if (!outzip.CopyEntry(entry.release(), inzip))
130 break;
36c9828f 131
15b6757b
FM
132 // close the input stream by releasing the pointer to it, do this
133 // before closing the output stream so that the file can be replaced
134 in.reset();
36c9828f 135
15b6757b
FM
136 // you can check for success as follows
137 bool success = inzip.Eof() && outzip.Close() && out.Commit();
138 @endcode
36c9828f
FM
139
140
141
e0a47918 142 @section overview_arc_byname Looking up an archive entry by name
36c9828f 143
98ba1eee 144 Also see wxFileSystem for a higher level interface that is
15b6757b 145 more convenient for accessing archive entries by name.
e0a47918 146
15b6757b 147 To open just one entry in an archive, the most efficient way is
98ba1eee
FM
148 to simply search for it linearly by calling wxArchiveInputStream::GetNextEntry()
149 until the required entry is found. This works both for archives on seekable and
15b6757b 150 non-seekable streams.
e0a47918 151
15b6757b
FM
152 The format of filenames in the archive is likely to be different
153 from the local filename format. For example zips and tars use
154 unix style names, with forward slashes as the path separator,
155 and absolute paths are not allowed. So if on Windows the file
98ba1eee
FM
156 "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry back
157 wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT".
158 The conversion into the internal format and back has lost some information.
e0a47918 159
15b6757b
FM
160 So to avoid ambiguity when searching for an entry matching a local name,
161 it is better to convert the local name to the archive's internal format
162 and search for that:
36c9828f 163
15b6757b 164 @code
e0a47918 165 auto_ptr<wxZipEntry> entry;
36c9828f 166
15b6757b
FM
167 // convert the local name we are looking for into the internal format
168 wxString name = wxZipEntry::GetInternalName(localname);
36c9828f 169
15b6757b
FM
170 // open the zip
171 wxFFileInputStream in(_T("test.zip"));
172 wxZipInputStream zip(in);
36c9828f 173
15b6757b
FM
174 // call GetNextEntry() until the required internal name is found
175 do {
176 entry.reset(zip.GetNextEntry());
177 }
98ba1eee 178 while (entry.get() != NULL && entry->GetInternalName() != name);
36c9828f 179
98ba1eee 180 if (entry.get() != NULL) {
15b6757b
FM
181 // read the entry's data...
182 }
183 @endcode
36c9828f 184
15b6757b
FM
185 To access several entries randomly, it is most efficient to transfer the
186 entire catalogue of entries to a container such as a std::map or a
98ba1eee
FM
187 wxHashMap then entries looked up by name can be opened using the
188 wxArchiveInputStream::OpenEntry() method.
36c9828f 189
15b6757b 190 @code
e0a47918 191 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
15b6757b
FM
192 ZipCatalog::iterator it;
193 wxZipEntry *entry;
194 ZipCatalog cat;
36c9828f 195
15b6757b
FM
196 // open the zip
197 wxFFileInputStream in(_T("test.zip"));
198 wxZipInputStream zip(in);
36c9828f 199
15b6757b 200 // load the zip catalog
98ba1eee
FM
201 while ((entry = zip.GetNextEntry()) != NULL) {
202 wxZipEntry*& current = cat[entry->GetInternalName()];
15b6757b
FM
203 // some archive formats can have multiple entries with the same name
204 // (e.g. tar) though it is an error in the case of zip
205 delete current;
206 current = entry;
207 }
36c9828f 208
15b6757b
FM
209 // open an entry by name
210 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
98ba1eee 211 zip.OpenEntry(*it->second);
15b6757b
FM
212 // ... now read entry's data
213 }
214 @endcode
36c9828f 215
15b6757b
FM
216 To open more than one entry simultaneously you need more than one
217 underlying stream on the same archive:
36c9828f 218
15b6757b 219 @code
e0a47918 220 // opening another entry without closing the first requires another
15b6757b
FM
221 // input stream for the same file
222 wxFFileInputStream in2(_T("test.zip"));
223 wxZipInputStream zip2(in2);
224 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
98ba1eee 225 zip2.OpenEntry(*it->second);
15b6757b 226 @endcode
36c9828f
FM
227
228
229
e0a47918 230 @section overview_arc_generic Generic archive programming
36c9828f 231
98ba1eee 232 Also see wxFileSystem for a higher level interface that
15b6757b 233 can handle archive files in a generic way.
e0a47918 234
15b6757b
FM
235 The specific archive classes, such as the wxZip classes, inherit from
236 the following abstract classes which can be used to write code that can
237 handle any of the archive types:
36c9828f 238
e0a47918
FM
239 @li wxArchiveInputStream: input stream
240 @li wxArchiveOutputStream: output stream
241 @li wxArchiveEntry: holds the meta-data for an entry (e.g. filename)
36c9828f 242
15b6757b
FM
243 In order to able to write generic code it's necessary to be able to create
244 instances of the classes without knowing which archive type is being used.
e0a47918 245
15b6757b 246 To allow this there is a class factory for each archive type, derived from
98ba1eee 247 wxArchiveClassFactory, that can create the other classes.
e0a47918 248
15b6757b
FM
249 For example, given @e wxArchiveClassFactory* factory, streams and
250 entries can be created like this:
36c9828f 251
15b6757b 252 @code
e0a47918 253 // create streams without knowing their type
98ba1eee
FM
254 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
255 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
36c9828f 256
15b6757b 257 // create an empty entry object
98ba1eee 258 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
15b6757b 259 @endcode
36c9828f 260
e0a47918 261 For the factory itself, the static member wxArchiveClassFactory::Find().
15b6757b
FM
262 can be used to find a class factory that can handle a given file
263 extension or mime type. For example, given @e filename:
36c9828f 264
15b6757b 265 @code
e0a47918 266 const wxArchiveClassFactory *factory;
15b6757b 267 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
36c9828f 268
15b6757b 269 if (factory)
98ba1eee 270 stream = factory->NewStream(new wxFFileInputStream(filename));
15b6757b 271 @endcode
36c9828f 272
15b6757b
FM
273 @e Find does not give away ownership of the returned pointer, so it
274 does not need to be deleted.
e0a47918 275
15b6757b
FM
276 There are similar class factories for the filter streams that handle the
277 compression and decompression of a single stream, such as wxGzipInputStream.
e0a47918
FM
278 These can be found using wxFilterClassFactory::Find().
279
15b6757b 280 For example, to list the contents of archive @e filename:
36c9828f 281
15b6757b 282 @code
e0a47918 283 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
36c9828f 284
98ba1eee 285 if (in->IsOk())
15b6757b
FM
286 {
287 // look for a filter handler, e.g. for '.gz'
288 const wxFilterClassFactory *fcf;
289 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
290 if (fcf) {
98ba1eee 291 in.reset(fcf->NewStream(in.release()));
15b6757b 292 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
98ba1eee 293 filename = fcf->PopExtension(filename);
15b6757b 294 }
36c9828f 295
15b6757b
FM
296 // look for a archive handler, e.g. for '.zip' or '.tar'
297 const wxArchiveClassFactory *acf;
298 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
299 if (acf) {
98ba1eee 300 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
e0a47918 301 auto_ptr<wxArchiveEntry> entry;
36c9828f 302
15b6757b 303 // list the contents of the archive
98ba1eee
FM
304 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
305 std::wcout << entry->GetName().c_str() << "\n";
15b6757b
FM
306 }
307 else {
308 wxLogError(_T("can't handle '%s'"), filename.c_str());
309 }
310 }
311 @endcode
36c9828f
FM
312
313
314
e0a47918 315 @section overview_arc_noseek Archives on non-seekable streams
36c9828f 316
15b6757b
FM
317 In general, handling archives on non-seekable streams is done in the same
318 way as for seekable streams, with a few caveats.
e0a47918 319
98ba1eee
FM
320 The main limitation is that accessing entries randomly using
321 wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
322 accessed sequentially in the order they are stored within the archive.
e0a47918 323
15b6757b
FM
324 For each archive type, there will also be other limitations which will
325 depend on the order the entries' meta-data is stored within the archive.
326 These are not too difficult to deal with, and are outlined below.
e0a47918 327
98ba1eee
FM
328 @subsection overview_arc_noseek_entrysize PutNextEntry and the entry size
329
15b6757b
FM
330 When writing archives, some archive formats store the entry size before
331 the entry's data (tar has this limitation, zip doesn't). In this case
98ba1eee
FM
332 the entry's size must be passed to wxArchiveOutputStream::PutNextEntry()
333 or an error occurs.
e0a47918 334
15b6757b
FM
335 This is only an issue on non-seekable streams, since otherwise the archive
336 output stream can seek back and fix up the header once the size of the
337 entry is known.
e0a47918 338
15b6757b
FM
339 For generic programming, one way to handle this is to supply the size
340 whenever it is known, and rely on the error message from the output
341 stream when the operation is not supported.
e0a47918 342
98ba1eee
FM
343 @subsection overview_arc_noseek_weak GetNextEntry and the weak reference mechanism
344
15b6757b
FM
345 Some archive formats do not store all an entry's meta-data before the
346 entry's data (zip is an example). In this case, when reading from a
98ba1eee
FM
347 non-seekable stream, wxArchiveInputStream::GetNextEntry() can only return
348 a partially populated wxArchiveEntry object - not all the fields are set.
e0a47918 349
15b6757b
FM
350 The input stream then keeps a weak reference to the entry object and
351 updates it when more meta-data becomes available. A weak reference being
352 one that does not prevent you from deleting the wxArchiveEntry object - the
353 input stream only attempts to update it if it is still around.
e0a47918 354
15b6757b
FM
355 The documentation for each archive entry type gives the details
356 of what meta-data becomes available and when. For generic programming,
357 when the worst case must be assumed, you can rely on all the fields
358 of wxArchiveEntry being fully populated when GetNextEntry() returns,
359 with the the following exceptions:
36c9828f 360
98ba1eee
FM
361 @li wxArchiveEntry::GetSize(): guaranteed to be available after the
362 entry has been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
363 has been called
36c9828f 364
98ba1eee
FM
365 @li wxArchiveEntry::IsReadOnly(): guaranteed to be available after the end of
366 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
367 Eof() is @true
36c9828f 368
98ba1eee
FM
369 This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
370 preserve entries' meta-data. No matter what order order the meta-data occurs
371 within the archive, the input stream will always have read it before the output
372 stream must write it.
373
374 @subsection overview_arc_noseek_notifier wxArchiveNotifier
36c9828f 375
15b6757b 376 Notifier objects can be used to get a notification whenever an input
98ba1eee 377 stream updates a wxArchiveEntry object's data via the weak reference mechanism.
e0a47918 378
15b6757b
FM
379 Consider the following code which renames an entry in an archive.
380 This is the usual way to modify an entry's meta-data, simply set the
98ba1eee 381 required field before writing it with wxArchiveOutputStream::CopyEntry():
36c9828f 382
15b6757b 383 @code
98ba1eee
FM
384 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
385 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
e0a47918 386 auto_ptr<wxArchiveEntry> entry;
36c9828f 387
98ba1eee 388 outarc->CopyArchiveMetaData(*arc);
36c9828f 389
98ba1eee
FM
390 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
391 if (entry->GetName() == from)
392 entry->SetName(to);
393 if (!outarc->CopyEntry(entry.release(), *arc))
15b6757b
FM
394 break;
395 }
36c9828f 396
98ba1eee 397 bool success = arc->Eof() && outarc->Close();
15b6757b 398 @endcode
36c9828f 399
15b6757b 400 However, for non-seekable streams, this technique cannot be used for
98ba1eee
FM
401 fields such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
402 wxArchiveInputStream::GetNextEntry() returns.
403
404 In this case a wxArchiveNotifier can be used:
36c9828f 405
15b6757b
FM
406 @code
407 class MyNotifier : public wxArchiveNotifier
408 {
409 public:
98ba1eee 410 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
15b6757b
FM
411 };
412 @endcode
36c9828f 413
98ba1eee
FM
414 The meta-data changes are done in your notifier's wxArchiveNotifier::OnEntryUpdated()
415 method, then wxArchiveEntry::SetNotifier() is called before CopyEntry():
36c9828f 416
15b6757b 417 @code
98ba1eee
FM
418 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
419 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
e0a47918 420 auto_ptr<wxArchiveEntry> entry;
15b6757b 421 MyNotifier notifier;
36c9828f 422
98ba1eee 423 outarc->CopyArchiveMetaData(*arc);
36c9828f 424
98ba1eee
FM
425 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
426 entry->SetNotifier(notifier);
427 if (!outarc->CopyEntry(entry.release(), *arc))
15b6757b
FM
428 break;
429 }
36c9828f 430
98ba1eee 431 bool success = arc->Eof() && outarc->Close();
15b6757b 432 @endcode
36c9828f 433
15b6757b
FM
434 SetNotifier() calls OnEntryUpdated() immediately, then the input
435 stream calls it again whenever it sets more fields in the entry. Since
436 OnEntryUpdated() will be called at least once, this technique always
437 works even when it is not strictly necessary to use it. For example,
438 changing the entry name can be done this way too and it works on seekable
439 streams as well as non-seekable.
36c9828f 440
e0a47918 441*/
36c9828f 442