]> git.saurik.com Git - wxWidgets.git/blob - docs/doxygen/overviews/archive.h
fixed all warnings for topic overviews (letters a,h)
[wxWidgets.git] / docs / doxygen / overviews / archive.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: archive.h
3 // Purpose: topic overview
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /*!
10
11 @page overview_arc Archive formats such as zip
12
13 The archive classes handle archive formats such as zip, tar, rar and cab.
14 Currently wxZip and wxTar classes are included.
15
16 For each archive type, there are the following classes (using zip here
17 as an example):
18
19 @li wxZipInputStream: input stream
20 @li wxZipOutputStream: output stream
21 @li wxZipEntry: holds the meta-data for an entry (e.g. filename, timestamp, etc.)
22
23 There are also abstract wxArchive classes that can be used to write code
24 that can handle any of the archive types, see @ref overview_arc_generic.
25
26 Also see wxFileSystem for a higher level interface that
27 can handle archive files in a generic way.
28
29 The classes are designed to handle archives on both seekable streams such
30 as disk files, or non-seekable streams such as pipes and sockets
31 (see @ref overview_arc_noseek).
32
33 See also wxFileSystem.
34
35 @li @ref overview_arc_create
36 @li @ref overview_arc_extract
37 @li @ref overview_arc_modify
38 @li @ref overview_arc_byname
39 @li @ref overview_arc_generic
40 @li @ref overview_arc_noseek
41
42
43 <hr>
44
45
46 @section overview_arc_create Creating an archive
47
48 Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the archive,
49 then write the entry's data.
50 Another call to PutNextEntry() closes the current entry and begins the next.
51 For example:
52
53 @code
54 wxFFileOutputStream out(_T("test.zip"));
55 wxZipOutputStream zip(out);
56 wxTextOutputStream txt(zip);
57 wxString sep(wxFileName::GetPathSeparator());
58
59 zip.PutNextEntry(_T("entry1.txt"));
60 txt << _T("Some text for entry1.txt\n");
61
62 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
63 txt << _T("Some text for subdir/entry2.txt\n");
64 @endcode
65
66 The name of each entry can be a full path, which makes it possible to
67 store entries in subdirectories.
68
69
70 @section overview_arc_extract Extracting an archive
71
72 wxArchiveInputStream::GetNextEntry() returns a pointer to entry object containing the
73 meta-data for the next entry in the archive (and gives away ownership).
74
75 Reading from the input stream then returns the entry's data.
76 Eof() becomes @true after an attempt has been made to read past the end of the entry's data.
77
78 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
79
80 @code
81 auto_ptr<wxZipEntry> entry;
82
83 wxFFileInputStream in(_T("test.zip"));
84 wxZipInputStream zip(in);
85
86 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87 {
88 // access meta-data
89 wxString name = entry->GetName();
90 // read 'zip' to access the entry's data
91 }
92 @endcode
93
94
95
96 @section overview_arc_modify Modifying an archive
97
98 To modify an existing archive, write a new copy of the archive to a new file,
99 making any necessary changes along the way and transferring any unchanged
100 entries using wxArchiveOutputStream::CopyEntry().
101
102 For archive types which compress entry data, CopyEntry() is likely to be
103 much more efficient than transferring the data using Read() and Write()
104 since it will copy them without decompressing and recompressing them.
105
106 In general modifications are not possible without rewriting the archive,
107 though it may be possible in some limited cases. Even then, rewriting the
108 archive is usually a better choice since a failure can be handled without
109 losing the whole archive. wxTempFileOutputStream can be helpful to do this.
110
111 For example to delete all entries matching the pattern "*.txt":
112
113 @code
114 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
115 wxTempFileOutputStream out(_T("test.zip"));
116
117 wxZipInputStream inzip(*in);
118 wxZipOutputStream outzip(out);
119
120 auto_ptr<wxZipEntry> entry;
121
122 // transfer any meta-data for the archive as a whole (the zip comment
123 // in the case of zip)
124 outzip.CopyArchiveMetaData(inzip);
125
126 // call CopyEntry for each entry except those matching the pattern
127 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
128 if (!entry->GetName().Matches(_T("*.txt")))
129 if (!outzip.CopyEntry(entry.release(), inzip))
130 break;
131
132 // close the input stream by releasing the pointer to it, do this
133 // before closing the output stream so that the file can be replaced
134 in.reset();
135
136 // you can check for success as follows
137 bool success = inzip.Eof() && outzip.Close() && out.Commit();
138 @endcode
139
140
141
142 @section overview_arc_byname Looking up an archive entry by name
143
144 Also see wxFileSystem for a higher level interface that is
145 more convenient for accessing archive entries by name.
146
147 To open just one entry in an archive, the most efficient way is
148 to simply search for it linearly by calling wxArchiveInputStream::GetNextEntry()
149 until the required entry is found. This works both for archives on seekable and
150 non-seekable streams.
151
152 The format of filenames in the archive is likely to be different
153 from the local filename format. For example zips and tars use
154 unix style names, with forward slashes as the path separator,
155 and absolute paths are not allowed. So if on Windows the file
156 "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry back
157 wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT".
158 The conversion into the internal format and back has lost some information.
159
160 So to avoid ambiguity when searching for an entry matching a local name,
161 it is better to convert the local name to the archive's internal format
162 and search for that:
163
164 @code
165 auto_ptr<wxZipEntry> entry;
166
167 // convert the local name we are looking for into the internal format
168 wxString name = wxZipEntry::GetInternalName(localname);
169
170 // open the zip
171 wxFFileInputStream in(_T("test.zip"));
172 wxZipInputStream zip(in);
173
174 // call GetNextEntry() until the required internal name is found
175 do {
176 entry.reset(zip.GetNextEntry());
177 }
178 while (entry.get() != NULL && entry->GetInternalName() != name);
179
180 if (entry.get() != NULL) {
181 // read the entry's data...
182 }
183 @endcode
184
185 To access several entries randomly, it is most efficient to transfer the
186 entire catalogue of entries to a container such as a std::map or a
187 wxHashMap then entries looked up by name can be opened using the
188 wxArchiveInputStream::OpenEntry() method.
189
190 @code
191 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
192 ZipCatalog::iterator it;
193 wxZipEntry *entry;
194 ZipCatalog cat;
195
196 // open the zip
197 wxFFileInputStream in(_T("test.zip"));
198 wxZipInputStream zip(in);
199
200 // load the zip catalog
201 while ((entry = zip.GetNextEntry()) != NULL) {
202 wxZipEntry*& current = cat[entry->GetInternalName()];
203 // some archive formats can have multiple entries with the same name
204 // (e.g. tar) though it is an error in the case of zip
205 delete current;
206 current = entry;
207 }
208
209 // open an entry by name
210 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
211 zip.OpenEntry(*it->second);
212 // ... now read entry's data
213 }
214 @endcode
215
216 To open more than one entry simultaneously you need more than one
217 underlying stream on the same archive:
218
219 @code
220 // opening another entry without closing the first requires another
221 // input stream for the same file
222 wxFFileInputStream in2(_T("test.zip"));
223 wxZipInputStream zip2(in2);
224 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
225 zip2.OpenEntry(*it->second);
226 @endcode
227
228
229
230 @section overview_arc_generic Generic archive programming
231
232 Also see wxFileSystem for a higher level interface that
233 can handle archive files in a generic way.
234
235 The specific archive classes, such as the wxZip classes, inherit from
236 the following abstract classes which can be used to write code that can
237 handle any of the archive types:
238
239 @li wxArchiveInputStream: input stream
240 @li wxArchiveOutputStream: output stream
241 @li wxArchiveEntry: holds the meta-data for an entry (e.g. filename)
242
243 In order to able to write generic code it's necessary to be able to create
244 instances of the classes without knowing which archive type is being used.
245
246 To allow this there is a class factory for each archive type, derived from
247 wxArchiveClassFactory, that can create the other classes.
248
249 For example, given @e wxArchiveClassFactory* factory, streams and
250 entries can be created like this:
251
252 @code
253 // create streams without knowing their type
254 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
255 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
256
257 // create an empty entry object
258 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
259 @endcode
260
261 For the factory itself, the static member wxArchiveClassFactory::Find().
262 can be used to find a class factory that can handle a given file
263 extension or mime type. For example, given @e filename:
264
265 @code
266 const wxArchiveClassFactory *factory;
267 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
268
269 if (factory)
270 stream = factory->NewStream(new wxFFileInputStream(filename));
271 @endcode
272
273 @e Find does not give away ownership of the returned pointer, so it
274 does not need to be deleted.
275
276 There are similar class factories for the filter streams that handle the
277 compression and decompression of a single stream, such as wxGzipInputStream.
278 These can be found using wxFilterClassFactory::Find().
279
280 For example, to list the contents of archive @e filename:
281
282 @code
283 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
284
285 if (in->IsOk())
286 {
287 // look for a filter handler, e.g. for '.gz'
288 const wxFilterClassFactory *fcf;
289 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
290 if (fcf) {
291 in.reset(fcf->NewStream(in.release()));
292 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
293 filename = fcf->PopExtension(filename);
294 }
295
296 // look for a archive handler, e.g. for '.zip' or '.tar'
297 const wxArchiveClassFactory *acf;
298 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
299 if (acf) {
300 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
301 auto_ptr<wxArchiveEntry> entry;
302
303 // list the contents of the archive
304 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
305 std::wcout << entry->GetName().c_str() << "\n";
306 }
307 else {
308 wxLogError(_T("can't handle '%s'"), filename.c_str());
309 }
310 }
311 @endcode
312
313
314
315 @section overview_arc_noseek Archives on non-seekable streams
316
317 In general, handling archives on non-seekable streams is done in the same
318 way as for seekable streams, with a few caveats.
319
320 The main limitation is that accessing entries randomly using
321 wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
322 accessed sequentially in the order they are stored within the archive.
323
324 For each archive type, there will also be other limitations which will
325 depend on the order the entries' meta-data is stored within the archive.
326 These are not too difficult to deal with, and are outlined below.
327
328 @subsection overview_arc_noseek_entrysize PutNextEntry and the entry size
329
330 When writing archives, some archive formats store the entry size before
331 the entry's data (tar has this limitation, zip doesn't). In this case
332 the entry's size must be passed to wxArchiveOutputStream::PutNextEntry()
333 or an error occurs.
334
335 This is only an issue on non-seekable streams, since otherwise the archive
336 output stream can seek back and fix up the header once the size of the
337 entry is known.
338
339 For generic programming, one way to handle this is to supply the size
340 whenever it is known, and rely on the error message from the output
341 stream when the operation is not supported.
342
343 @subsection overview_arc_noseek_weak GetNextEntry and the weak reference mechanism
344
345 Some archive formats do not store all an entry's meta-data before the
346 entry's data (zip is an example). In this case, when reading from a
347 non-seekable stream, wxArchiveInputStream::GetNextEntry() can only return
348 a partially populated wxArchiveEntry object - not all the fields are set.
349
350 The input stream then keeps a weak reference to the entry object and
351 updates it when more meta-data becomes available. A weak reference being
352 one that does not prevent you from deleting the wxArchiveEntry object - the
353 input stream only attempts to update it if it is still around.
354
355 The documentation for each archive entry type gives the details
356 of what meta-data becomes available and when. For generic programming,
357 when the worst case must be assumed, you can rely on all the fields
358 of wxArchiveEntry being fully populated when GetNextEntry() returns,
359 with the the following exceptions:
360
361 @li wxArchiveEntry::GetSize(): guaranteed to be available after the
362 entry has been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
363 has been called
364
365 @li wxArchiveEntry::IsReadOnly(): guaranteed to be available after the end of
366 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
367 Eof() is @true
368
369 This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
370 preserve entries' meta-data. No matter what order order the meta-data occurs
371 within the archive, the input stream will always have read it before the output
372 stream must write it.
373
374 @subsection overview_arc_noseek_notifier wxArchiveNotifier
375
376 Notifier objects can be used to get a notification whenever an input
377 stream updates a wxArchiveEntry object's data via the weak reference mechanism.
378
379 Consider the following code which renames an entry in an archive.
380 This is the usual way to modify an entry's meta-data, simply set the
381 required field before writing it with wxArchiveOutputStream::CopyEntry():
382
383 @code
384 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
385 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
386 auto_ptr<wxArchiveEntry> entry;
387
388 outarc->CopyArchiveMetaData(*arc);
389
390 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
391 if (entry->GetName() == from)
392 entry->SetName(to);
393 if (!outarc->CopyEntry(entry.release(), *arc))
394 break;
395 }
396
397 bool success = arc->Eof() && outarc->Close();
398 @endcode
399
400 However, for non-seekable streams, this technique cannot be used for
401 fields such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
402 wxArchiveInputStream::GetNextEntry() returns.
403
404 In this case a wxArchiveNotifier can be used:
405
406 @code
407 class MyNotifier : public wxArchiveNotifier
408 {
409 public:
410 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
411 };
412 @endcode
413
414 The meta-data changes are done in your notifier's wxArchiveNotifier::OnEntryUpdated()
415 method, then wxArchiveEntry::SetNotifier() is called before CopyEntry():
416
417 @code
418 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
419 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
420 auto_ptr<wxArchiveEntry> entry;
421 MyNotifier notifier;
422
423 outarc->CopyArchiveMetaData(*arc);
424
425 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
426 entry->SetNotifier(notifier);
427 if (!outarc->CopyEntry(entry.release(), *arc))
428 break;
429 }
430
431 bool success = arc->Eof() && outarc->Close();
432 @endcode
433
434 SetNotifier() calls OnEntryUpdated() immediately, then the input
435 stream calls it again whenever it sets more fields in the entry. Since
436 OnEntryUpdated() will be called at least once, this technique always
437 works even when it is not strictly necessary to use it. For example,
438 changing the entry name can be done this way too and it works on seekable
439 streams as well as non-seekable.
440
441 */
442