]>
Commit | Line | Data |
---|---|---|
15b6757b | 1 | ///////////////////////////////////////////////////////////////////////////// |
98ba1eee | 2 | // Name: archive.h |
15b6757b FM |
3 | // Purpose: topic overview |
4 | // Author: wxWidgets team | |
5 | // RCS-ID: $Id$ | |
6 | // Licence: wxWindows license | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
9 | /*! | |
36c9828f | 10 | |
e0a47918 | 11 | @page overview_arc Archive formats such as zip |
36c9828f | 12 | |
15b6757b | 13 | The archive classes handle archive formats such as zip, tar, rar and cab. |
98ba1eee | 14 | Currently wxZip and wxTar classes are included. |
e0a47918 | 15 | |
15b6757b FM |
16 | For each archive type, there are the following classes (using zip here |
17 | as an example): | |
36c9828f | 18 | |
e0a47918 FM |
19 | @li wxZipInputStream: input stream |
20 | @li wxZipOutputStream: output stream | |
21 | @li wxZipEntry: holds the meta-data for an entry (e.g. filename, timestamp, etc.) | |
36c9828f | 22 | |
15b6757b | 23 | There are also abstract wxArchive classes that can be used to write code |
e0a47918 FM |
24 | that can handle any of the archive types, see @ref overview_arc_generic. |
25 | ||
98ba1eee | 26 | Also see wxFileSystem for a higher level interface that |
15b6757b | 27 | can handle archive files in a generic way. |
e0a47918 | 28 | |
15b6757b FM |
29 | The classes are designed to handle archives on both seekable streams such |
30 | as disk files, or non-seekable streams such as pipes and sockets | |
e0a47918 FM |
31 | (see @ref overview_arc_noseek). |
32 | ||
98ba1eee | 33 | See also wxFileSystem. |
36c9828f | 34 | |
e0a47918 FM |
35 | @li @ref overview_arc_create |
36 | @li @ref overview_arc_extract | |
37 | @li @ref overview_arc_modify | |
38 | @li @ref overview_arc_byname | |
39 | @li @ref overview_arc_generic | |
40 | @li @ref overview_arc_noseek | |
36c9828f FM |
41 | |
42 | ||
e0a47918 | 43 | <hr> |
36c9828f | 44 | |
e0a47918 FM |
45 | |
46 | @section overview_arc_create Creating an archive | |
47 | ||
98ba1eee FM |
48 | Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the archive, |
49 | then write the entry's data. | |
15b6757b FM |
50 | Another call to PutNextEntry() closes the current entry and begins the next. |
51 | For example: | |
36c9828f | 52 | |
15b6757b | 53 | @code |
e0a47918 | 54 | wxFFileOutputStream out(_T("test.zip")); |
15b6757b FM |
55 | wxZipOutputStream zip(out); |
56 | wxTextOutputStream txt(zip); | |
57 | wxString sep(wxFileName::GetPathSeparator()); | |
36c9828f | 58 | |
15b6757b | 59 | zip.PutNextEntry(_T("entry1.txt")); |
98ba1eee | 60 | txt << _T("Some text for entry1.txt\n"); |
36c9828f | 61 | |
15b6757b | 62 | zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt")); |
98ba1eee | 63 | txt << _T("Some text for subdir/entry2.txt\n"); |
15b6757b | 64 | @endcode |
36c9828f | 65 | |
15b6757b FM |
66 | The name of each entry can be a full path, which makes it possible to |
67 | store entries in subdirectories. | |
36c9828f FM |
68 | |
69 | ||
e0a47918 FM |
70 | @section overview_arc_extract Extracting an archive |
71 | ||
98ba1eee FM |
72 | wxArchiveInputStream::GetNextEntry() returns a pointer to entry object containing the |
73 | meta-data for the next entry in the archive (and gives away ownership). | |
e0a47918 FM |
74 | |
75 | Reading from the input stream then returns the entry's data. | |
76 | Eof() becomes @true after an attempt has been made to read past the end of the entry's data. | |
36c9828f | 77 | |
15b6757b | 78 | When there are no more entries, GetNextEntry() returns @NULL and sets Eof(). |
36c9828f | 79 | |
15b6757b | 80 | @code |
e0a47918 | 81 | auto_ptr<wxZipEntry> entry; |
36c9828f | 82 | |
15b6757b FM |
83 | wxFFileInputStream in(_T("test.zip")); |
84 | wxZipInputStream zip(in); | |
36c9828f | 85 | |
98ba1eee | 86 | while (entry.reset(zip.GetNextEntry()), entry.get() != NULL) |
15b6757b FM |
87 | { |
88 | // access meta-data | |
98ba1eee | 89 | wxString name = entry->GetName(); |
15b6757b FM |
90 | // read 'zip' to access the entry's data |
91 | } | |
92 | @endcode | |
36c9828f FM |
93 | |
94 | ||
95 | ||
e0a47918 | 96 | @section overview_arc_modify Modifying an archive |
36c9828f | 97 | |
15b6757b FM |
98 | To modify an existing archive, write a new copy of the archive to a new file, |
99 | making any necessary changes along the way and transferring any unchanged | |
98ba1eee | 100 | entries using wxArchiveOutputStream::CopyEntry(). |
e0a47918 | 101 | |
15b6757b FM |
102 | For archive types which compress entry data, CopyEntry() is likely to be |
103 | much more efficient than transferring the data using Read() and Write() | |
104 | since it will copy them without decompressing and recompressing them. | |
e0a47918 | 105 | |
15b6757b FM |
106 | In general modifications are not possible without rewriting the archive, |
107 | though it may be possible in some limited cases. Even then, rewriting the | |
108 | archive is usually a better choice since a failure can be handled without | |
98ba1eee | 109 | losing the whole archive. wxTempFileOutputStream can be helpful to do this. |
e0a47918 | 110 | |
15b6757b | 111 | For example to delete all entries matching the pattern "*.txt": |
36c9828f | 112 | |
15b6757b | 113 | @code |
e0a47918 | 114 | auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip"))); |
15b6757b | 115 | wxTempFileOutputStream out(_T("test.zip")); |
36c9828f | 116 | |
15b6757b FM |
117 | wxZipInputStream inzip(*in); |
118 | wxZipOutputStream outzip(out); | |
36c9828f | 119 | |
e0a47918 | 120 | auto_ptr<wxZipEntry> entry; |
36c9828f | 121 | |
15b6757b FM |
122 | // transfer any meta-data for the archive as a whole (the zip comment |
123 | // in the case of zip) | |
124 | outzip.CopyArchiveMetaData(inzip); | |
36c9828f | 125 | |
15b6757b | 126 | // call CopyEntry for each entry except those matching the pattern |
98ba1eee FM |
127 | while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL) |
128 | if (!entry->GetName().Matches(_T("*.txt"))) | |
15b6757b FM |
129 | if (!outzip.CopyEntry(entry.release(), inzip)) |
130 | break; | |
36c9828f | 131 | |
15b6757b FM |
132 | // close the input stream by releasing the pointer to it, do this |
133 | // before closing the output stream so that the file can be replaced | |
134 | in.reset(); | |
36c9828f | 135 | |
15b6757b FM |
136 | // you can check for success as follows |
137 | bool success = inzip.Eof() && outzip.Close() && out.Commit(); | |
138 | @endcode | |
36c9828f FM |
139 | |
140 | ||
141 | ||
e0a47918 | 142 | @section overview_arc_byname Looking up an archive entry by name |
36c9828f | 143 | |
98ba1eee | 144 | Also see wxFileSystem for a higher level interface that is |
15b6757b | 145 | more convenient for accessing archive entries by name. |
e0a47918 | 146 | |
15b6757b | 147 | To open just one entry in an archive, the most efficient way is |
98ba1eee FM |
148 | to simply search for it linearly by calling wxArchiveInputStream::GetNextEntry() |
149 | until the required entry is found. This works both for archives on seekable and | |
15b6757b | 150 | non-seekable streams. |
e0a47918 | 151 | |
15b6757b FM |
152 | The format of filenames in the archive is likely to be different |
153 | from the local filename format. For example zips and tars use | |
154 | unix style names, with forward slashes as the path separator, | |
155 | and absolute paths are not allowed. So if on Windows the file | |
98ba1eee FM |
156 | "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry back |
157 | wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". | |
158 | The conversion into the internal format and back has lost some information. | |
e0a47918 | 159 | |
15b6757b FM |
160 | So to avoid ambiguity when searching for an entry matching a local name, |
161 | it is better to convert the local name to the archive's internal format | |
162 | and search for that: | |
36c9828f | 163 | |
15b6757b | 164 | @code |
e0a47918 | 165 | auto_ptr<wxZipEntry> entry; |
36c9828f | 166 | |
15b6757b FM |
167 | // convert the local name we are looking for into the internal format |
168 | wxString name = wxZipEntry::GetInternalName(localname); | |
36c9828f | 169 | |
15b6757b FM |
170 | // open the zip |
171 | wxFFileInputStream in(_T("test.zip")); | |
172 | wxZipInputStream zip(in); | |
36c9828f | 173 | |
15b6757b FM |
174 | // call GetNextEntry() until the required internal name is found |
175 | do { | |
176 | entry.reset(zip.GetNextEntry()); | |
177 | } | |
98ba1eee | 178 | while (entry.get() != NULL && entry->GetInternalName() != name); |
36c9828f | 179 | |
98ba1eee | 180 | if (entry.get() != NULL) { |
15b6757b FM |
181 | // read the entry's data... |
182 | } | |
183 | @endcode | |
36c9828f | 184 | |
15b6757b FM |
185 | To access several entries randomly, it is most efficient to transfer the |
186 | entire catalogue of entries to a container such as a std::map or a | |
98ba1eee FM |
187 | wxHashMap then entries looked up by name can be opened using the |
188 | wxArchiveInputStream::OpenEntry() method. | |
36c9828f | 189 | |
15b6757b | 190 | @code |
e0a47918 | 191 | WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog); |
15b6757b FM |
192 | ZipCatalog::iterator it; |
193 | wxZipEntry *entry; | |
194 | ZipCatalog cat; | |
36c9828f | 195 | |
15b6757b FM |
196 | // open the zip |
197 | wxFFileInputStream in(_T("test.zip")); | |
198 | wxZipInputStream zip(in); | |
36c9828f | 199 | |
15b6757b | 200 | // load the zip catalog |
98ba1eee FM |
201 | while ((entry = zip.GetNextEntry()) != NULL) { |
202 | wxZipEntry*& current = cat[entry->GetInternalName()]; | |
15b6757b FM |
203 | // some archive formats can have multiple entries with the same name |
204 | // (e.g. tar) though it is an error in the case of zip | |
205 | delete current; | |
206 | current = entry; | |
207 | } | |
36c9828f | 208 | |
15b6757b FM |
209 | // open an entry by name |
210 | if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) { | |
98ba1eee | 211 | zip.OpenEntry(*it->second); |
15b6757b FM |
212 | // ... now read entry's data |
213 | } | |
214 | @endcode | |
36c9828f | 215 | |
15b6757b FM |
216 | To open more than one entry simultaneously you need more than one |
217 | underlying stream on the same archive: | |
36c9828f | 218 | |
15b6757b | 219 | @code |
e0a47918 | 220 | // opening another entry without closing the first requires another |
15b6757b FM |
221 | // input stream for the same file |
222 | wxFFileInputStream in2(_T("test.zip")); | |
223 | wxZipInputStream zip2(in2); | |
224 | if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end()) | |
98ba1eee | 225 | zip2.OpenEntry(*it->second); |
15b6757b | 226 | @endcode |
36c9828f FM |
227 | |
228 | ||
229 | ||
e0a47918 | 230 | @section overview_arc_generic Generic archive programming |
36c9828f | 231 | |
98ba1eee | 232 | Also see wxFileSystem for a higher level interface that |
15b6757b | 233 | can handle archive files in a generic way. |
e0a47918 | 234 | |
15b6757b FM |
235 | The specific archive classes, such as the wxZip classes, inherit from |
236 | the following abstract classes which can be used to write code that can | |
237 | handle any of the archive types: | |
36c9828f | 238 | |
e0a47918 FM |
239 | @li wxArchiveInputStream: input stream |
240 | @li wxArchiveOutputStream: output stream | |
241 | @li wxArchiveEntry: holds the meta-data for an entry (e.g. filename) | |
36c9828f | 242 | |
15b6757b FM |
243 | In order to able to write generic code it's necessary to be able to create |
244 | instances of the classes without knowing which archive type is being used. | |
e0a47918 | 245 | |
15b6757b | 246 | To allow this there is a class factory for each archive type, derived from |
98ba1eee | 247 | wxArchiveClassFactory, that can create the other classes. |
e0a47918 | 248 | |
15b6757b FM |
249 | For example, given @e wxArchiveClassFactory* factory, streams and |
250 | entries can be created like this: | |
36c9828f | 251 | |
15b6757b | 252 | @code |
e0a47918 | 253 | // create streams without knowing their type |
98ba1eee FM |
254 | auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in)); |
255 | auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out)); | |
36c9828f | 256 | |
15b6757b | 257 | // create an empty entry object |
98ba1eee | 258 | auto_ptr<wxArchiveEntry> entry(factory->NewEntry()); |
15b6757b | 259 | @endcode |
36c9828f | 260 | |
e0a47918 | 261 | For the factory itself, the static member wxArchiveClassFactory::Find(). |
15b6757b FM |
262 | can be used to find a class factory that can handle a given file |
263 | extension or mime type. For example, given @e filename: | |
36c9828f | 264 | |
15b6757b | 265 | @code |
e0a47918 | 266 | const wxArchiveClassFactory *factory; |
15b6757b | 267 | factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT); |
36c9828f | 268 | |
15b6757b | 269 | if (factory) |
98ba1eee | 270 | stream = factory->NewStream(new wxFFileInputStream(filename)); |
15b6757b | 271 | @endcode |
36c9828f | 272 | |
15b6757b FM |
273 | @e Find does not give away ownership of the returned pointer, so it |
274 | does not need to be deleted. | |
e0a47918 | 275 | |
15b6757b FM |
276 | There are similar class factories for the filter streams that handle the |
277 | compression and decompression of a single stream, such as wxGzipInputStream. | |
e0a47918 FM |
278 | These can be found using wxFilterClassFactory::Find(). |
279 | ||
15b6757b | 280 | For example, to list the contents of archive @e filename: |
36c9828f | 281 | |
15b6757b | 282 | @code |
e0a47918 | 283 | auto_ptr<wxInputStream> in(new wxFFileInputStream(filename)); |
36c9828f | 284 | |
98ba1eee | 285 | if (in->IsOk()) |
15b6757b FM |
286 | { |
287 | // look for a filter handler, e.g. for '.gz' | |
288 | const wxFilterClassFactory *fcf; | |
289 | fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT); | |
290 | if (fcf) { | |
98ba1eee | 291 | in.reset(fcf->NewStream(in.release())); |
15b6757b | 292 | // pop the extension, so if it was '.tar.gz' it is now just '.tar' |
98ba1eee | 293 | filename = fcf->PopExtension(filename); |
15b6757b | 294 | } |
36c9828f | 295 | |
15b6757b FM |
296 | // look for a archive handler, e.g. for '.zip' or '.tar' |
297 | const wxArchiveClassFactory *acf; | |
298 | acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT); | |
299 | if (acf) { | |
98ba1eee | 300 | auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release())); |
e0a47918 | 301 | auto_ptr<wxArchiveEntry> entry; |
36c9828f | 302 | |
15b6757b | 303 | // list the contents of the archive |
98ba1eee FM |
304 | while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL) |
305 | std::wcout << entry->GetName().c_str() << "\n"; | |
15b6757b FM |
306 | } |
307 | else { | |
308 | wxLogError(_T("can't handle '%s'"), filename.c_str()); | |
309 | } | |
310 | } | |
311 | @endcode | |
36c9828f FM |
312 | |
313 | ||
314 | ||
e0a47918 | 315 | @section overview_arc_noseek Archives on non-seekable streams |
36c9828f | 316 | |
15b6757b FM |
317 | In general, handling archives on non-seekable streams is done in the same |
318 | way as for seekable streams, with a few caveats. | |
e0a47918 | 319 | |
98ba1eee FM |
320 | The main limitation is that accessing entries randomly using |
321 | wxArchiveInputStream::OpenEntry() is not possible, the entries can only be | |
322 | accessed sequentially in the order they are stored within the archive. | |
e0a47918 | 323 | |
15b6757b FM |
324 | For each archive type, there will also be other limitations which will |
325 | depend on the order the entries' meta-data is stored within the archive. | |
326 | These are not too difficult to deal with, and are outlined below. | |
e0a47918 | 327 | |
98ba1eee FM |
328 | @subsection overview_arc_noseek_entrysize PutNextEntry and the entry size |
329 | ||
15b6757b FM |
330 | When writing archives, some archive formats store the entry size before |
331 | the entry's data (tar has this limitation, zip doesn't). In this case | |
98ba1eee FM |
332 | the entry's size must be passed to wxArchiveOutputStream::PutNextEntry() |
333 | or an error occurs. | |
e0a47918 | 334 | |
15b6757b FM |
335 | This is only an issue on non-seekable streams, since otherwise the archive |
336 | output stream can seek back and fix up the header once the size of the | |
337 | entry is known. | |
e0a47918 | 338 | |
15b6757b FM |
339 | For generic programming, one way to handle this is to supply the size |
340 | whenever it is known, and rely on the error message from the output | |
341 | stream when the operation is not supported. | |
e0a47918 | 342 | |
98ba1eee FM |
343 | @subsection overview_arc_noseek_weak GetNextEntry and the weak reference mechanism |
344 | ||
15b6757b FM |
345 | Some archive formats do not store all an entry's meta-data before the |
346 | entry's data (zip is an example). In this case, when reading from a | |
98ba1eee FM |
347 | non-seekable stream, wxArchiveInputStream::GetNextEntry() can only return |
348 | a partially populated wxArchiveEntry object - not all the fields are set. | |
e0a47918 | 349 | |
15b6757b FM |
350 | The input stream then keeps a weak reference to the entry object and |
351 | updates it when more meta-data becomes available. A weak reference being | |
352 | one that does not prevent you from deleting the wxArchiveEntry object - the | |
353 | input stream only attempts to update it if it is still around. | |
e0a47918 | 354 | |
15b6757b FM |
355 | The documentation for each archive entry type gives the details |
356 | of what meta-data becomes available and when. For generic programming, | |
357 | when the worst case must be assumed, you can rely on all the fields | |
358 | of wxArchiveEntry being fully populated when GetNextEntry() returns, | |
359 | with the the following exceptions: | |
36c9828f | 360 | |
98ba1eee FM |
361 | @li wxArchiveEntry::GetSize(): guaranteed to be available after the |
362 | entry has been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry() | |
363 | has been called | |
36c9828f | 364 | |
98ba1eee FM |
365 | @li wxArchiveEntry::IsReadOnly(): guaranteed to be available after the end of |
366 | the archive has been reached, i.e. after GetNextEntry() returns @NULL and | |
367 | Eof() is @true | |
36c9828f | 368 | |
98ba1eee FM |
369 | This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully |
370 | preserve entries' meta-data. No matter what order order the meta-data occurs | |
371 | within the archive, the input stream will always have read it before the output | |
372 | stream must write it. | |
373 | ||
374 | @subsection overview_arc_noseek_notifier wxArchiveNotifier | |
36c9828f | 375 | |
15b6757b | 376 | Notifier objects can be used to get a notification whenever an input |
98ba1eee | 377 | stream updates a wxArchiveEntry object's data via the weak reference mechanism. |
e0a47918 | 378 | |
15b6757b FM |
379 | Consider the following code which renames an entry in an archive. |
380 | This is the usual way to modify an entry's meta-data, simply set the | |
98ba1eee | 381 | required field before writing it with wxArchiveOutputStream::CopyEntry(): |
36c9828f | 382 | |
15b6757b | 383 | @code |
98ba1eee FM |
384 | auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in)); |
385 | auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out)); | |
e0a47918 | 386 | auto_ptr<wxArchiveEntry> entry; |
36c9828f | 387 | |
98ba1eee | 388 | outarc->CopyArchiveMetaData(*arc); |
36c9828f | 389 | |
98ba1eee FM |
390 | while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) { |
391 | if (entry->GetName() == from) | |
392 | entry->SetName(to); | |
393 | if (!outarc->CopyEntry(entry.release(), *arc)) | |
15b6757b FM |
394 | break; |
395 | } | |
36c9828f | 396 | |
98ba1eee | 397 | bool success = arc->Eof() && outarc->Close(); |
15b6757b | 398 | @endcode |
36c9828f | 399 | |
15b6757b | 400 | However, for non-seekable streams, this technique cannot be used for |
98ba1eee FM |
401 | fields such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when |
402 | wxArchiveInputStream::GetNextEntry() returns. | |
403 | ||
404 | In this case a wxArchiveNotifier can be used: | |
36c9828f | 405 | |
15b6757b FM |
406 | @code |
407 | class MyNotifier : public wxArchiveNotifier | |
408 | { | |
409 | public: | |
98ba1eee | 410 | void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); } |
15b6757b FM |
411 | }; |
412 | @endcode | |
36c9828f | 413 | |
98ba1eee FM |
414 | The meta-data changes are done in your notifier's wxArchiveNotifier::OnEntryUpdated() |
415 | method, then wxArchiveEntry::SetNotifier() is called before CopyEntry(): | |
36c9828f | 416 | |
15b6757b | 417 | @code |
98ba1eee FM |
418 | auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in)); |
419 | auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out)); | |
e0a47918 | 420 | auto_ptr<wxArchiveEntry> entry; |
15b6757b | 421 | MyNotifier notifier; |
36c9828f | 422 | |
98ba1eee | 423 | outarc->CopyArchiveMetaData(*arc); |
36c9828f | 424 | |
98ba1eee FM |
425 | while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) { |
426 | entry->SetNotifier(notifier); | |
427 | if (!outarc->CopyEntry(entry.release(), *arc)) | |
15b6757b FM |
428 | break; |
429 | } | |
36c9828f | 430 | |
98ba1eee | 431 | bool success = arc->Eof() && outarc->Close(); |
15b6757b | 432 | @endcode |
36c9828f | 433 | |
15b6757b FM |
434 | SetNotifier() calls OnEntryUpdated() immediately, then the input |
435 | stream calls it again whenever it sets more fields in the entry. Since | |
436 | OnEntryUpdated() will be called at least once, this technique always | |
437 | works even when it is not strictly necessary to use it. For example, | |
438 | changing the entry name can be done this way too and it works on seekable | |
439 | streams as well as non-seekable. | |
36c9828f | 440 | |
e0a47918 | 441 | */ |
36c9828f | 442 |