docs/doxygen/overviews/archive.h

   1 /////////////////////////////////////////////////////////////////////////////
   2 // Name:        archive.h
   3 // Purpose:     topic overview
   4 // Author:      wxWidgets team
   5 // RCS-ID:      $Id$
   6 // Licence:     wxWindows licence
   7 /////////////////////////////////////////////////////////////////////////////
   8
   9 /**
  10
  11 @page overview_archive Archive Formats
  12
  13 @tableofcontents
  14
  15 The archive classes handle archive formats such as zip, tar, rar and cab.
  16 Currently wxZip, wxTar and wxZlib classes are included.
  17
  18 For each archive type, there are the following classes (using zip here as an
  19 example):
  20
  21 @li wxZipInputStream: Input stream
  22 @li wxZipOutputStream: Output stream
  23 @li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
  24
  25 There are also abstract wxArchive classes that can be used to write code that
  26 can handle any of the archive types, see @ref overview_archive_generic.
  27
  28 Also see wxFileSystem for a higher level interface that can handle archive
  29 files in a generic way.
  30
  31 The classes are designed to handle archives on both seekable streams such as
  32 disk files, or non-seekable streams such as pipes and sockets (see
  33 @ref overview_archive_noseek).
  34
  35
  36
  37 @section overview_archive_create Creating an Archive
  38
  39 Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
  40 archive, then write the entry's data. Another call to PutNextEntry() closes the
  41 current entry and begins the next. For example:
  42
  43 @code
  44 wxFFileOutputStream out(wxT("test.zip"));
  45 wxZipOutputStream zip(out);
  46 wxTextOutputStream txt(zip);
  47 wxString sep(wxFileName::GetPathSeparator());
  48
  49 zip.PutNextEntry(wxT("entry1.txt"));
  50 txt << wxT("Some text for entry1.txt\n");
  51
  52 zip.PutNextEntry(wxT("subdir") + sep + wxT("entry2.txt"));
  53 txt << wxT("Some text for subdir/entry2.txt\n");
  54 @endcode
  55
  56 The name of each entry can be a full path, which makes it possible to store
  57 entries in subdirectories.
  58
  59
  60 @section overview_archive_extract Extracting an Archive
  61
  62 wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
  63 containing the meta-data for the next entry in the archive (and gives away
  64 ownership).
  65
  66 Reading from the input stream then returns the entry's data. Eof() becomes
  67 @true after an attempt has been made to read past the end of the entry's data.
  68
  69 When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
  70
  71 @code
  72 auto_ptr<wxZipEntry> entry;
  73
  74 wxFFileInputStream in(wxT("test.zip"));
  75 wxZipInputStream zip(in);
  76
  77 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
  78 {
  79     // access meta-data
  80     wxString name = entry->GetName();
  81     // read 'zip' to access the entry's data
  82 }
  83 @endcode
  84
  85
  86
  87 @section overview_archive_modify Modifying an Archive
  88
  89 To modify an existing archive, write a new copy of the archive to a new file,
  90 making any necessary changes along the way and transferring any unchanged
  91 entries using wxArchiveOutputStream::CopyEntry().
  92
  93 For archive types which compress entry data, CopyEntry() is likely to be much
  94 more efficient than transferring the data using Read() and Write() since it
  95 will copy them without decompressing and recompressing them.
  96
  97 In general modifications are not possible without rewriting the archive, though
  98 it may be possible in some limited cases. Even then, rewriting the archive is
  99 usually a better choice since a failure can be handled without losing the whole
 100 archive. wxTempFileOutputStream can be helpful to do this.
 101
 102 For example to delete all entries matching the pattern "*.txt":
 103
 104 @code
 105 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(wxT("test.zip")));
 106 wxTempFileOutputStream out(wxT("test.zip"));
 107
 108 wxZipInputStream inzip(*in);
 109 wxZipOutputStream outzip(out);
 110
 111 auto_ptr<wxZipEntry> entry;
 112
 113 // transfer any meta-data for the archive as a whole (the zip comment
 114 // in the case of zip)
 115 outzip.CopyArchiveMetaData(inzip);
 116
 117 // call CopyEntry for each entry except those matching the pattern
 118 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
 119     if (!entry->GetName().Matches(wxT("*.txt")))
 120         if (!outzip.CopyEntry(entry.release(), inzip))
 121             break;
 122
 123 // close the input stream by releasing the pointer to it, do this
 124 // before closing the output stream so that the file can be replaced
 125 in.reset();
 126
 127 // you can check for success as follows
 128 bool success = inzip.Eof() && outzip.Close() && out.Commit();
 129 @endcode
 130
 131
 132
 133 @section overview_archive_byname Looking Up an Archive Entry by Name
 134
 135 Also see wxFileSystem for a higher level interface that is more convenient for
 136 accessing archive entries by name.
 137
 138 To open just one entry in an archive, the most efficient way is to simply
 139 search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
 140 the required entry is found. This works both for archives on seekable and
 141 non-seekable streams.
 142
 143 The format of filenames in the archive is likely to be different from the local
 144 filename format. For example zips and tars use unix style names, with forward
 145 slashes as the path separator, and absolute paths are not allowed. So if on
 146 Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
 147 back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
 148 into the internal format and back has lost some information.
 149
 150 So to avoid ambiguity when searching for an entry matching a local name, it is
 151 better to convert the local name to the archive's internal format and search
 152 for that:
 153
 154 @code
 155 auto_ptr<wxZipEntry> entry;
 156
 157 // convert the local name we are looking for into the internal format
 158 wxString name = wxZipEntry::GetInternalName(localname);
 159
 160 // open the zip
 161 wxFFileInputStream in(wxT("test.zip"));
 162 wxZipInputStream zip(in);
 163
 164 // call GetNextEntry() until the required internal name is found
 165 do
 166 {
 167     entry.reset(zip.GetNextEntry());
 168 }
 169 while (entry.get() != NULL && entry->GetInternalName() != name);
 170
 171 if (entry.get() != NULL)
 172 {
 173     // read the entry's data...
 174 }
 175 @endcode
 176
 177 To access several entries randomly, it is most efficient to transfer the entire
 178 catalogue of entries to a container such as a std::map or a wxHashMap then
 179 entries looked up by name can be opened using the
 180 wxArchiveInputStream::OpenEntry() method.
 181
 182 @code
 183 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
 184 ZipCatalog::iterator it;
 185 wxZipEntry *entry;
 186 ZipCatalog cat;
 187
 188 // open the zip
 189 wxFFileInputStream in(wxT("test.zip"));
 190 wxZipInputStream zip(in);
 191
 192 // load the zip catalog
 193 while ((entry = zip.GetNextEntry()) != NULL)
 194 {
 195     wxZipEntry*& current = cat[entry->GetInternalName()];
 196     // some archive formats can have multiple entries with the same name
 197     // (e.g. tar) though it is an error in the case of zip
 198     delete current;
 199     current = entry;
 200 }
 201
 202 // open an entry by name
 203 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
 204 {
 205     zip.OpenEntry(*it->second);
 206     // ... now read entry's data
 207 }
 208 @endcode
 209
 210 To open more than one entry simultaneously you need more than one underlying
 211 stream on the same archive:
 212
 213 @code
 214 // opening another entry without closing the first requires another
 215 // input stream for the same file
 216 wxFFileInputStream in2(wxT("test.zip"));
 217 wxZipInputStream zip2(in2);
 218 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
 219     zip2.OpenEntry(*it->second);
 220 @endcode
 221
 222
 223
 224 @section overview_archive_generic Generic Archive Programming
 225
 226 Also see wxFileSystem for a higher level interface that can handle archive
 227 files in a generic way.
 228
 229 The specific archive classes, such as the wxZip classes, inherit from the
 230 following abstract classes which can be used to write code that can handle any
 231 of the archive types:
 232
 233 @li wxArchiveInputStream: Input stream
 234 @li wxArchiveOutputStream: Output stream
 235 @li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
 236
 237 In order to able to write generic code it's necessary to be able to create
 238 instances of the classes without knowing which archive type is being used.
 239
 240 To allow this there is a class factory for each archive type, derived from
 241 wxArchiveClassFactory, that can create the other classes.
 242
 243 For example, given wxArchiveClassFactory* factory, streams and entries can be
 244 created like this:
 245
 246 @code
 247 // create streams without knowing their type
 248 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
 249 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 250
 251 // create an empty entry object
 252 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
 253 @endcode
 254
 255 For the factory itself, the static member wxArchiveClassFactory::Find() can be
 256 used to find a class factory that can handle a given file extension or mime
 257 type. For example, given @e filename:
 258
 259 @code
 260 const wxArchiveClassFactory *factory;
 261 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
 262
 263 if (factory)
 264     stream = factory->NewStream(new wxFFileInputStream(filename));
 265 @endcode
 266
 267 @e Find() does not give away ownership of the returned pointer, so it does not
 268 need to be deleted.
 269
 270 There are similar class factories for the filter streams that handle the
 271 compression and decompression of a single stream, such as wxGzipInputStream.
 272 These can be found using wxFilterClassFactory::Find().
 273
 274 For example, to list the contents of archive @e filename:
 275
 276 @code
 277 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
 278
 279 if (in->IsOk())
 280 {
 281     // look for a filter handler, e.g. for '.gz'
 282     const wxFilterClassFactory *fcf;
 283     fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
 284     if (fcf)
 285     {
 286         in.reset(fcf->NewStream(in.release()));
 287         // pop the extension, so if it was '.tar.gz' it is now just '.tar'
 288         filename = fcf->PopExtension(filename);
 289     }
 290
 291     // look for a archive handler, e.g. for '.zip' or '.tar'
 292     const wxArchiveClassFactory *acf;
 293     acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
 294     if (acf)
 295     {
 296         auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
 297         auto_ptr<wxArchiveEntry> entry;
 298
 299         // list the contents of the archive
 300         while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
 301             std::wcout << entry->GetName().c_str() << "\n";
 302     }
 303     else
 304     {
 305         wxLogError(wxT("can't handle '%s'"), filename.c_str());
 306     }
 307 }
 308 @endcode
 309
 310
 311
 312 @section overview_archive_noseek Archives on Non-Seekable Streams
 313
 314 In general, handling archives on non-seekable streams is done in the same way
 315 as for seekable streams, with a few caveats.
 316
 317 The main limitation is that accessing entries randomly using
 318 wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
 319 accessed sequentially in the order they are stored within the archive.
 320
 321 For each archive type, there will also be other limitations which will depend
 322 on the order the entries' meta-data is stored within the archive. These are not
 323 too difficult to deal with, and are outlined below.
 324
 325 @subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
 326
 327 When writing archives, some archive formats store the entry size before the
 328 entry's data (tar has this limitation, zip doesn't). In this case the entry's
 329 size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
 330 occurs.
 331
 332 This is only an issue on non-seekable streams, since otherwise the archive
 333 output stream can seek back and fix up the header once the size of the entry is
 334 known.
 335
 336 For generic programming, one way to handle this is to supply the size whenever
 337 it is known, and rely on the error message from the output stream when the
 338 operation is not supported.
 339
 340 @subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
 341
 342 Some archive formats do not store all an entry's meta-data before the entry's
 343 data (zip is an example). In this case, when reading from a non-seekable
 344 stream, wxArchiveInputStream::GetNextEntry() can only return a partially
 345 populated wxArchiveEntry object - not all the fields are set.
 346
 347 The input stream then keeps a weak reference to the entry object and updates it
 348 when more meta-data becomes available. A weak reference being one that does not
 349 prevent you from deleting the wxArchiveEntry object - the input stream only
 350 attempts to update it if it is still around.
 351
 352 The documentation for each archive entry type gives the details of what
 353 meta-data becomes available and when. For generic programming, when the worst
 354 case must be assumed, you can rely on all the fields of wxArchiveEntry being
 355 fully populated when GetNextEntry() returns, with the following exceptions:
 356
 357 @li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
 358     been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
 359     has been called.
 360 @li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
 361     the archive has been reached, i.e. after GetNextEntry() returns @NULL and
 362     Eof() is @true.
 363
 364 This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
 365 preserve entries' meta-data. No matter what order order the meta-data occurs
 366 within the archive, the input stream will always have read it before the output
 367 stream must write it.
 368
 369 @subsection overview_archive_noseek_notifier wxArchiveNotifier
 370
 371 Notifier objects can be used to get a notification whenever an input stream
 372 updates a wxArchiveEntry object's data via the weak reference mechanism.
 373
 374 Consider the following code which renames an entry in an archive. This is the
 375 usual way to modify an entry's meta-data, simply set the required field before
 376 writing it with wxArchiveOutputStream::CopyEntry():
 377
 378 @code
 379 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
 380 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 381 auto_ptr<wxArchiveEntry> entry;
 382
 383 outarc->CopyArchiveMetaData(*arc);
 384
 385 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
 386 {
 387     if (entry->GetName() == from)
 388         entry->SetName(to);
 389     if (!outarc->CopyEntry(entry.release(), *arc))
 390         break;
 391 }
 392
 393 bool success = arc->Eof() && outarc->Close();
 394 @endcode
 395
 396 However, for non-seekable streams, this technique cannot be used for fields
 397 such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
 398 wxArchiveInputStream::GetNextEntry() returns.
 399
 400 In this case a wxArchiveNotifier can be used:
 401
 402 @code
 403 class MyNotifier : public wxArchiveNotifier
 404 {
 405 public:
 406     void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
 407 };
 408 @endcode
 409
 410 The meta-data changes are done in your notifier's
 411 wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
 412 is called before CopyEntry():
 413
 414 @code
 415 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
 416 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 417 auto_ptr<wxArchiveEntry> entry;
 418 MyNotifier notifier;
 419
 420 outarc->CopyArchiveMetaData(*arc);
 421
 422 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
 423 {
 424     entry->SetNotifier(notifier);
 425     if (!outarc->CopyEntry(entry.release(), *arc))
 426         break;
 427 }
 428
 429 bool success = arc->Eof() && outarc->Close();
 430 @endcode
 431
 432 SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
 433 it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
 434 be called at least once, this technique always works even when it is not
 435 strictly necessary to use it. For example, changing the entry name can be done
 436 this way too and it works on seekable streams as well as non-seekable.
 437
 438 */
 439