docs/latex/wx/arc.tex

   1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2 %% Name:        arc.tex
   3 %% Purpose:     Overview of the archive classes
   4 %% Author:      M.J.Wetherell
   5 %% RCS-ID:      $Id$
   6 %% Copyright:   2004 M.J.Wetherell
   7 %% License:     wxWindows license
   8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   9
  10 \section{Archive formats such as zip}\label{wxarc}
  11
  12 The archive classes handle archive formats such as zip, tar, rar and cab.
  13 Currently \helpref{wxZip}{wxzipinputstream}
  14 and \helpref{wxTar}{wxtarinputstream} classes are included.
  15
  16 For each archive type, there are the following classes (using zip here
  17 as an example):
  18
  19 \begin{twocollist}\twocolwidtha{4cm}
  20 \twocolitem{\helpref{wxZipInputStream}{wxzipinputstream}}{Input stream}
  21 \twocolitem{\helpref{wxZipOutputStream}{wxzipoutputstream}}{Output stream}
  22 \twocolitem{\helpref{wxZipEntry}{wxzipentry}}{Holds the meta-data for an
  23 entry (e.g. filename, timestamp, etc.)}
  24 \end{twocollist}
  25
  26 There are also abstract wxArchive classes that can be used to write code
  27 that can handle any of the archive types,
  28 see '\helpref{Generic archive programming}{wxarcgeneric}'.
  29 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
  30 can handle archive files in a generic way.
  31
  32 The classes are designed to handle archives on both seekable streams such
  33 as disk files, or non-seekable streams such as pipes and sockets
  34 (see '\helpref{Archives on non-seekable streams}{wxarcnoseek}').
  35
  36 \wxheading{See also}
  37
  38 \helpref{wxFileSystem}{fs}
  39
  40
  41 \subsection{Creating an archive}\label{wxarccreate}
  42
  43 \helpref{Archive formats such as zip}{wxarc}
  44
  45 Call \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} to
  46 create each new entry in the archive, then write the entry's data.
  47 Another call to PutNextEntry() closes the current entry and begins the next.
  48
  49 For example:
  50
  51 \begin{verbatim}
  52     wxFFileOutputStream out(_T("test.zip"));
  53     wxZipOutputStream zip(out);
  54     wxTextOutputStream txt(zip);
  55     wxString sep(wxFileName::GetPathSeparator());
  56
  57     zip.PutNextEntry(_T("entry1.txt"));
  58     txt << _T("Some text for entry1.txt\n");
  59
  60     zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
  61     txt << _T("Some text for subdir/entry2.txt\n");
  62
  63 \end{verbatim}
  64
  65 The name of each entry can be a full path, which makes it possible to
  66 store entries in subdirectories.
  67
  68
  69 \subsection{Extracting an archive}\label{wxarcextract}
  70
  71 \helpref{Archive formats such as zip}{wxarc}
  72
  73 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns a pointer
  74 to entry object containing the meta-data for the next entry in the archive
  75 (and gives away ownership). Reading from the input stream then returns the
  76 entry's data. Eof() becomes true after an attempt has been made to read past
  77 the end of the entry's data.
  78
  79 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
  80
  81 \begin{verbatim}
  82     auto_ptr<wxZipEntry> entry;
  83
  84     wxFFileInputStream in(_T("test.zip"));
  85     wxZipInputStream zip(in);
  86
  87     while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
  88     {
  89         // access meta-data
  90         wxString name = entry->GetName();
  91         // read 'zip' to access the entry's data
  92     }
  93
  94 \end{verbatim}
  95
  96
  97 \subsection{Modifying an archive}\label{wxarcmodify}
  98
  99 \helpref{Archive formats such as zip}{wxarc}
 100
 101 To modify an existing archive, write a new copy of the archive to a new file,
 102 making any necessary changes along the way and transferring any unchanged
 103 entries using \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}.
 104 For archive types which compress entry data, CopyEntry() is likely to be
 105 much more efficient than transferring the data using Read() and Write()
 106 since it will copy them without decompressing and recompressing them.
 107
 108 In general modifications are not possible without rewriting the archive,
 109 though it may be possible in some limited cases. Even then, rewriting the
 110 archive is usually a better choice since a failure can be handled without
 111 losing the whole
 112 archive. \helpref{wxTempFileOutputStream}{wxtempfileoutputstream} can
 113 be helpful to do this.
 114
 115 For example to delete all entries matching the pattern "*.txt":
 116
 117 \begin{verbatim}
 118     auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
 119     wxTempFileOutputStream out(_T("test.zip"));
 120
 121     wxZipInputStream inzip(*in);
 122     wxZipOutputStream outzip(out);
 123
 124     auto_ptr<wxZipEntry> entry;
 125
 126     // transfer any meta-data for the archive as a whole (the zip comment
 127     // in the case of zip)
 128     outzip.CopyArchiveMetaData(inzip);
 129
 130     // call CopyEntry for each entry except those matching the pattern
 131     while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
 132         if (!entry->GetName().Matches(_T("*.txt")))
 133             if (!outzip.CopyEntry(entry.release(), inzip))
 134                 break;
 135
 136     // close the input stream by releasing the pointer to it, do this
 137     // before closing the output stream so that the file can be replaced
 138     in.reset();
 139
 140     // you can check for success as follows
 141     bool success = inzip.Eof() && outzip.Close() && out.Commit();
 142
 143 \end{verbatim}
 144
 145
 146 \subsection{Looking up an archive entry by name}\label{wxarcbyname}
 147
 148 \helpref{Archive formats such as zip}{wxarc}
 149
 150 Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
 151 more convenient for accessing archive entries by name.
 152
 153 To open just one entry in an archive, the most efficient way is
 154 to simply search for it linearly by calling
 155  \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
 156 required entry is found. This works both for archives on seekable and
 157 non-seekable streams.
 158
 159 The format of filenames in the archive is likely to be different
 160 from the local filename format. For example zips and tars use
 161 unix style names, with forward slashes as the path separator,
 162 and absolute paths are not allowed. So if on Windows the file
 163 "C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
 164 the entry back \helpref{GetName()}{wxarchiveentryname} will return
 165 "MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
 166 and back has lost some information.
 167
 168 So to avoid ambiguity when searching for an entry matching a local name,
 169 it is better to convert the local name to the archive's internal format
 170 and search for that:
 171
 172 \begin{verbatim}
 173     auto_ptr<wxZipEntry> entry;
 174
 175     // convert the local name we are looking for into the internal format
 176     wxString name = wxZipEntry::GetInternalName(localname);
 177
 178     // open the zip
 179     wxFFileInputStream in(_T("test.zip"));
 180     wxZipInputStream zip(in);
 181
 182     // call GetNextEntry() until the required internal name is found
 183     do {
 184         entry.reset(zip.GetNextEntry());
 185     }
 186     while (entry.get() != NULL && entry->GetInternalName() != name);
 187
 188     if (entry.get() != NULL) {
 189         // read the entry's data...
 190     }
 191
 192 \end{verbatim}
 193
 194 To access several entries randomly, it is most efficient to transfer the
 195 entire catalogue of entries to a container such as a std::map or a
 196  \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
 197 opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
 198
 199 \begin{verbatim}
 200     WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
 201     ZipCatalog::iterator it;
 202     wxZipEntry *entry;
 203     ZipCatalog cat;
 204
 205     // open the zip
 206     wxFFileInputStream in(_T("test.zip"));
 207     wxZipInputStream zip(in);
 208
 209     // load the zip catalog
 210     while ((entry = zip.GetNextEntry()) != NULL) {
 211         wxZipEntry*& current = cat[entry->GetInternalName()];
 212         // some archive formats can have multiple entries with the same name
 213         // (e.g. tar) though it is an error in the case of zip
 214         delete current;
 215         current = entry;
 216     }
 217
 218     // open an entry by name
 219     if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
 220         zip.OpenEntry(*it->second);
 221         // ... now read entry's data
 222     }
 223
 224 \end{verbatim}
 225
 226 To open more than one entry simultaneously you need more than one
 227 underlying stream on the same archive:
 228
 229 \begin{verbatim}
 230     // opening another entry without closing the first requires another
 231     // input stream for the same file
 232     wxFFileInputStream in2(_T("test.zip"));
 233     wxZipInputStream zip2(in2);
 234     if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
 235         zip2.OpenEntry(*it->second);
 236
 237 \end{verbatim}
 238
 239
 240 \subsection{Generic archive programming}\label{wxarcgeneric}
 241
 242 \helpref{Archive formats such as zip}{wxarc}
 243
 244 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
 245 can handle archive files in a generic way.
 246
 247 The specific archive classes, such as the wxZip classes, inherit from
 248 the following abstract classes which can be used to write code that can
 249 handle any of the archive types:
 250
 251 \begin{twocollist}\twocolwidtha{5cm}
 252 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
 253 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
 254 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
 255 entry (e.g. filename)}
 256 \end{twocollist}
 257
 258 In order to able to write generic code it's necessary to be able to create
 259 instances of the classes without knowing which archive type is being used.
 260 To allow this there is a class factory for each archive type, derived from
 261  \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, that can create
 262 the other classes.
 263
 264 For example, given {\it wxArchiveClassFactory* factory}, streams and
 265 entries can be created like this:
 266
 267 \begin{verbatim}
 268     // create streams without knowing their type
 269     auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
 270     auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 271
 272     // create an empty entry object
 273     auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
 274
 275 \end{verbatim}
 276
 277 For the factory itself, the static member
 278  \helpref{wxArchiveClassFactory::Find()}{wxarchiveclassfactoryfind}.
 279 can be used to find a class factory that can handle a given file
 280 extension or mime type. For example, given {\it filename}:
 281
 282 \begin{verbatim}
 283     const wxArchiveClassFactory *factory;
 284     factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
 285
 286     if (factory)
 287         stream = factory->NewStream(new wxFFileInputStream(filename));
 288
 289 \end{verbatim}
 290
 291 {\it Find} does not give away ownership of the returned pointer, so it
 292 does not need to be deleted.
 293
 294 There are similar class factories for the filter streams that handle the
 295 compression and decompression of a single stream, such as wxGzipInputStream.
 296 These can be found using
 297  \helpref{wxFilterClassFactory::Find()}{wxfilterclassfactoryfind}.
 298
 299 For example, to list the contents of archive {\it filename}:
 300
 301 \begin{verbatim}
 302     auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
 303
 304     if (in->IsOk())
 305     {
 306         // look for a filter handler, e.g. for '.gz'
 307         const wxFilterClassFactory *fcf;
 308         fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
 309         if (fcf) {
 310             in.reset(fcf->NewStream(in.release()));
 311             // pop the extension, so if it was '.tar.gz' it is now just '.tar'
 312             filename = fcf->PopExtension(filename);
 313         }
 314
 315         // look for a archive handler, e.g. for '.zip' or '.tar'
 316         const wxArchiveClassFactory *acf;
 317         acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
 318         if (acf) {
 319             auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
 320             auto_ptr<wxArchiveEntry> entry;
 321
 322             // list the contents of the archive
 323             while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
 324                 std::wcout << entry->GetName().c_str() << "\n";
 325         }
 326         else {
 327             wxLogError(_T("can't handle '%s'"), filename.c_str());
 328         }
 329     }
 330
 331 \end{verbatim}
 332
 333
 334 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
 335
 336 \helpref{Archive formats such as zip}{wxarc}
 337
 338 In general, handling archives on non-seekable streams is done in the same
 339 way as for seekable streams, with a few caveats.
 340
 341 The main limitation is that accessing entries randomly using
 342  \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
 343 is not possible, the entries can only be accessed sequentially in the order
 344 they are stored within the archive.
 345
 346 For each archive type, there will also be other limitations which will
 347 depend on the order the entries' meta-data is stored within the archive.
 348 These are not too difficult to deal with, and are outlined below.
 349
 350 \wxheading{PutNextEntry and the entry size}
 351
 352 When writing archives, some archive formats store the entry size before
 353 the entry's data (tar has this limitation, zip doesn't). In this case
 354 the entry's size must be passed to
 355  \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
 356 occurs.
 357
 358 This is only an issue on non-seekable streams, since otherwise the archive
 359 output stream can seek back and fix up the header once the size of the
 360 entry is known.
 361
 362 For generic programming, one way to handle this is to supply the size
 363 whenever it is known, and rely on the error message from the output
 364 stream when the operation is not supported.
 365
 366 \wxheading{GetNextEntry and the weak reference mechanism}
 367
 368 Some archive formats do not store all an entry's meta-data before the
 369 entry's data (zip is an example). In this case, when reading from a
 370 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
 371 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
 372 object - not all the fields are set.
 373
 374 The input stream then keeps a weak reference to the entry object and
 375 updates it when more meta-data becomes available. A weak reference being
 376 one that does not prevent you from deleting the wxArchiveEntry object - the
 377 input stream only attempts to update it if it is still around.
 378
 379 The documentation for each archive entry type gives the details
 380 of what meta-data becomes available and when. For generic programming,
 381 when the worst case must be assumed, you can rely on all the fields
 382 of wxArchiveEntry being fully populated when GetNextEntry() returns,
 383 with the the following exceptions:
 384
 385 \begin{twocollist}\twocolwidtha{3cm}
 386 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
 387 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
 388 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
 389 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
 390 be available after the end of the archive has been reached, i.e. after
 391 GetNextEntry() returns NULL and Eof() is true}
 392 \end{twocollist}
 393
 394 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
 395 to always fully preserve entries' meta-data. No matter what order order
 396 the meta-data occurs within the archive, the input stream will always
 397 have read it before the output stream must write it.
 398
 399 \wxheading{wxArchiveNotifier}
 400
 401 Notifier objects can be used to get a notification whenever an input
 402 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
 403 via the weak reference mechanism.
 404
 405 Consider the following code which renames an entry in an archive.
 406 This is the usual way to modify an entry's meta-data, simply set the
 407 required field before writing it with
 408  \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
 409
 410 \begin{verbatim}
 411     auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
 412     auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 413     auto_ptr<wxArchiveEntry> entry;
 414
 415     outarc->CopyArchiveMetaData(*arc);
 416
 417     while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
 418         if (entry->GetName() == from)
 419             entry->SetName(to);
 420         if (!outarc->CopyEntry(entry.release(), *arc))
 421             break;
 422     }
 423
 424     bool success = arc->Eof() && outarc->Close();
 425
 426 \end{verbatim}
 427
 428 However, for non-seekable streams, this technique cannot be used for
 429 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
 430 which are not necessarily set when
 431  \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
 432 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
 433
 434 \begin{verbatim}
 435 class MyNotifier : public wxArchiveNotifier
 436 {
 437 public:
 438     void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
 439 };
 440
 441 \end{verbatim}
 442
 443 The meta-data changes are done in your notifier's
 444  \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
 445 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
 446 CopyEntry():
 447
 448 \begin{verbatim}
 449     auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
 450     auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 451     auto_ptr<wxArchiveEntry> entry;
 452     MyNotifier notifier;
 453
 454     outarc->CopyArchiveMetaData(*arc);
 455
 456     while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
 457         entry->SetNotifier(notifier);
 458         if (!outarc->CopyEntry(entry.release(), *arc))
 459             break;
 460     }
 461
 462     bool success = arc->Eof() && outarc->Close();
 463
 464 \end{verbatim}
 465
 466 SetNotifier() calls OnEntryUpdated() immediately, then the input
 467 stream calls it again whenever it sets more fields in the entry. Since
 468 OnEntryUpdated() will be called at least once, this technique always
 469 works even when it is not strictly necessary to use it. For example,
 470 changing the entry name can be done this way too and it works on seekable
 471 streams as well as non-seekable.
 472