docs/latex/wx/arc.tex

   1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   2 %% Name:        arc.tex
   3 %% Purpose:     Overview of the archive classes
   4 %% Author:      M.J.Wetherell
   5 %% RCS-ID:      $Id$
   6 %% Copyright:   2004 M.J.Wetherell
   7 %% License:     wxWindows license
   8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   9
  10 \section{Archive formats such as zip}\label{wxarc}
  11
  12 The archive classes handle archive formats such as zip, tar, rar and cab.
  13 Currently wxZip and wxTar classes are included.
  14
  15 For each archive type, there are the following classes (using zip here
  16 as an example):
  17
  18 \begin{twocollist}\twocolwidtha{4cm}
  19 \twocolitem{\helpref{wxZipInputStream}{wxzipinputstream}}{Input stream}
  20 \twocolitem{\helpref{wxZipOutputStream}{wxzipoutputstream}}{Output stream}
  21 \twocolitem{\helpref{wxZipEntry}{wxzipentry}}{Holds the meta-data for an
  22 entry (e.g. filename, timestamp, etc.)}
  23 \end{twocollist}
  24
  25 There are also abstract wxArchive classes that can be used to write code
  26 that can handle any of the archive types,
  27 see '\helpref{Generic archive programming}{wxarcgeneric}'.
  28 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
  29 can handle archive files in a generic way.
  30
  31 The classes are designed to handle archives on both seekable streams such
  32 as disk files, or non-seekable streams such as pipes and sockets
  33 (see '\helpref{Archives on non-seekable streams}{wxarcnoseek}').
  34
  35 \wxheading{See also}
  36
  37 \helpref{wxFileSystem}{fs}
  38
  39
  40 \subsection{Creating an archive}\label{wxarccreate}
  41
  42 \helpref{Archive formats such as zip}{wxarc}
  43
  44 Call \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} to
  45 create each new entry in the archive, then write the entry's data.
  46 Another call to PutNextEntry() closes the current entry and begins the next.
  47
  48 For example:
  49
  50 \begin{verbatim}
  51     wxFFileOutputStream out(_T("test.zip"));
  52     wxZipOutputStream zip(out);
  53     wxTextOutputStream txt(zip);
  54     wxString sep(wxFileName::GetPathSeparator());
  55
  56     zip.PutNextEntry(_T("entry1.txt"));
  57     txt << _T("Some text for entry1.txt\n");
  58
  59     zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
  60     txt << _T("Some text for subdir/entry2.txt\n");
  61
  62 \end{verbatim}
  63
  64 The name of each entry can be a full path, which makes it possible to
  65 store entries in subdirectories.
  66
  67
  68 \subsection{Extracting an archive}\label{wxarcextract}
  69
  70 \helpref{Archive formats such as zip}{wxarc}
  71
  72 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns a pointer
  73 to entry object containing the meta-data for the next entry in the archive
  74 (and gives away ownership). Reading from the input stream then returns the
  75 entry's data. Eof() becomes true after an attempt has been made to read past
  76 the end of the entry's data.
  77
  78 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
  79
  80 \begin{verbatim}
  81     auto_ptr<wxZipEntry> entry;
  82
  83     wxFFileInputStream in(_T("test.zip"));
  84     wxZipInputStream zip(in);
  85
  86     while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
  87     {
  88         // access meta-data
  89         wxString name = entry->GetName();
  90         // read 'zip' to access the entry's data
  91     }
  92
  93 \end{verbatim}
  94
  95
  96 \subsection{Modifying an archive}\label{wxarcmodify}
  97
  98 \helpref{Archive formats such as zip}{wxarc}
  99
 100 To modify an existing archive, write a new copy of the archive to a new file,
 101 making any necessary changes along the way and transferring any unchanged
 102 entries using \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}.
 103 For archive types which compress entry data, CopyEntry() is likely to be
 104 much more efficient than transferring the data using Read() and Write()
 105 since it will copy them without decompressing and recompressing them.
 106
 107 In general modifications are not possible without rewriting the archive,
 108 though it may be possible in some limited cases. Even then, rewriting the
 109 archive is usually a better choice since a failure can be handled without
 110 losing the whole
 111 archive. \helpref{wxTempFileOutputStream}{wxtempfileoutputstream} can
 112 be helpful to do this.
 113
 114 For example to delete all entries matching the pattern "*.txt":
 115
 116 \begin{verbatim}
 117     auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
 118     wxTempFileOutputStream out(_T("test.zip"));
 119
 120     wxZipInputStream inzip(*in);
 121     wxZipOutputStream outzip(out);
 122
 123     auto_ptr<wxZipEntry> entry;
 124
 125     // transfer any meta-data for the archive as a whole (the zip comment
 126     // in the case of zip)
 127     outzip.CopyArchiveMetaData(inzip);
 128
 129     // call CopyEntry for each entry except those matching the pattern
 130     while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
 131         if (!entry->GetName().Matches(_T("*.txt")))
 132             if (!outzip.CopyEntry(entry.release(), inzip))
 133                 break;
 134
 135     // close the input stream by releasing the pointer to it, do this
 136     // before closing the output stream so that the file can be replaced
 137     in.reset();
 138
 139     // you can check for success as follows
 140     bool success = inzip.Eof() && outzip.Close() && out.Commit();
 141
 142 \end{verbatim}
 143
 144
 145 \subsection{Looking up an archive entry by name}\label{wxarcbyname}
 146
 147 \helpref{Archive formats such as zip}{wxarc}
 148
 149 Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
 150 more convenient for accessing archive entries by name.
 151
 152 To open just one entry in an archive, the most efficient way is
 153 to simply search for it linearly by calling
 154  \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
 155 required entry is found. This works both for archives on seekable and
 156 non-seekable streams.
 157
 158 The format of filenames in the archive is likely to be different
 159 from the local filename format. For example zips and tars use
 160 unix style names, with forward slashes as the path separator,
 161 and absolute paths are not allowed. So if on Windows the file
 162 "C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
 163 the entry back \helpref{GetName()}{wxarchiveentryname} will return
 164 "MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
 165 and back has lost some information.
 166
 167 So to avoid ambiguity when searching for an entry matching a local name,
 168 it is better to convert the local name to the archive's internal format
 169 and search for that:
 170
 171 \begin{verbatim}
 172     auto_ptr<wxZipEntry> entry;
 173
 174     // convert the local name we are looking for into the internal format
 175     wxString name = wxZipEntry::GetInternalName(localname);
 176
 177     // open the zip
 178     wxFFileInputStream in(_T("test.zip"));
 179     wxZipInputStream zip(in);
 180
 181     // call GetNextEntry() until the required internal name is found
 182     do {
 183         entry.reset(zip.GetNextEntry());
 184     }
 185     while (entry.get() != NULL && entry->GetInternalName() != name);
 186
 187     if (entry.get() != NULL) {
 188         // read the entry's data...
 189     }
 190
 191 \end{verbatim}
 192
 193 To access several entries randomly, it is most efficient to transfer the
 194 entire catalogue of entries to a container such as a std::map or a
 195  \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
 196 opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
 197
 198 \begin{verbatim}
 199     WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
 200     ZipCatalog::iterator it;
 201     wxZipEntry *entry;
 202     ZipCatalog cat;
 203
 204     // open the zip
 205     wxFFileInputStream in(_T("test.zip"));
 206     wxZipInputStream zip(in);
 207
 208     // load the zip catalog
 209     while ((entry = zip.GetNextEntry()) != NULL) {
 210         wxZipEntry*& current = cat[entry->GetInternalName()];
 211         // some archive formats can have multiple entries with the same name
 212         // (e.g. tar) though it is an error in the case of zip
 213         delete current;
 214         current = entry;
 215     }
 216
 217     // open an entry by name
 218     if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
 219         zip.OpenEntry(*it->second);
 220         // ... now read entry's data
 221     }
 222
 223 \end{verbatim}
 224
 225 To open more than one entry simultaneously you need more than one
 226 underlying stream on the same archive:
 227
 228 \begin{verbatim}
 229     // opening another entry without closing the first requires another
 230     // input stream for the same file
 231     wxFFileInputStream in2(_T("test.zip"));
 232     wxZipInputStream zip2(in2);
 233     if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
 234         zip2.OpenEntry(*it->second);
 235
 236 \end{verbatim}
 237
 238
 239 \subsection{Generic archive programming}\label{wxarcgeneric}
 240
 241 \helpref{Archive formats such as zip}{wxarc}
 242
 243 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
 244 can handle archive files in a generic way.
 245
 246 The specific archive classes, such as the wxZip classes, inherit from
 247 the following abstract classes which can be used to write code that can
 248 handle any of the archive types:
 249
 250 \begin{twocollist}\twocolwidtha{5cm}
 251 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
 252 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
 253 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
 254 entry (e.g. filename)}
 255 \end{twocollist}
 256
 257 In order to able to write generic code it's necessary to be able to create
 258 instances of the classes without knowing which archive type is being used.
 259 To allow this there is a class factory for each archive type, derived from
 260  \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, that can create
 261 the other classes.
 262
 263 For example, given {\it wxArchiveClassFactory* factory}, streams and
 264 entries can be created like this:
 265
 266 \begin{verbatim}
 267     // create streams without knowing their type
 268     auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
 269     auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 270
 271     // create an empty entry object
 272     auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
 273
 274 \end{verbatim}
 275
 276 For the factory itself, the static member
 277  \helpref{wxArchiveClassFactory::Find()}{wxarchiveclassfactoryfind}.
 278 can be used to find a class factory that can handle a given file
 279 extension or mime type. For example, given {\it filename}:
 280
 281 \begin{verbatim}
 282     const wxArchiveClassFactory *factory;
 283     factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
 284
 285     if (factory)
 286         stream = factory->NewStream(new wxFFileInputStream(filename));
 287
 288 \end{verbatim}
 289
 290 {\it Find} does not give away ownership of the returned pointer, so it
 291 does not need to be deleted.
 292
 293 There are similar class factories for the filter streams that handle the
 294 compression and decompression of a single stream, such as wxGzipInputStream.
 295 These can be found using
 296  \helpref{wxFilterClassFactory::Find()}{wxfilterclassfactoryfind}.
 297
 298 For example, to list the contents of archive {\it filename}:
 299
 300 \begin{verbatim}
 301     auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
 302
 303     if (in->IsOk())
 304     {
 305         // look for a filter handler, e.g. for '.gz'
 306         const wxFilterClassFactory *fcf;
 307         fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
 308         if (fcf) {
 309             in.reset(fcf->NewStream(in.release()));
 310             // pop the extension, so if it was '.tar.gz' it is now just '.tar'
 311             filename = fcf->PopExtension(filename);
 312         }
 313
 314         // look for a archive handler, e.g. for '.zip' or '.tar'
 315         const wxArchiveClassFactory *acf;
 316         acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
 317         if (acf) {
 318             auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
 319             auto_ptr<wxArchiveEntry> entry;
 320
 321             // list the contents of the archive
 322             while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
 323                 std::wcout << entry->GetName().c_str() << "\n";
 324         }
 325         else {
 326             wxLogError(_T("can't handle '%s'"), filename.c_str());
 327         }
 328     }
 329
 330 \end{verbatim}
 331
 332
 333 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
 334
 335 \helpref{Archive formats such as zip}{wxarc}
 336
 337 In general, handling archives on non-seekable streams is done in the same
 338 way as for seekable streams, with a few caveats.
 339
 340 The main limitation is that accessing entries randomly using
 341  \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
 342 is not possible, the entries can only be accessed sequentially in the order
 343 they are stored within the archive.
 344
 345 For each archive type, there will also be other limitations which will
 346 depend on the order the entries' meta-data is stored within the archive.
 347 These are not too difficult to deal with, and are outlined below.
 348
 349 \wxheading{PutNextEntry and the entry size}
 350
 351 When writing archives, some archive formats store the entry size before
 352 the entry's data (tar has this limitation, zip doesn't). In this case
 353 the entry's size must be passed to
 354  \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
 355 occurs.
 356
 357 This is only an issue on non-seekable streams, since otherwise the archive
 358 output stream can seek back and fix up the header once the size of the
 359 entry is known.
 360
 361 For generic programming, one way to handle this is to supply the size
 362 whenever it is known, and rely on the error message from the output
 363 stream when the operation is not supported.
 364
 365 \wxheading{GetNextEntry and the weak reference mechanism}
 366
 367 Some archive formats do not store all an entry's meta-data before the
 368 entry's data (zip is an example). In this case, when reading from a
 369 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
 370 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
 371 object - not all the fields are set.
 372
 373 The input stream then keeps a weak reference to the entry object and
 374 updates it when more meta-data becomes available. A weak reference being
 375 one that does not prevent you from deleting the wxArchiveEntry object - the
 376 input stream only attempts to update it if it is still around.
 377
 378 The documentation for each archive entry type gives the details
 379 of what meta-data becomes available and when. For generic programming,
 380 when the worst case must be assumed, you can rely on all the fields
 381 of wxArchiveEntry being fully populated when GetNextEntry() returns,
 382 with the the following exceptions:
 383
 384 \begin{twocollist}\twocolwidtha{3cm}
 385 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
 386 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
 387 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
 388 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
 389 be available after the end of the archive has been reached, i.e. after
 390 GetNextEntry() returns NULL and Eof() is true}
 391 \end{twocollist}
 392
 393 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
 394 to always fully preserve entries' meta-data. No matter what order order
 395 the meta-data occurs within the archive, the input stream will always
 396 have read it before the output stream must write it.
 397
 398 \wxheading{wxArchiveNotifier}
 399
 400 Notifier objects can be used to get a notification whenever an input
 401 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
 402 via the weak reference mechanism.
 403
 404 Consider the following code which renames an entry in an archive.
 405 This is the usual way to modify an entry's meta-data, simply set the
 406 required field before writing it with
 407  \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
 408
 409 \begin{verbatim}
 410     auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
 411     auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 412     auto_ptr<wxArchiveEntry> entry;
 413
 414     outarc->CopyArchiveMetaData(*arc);
 415
 416     while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
 417         if (entry->GetName() == from)
 418             entry->SetName(to);
 419         if (!outarc->CopyEntry(entry.release(), *arc))
 420             break;
 421     }
 422
 423     bool success = arc->Eof() && outarc->Close();
 424
 425 \end{verbatim}
 426
 427 However, for non-seekable streams, this technique cannot be used for
 428 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
 429 which are not necessarily set when
 430  \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
 431 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
 432
 433 \begin{verbatim}
 434 class MyNotifier : public wxArchiveNotifier
 435 {
 436 public:
 437     void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
 438 };
 439
 440 \end{verbatim}
 441
 442 The meta-data changes are done in your notifier's
 443  \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
 444 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
 445 CopyEntry():
 446
 447 \begin{verbatim}
 448     auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
 449     auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
 450     auto_ptr<wxArchiveEntry> entry;
 451     MyNotifier notifier;
 452
 453     outarc->CopyArchiveMetaData(*arc);
 454
 455     while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
 456         entry->SetNotifier(notifier);
 457         if (!outarc->CopyEntry(entry.release(), *arc))
 458             break;
 459     }
 460
 461     bool success = arc->Eof() && outarc->Close();
 462
 463 \end{verbatim}
 464
 465 SetNotifier() calls OnEntryUpdated() immediately, then the input
 466 stream calls it again whenever it sets more fields in the entry. Since
 467 OnEntryUpdated() will be called at least once, this technique always
 468 works even when it is not strictly necessary to use it. For example,
 469 changing the entry name can be done this way too and it works on seekable
 470 streams as well as non-seekable.
 471