1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %% Purpose: Overview of the archive classes
4 %% Author: M.J.Wetherell
6 %% Copyright: 2004 M.J.Wetherell
7 %% License: wxWindows license
8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 \section{Archive formats such as zip
}\label{wxarc
}
12 The archive classes handle archive formats such as zip, tar, rar and cab.
13 Currently only the wxZip classes are included. wxTar classes are under
14 development at
\urlref{wxCode
}{http://wxcode.sf.net
}.
16 For each archive type, there are the following classes (using zip here
19 \begin{twocollist
}\twocolwidtha{4cm
}
20 \twocolitem{\helpref{wxZipInputStream
}{wxzipinputstream
}}{Input stream
}
21 \twocolitem{\helpref{wxZipOutputStream
}{wxzipoutputstream
}}{Output stream
}
22 \twocolitem{\helpref{wxZipEntry
}{wxzipentry
}}{Holds the meta-data for an
23 entry (e.g. filename, timestamp, etc.)
}
26 There are also abstract wxArchive classes that can be used to write code
27 that can handle any of the archive types,
28 see '
\helpref{Generic archive programming
}{wxarcgeneric
}'.
29 Also see
\helpref{wxFileSystem
}{fs
} for a higher level interface that
30 can handle archive files in a generic way.
32 The classes are designed to handle archives on both seekable streams such
33 as disk files, or non-seekable streams such as pipes and sockets
34 (see '
\helpref{Archives on non-seekable streams
}{wxarcnoseek
}').
38 \helpref{wxFileSystem
}{fs
}
41 \subsection{Creating an archive
}\label{wxarccreate
}
43 \helpref{Archive formats such as zip
}{wxarc
}
45 Call
\helpref{PutNextEntry()
}{wxarchiveoutputstreamputnextentry
} to
46 create each new entry in the archive, then write the entry's data.
47 Another call to PutNextEntry() closes the current entry and begins the next.
52 wxFFileOutputStream out(_T("test.zip"));
53 wxZipOutputStream zip(out);
54 wxTextOutputStream txt(zip);
56 zip.PutNextEntry(_T("entry1.txt"));
57 txt << _T("Some text for entry1
\n");
59 zip.PutNextEntry(_T("entry2.txt"));
60 txt << _T("Some text for entry2
\n");
65 \subsection{Extracting an archive
}\label{wxarcextract
}
67 \helpref{Archive formats such as zip
}{wxarc
}
69 \helpref{GetNextEntry()
}{wxarchiveinputstreamgetnextentry
} returns a pointer
70 to entry object containing the meta-data for the next entry in the archive
71 (and gives away ownership). Reading from the input stream then returns the
72 entry's data. Eof() becomes true after an attempt has been made to read past
73 the end of the entry's data.
75 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
78 // 'smart pointer' type created with wxDEFINE_SCOPED_PTR_TYPE
81 wxFFileInputStream in(_T("test.zip"));
82 wxZipInputStream zip(in);
84 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87 wxString name = entry->GetName();
88 // read 'zip' to access the entry's data
93 The
\helpref{smart pointer
}{wxscopedptr
} type
{\em wxZipEntryPtr
}
94 can be created like this:
97 #include <wx/ptr_scpd.h>
98 wxDEFINE_SCOPED_PTR_TYPE(wxZipEntry);
103 \subsection{Modifying an archive
}\label{wxarcmodify
}
105 \helpref{Archive formats such as zip
}{wxarc
}
107 To modify an existing archive, write a new copy of the archive to a new file,
108 making any necessary changes along the way and transferring any unchanged
109 entries using
\helpref{CopyEntry()
}{wxarchiveoutputstreamcopyentry
}.
110 For archive types which compress entry data, CopyEntry() is likely to be
111 much more efficient than transferring the data using Read() and Write()
112 since it will copy them without decompressing and recompressing them.
114 In general modifications are not possible without rewriting the archive,
115 though it may be possible in some limited cases. Even then, rewriting the
116 archive is usually a better choice since a failure can be handled without
118 archive.
\helpref{wxTempFileOutputStream
}{wxtempfileoutputstream
} can
119 be helpful to do this.
121 For example to delete all entries matching the pattern "*.txt":
124 wxFFileInputStreamPtr in(new wxFFileInputStream(_T("test.zip")));
125 wxTempFileOutputStream out(_T("test.zip"));
127 wxZipInputStream inzip
(*in);
128 wxZipOutputStream outzip(out);
130 // 'smart pointer' type created with wxDEFINE_SCOPED_PTR_TYPE
133 // transfer any meta-data for the archive as a whole (the zip comment
134 // in the case of zip)
135 outzip.CopyArchiveMetaData(inzip);
137 // call CopyEntry for each entry except those matching the pattern
138 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
139 if (!entry->GetName().Matches(_T("*.txt")))
140 if (!outzip.CopyEntry(entry.release(), inzip))
143 // close the input stream by releasing the pointer to it, do this
144 // before closing the output stream so that the file can be replaced
147 // you can check for success as follows
148 bool success = inzip.Eof() && outzip.Close() && out.Commit();
152 The \helpref{smart pointer}{wxscopedptr} types {\em wxZipEntryPtr}
153 and {\em wxFFileInputStreamPtr} can be created like this:
156 #include <wx/ptr_scpd.h>
157 wxDEFINE_SCOPED_PTR_TYPE(wxZipEntry);
158 wxDEFINE_SCOPED_PTR_TYPE(wxFFileInputStream);
163 \subsection{Looking up an archive entry by name}\label{wxarcbyname}
165 \helpref{Archive formats such as zip}{wxarc}
167 Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
168 more convenient for accessing archive entries by name.
170 To open just one entry in an archive, the most efficient way is
171 to simply search for it linearly by calling
172 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
173 required entry is found. This works both for archives on seekable and
174 non-seekable streams.
176 The format of filenames in the archive is likely to be different
177 from the local filename format. For example zips and tars use
178 unix style names, with forward slashes as the path separator,
179 and absolute paths are not allowed. So if on Windows the file
180 "C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
181 the entry back \helpref{GetName()}{wxarchiveentryname} will return
182 "MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
183 and back has lost some information.
185 So to avoid ambiguity when searching for an entry matching a local name,
186 it is better to convert the local name to the archive's internal format
190 // 'smart pointer' type created with wxDEFINE_SCOPED_PTR_TYPE
193 // convert the local name we are looking for into the internal format
194 wxString name = wxZipEntry::GetInternalName(localname);
197 wxFFileInputStream in(_T("test.zip"));
198 wxZipInputStream zip(in);
200 // call GetNextEntry() until the required internal name is found
202 entry.reset(zip.GetNextEntry());
204 while (entry.get() != NULL && entry->GetInternalName() != name);
206 if (entry.get() != NULL) {
207 // read the entry's data...
212 To access several entries randomly, it is most efficient to transfer the
213 entire catalogue of entries to a container such as a std::map or a
214 \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
215 opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
218 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
219 ZipCatalog::iterator it;
224 wxFFileInputStream in(_T("test.zip"));
225 wxZipInputStream zip(in);
227 // load the zip catalog
228 while ((entry = zip.GetNextEntry()) != NULL) {
229 wxZipEntry*& current = cat[entry->GetInternalName()];
230 // some archive formats can have multiple entries with the same name
231 // (e.g. tar) though it is an error in the case of zip
236 // open an entry by name
237 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
238 zip.OpenEntry(*it->second);
239 // ... now read entry's data
244 To open more than one entry simultaneously you need more than one
245 underlying stream on the same archive:
248 // opening another entry without closing the first requires another
249 // input stream for the same file
250 wxFFileInputStream in2(_T("test.zip"));
251 wxZipInputStream zip2(in2);
252 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
253 zip2.OpenEntry(*it->second);
258 \subsection{Generic archive programming}\label{wxarcgeneric}
260 \helpref{Archive formats such as zip}{wxarc}
262 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
263 can handle archive files in a generic way.
265 The specific archive classes, such as the wxZip classes, inherit from
266 the following abstract classes which can be used to write code that can
267 handle any of the archive types:
269 \begin{twocollist}\twocolwidtha{5cm}
270 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
271 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
272 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
273 entry (e.g. filename)}
276 In order to able to write generic code it's necessary to be able to create
277 instances of the classes without knowing which archive type is being used.
278 So there is a class factory for each archive type, derived from
279 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, which can create
282 For example, given {\it wxArchiveClassFactory* factory}, streams and
283 entries can be created like this:
286 // create streams without knowing their type
287 wxArchiveInputStreamPtr inarc(factory->NewStream(in));
288 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
290 // create an empty entry object
291 wxArchiveEntryPtr entry(factory->NewEntry());
295 The \helpref{smart pointer}{wxscopedptr} types {\em wxArchiveInputStreamPtr},
296 {\em wxArchiveOutputStreamPtr} and {\em wxArchiveEntryPtr} would need to
297 have already have been defined, which could be done like this:
300 #include <wx/ptr_scpd.h>
301 wxDEFINE_SCOPED_PTR_TYPE(wxArchiveInputStream);
302 wxDEFINE_SCOPED_PTR_TYPE(wxArchiveOutputStream);
303 wxDEFINE_SCOPED_PTR_TYPE(wxArchiveEntry);
307 The class factory itself can either be created explicitly:
310 wxArchiveClassFactory *factory = new wxZipClassFactory;
314 or using wxWidgets' \helpref{RTTI}{runtimeclassoverview}:
317 wxArchiveClassFactory *MakeFactory(const wxString& type)
319 wxString name = _T("wx") + type.Left(1).Upper() +
320 type.Mid(1).Lower() + _T("ClassFactory");
322 wxObject *pObj = wxCreateDynamicObject(name);
323 wxArchiveClassFactory *pcf = wxDynamicCast(pObj, wxArchiveClassFactory);
326 wxLogError(_T("can't handle '%s' archives"), type.c_str());
336 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
338 \helpref{Archive formats such as zip}{wxarc}
340 In general, handling archives on non-seekable streams is done in the same
341 way as for seekable streams, with a few caveats.
343 The main limitation is that accessing entries randomly using
344 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
345 is not possible, the entries can only be accessed sequentially in the order
346 they are stored within the archive.
348 For each archive type, there will also be other limitations which will
349 depend on the order the entries' meta-data is stored within the archive.
350 These are not too difficult to deal with, and are outlined below.
352 \wxheading{PutNextEntry and the entry size}
354 When writing archives, some archive formats store the entry size before
355 the entry's data (tar has this limitation, zip doesn't). In this case
356 the entry's size must be passed to
357 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
360 This is only an issue on non-seekable streams, since otherwise the archive
361 output stream can seek back and fix up the header once the size of the
364 For generic programming, one way to handle this is to supply the size
365 whenever it is known, and rely on the error message from the output
366 stream when the operation is not supported.
368 \wxheading{GetNextEntry and the weak reference mechanism}
370 Some archive formats do not store all an entry's meta-data before the
371 entry's data (zip is an example). In this case, when reading from a
372 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
373 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
374 object - not all the fields are set.
376 The input stream then keeps a weak reference to the entry object and
377 updates it when more meta-data becomes available. A weak reference being
378 one that does not prevent you from deleting the wxArchiveEntry object - the
379 input stream only attempts to update it if it is still around.
381 The documentation for each archive entry type gives the details
382 of what meta-data becomes available and when. For generic programming,
383 when the worst case must be assumed, you can rely on all the fields
384 of wxArchiveEntry being fully populated when GetNextEntry() returns,
385 with the the following exceptions:
387 \begin{twocollist}\twocolwidtha{3cm}
388 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
389 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
390 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
391 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
392 be available after the end of the archive has been reached, i.e. after
393 GetNextEntry() returns NULL and Eof() is true}
396 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
397 to always fully preserve entries' meta-data. No matter what order order
398 the meta-data occurs within the archive, the input stream will always
399 have read it before the output stream must write it.
401 \wxheading{wxArchiveNotifier}
403 Notifier objects can be used to get a notification whenever an input
404 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
405 via the weak reference mechanism.
407 Consider the following code which renames an entry in an archive.
408 This is the usual way to modify an entry's meta-data, simply set the
409 required field before writing it with
410 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
413 wxArchiveInputStreamPtr arc(factory->NewStream(in));
414 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
415 wxArchiveEntryPtr entry;
417 outarc->CopyArchiveMetaData(*arc);
419 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
420 if (entry->GetName() == from)
422 if (!outarc->CopyEntry(entry.release(), *arc))
426 bool success = arc->Eof() && outarc->Close();
430 However, for non-seekable streams, this technique cannot be used for
431 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
432 which are not necessarily set when
433 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
434 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
437 class MyNotifier : public wxArchiveNotifier
440 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
445 The meta-data changes are done in your notifier's
446 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
447 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
451 wxArchiveInputStreamPtr arc(factory->NewStream(in));
452 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
453 wxArchiveEntryPtr entry;
456 outarc->CopyArchiveMetaData(*arc);
458 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
459 entry->SetNotifier(notifier);
460 if (!outarc->CopyEntry(entry.release(), *arc))
464 bool success = arc->Eof() && outarc->Close();
468 SetNotifier() calls OnEntryUpdated() immediately, then the input
469 stream calls it again whenever it sets more fields in the entry. Since
470 OnEntryUpdated() will be called at least once, this technique always
471 works even when it is not strictly necessary to use it. For example,
472 changing the entry name can be done this way too and it works on seekable
473 streams as well as non-seekable.