1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %% Purpose: Overview of the archive classes
4 %% Author: M.J.Wetherell
6 %% Copyright: 2004 M.J.Wetherell
7 %% License: wxWindows license
8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 \section{Archive formats such as zip
}\label{wxarc
}
12 The archive classes handle archive formats such as zip, tar, rar and cab.
13 Currently only the wxZip classes are included. wxTar classes are under
14 development at
\urlref{wxCode
}{http://wxcode.sf.net
}.
16 For each archive type, there are the following classes (using zip here
19 \begin{twocollist
}\twocolwidtha{4cm
}
20 \twocolitem{\helpref{wxZipInputStream
}{wxzipinputstream
}}{Input stream
}
21 \twocolitem{\helpref{wxZipOutputStream
}{wxzipoutputstream
}}{Output stream
}
22 \twocolitem{\helpref{wxZipEntry
}{wxzipentry
}}{Holds the meta-data for an
23 entry (e.g. filename, timestamp, etc.)
}
26 There are also abstract wxArchive classes that can be used to write code
27 that can handle any of the archive types,
28 see '
\helpref{Generic archive programming
}{wxarcgeneric
}'.
29 Also see
\helpref{wxFileSystem
}{fs
} for a higher level interface that
30 can handle archive files in a generic way.
32 The classes are designed to handle archives on both seekable streams such
33 as disk files, or non-seekable streams such as pipes and sockets
34 (see '
\helpref{Archives on non-seekable streams
}{wxarcnoseek
}').
38 \helpref{wxFileSystem
}{fs
}
41 \subsection{Creating an archive
}\label{wxarccreate
}
43 \helpref{Archive formats such as zip
}{wxarc
}
45 Call
\helpref{PutNextEntry()
}{wxarchiveoutputstreamputnextentry
} to
46 create each new entry in the archive, then write the entry's data.
47 Another call to PutNextEntry() closes the current entry and begins the next.
52 wxFFileOutputStream out(_T("test.zip"));
53 wxZipOutputStream zip(out);
54 wxTextOutputStream txt(zip);
55 wxString sep(wxFileName::GetPathSeparator());
57 zip.PutNextEntry(_T("entry1.txt"));
58 txt << _T("Some text for entry1.txt
\n");
60 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
61 txt << _T("Some text for subdir/entry2.txt
\n");
65 The name of each entry can be a full path, which makes it possible to
66 store entries in subdirectories.
69 \subsection{Extracting an archive
}\label{wxarcextract
}
71 \helpref{Archive formats such as zip
}{wxarc
}
73 \helpref{GetNextEntry()
}{wxarchiveinputstreamgetnextentry
} returns a pointer
74 to entry object containing the meta-data for the next entry in the archive
75 (and gives away ownership). Reading from the input stream then returns the
76 entry's data. Eof() becomes true after an attempt has been made to read past
77 the end of the entry's data.
79 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
82 // 'smart pointer' type created with wxDEFINE_SCOPED_PTR_TYPE
85 wxFFileInputStream in(_T("test.zip"));
86 wxZipInputStream zip(in);
88 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
91 wxString name = entry->GetName();
92 // read 'zip' to access the entry's data
97 The
\helpref{smart pointer
}{wxscopedptr
} type
{\em wxZipEntryPtr
}
98 can be created like this:
101 #include <wx/ptr_scpd.h>
102 wxDEFINE_SCOPED_PTR_TYPE(wxZipEntry);
107 \subsection{Modifying an archive
}\label{wxarcmodify
}
109 \helpref{Archive formats such as zip
}{wxarc
}
111 To modify an existing archive, write a new copy of the archive to a new file,
112 making any necessary changes along the way and transferring any unchanged
113 entries using
\helpref{CopyEntry()
}{wxarchiveoutputstreamcopyentry
}.
114 For archive types which compress entry data, CopyEntry() is likely to be
115 much more efficient than transferring the data using Read() and Write()
116 since it will copy them without decompressing and recompressing them.
118 In general modifications are not possible without rewriting the archive,
119 though it may be possible in some limited cases. Even then, rewriting the
120 archive is usually a better choice since a failure can be handled without
122 archive.
\helpref{wxTempFileOutputStream
}{wxtempfileoutputstream
} can
123 be helpful to do this.
125 For example to delete all entries matching the pattern "*.txt":
128 wxFFileInputStreamPtr in(new wxFFileInputStream(_T("test.zip")));
129 wxTempFileOutputStream out(_T("test.zip"));
131 wxZipInputStream inzip
(*in);
132 wxZipOutputStream outzip(out);
134 // 'smart pointer' type created with wxDEFINE_SCOPED_PTR_TYPE
137 // transfer any meta-data for the archive as a whole (the zip comment
138 // in the case of zip)
139 outzip.CopyArchiveMetaData(inzip);
141 // call CopyEntry for each entry except those matching the pattern
142 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
143 if (!entry->GetName().Matches(_T("*.txt")))
144 if (!outzip.CopyEntry(entry.release(), inzip))
147 // close the input stream by releasing the pointer to it, do this
148 // before closing the output stream so that the file can be replaced
151 // you can check for success as follows
152 bool success = inzip.Eof() && outzip.Close() && out.Commit();
156 The \helpref{smart pointer}{wxscopedptr} types {\em wxZipEntryPtr}
157 and {\em wxFFileInputStreamPtr} can be created like this:
160 #include <wx/ptr_scpd.h>
161 wxDEFINE_SCOPED_PTR_TYPE(wxZipEntry);
162 wxDEFINE_SCOPED_PTR_TYPE(wxFFileInputStream);
167 \subsection{Looking up an archive entry by name}\label{wxarcbyname}
169 \helpref{Archive formats such as zip}{wxarc}
171 Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
172 more convenient for accessing archive entries by name.
174 To open just one entry in an archive, the most efficient way is
175 to simply search for it linearly by calling
176 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
177 required entry is found. This works both for archives on seekable and
178 non-seekable streams.
180 The format of filenames in the archive is likely to be different
181 from the local filename format. For example zips and tars use
182 unix style names, with forward slashes as the path separator,
183 and absolute paths are not allowed. So if on Windows the file
184 "C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
185 the entry back \helpref{GetName()}{wxarchiveentryname} will return
186 "MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
187 and back has lost some information.
189 So to avoid ambiguity when searching for an entry matching a local name,
190 it is better to convert the local name to the archive's internal format
194 // 'smart pointer' type created with wxDEFINE_SCOPED_PTR_TYPE
197 // convert the local name we are looking for into the internal format
198 wxString name = wxZipEntry::GetInternalName(localname);
201 wxFFileInputStream in(_T("test.zip"));
202 wxZipInputStream zip(in);
204 // call GetNextEntry() until the required internal name is found
206 entry.reset(zip.GetNextEntry());
208 while (entry.get() != NULL && entry->GetInternalName() != name);
210 if (entry.get() != NULL) {
211 // read the entry's data...
216 To access several entries randomly, it is most efficient to transfer the
217 entire catalogue of entries to a container such as a std::map or a
218 \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
219 opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
222 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
223 ZipCatalog::iterator it;
228 wxFFileInputStream in(_T("test.zip"));
229 wxZipInputStream zip(in);
231 // load the zip catalog
232 while ((entry = zip.GetNextEntry()) != NULL) {
233 wxZipEntry*& current = cat[entry->GetInternalName()];
234 // some archive formats can have multiple entries with the same name
235 // (e.g. tar) though it is an error in the case of zip
240 // open an entry by name
241 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
242 zip.OpenEntry(*it->second);
243 // ... now read entry's data
248 To open more than one entry simultaneously you need more than one
249 underlying stream on the same archive:
252 // opening another entry without closing the first requires another
253 // input stream for the same file
254 wxFFileInputStream in2(_T("test.zip"));
255 wxZipInputStream zip2(in2);
256 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
257 zip2.OpenEntry(*it->second);
262 \subsection{Generic archive programming}\label{wxarcgeneric}
264 \helpref{Archive formats such as zip}{wxarc}
266 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
267 can handle archive files in a generic way.
269 The specific archive classes, such as the wxZip classes, inherit from
270 the following abstract classes which can be used to write code that can
271 handle any of the archive types:
273 \begin{twocollist}\twocolwidtha{5cm}
274 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
275 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
276 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
277 entry (e.g. filename)}
280 In order to able to write generic code it's necessary to be able to create
281 instances of the classes without knowing which archive type is being used.
282 So there is a class factory for each archive type, derived from
283 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, which can create
286 For example, given {\it wxArchiveClassFactory* factory}, streams and
287 entries can be created like this:
290 // create streams without knowing their type
291 wxArchiveInputStreamPtr inarc(factory->NewStream(in));
292 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
294 // create an empty entry object
295 wxArchiveEntryPtr entry(factory->NewEntry());
299 The \helpref{smart pointer}{wxscopedptr} types {\em wxArchiveInputStreamPtr},
300 {\em wxArchiveOutputStreamPtr} and {\em wxArchiveEntryPtr} would need to
301 have already have been defined, which could be done like this:
304 #include <wx/ptr_scpd.h>
305 wxDEFINE_SCOPED_PTR_TYPE(wxArchiveInputStream);
306 wxDEFINE_SCOPED_PTR_TYPE(wxArchiveOutputStream);
307 wxDEFINE_SCOPED_PTR_TYPE(wxArchiveEntry);
311 The class factory itself can either be created explicitly:
314 wxArchiveClassFactory *factory = new wxZipClassFactory;
318 or using wxWidgets' \helpref{RTTI}{runtimeclassoverview}:
321 wxArchiveClassFactory *MakeFactory(const wxString& type)
323 wxString name = _T("wx") + type.Left(1).Upper() +
324 type.Mid(1).Lower() + _T("ClassFactory");
326 wxObject *pObj = wxCreateDynamicObject(name);
327 wxArchiveClassFactory *pcf = wxDynamicCast(pObj, wxArchiveClassFactory);
330 wxLogError(_T("can't handle '%s' archives"), type.c_str());
340 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
342 \helpref{Archive formats such as zip}{wxarc}
344 In general, handling archives on non-seekable streams is done in the same
345 way as for seekable streams, with a few caveats.
347 The main limitation is that accessing entries randomly using
348 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
349 is not possible, the entries can only be accessed sequentially in the order
350 they are stored within the archive.
352 For each archive type, there will also be other limitations which will
353 depend on the order the entries' meta-data is stored within the archive.
354 These are not too difficult to deal with, and are outlined below.
356 \wxheading{PutNextEntry and the entry size}
358 When writing archives, some archive formats store the entry size before
359 the entry's data (tar has this limitation, zip doesn't). In this case
360 the entry's size must be passed to
361 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
364 This is only an issue on non-seekable streams, since otherwise the archive
365 output stream can seek back and fix up the header once the size of the
368 For generic programming, one way to handle this is to supply the size
369 whenever it is known, and rely on the error message from the output
370 stream when the operation is not supported.
372 \wxheading{GetNextEntry and the weak reference mechanism}
374 Some archive formats do not store all an entry's meta-data before the
375 entry's data (zip is an example). In this case, when reading from a
376 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
377 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
378 object - not all the fields are set.
380 The input stream then keeps a weak reference to the entry object and
381 updates it when more meta-data becomes available. A weak reference being
382 one that does not prevent you from deleting the wxArchiveEntry object - the
383 input stream only attempts to update it if it is still around.
385 The documentation for each archive entry type gives the details
386 of what meta-data becomes available and when. For generic programming,
387 when the worst case must be assumed, you can rely on all the fields
388 of wxArchiveEntry being fully populated when GetNextEntry() returns,
389 with the the following exceptions:
391 \begin{twocollist}\twocolwidtha{3cm}
392 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
393 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
394 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
395 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
396 be available after the end of the archive has been reached, i.e. after
397 GetNextEntry() returns NULL and Eof() is true}
400 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
401 to always fully preserve entries' meta-data. No matter what order order
402 the meta-data occurs within the archive, the input stream will always
403 have read it before the output stream must write it.
405 \wxheading{wxArchiveNotifier}
407 Notifier objects can be used to get a notification whenever an input
408 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
409 via the weak reference mechanism.
411 Consider the following code which renames an entry in an archive.
412 This is the usual way to modify an entry's meta-data, simply set the
413 required field before writing it with
414 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
417 wxArchiveInputStreamPtr arc(factory->NewStream(in));
418 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
419 wxArchiveEntryPtr entry;
421 outarc->CopyArchiveMetaData(*arc);
423 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
424 if (entry->GetName() == from)
426 if (!outarc->CopyEntry(entry.release(), *arc))
430 bool success = arc->Eof() && outarc->Close();
434 However, for non-seekable streams, this technique cannot be used for
435 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
436 which are not necessarily set when
437 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
438 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
441 class MyNotifier : public wxArchiveNotifier
444 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
449 The meta-data changes are done in your notifier's
450 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
451 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
455 wxArchiveInputStreamPtr arc(factory->NewStream(in));
456 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
457 wxArchiveEntryPtr entry;
460 outarc->CopyArchiveMetaData(*arc);
462 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
463 entry->SetNotifier(notifier);
464 if (!outarc->CopyEntry(entry.release(), *arc))
468 bool success = arc->Eof() && outarc->Close();
472 SetNotifier() calls OnEntryUpdated() immediately, then the input
473 stream calls it again whenever it sets more fields in the entry. Since
474 OnEntryUpdated() will be called at least once, this technique always
475 works even when it is not strictly necessary to use it. For example,
476 changing the entry name can be done this way too and it works on seekable
477 streams as well as non-seekable.