1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %% Purpose: Overview of the archive classes
4 %% Author: M.J.Wetherell
6 %% Copyright: 2004 M.J.Wetherell
7 %% License: wxWindows license
8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
10 \section{Archive formats such as zip
}\label{wxarc
}
12 The archive classes handle archive formats such as zip, tar, rar and cab.
13 Currently only the wxZip classes are included.
15 For each archive type, there are the following classes (using zip here
18 \begin{twocollist
}\twocolwidtha{4cm
}
19 \twocolitem{\helpref{wxZipInputStream
}{wxzipinputstream
}}{Input stream
}
20 \twocolitem{\helpref{wxZipOutputStream
}{wxzipoutputstream
}}{Output stream
}
21 \twocolitem{\helpref{wxZipEntry
}{wxzipentry
}}{Holds the meta-data for an
22 entry (e.g. filename, timestamp, etc.)
}
25 There are also abstract wxArchive classes that can be used to write code
26 that can handle any of the archive types,
27 see '
\helpref{Generic archive programming
}{wxarcgeneric
}'.
28 Also see
\helpref{wxFileSystem
}{fs
} for a higher level interface that
29 can handle archive files in a generic way.
31 The classes are designed to handle archives on both seekable streams such
32 as disk files, or non-seekable streams such as pipes and sockets
33 (see '
\helpref{Archives on non-seekable streams
}{wxarcnoseek
}').
37 \helpref{wxFileSystem
}{fs
}
40 \subsection{Creating an archive
}\label{wxarccreate
}
42 \helpref{Archive formats such as zip
}{wxarc
}
44 Call
\helpref{PutNextEntry()
}{wxarchiveoutputstreamputnextentry
} to
45 create each new entry in the archive, then write the entry's data.
46 Another call to PutNextEntry() closes the current entry and begins the next.
51 wxFFileOutputStream out(_T("test.zip"));
52 wxZipOutputStream zip(out);
53 wxTextOutputStream txt(zip);
55 zip.PutNextEntry(_T("entry1.txt"));
56 txt << _T("Some text for entry1
\n");
58 zip.PutNextEntry(_T("entry2.txt"));
59 txt << _T("Some text for entry2
\n");
64 \subsection{Extracting an archive
}\label{wxarcextract
}
66 \helpref{Archive formats such as zip
}{wxarc
}
68 \helpref{GetNextEntry()
}{wxarchiveinputstreamgetnextentry
} returns an
69 entry object containing the meta-data for the next entry in the archive
70 (and gives away ownership). Reading from the input stream then returns
71 the entry's data. Eof() becomes true after an attempt has been made to
72 read past the end of the entry's data.
74 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
77 wxDEFINE_SCOPED_PTR_TYPE(wxZipEntry);
80 wxFFileInputStream in(_T("test.zip"));
81 wxZipInputStream zip(in);
82 wxTextInputStream txt(zip);
85 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87 wxString name = entry->GetName(); // access meta-data
88 txt >> data; // access data
94 \subsection{Modifying an archive
}\label{wxarcmodify
}
96 \helpref{Archive formats such as zip
}{wxarc
}
98 To modify an existing archive, write a new copy of the archive to a new file,
99 making any necessary changes along the way and transferring any unchanged
100 entries using
\helpref{CopyEntry()
}{wxarchiveoutputstreamcopyentry
}.
101 For archive types which compress entry data, CopyEntry() is likely to be
102 much more efficient than transferring the data using Read() and Write()
103 since it will copy them without decompressing and recompressing them.
105 In general modifications are not possible without rewriting the archive,
106 though it may be possible in some limited cases. Even then, rewriting
107 the archive is usually a better choice since a failure can be handled
108 without losing the whole archive.
110 For example to delete all entries matching the pattern "*.txt":
113 wxFFileInputStream in(_T("in.zip"));
114 wxFFileOutputStream out(_T("out.zip"));
116 wxZipInputStream inzip(in);
117 wxZipOutputStream outzip(out);
120 // transfer any meta-data for the archive as a whole (the zip comment
121 // in the case of zip)
122 outzip.CopyArchiveMetaData(inzip);
124 // call CopyEntry for each entry except those matching the pattern
125 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
126 if (!entry->GetName().Matches(_T("*.txt")))
127 if (!outzip.CopyEntry(entry.release(), inzip))
130 bool success = inzip.Eof() && outzip.Close();
135 \subsection{Looking up an archive entry by name
}\label{wxarcbyname
}
137 \helpref{Archive formats such as zip
}{wxarc
}
139 Also see
\helpref{wxFileSystem
}{fs
} for a higher level interface that is
140 more convenient for accessing archive entries by name.
142 To open just one entry in an archive, the most efficient way is
143 to simply search for it linearly by calling
144 \helpref{GetNextEntry()
}{wxarchiveinputstreamgetnextentry
} until the
145 required entry is found. This works both for archives on seekable and
146 non-seekable streams.
148 The format of filenames in the archive is likely to be different
149 from the local filename format. For example zips and tars use
150 unix style names, with forward slashes as the path separator,
151 and absolute paths are not allowed. So if on Windows the file
152 "C:$
\backslash$MYDIR$
\backslash$MYFILE.TXT" is stored, then when reading
153 the entry back
\helpref{GetName()
}{wxarchiveentryname
} will return
154 "MYDIR$
\backslash$MYFILE.TXT". The conversion into the internal format
155 and back has lost some information.
157 So to avoid ambiguity when searching for an entry matching a local name,
158 it is better to convert the local name to the archive's internal format
162 wxDEFINE_SCOPED_PTR_TYPE(wxZipEntry);
165 // convert the local name we are looking for into the internal format
166 wxString name = wxZipEntry::GetInternalName(localname);
169 wxFFileInputStream in(_T("test.zip"));
170 wxZipInputStream zip(in);
172 // call GetNextEntry() until the required internal name is found
174 entry.reset(zip.GetNextEntry());
176 while (entry.get() != NULL && entry->GetInternalName() != name);
178 if (entry.get() != NULL)
{
179 // read the entry's data...
184 To access several entries randomly, it is most efficient to transfer the
185 entire catalogue of entries to a container such as a std::map or a
186 \helpref{wxHashMap
}{wxhashmap
} then entries looked up by name can be
187 opened using the
\helpref{OpenEntry()
}{wxarchiveinputstreamopenentry
} method.
190 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
191 ZipCatalog::iterator it;
196 wxFFileInputStream in(_T("test.zip"));
197 wxZipInputStream zip(in);
199 // load the zip catalog
200 while ((entry = zip.GetNextEntry()) != NULL)
{
201 wxZipEntry*& current = cat
[entry->GetInternalName()
];
202 // some archive formats can have multiple entries with the same name
203 // (e.g. tar) though it is an error in the case of zip
208 // open an entry by name
209 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
{
210 zip.OpenEntry
(*it->second);
211 // ... now read entry's data
216 To open more than one entry simultaneously you need more than one
217 underlying stream on the same archive:
220 // opening another entry without closing the first requires another
221 // input stream for the same file
222 wxFFileInputStream in2(_T("test.zip"));
223 wxZipInputStream zip2(in2);
224 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
225 zip2.OpenEntry(*it->second);
230 \subsection{Generic archive programming}\label{wxarcgeneric}
232 \helpref{Archive formats such as zip}{wxarc}
234 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
235 can handle archive files in a generic way.
237 The specific archive classes, such as the wxZip classes, inherit from
238 the following abstract classes which can be used to write code that can
239 handle any of the archive types:
241 \begin{twocollist}\twocolwidtha{5cm}
242 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
243 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
244 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
245 entry (e.g. filename)}
248 In order to able to write generic code it's necessary to be able to create
249 instances of the classes without knowing which archive type is being used.
250 So there is a class factory for each archive type, derived from
251 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, which can create
254 For example, given {\it wxArchiveClassFactory* factory}:
257 // create streams without knowing their type
258 wxArchiveInputStreamPtr inarc(factory->NewStream(in));
259 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
261 // create an empty entry object
262 wxArchiveEntryPtr entry(factory->NewEntry());
266 The class factory itself can either be created explicitly:
269 wxArchiveClassFactory *factory = new wxZipClassFactory;
273 or using wxWidgets' \helpref{RTTI}{runtimeclassoverview}:
276 wxArchiveClassFactory *MakeFactory(const wxString& type)
278 wxString name = _T("wx") + type.Left(1).Upper() +
279 type.Mid(1).Lower() + _T("ClassFactory");
281 wxObject *pObj = wxCreateDynamicObject(name);
282 wxArchiveClassFactory *pcf = wxDynamicCast(pObj, wxArchiveClassFactory);
285 wxLogError(_T("can't handle '%s' archives"), type.c_str());
295 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
297 \helpref{Archive formats such as zip}{wxarc}
299 In general, handling archives on non-seekable streams is done in the same
300 way as for seekable streams, with a few caveats.
302 The main limitation is that accessing entries randomly using
303 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
304 is not possible, the entries can only be accessed sequentially in the order
305 they are stored within the archive.
307 For each archive type, there will also be other limitations which will
308 depend on the order the entries' meta-data is stored within the archive.
309 These are not too difficult to deal with, and are outlined below.
311 \wxheading{PutNextEntry and the entry size}
313 When writing archives, some archive formats store the entry size before
314 the entry's data (tar has this limitation, zip doesn't). In this case
315 the entry's size must be passed to
316 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
319 This is only an issue on non-seekable streams, since otherwise the archive
320 output stream can seek back and fix up the header once the size of the
323 For generic programming, one way to handle this is to supply the size
324 whenever it is known, and rely on the error message from the output
325 stream when the operation is not supported.
327 \wxheading{GetNextEntry and the weak reference mechanism}
329 Some archive formats do not store all an entry's meta-data before the
330 entry's data (zip is an example). In this case, when reading from a
331 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
332 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
333 object - not all the fields are set.
335 The input stream then keeps a weak reference to the entry object and
336 updates it when more meta-data becomes available. A weak reference being
337 one that does not prevent you from deleting the wxArchiveEntry object - the
338 input stream only attempts to update it if it is still around.
340 The documentation for each archive entry type gives the details
341 of what meta-data becomes available and when. For generic programming,
342 when the worst case must be assumed, you can rely on all the fields
343 of wxArchiveEntry being fully populated when GetNextEntry() returns,
344 with the the following exceptions:
346 \begin{twocollist}\twocolwidtha{3cm}
347 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
348 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
349 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
350 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
351 be available after the end of the archive has been reached, i.e. after
352 GetNextEntry() returns NULL and Eof() is true}
355 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
356 to always fully preserve entries' meta-data. No matter what order order
357 the meta-data occurs within the archive, the input stream will always
358 have read it before the output stream must write it.
360 \wxheading{wxArchiveNotifier}
362 Notifier objects can be used to get a notification whenever an input
363 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
364 via the weak reference mechanism.
366 Consider the following code which renames an entry in an archive.
367 This is the usual way to modify an entry's meta-data, simply set the
368 required field before writing it with
369 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
372 wxArchiveInputStreamPtr arc(factory->NewStream(in));
373 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
374 wxArchiveEntryPtr entry;
376 outarc->CopyArchiveMetaData(*arc);
378 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
379 if (entry->GetName() == from)
381 if (!outarc->CopyEntry(entry.release(), *arc))
385 bool success = arc->Eof() && outarc->Close();
389 However, for non-seekable streams, this technique cannot be used for
390 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
391 which are not necessarily set when
392 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
393 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
396 class MyNotifier : public wxArchiveNotifier
399 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
404 The meta-data changes are done in your notifier's
405 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
406 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
410 wxArchiveInputStreamPtr arc(factory->NewStream(in));
411 wxArchiveOutputStreamPtr outarc(factory->NewStream(out));
412 wxArchiveEntryPtr entry;
415 outarc->CopyArchiveMetaData(*arc);
417 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
418 entry->SetNotifier(notifier);
419 if (!outarc->CopyEntry(entry.release(), *arc))
423 bool success = arc->Eof() && outarc->Close();
427 SetNotifier() calls OnEntryUpdated() immediately, then the input
428 stream calls it again whenever it sets more fields in the entry. Since
429 OnEntryUpdated() will be called at least once, this technique always
430 works even when it is not strictly necessary to use it. For example,
431 changing the entry name can be done this way too and it works on seekable
432 streams as well as non-seekable.