]> git.saurik.com Git - wxWidgets.git/blame_incremental - docs/latex/wx/arc.tex
added support for binary data to wxConfig (slightly modified patch 1736788)
[wxWidgets.git] / docs / latex / wx / arc.tex
... / ...
CommitLineData
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%% Name: arc.tex
3%% Purpose: Overview of the archive classes
4%% Author: M.J.Wetherell
5%% RCS-ID: $Id$
6%% Copyright: 2004 M.J.Wetherell
7%% License: wxWindows license
8%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
9
10\section{Archive formats such as zip}\label{wxarc}
11
12The archive classes handle archive formats such as zip, tar, rar and cab.
13Currently \helpref{wxZip}{wxzipinputstream}
14and \helpref{wxTar}{wxtarinputstream} classes are included.
15
16For each archive type, there are the following classes (using zip here
17as an example):
18
19\begin{twocollist}\twocolwidtha{4cm}
20\twocolitem{\helpref{wxZipInputStream}{wxzipinputstream}}{Input stream}
21\twocolitem{\helpref{wxZipOutputStream}{wxzipoutputstream}}{Output stream}
22\twocolitem{\helpref{wxZipEntry}{wxzipentry}}{Holds the meta-data for an
23entry (e.g. filename, timestamp, etc.)}
24\end{twocollist}
25
26There are also abstract wxArchive classes that can be used to write code
27that can handle any of the archive types,
28see '\helpref{Generic archive programming}{wxarcgeneric}'.
29Also see \helpref{wxFileSystem}{fs} for a higher level interface that
30can handle archive files in a generic way.
31
32The classes are designed to handle archives on both seekable streams such
33as disk files, or non-seekable streams such as pipes and sockets
34(see '\helpref{Archives on non-seekable streams}{wxarcnoseek}').
35
36\wxheading{See also}
37
38\helpref{wxFileSystem}{fs}
39
40
41\subsection{Creating an archive}\label{wxarccreate}
42
43\helpref{Archive formats such as zip}{wxarc}
44
45Call \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} to
46create each new entry in the archive, then write the entry's data.
47Another call to PutNextEntry() closes the current entry and begins the next.
48
49For example:
50
51\begin{verbatim}
52 wxFFileOutputStream out(_T("test.zip"));
53 wxZipOutputStream zip(out);
54 wxTextOutputStream txt(zip);
55 wxString sep(wxFileName::GetPathSeparator());
56
57 zip.PutNextEntry(_T("entry1.txt"));
58 txt << _T("Some text for entry1.txt\n");
59
60 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
61 txt << _T("Some text for subdir/entry2.txt\n");
62
63\end{verbatim}
64
65The name of each entry can be a full path, which makes it possible to
66store entries in subdirectories.
67
68
69\subsection{Extracting an archive}\label{wxarcextract}
70
71\helpref{Archive formats such as zip}{wxarc}
72
73\helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns a pointer
74to entry object containing the meta-data for the next entry in the archive
75(and gives away ownership). Reading from the input stream then returns the
76entry's data. Eof() becomes true after an attempt has been made to read past
77the end of the entry's data.
78
79When there are no more entries, GetNextEntry() returns NULL and sets Eof().
80
81\begin{verbatim}
82 auto_ptr<wxZipEntry> entry;
83
84 wxFFileInputStream in(_T("test.zip"));
85 wxZipInputStream zip(in);
86
87 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
88 {
89 // access meta-data
90 wxString name = entry->GetName();
91 // read 'zip' to access the entry's data
92 }
93
94\end{verbatim}
95
96
97\subsection{Modifying an archive}\label{wxarcmodify}
98
99\helpref{Archive formats such as zip}{wxarc}
100
101To modify an existing archive, write a new copy of the archive to a new file,
102making any necessary changes along the way and transferring any unchanged
103entries using \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}.
104For archive types which compress entry data, CopyEntry() is likely to be
105much more efficient than transferring the data using Read() and Write()
106since it will copy them without decompressing and recompressing them.
107
108In general modifications are not possible without rewriting the archive,
109though it may be possible in some limited cases. Even then, rewriting the
110archive is usually a better choice since a failure can be handled without
111losing the whole
112archive. \helpref{wxTempFileOutputStream}{wxtempfileoutputstream} can
113be helpful to do this.
114
115For example to delete all entries matching the pattern "*.txt":
116
117\begin{verbatim}
118 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
119 wxTempFileOutputStream out(_T("test.zip"));
120
121 wxZipInputStream inzip(*in);
122 wxZipOutputStream outzip(out);
123
124 auto_ptr<wxZipEntry> entry;
125
126 // transfer any meta-data for the archive as a whole (the zip comment
127 // in the case of zip)
128 outzip.CopyArchiveMetaData(inzip);
129
130 // call CopyEntry for each entry except those matching the pattern
131 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
132 if (!entry->GetName().Matches(_T("*.txt")))
133 if (!outzip.CopyEntry(entry.release(), inzip))
134 break;
135
136 // close the input stream by releasing the pointer to it, do this
137 // before closing the output stream so that the file can be replaced
138 in.reset();
139
140 // you can check for success as follows
141 bool success = inzip.Eof() && outzip.Close() && out.Commit();
142
143\end{verbatim}
144
145
146\subsection{Looking up an archive entry by name}\label{wxarcbyname}
147
148\helpref{Archive formats such as zip}{wxarc}
149
150Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
151more convenient for accessing archive entries by name.
152
153To open just one entry in an archive, the most efficient way is
154to simply search for it linearly by calling
155 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
156required entry is found. This works both for archives on seekable and
157non-seekable streams.
158
159The format of filenames in the archive is likely to be different
160from the local filename format. For example zips and tars use
161unix style names, with forward slashes as the path separator,
162and absolute paths are not allowed. So if on Windows the file
163"C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
164the entry back \helpref{GetName()}{wxarchiveentryname} will return
165"MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
166and back has lost some information.
167
168So to avoid ambiguity when searching for an entry matching a local name,
169it is better to convert the local name to the archive's internal format
170and search for that:
171
172\begin{verbatim}
173 auto_ptr<wxZipEntry> entry;
174
175 // convert the local name we are looking for into the internal format
176 wxString name = wxZipEntry::GetInternalName(localname);
177
178 // open the zip
179 wxFFileInputStream in(_T("test.zip"));
180 wxZipInputStream zip(in);
181
182 // call GetNextEntry() until the required internal name is found
183 do {
184 entry.reset(zip.GetNextEntry());
185 }
186 while (entry.get() != NULL && entry->GetInternalName() != name);
187
188 if (entry.get() != NULL) {
189 // read the entry's data...
190 }
191
192\end{verbatim}
193
194To access several entries randomly, it is most efficient to transfer the
195entire catalogue of entries to a container such as a std::map or a
196 \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
197opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
198
199\begin{verbatim}
200 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
201 ZipCatalog::iterator it;
202 wxZipEntry *entry;
203 ZipCatalog cat;
204
205 // open the zip
206 wxFFileInputStream in(_T("test.zip"));
207 wxZipInputStream zip(in);
208
209 // load the zip catalog
210 while ((entry = zip.GetNextEntry()) != NULL) {
211 wxZipEntry*& current = cat[entry->GetInternalName()];
212 // some archive formats can have multiple entries with the same name
213 // (e.g. tar) though it is an error in the case of zip
214 delete current;
215 current = entry;
216 }
217
218 // open an entry by name
219 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
220 zip.OpenEntry(*it->second);
221 // ... now read entry's data
222 }
223
224\end{verbatim}
225
226To open more than one entry simultaneously you need more than one
227underlying stream on the same archive:
228
229\begin{verbatim}
230 // opening another entry without closing the first requires another
231 // input stream for the same file
232 wxFFileInputStream in2(_T("test.zip"));
233 wxZipInputStream zip2(in2);
234 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
235 zip2.OpenEntry(*it->second);
236
237\end{verbatim}
238
239
240\subsection{Generic archive programming}\label{wxarcgeneric}
241
242\helpref{Archive formats such as zip}{wxarc}
243
244Also see \helpref{wxFileSystem}{fs} for a higher level interface that
245can handle archive files in a generic way.
246
247The specific archive classes, such as the wxZip classes, inherit from
248the following abstract classes which can be used to write code that can
249handle any of the archive types:
250
251\begin{twocollist}\twocolwidtha{5cm}
252\twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
253\twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
254\twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
255entry (e.g. filename)}
256\end{twocollist}
257
258In order to able to write generic code it's necessary to be able to create
259instances of the classes without knowing which archive type is being used.
260To allow this there is a class factory for each archive type, derived from
261 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, that can create
262the other classes.
263
264For example, given {\it wxArchiveClassFactory* factory}, streams and
265entries can be created like this:
266
267\begin{verbatim}
268 // create streams without knowing their type
269 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
270 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
271
272 // create an empty entry object
273 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
274
275\end{verbatim}
276
277For the factory itself, the static member
278 \helpref{wxArchiveClassFactory::Find()}{wxarchiveclassfactoryfind}.
279can be used to find a class factory that can handle a given file
280extension or mime type. For example, given {\it filename}:
281
282\begin{verbatim}
283 const wxArchiveClassFactory *factory;
284 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
285
286 if (factory)
287 stream = factory->NewStream(new wxFFileInputStream(filename));
288
289\end{verbatim}
290
291{\it Find} does not give away ownership of the returned pointer, so it
292does not need to be deleted.
293
294There are similar class factories for the filter streams that handle the
295compression and decompression of a single stream, such as wxGzipInputStream.
296These can be found using
297 \helpref{wxFilterClassFactory::Find()}{wxfilterclassfactoryfind}.
298
299For example, to list the contents of archive {\it filename}:
300
301\begin{verbatim}
302 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
303
304 if (in->IsOk())
305 {
306 // look for a filter handler, e.g. for '.gz'
307 const wxFilterClassFactory *fcf;
308 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
309 if (fcf) {
310 in.reset(fcf->NewStream(in.release()));
311 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
312 filename = fcf->PopExtension(filename);
313 }
314
315 // look for a archive handler, e.g. for '.zip' or '.tar'
316 const wxArchiveClassFactory *acf;
317 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
318 if (acf) {
319 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
320 auto_ptr<wxArchiveEntry> entry;
321
322 // list the contents of the archive
323 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
324 std::wcout << entry->GetName().c_str() << "\n";
325 }
326 else {
327 wxLogError(_T("can't handle '%s'"), filename.c_str());
328 }
329 }
330
331\end{verbatim}
332
333
334\subsection{Archives on non-seekable streams}\label{wxarcnoseek}
335
336\helpref{Archive formats such as zip}{wxarc}
337
338In general, handling archives on non-seekable streams is done in the same
339way as for seekable streams, with a few caveats.
340
341The main limitation is that accessing entries randomly using
342 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
343is not possible, the entries can only be accessed sequentially in the order
344they are stored within the archive.
345
346For each archive type, there will also be other limitations which will
347depend on the order the entries' meta-data is stored within the archive.
348These are not too difficult to deal with, and are outlined below.
349
350\wxheading{PutNextEntry and the entry size}
351
352When writing archives, some archive formats store the entry size before
353the entry's data (tar has this limitation, zip doesn't). In this case
354the entry's size must be passed to
355 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
356occurs.
357
358This is only an issue on non-seekable streams, since otherwise the archive
359output stream can seek back and fix up the header once the size of the
360entry is known.
361
362For generic programming, one way to handle this is to supply the size
363whenever it is known, and rely on the error message from the output
364stream when the operation is not supported.
365
366\wxheading{GetNextEntry and the weak reference mechanism}
367
368Some archive formats do not store all an entry's meta-data before the
369entry's data (zip is an example). In this case, when reading from a
370non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
371can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
372object - not all the fields are set.
373
374The input stream then keeps a weak reference to the entry object and
375updates it when more meta-data becomes available. A weak reference being
376one that does not prevent you from deleting the wxArchiveEntry object - the
377input stream only attempts to update it if it is still around.
378
379The documentation for each archive entry type gives the details
380of what meta-data becomes available and when. For generic programming,
381when the worst case must be assumed, you can rely on all the fields
382of wxArchiveEntry being fully populated when GetNextEntry() returns,
383with the the following exceptions:
384
385\begin{twocollist}\twocolwidtha{3cm}
386\twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
387available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
388or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
389\twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
390be available after the end of the archive has been reached, i.e. after
391GetNextEntry() returns NULL and Eof() is true}
392\end{twocollist}
393
394This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
395to always fully preserve entries' meta-data. No matter what order order
396the meta-data occurs within the archive, the input stream will always
397have read it before the output stream must write it.
398
399\wxheading{wxArchiveNotifier}
400
401Notifier objects can be used to get a notification whenever an input
402stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
403via the weak reference mechanism.
404
405Consider the following code which renames an entry in an archive.
406This is the usual way to modify an entry's meta-data, simply set the
407required field before writing it with
408 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
409
410\begin{verbatim}
411 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
412 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
413 auto_ptr<wxArchiveEntry> entry;
414
415 outarc->CopyArchiveMetaData(*arc);
416
417 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
418 if (entry->GetName() == from)
419 entry->SetName(to);
420 if (!outarc->CopyEntry(entry.release(), *arc))
421 break;
422 }
423
424 bool success = arc->Eof() && outarc->Close();
425
426\end{verbatim}
427
428However, for non-seekable streams, this technique cannot be used for
429fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
430which are not necessarily set when
431 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
432this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
433
434\begin{verbatim}
435class MyNotifier : public wxArchiveNotifier
436{
437public:
438 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
439};
440
441\end{verbatim}
442
443The meta-data changes are done in your notifier's
444 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
445then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
446CopyEntry():
447
448\begin{verbatim}
449 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
450 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
451 auto_ptr<wxArchiveEntry> entry;
452 MyNotifier notifier;
453
454 outarc->CopyArchiveMetaData(*arc);
455
456 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
457 entry->SetNotifier(notifier);
458 if (!outarc->CopyEntry(entry.release(), *arc))
459 break;
460 }
461
462 bool success = arc->Eof() && outarc->Close();
463
464\end{verbatim}
465
466SetNotifier() calls OnEntryUpdated() immediately, then the input
467stream calls it again whenever it sets more fields in the entry. Since
468OnEntryUpdated() will be called at least once, this technique always
469works even when it is not strictly necessary to use it. For example,
470changing the entry name can be done this way too and it works on seekable
471streams as well as non-seekable.
472