]> git.saurik.com Git - wxWidgets.git/blame - docs/latex/wx/arc.tex
Small fix.
[wxWidgets.git] / docs / latex / wx / arc.tex
CommitLineData
00375592
VZ
1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2%% Name: arc.tex
3%% Purpose: Overview of the archive classes
4%% Author: M.J.Wetherell
5%% RCS-ID: $Id$
6%% Copyright: 2004 M.J.Wetherell
8795498c 7%% License: wxWindows license
00375592
VZ
8%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
9
10\section{Archive formats such as zip}\label{wxarc}
11
12The archive classes handle archive formats such as zip, tar, rar and cab.
d7d47fe2
MW
13Currently \helpref{wxZip}{wxzipinputstream}
14and \helpref{wxTar}{wxtarinputstream} classes are included.
00375592
VZ
15
16For each archive type, there are the following classes (using zip here
17as an example):
18
19\begin{twocollist}\twocolwidtha{4cm}
20\twocolitem{\helpref{wxZipInputStream}{wxzipinputstream}}{Input stream}
21\twocolitem{\helpref{wxZipOutputStream}{wxzipoutputstream}}{Output stream}
22\twocolitem{\helpref{wxZipEntry}{wxzipentry}}{Holds the meta-data for an
23entry (e.g. filename, timestamp, etc.)}
24\end{twocollist}
25
26There are also abstract wxArchive classes that can be used to write code
27that can handle any of the archive types,
28see '\helpref{Generic archive programming}{wxarcgeneric}'.
29Also see \helpref{wxFileSystem}{fs} for a higher level interface that
30can handle archive files in a generic way.
31
32The classes are designed to handle archives on both seekable streams such
33as disk files, or non-seekable streams such as pipes and sockets
34(see '\helpref{Archives on non-seekable streams}{wxarcnoseek}').
35
36\wxheading{See also}
37
38\helpref{wxFileSystem}{fs}
39
40
41\subsection{Creating an archive}\label{wxarccreate}
42
43\helpref{Archive formats such as zip}{wxarc}
44
45Call \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} to
46create each new entry in the archive, then write the entry's data.
47Another call to PutNextEntry() closes the current entry and begins the next.
48
49For example:
50
51\begin{verbatim}
52 wxFFileOutputStream out(_T("test.zip"));
53 wxZipOutputStream zip(out);
54 wxTextOutputStream txt(zip);
76ad7b36 55 wxString sep(wxFileName::GetPathSeparator());
00375592
VZ
56
57 zip.PutNextEntry(_T("entry1.txt"));
76ad7b36 58 txt << _T("Some text for entry1.txt\n");
00375592 59
76ad7b36
MW
60 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
61 txt << _T("Some text for subdir/entry2.txt\n");
00375592
VZ
62
63\end{verbatim}
64
76ad7b36
MW
65The name of each entry can be a full path, which makes it possible to
66store entries in subdirectories.
67
00375592
VZ
68
69\subsection{Extracting an archive}\label{wxarcextract}
70
71\helpref{Archive formats such as zip}{wxarc}
72
df467a9d
MW
73\helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns a pointer
74to entry object containing the meta-data for the next entry in the archive
75(and gives away ownership). Reading from the input stream then returns the
76entry's data. Eof() becomes true after an attempt has been made to read past
77the end of the entry's data.
00375592
VZ
78
79When there are no more entries, GetNextEntry() returns NULL and sets Eof().
80
81\begin{verbatim}
601bee5c 82 auto_ptr<wxZipEntry> entry;
00375592
VZ
83
84 wxFFileInputStream in(_T("test.zip"));
85 wxZipInputStream zip(in);
00375592
VZ
86
87 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
88 {
df467a9d
MW
89 // access meta-data
90 wxString name = entry->GetName();
91 // read 'zip' to access the entry's data
00375592
VZ
92 }
93
94\end{verbatim}
95
96
97\subsection{Modifying an archive}\label{wxarcmodify}
98
99\helpref{Archive formats such as zip}{wxarc}
100
101To modify an existing archive, write a new copy of the archive to a new file,
102making any necessary changes along the way and transferring any unchanged
103entries using \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}.
104For archive types which compress entry data, CopyEntry() is likely to be
105much more efficient than transferring the data using Read() and Write()
106since it will copy them without decompressing and recompressing them.
107
108In general modifications are not possible without rewriting the archive,
df467a9d
MW
109though it may be possible in some limited cases. Even then, rewriting the
110archive is usually a better choice since a failure can be handled without
111losing the whole
112archive. \helpref{wxTempFileOutputStream}{wxtempfileoutputstream} can
113be helpful to do this.
00375592
VZ
114
115For example to delete all entries matching the pattern "*.txt":
116
117\begin{verbatim}
601bee5c 118 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
df467a9d 119 wxTempFileOutputStream out(_T("test.zip"));
00375592 120
df467a9d 121 wxZipInputStream inzip(*in);
00375592 122 wxZipOutputStream outzip(out);
df467a9d 123
601bee5c 124 auto_ptr<wxZipEntry> entry;
00375592
VZ
125
126 // transfer any meta-data for the archive as a whole (the zip comment
127 // in the case of zip)
128 outzip.CopyArchiveMetaData(inzip);
129
130 // call CopyEntry for each entry except those matching the pattern
131 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
132 if (!entry->GetName().Matches(_T("*.txt")))
133 if (!outzip.CopyEntry(entry.release(), inzip))
134 break;
135
df467a9d
MW
136 // close the input stream by releasing the pointer to it, do this
137 // before closing the output stream so that the file can be replaced
138 in.reset();
139
140 // you can check for success as follows
141 bool success = inzip.Eof() && outzip.Close() && out.Commit();
142
143\end{verbatim}
144
00375592
VZ
145
146\subsection{Looking up an archive entry by name}\label{wxarcbyname}
147
148\helpref{Archive formats such as zip}{wxarc}
149
150Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
151more convenient for accessing archive entries by name.
152
153To open just one entry in an archive, the most efficient way is
154to simply search for it linearly by calling
155 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
156required entry is found. This works both for archives on seekable and
157non-seekable streams.
158
159The format of filenames in the archive is likely to be different
160from the local filename format. For example zips and tars use
161unix style names, with forward slashes as the path separator,
162and absolute paths are not allowed. So if on Windows the file
163"C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
164the entry back \helpref{GetName()}{wxarchiveentryname} will return
165"MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
166and back has lost some information.
167
168So to avoid ambiguity when searching for an entry matching a local name,
169it is better to convert the local name to the archive's internal format
170and search for that:
171
172\begin{verbatim}
601bee5c 173 auto_ptr<wxZipEntry> entry;
00375592
VZ
174
175 // convert the local name we are looking for into the internal format
176 wxString name = wxZipEntry::GetInternalName(localname);
177
178 // open the zip
179 wxFFileInputStream in(_T("test.zip"));
180 wxZipInputStream zip(in);
181
182 // call GetNextEntry() until the required internal name is found
183 do {
184 entry.reset(zip.GetNextEntry());
185 }
186 while (entry.get() != NULL && entry->GetInternalName() != name);
187
188 if (entry.get() != NULL) {
189 // read the entry's data...
190 }
191
192\end{verbatim}
193
194To access several entries randomly, it is most efficient to transfer the
195entire catalogue of entries to a container such as a std::map or a
196 \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
197opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
198
199\begin{verbatim}
200 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
201 ZipCatalog::iterator it;
202 wxZipEntry *entry;
203 ZipCatalog cat;
204
205 // open the zip
206 wxFFileInputStream in(_T("test.zip"));
207 wxZipInputStream zip(in);
208
209 // load the zip catalog
210 while ((entry = zip.GetNextEntry()) != NULL) {
211 wxZipEntry*& current = cat[entry->GetInternalName()];
212 // some archive formats can have multiple entries with the same name
213 // (e.g. tar) though it is an error in the case of zip
214 delete current;
215 current = entry;
216 }
217
218 // open an entry by name
219 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
220 zip.OpenEntry(*it->second);
221 // ... now read entry's data
222 }
223
224\end{verbatim}
225
226To open more than one entry simultaneously you need more than one
227underlying stream on the same archive:
228
229\begin{verbatim}
230 // opening another entry without closing the first requires another
231 // input stream for the same file
232 wxFFileInputStream in2(_T("test.zip"));
233 wxZipInputStream zip2(in2);
234 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
235 zip2.OpenEntry(*it->second);
236
237\end{verbatim}
238
239
240\subsection{Generic archive programming}\label{wxarcgeneric}
241
242\helpref{Archive formats such as zip}{wxarc}
243
244Also see \helpref{wxFileSystem}{fs} for a higher level interface that
245can handle archive files in a generic way.
246
247The specific archive classes, such as the wxZip classes, inherit from
248the following abstract classes which can be used to write code that can
249handle any of the archive types:
250
251\begin{twocollist}\twocolwidtha{5cm}
252\twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
253\twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
254\twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
255entry (e.g. filename)}
256\end{twocollist}
257
258In order to able to write generic code it's necessary to be able to create
259instances of the classes without knowing which archive type is being used.
601bee5c
MW
260To allow this there is a class factory for each archive type, derived from
261 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, that can create
00375592
VZ
262the other classes.
263
df467a9d
MW
264For example, given {\it wxArchiveClassFactory* factory}, streams and
265entries can be created like this:
00375592
VZ
266
267\begin{verbatim}
268 // create streams without knowing their type
601bee5c
MW
269 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
270 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
00375592
VZ
271
272 // create an empty entry object
601bee5c 273 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
00375592
VZ
274
275\end{verbatim}
276
601bee5c
MW
277For the factory itself, the static member
278 \helpref{wxArchiveClassFactory::Find()}{wxarchiveclassfactoryfind}.
279can be used to find a class factory that can handle a given file
280extension or mime type. For example, given {\it filename}:
df467a9d
MW
281
282\begin{verbatim}
601bee5c
MW
283 const wxArchiveClassFactory *factory;
284 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
285
286 if (factory)
287 stream = factory->NewStream(new wxFFileInputStream(filename));
df467a9d
MW
288
289\end{verbatim}
290
601bee5c
MW
291{\it Find} does not give away ownership of the returned pointer, so it
292does not need to be deleted.
00375592 293
601bee5c
MW
294There are similar class factories for the filter streams that handle the
295compression and decompression of a single stream, such as wxGzipInputStream.
296These can be found using
297 \helpref{wxFilterClassFactory::Find()}{wxfilterclassfactoryfind}.
00375592 298
601bee5c 299For example, to list the contents of archive {\it filename}:
00375592
VZ
300
301\begin{verbatim}
601bee5c 302 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
00375592 303
601bee5c
MW
304 if (in->IsOk())
305 {
306 // look for a filter handler, e.g. for '.gz'
307 const wxFilterClassFactory *fcf;
308 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
309 if (fcf) {
310 in.reset(fcf->NewStream(in.release()));
311 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
312 filename = fcf->PopExtension(filename);
313 }
314
315 // look for a archive handler, e.g. for '.zip' or '.tar'
316 const wxArchiveClassFactory *acf;
317 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
318 if (acf) {
319 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
320 auto_ptr<wxArchiveEntry> entry;
321
322 // list the contents of the archive
323 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
324 std::wcout << entry->GetName().c_str() << "\n";
325 }
326 else {
327 wxLogError(_T("can't handle '%s'"), filename.c_str());
328 }
00375592
VZ
329 }
330
00375592
VZ
331\end{verbatim}
332
333
334\subsection{Archives on non-seekable streams}\label{wxarcnoseek}
335
336\helpref{Archive formats such as zip}{wxarc}
337
338In general, handling archives on non-seekable streams is done in the same
339way as for seekable streams, with a few caveats.
340
341The main limitation is that accessing entries randomly using
342 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
343is not possible, the entries can only be accessed sequentially in the order
344they are stored within the archive.
345
346For each archive type, there will also be other limitations which will
347depend on the order the entries' meta-data is stored within the archive.
348These are not too difficult to deal with, and are outlined below.
349
350\wxheading{PutNextEntry and the entry size}
351
352When writing archives, some archive formats store the entry size before
353the entry's data (tar has this limitation, zip doesn't). In this case
354the entry's size must be passed to
355 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
356occurs.
357
358This is only an issue on non-seekable streams, since otherwise the archive
359output stream can seek back and fix up the header once the size of the
360entry is known.
361
362For generic programming, one way to handle this is to supply the size
363whenever it is known, and rely on the error message from the output
364stream when the operation is not supported.
365
366\wxheading{GetNextEntry and the weak reference mechanism}
367
368Some archive formats do not store all an entry's meta-data before the
369entry's data (zip is an example). In this case, when reading from a
370non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
371can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
372object - not all the fields are set.
373
374The input stream then keeps a weak reference to the entry object and
375updates it when more meta-data becomes available. A weak reference being
376one that does not prevent you from deleting the wxArchiveEntry object - the
377input stream only attempts to update it if it is still around.
378
379The documentation for each archive entry type gives the details
380of what meta-data becomes available and when. For generic programming,
381when the worst case must be assumed, you can rely on all the fields
382of wxArchiveEntry being fully populated when GetNextEntry() returns,
383with the the following exceptions:
384
385\begin{twocollist}\twocolwidtha{3cm}
386\twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
387available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
388or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
389\twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
390be available after the end of the archive has been reached, i.e. after
391GetNextEntry() returns NULL and Eof() is true}
392\end{twocollist}
393
394This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
395to always fully preserve entries' meta-data. No matter what order order
396the meta-data occurs within the archive, the input stream will always
397have read it before the output stream must write it.
398
399\wxheading{wxArchiveNotifier}
400
401Notifier objects can be used to get a notification whenever an input
402stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
403via the weak reference mechanism.
404
405Consider the following code which renames an entry in an archive.
406This is the usual way to modify an entry's meta-data, simply set the
407required field before writing it with
408 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
409
410\begin{verbatim}
601bee5c
MW
411 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
412 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
413 auto_ptr<wxArchiveEntry> entry;
00375592
VZ
414
415 outarc->CopyArchiveMetaData(*arc);
416
417 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
418 if (entry->GetName() == from)
419 entry->SetName(to);
420 if (!outarc->CopyEntry(entry.release(), *arc))
421 break;
422 }
423
424 bool success = arc->Eof() && outarc->Close();
425
426\end{verbatim}
427
428However, for non-seekable streams, this technique cannot be used for
429fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
430which are not necessarily set when
431 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
432this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
433
434\begin{verbatim}
435class MyNotifier : public wxArchiveNotifier
436{
437public:
438 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
439};
440
441\end{verbatim}
442
443The meta-data changes are done in your notifier's
444 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
445then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
446CopyEntry():
447
448\begin{verbatim}
601bee5c
MW
449 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
450 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
451 auto_ptr<wxArchiveEntry> entry;
452 MyNotifier notifier;
00375592
VZ
453
454 outarc->CopyArchiveMetaData(*arc);
455
456 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
457 entry->SetNotifier(notifier);
458 if (!outarc->CopyEntry(entry.release(), *arc))
459 break;
460 }
461
462 bool success = arc->Eof() && outarc->Close();
463
464\end{verbatim}
465
466SetNotifier() calls OnEntryUpdated() immediately, then the input
467stream calls it again whenever it sets more fields in the entry. Since
468OnEntryUpdated() will be called at least once, this technique always
469works even when it is not strictly necessary to use it. For example,
470changing the entry name can be done this way too and it works on seekable
471streams as well as non-seekable.
472