]> git.saurik.com Git - wxWidgets.git/blob - docs/latex/wx/arc.tex
Update archive overview for class factory changes.
[wxWidgets.git] / docs / latex / wx / arc.tex
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 %% Name: arc.tex
3 %% Purpose: Overview of the archive classes
4 %% Author: M.J.Wetherell
5 %% RCS-ID: $Id$
6 %% Copyright: 2004 M.J.Wetherell
7 %% License: wxWindows license
8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
9
10 \section{Archive formats such as zip}\label{wxarc}
11
12 The archive classes handle archive formats such as zip, tar, rar and cab.
13 Currently wxZip and wxTar classes are included.
14
15 For each archive type, there are the following classes (using zip here
16 as an example):
17
18 \begin{twocollist}\twocolwidtha{4cm}
19 \twocolitem{\helpref{wxZipInputStream}{wxzipinputstream}}{Input stream}
20 \twocolitem{\helpref{wxZipOutputStream}{wxzipoutputstream}}{Output stream}
21 \twocolitem{\helpref{wxZipEntry}{wxzipentry}}{Holds the meta-data for an
22 entry (e.g. filename, timestamp, etc.)}
23 \end{twocollist}
24
25 There are also abstract wxArchive classes that can be used to write code
26 that can handle any of the archive types,
27 see '\helpref{Generic archive programming}{wxarcgeneric}'.
28 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
29 can handle archive files in a generic way.
30
31 The classes are designed to handle archives on both seekable streams such
32 as disk files, or non-seekable streams such as pipes and sockets
33 (see '\helpref{Archives on non-seekable streams}{wxarcnoseek}').
34
35 \wxheading{See also}
36
37 \helpref{wxFileSystem}{fs}
38
39
40 \subsection{Creating an archive}\label{wxarccreate}
41
42 \helpref{Archive formats such as zip}{wxarc}
43
44 Call \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} to
45 create each new entry in the archive, then write the entry's data.
46 Another call to PutNextEntry() closes the current entry and begins the next.
47
48 For example:
49
50 \begin{verbatim}
51 wxFFileOutputStream out(_T("test.zip"));
52 wxZipOutputStream zip(out);
53 wxTextOutputStream txt(zip);
54 wxString sep(wxFileName::GetPathSeparator());
55
56 zip.PutNextEntry(_T("entry1.txt"));
57 txt << _T("Some text for entry1.txt\n");
58
59 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
60 txt << _T("Some text for subdir/entry2.txt\n");
61
62 \end{verbatim}
63
64 The name of each entry can be a full path, which makes it possible to
65 store entries in subdirectories.
66
67
68 \subsection{Extracting an archive}\label{wxarcextract}
69
70 \helpref{Archive formats such as zip}{wxarc}
71
72 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns a pointer
73 to entry object containing the meta-data for the next entry in the archive
74 (and gives away ownership). Reading from the input stream then returns the
75 entry's data. Eof() becomes true after an attempt has been made to read past
76 the end of the entry's data.
77
78 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
79
80 \begin{verbatim}
81 auto_ptr<wxZipEntry> entry;
82
83 wxFFileInputStream in(_T("test.zip"));
84 wxZipInputStream zip(in);
85
86 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87 {
88 // access meta-data
89 wxString name = entry->GetName();
90 // read 'zip' to access the entry's data
91 }
92
93 \end{verbatim}
94
95
96 \subsection{Modifying an archive}\label{wxarcmodify}
97
98 \helpref{Archive formats such as zip}{wxarc}
99
100 To modify an existing archive, write a new copy of the archive to a new file,
101 making any necessary changes along the way and transferring any unchanged
102 entries using \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}.
103 For archive types which compress entry data, CopyEntry() is likely to be
104 much more efficient than transferring the data using Read() and Write()
105 since it will copy them without decompressing and recompressing them.
106
107 In general modifications are not possible without rewriting the archive,
108 though it may be possible in some limited cases. Even then, rewriting the
109 archive is usually a better choice since a failure can be handled without
110 losing the whole
111 archive. \helpref{wxTempFileOutputStream}{wxtempfileoutputstream} can
112 be helpful to do this.
113
114 For example to delete all entries matching the pattern "*.txt":
115
116 \begin{verbatim}
117 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
118 wxTempFileOutputStream out(_T("test.zip"));
119
120 wxZipInputStream inzip(*in);
121 wxZipOutputStream outzip(out);
122
123 auto_ptr<wxZipEntry> entry;
124
125 // transfer any meta-data for the archive as a whole (the zip comment
126 // in the case of zip)
127 outzip.CopyArchiveMetaData(inzip);
128
129 // call CopyEntry for each entry except those matching the pattern
130 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
131 if (!entry->GetName().Matches(_T("*.txt")))
132 if (!outzip.CopyEntry(entry.release(), inzip))
133 break;
134
135 // close the input stream by releasing the pointer to it, do this
136 // before closing the output stream so that the file can be replaced
137 in.reset();
138
139 // you can check for success as follows
140 bool success = inzip.Eof() && outzip.Close() && out.Commit();
141
142 \end{verbatim}
143
144
145 \subsection{Looking up an archive entry by name}\label{wxarcbyname}
146
147 \helpref{Archive formats such as zip}{wxarc}
148
149 Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
150 more convenient for accessing archive entries by name.
151
152 To open just one entry in an archive, the most efficient way is
153 to simply search for it linearly by calling
154 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
155 required entry is found. This works both for archives on seekable and
156 non-seekable streams.
157
158 The format of filenames in the archive is likely to be different
159 from the local filename format. For example zips and tars use
160 unix style names, with forward slashes as the path separator,
161 and absolute paths are not allowed. So if on Windows the file
162 "C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
163 the entry back \helpref{GetName()}{wxarchiveentryname} will return
164 "MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
165 and back has lost some information.
166
167 So to avoid ambiguity when searching for an entry matching a local name,
168 it is better to convert the local name to the archive's internal format
169 and search for that:
170
171 \begin{verbatim}
172 auto_ptr<wxZipEntry> entry;
173
174 // convert the local name we are looking for into the internal format
175 wxString name = wxZipEntry::GetInternalName(localname);
176
177 // open the zip
178 wxFFileInputStream in(_T("test.zip"));
179 wxZipInputStream zip(in);
180
181 // call GetNextEntry() until the required internal name is found
182 do {
183 entry.reset(zip.GetNextEntry());
184 }
185 while (entry.get() != NULL && entry->GetInternalName() != name);
186
187 if (entry.get() != NULL) {
188 // read the entry's data...
189 }
190
191 \end{verbatim}
192
193 To access several entries randomly, it is most efficient to transfer the
194 entire catalogue of entries to a container such as a std::map or a
195 \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
196 opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
197
198 \begin{verbatim}
199 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
200 ZipCatalog::iterator it;
201 wxZipEntry *entry;
202 ZipCatalog cat;
203
204 // open the zip
205 wxFFileInputStream in(_T("test.zip"));
206 wxZipInputStream zip(in);
207
208 // load the zip catalog
209 while ((entry = zip.GetNextEntry()) != NULL) {
210 wxZipEntry*& current = cat[entry->GetInternalName()];
211 // some archive formats can have multiple entries with the same name
212 // (e.g. tar) though it is an error in the case of zip
213 delete current;
214 current = entry;
215 }
216
217 // open an entry by name
218 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
219 zip.OpenEntry(*it->second);
220 // ... now read entry's data
221 }
222
223 \end{verbatim}
224
225 To open more than one entry simultaneously you need more than one
226 underlying stream on the same archive:
227
228 \begin{verbatim}
229 // opening another entry without closing the first requires another
230 // input stream for the same file
231 wxFFileInputStream in2(_T("test.zip"));
232 wxZipInputStream zip2(in2);
233 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
234 zip2.OpenEntry(*it->second);
235
236 \end{verbatim}
237
238
239 \subsection{Generic archive programming}\label{wxarcgeneric}
240
241 \helpref{Archive formats such as zip}{wxarc}
242
243 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
244 can handle archive files in a generic way.
245
246 The specific archive classes, such as the wxZip classes, inherit from
247 the following abstract classes which can be used to write code that can
248 handle any of the archive types:
249
250 \begin{twocollist}\twocolwidtha{5cm}
251 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
252 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
253 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
254 entry (e.g. filename)}
255 \end{twocollist}
256
257 In order to able to write generic code it's necessary to be able to create
258 instances of the classes without knowing which archive type is being used.
259 To allow this there is a class factory for each archive type, derived from
260 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, that can create
261 the other classes.
262
263 For example, given {\it wxArchiveClassFactory* factory}, streams and
264 entries can be created like this:
265
266 \begin{verbatim}
267 // create streams without knowing their type
268 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
269 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
270
271 // create an empty entry object
272 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
273
274 \end{verbatim}
275
276 For the factory itself, the static member
277 \helpref{wxArchiveClassFactory::Find()}{wxarchiveclassfactoryfind}.
278 can be used to find a class factory that can handle a given file
279 extension or mime type. For example, given {\it filename}:
280
281 \begin{verbatim}
282 const wxArchiveClassFactory *factory;
283 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
284
285 if (factory)
286 stream = factory->NewStream(new wxFFileInputStream(filename));
287
288 \end{verbatim}
289
290 {\it Find} does not give away ownership of the returned pointer, so it
291 does not need to be deleted.
292
293 There are similar class factories for the filter streams that handle the
294 compression and decompression of a single stream, such as wxGzipInputStream.
295 These can be found using
296 \helpref{wxFilterClassFactory::Find()}{wxfilterclassfactoryfind}.
297
298 For example, to list the contents of archive {\it filename}:
299
300 \begin{verbatim}
301 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
302
303 if (in->IsOk())
304 {
305 // look for a filter handler, e.g. for '.gz'
306 const wxFilterClassFactory *fcf;
307 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
308 if (fcf) {
309 in.reset(fcf->NewStream(in.release()));
310 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
311 filename = fcf->PopExtension(filename);
312 }
313
314 // look for a archive handler, e.g. for '.zip' or '.tar'
315 const wxArchiveClassFactory *acf;
316 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
317 if (acf) {
318 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
319 auto_ptr<wxArchiveEntry> entry;
320
321 // list the contents of the archive
322 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
323 std::wcout << entry->GetName().c_str() << "\n";
324 }
325 else {
326 wxLogError(_T("can't handle '%s'"), filename.c_str());
327 }
328 }
329
330 \end{verbatim}
331
332
333 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
334
335 \helpref{Archive formats such as zip}{wxarc}
336
337 In general, handling archives on non-seekable streams is done in the same
338 way as for seekable streams, with a few caveats.
339
340 The main limitation is that accessing entries randomly using
341 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
342 is not possible, the entries can only be accessed sequentially in the order
343 they are stored within the archive.
344
345 For each archive type, there will also be other limitations which will
346 depend on the order the entries' meta-data is stored within the archive.
347 These are not too difficult to deal with, and are outlined below.
348
349 \wxheading{PutNextEntry and the entry size}
350
351 When writing archives, some archive formats store the entry size before
352 the entry's data (tar has this limitation, zip doesn't). In this case
353 the entry's size must be passed to
354 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
355 occurs.
356
357 This is only an issue on non-seekable streams, since otherwise the archive
358 output stream can seek back and fix up the header once the size of the
359 entry is known.
360
361 For generic programming, one way to handle this is to supply the size
362 whenever it is known, and rely on the error message from the output
363 stream when the operation is not supported.
364
365 \wxheading{GetNextEntry and the weak reference mechanism}
366
367 Some archive formats do not store all an entry's meta-data before the
368 entry's data (zip is an example). In this case, when reading from a
369 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
370 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
371 object - not all the fields are set.
372
373 The input stream then keeps a weak reference to the entry object and
374 updates it when more meta-data becomes available. A weak reference being
375 one that does not prevent you from deleting the wxArchiveEntry object - the
376 input stream only attempts to update it if it is still around.
377
378 The documentation for each archive entry type gives the details
379 of what meta-data becomes available and when. For generic programming,
380 when the worst case must be assumed, you can rely on all the fields
381 of wxArchiveEntry being fully populated when GetNextEntry() returns,
382 with the the following exceptions:
383
384 \begin{twocollist}\twocolwidtha{3cm}
385 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
386 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
387 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
388 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
389 be available after the end of the archive has been reached, i.e. after
390 GetNextEntry() returns NULL and Eof() is true}
391 \end{twocollist}
392
393 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
394 to always fully preserve entries' meta-data. No matter what order order
395 the meta-data occurs within the archive, the input stream will always
396 have read it before the output stream must write it.
397
398 \wxheading{wxArchiveNotifier}
399
400 Notifier objects can be used to get a notification whenever an input
401 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
402 via the weak reference mechanism.
403
404 Consider the following code which renames an entry in an archive.
405 This is the usual way to modify an entry's meta-data, simply set the
406 required field before writing it with
407 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
408
409 \begin{verbatim}
410 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
411 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
412 auto_ptr<wxArchiveEntry> entry;
413
414 outarc->CopyArchiveMetaData(*arc);
415
416 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
417 if (entry->GetName() == from)
418 entry->SetName(to);
419 if (!outarc->CopyEntry(entry.release(), *arc))
420 break;
421 }
422
423 bool success = arc->Eof() && outarc->Close();
424
425 \end{verbatim}
426
427 However, for non-seekable streams, this technique cannot be used for
428 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
429 which are not necessarily set when
430 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
431 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
432
433 \begin{verbatim}
434 class MyNotifier : public wxArchiveNotifier
435 {
436 public:
437 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
438 };
439
440 \end{verbatim}
441
442 The meta-data changes are done in your notifier's
443 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
444 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
445 CopyEntry():
446
447 \begin{verbatim}
448 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
449 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
450 auto_ptr<wxArchiveEntry> entry;
451 MyNotifier notifier;
452
453 outarc->CopyArchiveMetaData(*arc);
454
455 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
456 entry->SetNotifier(notifier);
457 if (!outarc->CopyEntry(entry.release(), *arc))
458 break;
459 }
460
461 bool success = arc->Eof() && outarc->Close();
462
463 \end{verbatim}
464
465 SetNotifier() calls OnEntryUpdated() immediately, then the input
466 stream calls it again whenever it sets more fields in the entry. Since
467 OnEntryUpdated() will be called at least once, this technique always
468 works even when it is not strictly necessary to use it. For example,
469 changing the entry name can be done this way too and it works on seekable
470 streams as well as non-seekable.
471