]> git.saurik.com Git - wxWidgets.git/blob - docs/latex/wx/arc.tex
don't misinterpret the time after the date as a weekday (patch 1836708)
[wxWidgets.git] / docs / latex / wx / arc.tex
1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2 %% Name: arc.tex
3 %% Purpose: Overview of the archive classes
4 %% Author: M.J.Wetherell
5 %% RCS-ID: $Id$
6 %% Copyright: 2004 M.J.Wetherell
7 %% License: wxWindows license
8 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
9
10 \section{Archive formats such as zip}\label{wxarc}
11
12 The archive classes handle archive formats such as zip, tar, rar and cab.
13 Currently \helpref{wxZip}{wxzipinputstream}
14 and \helpref{wxTar}{wxtarinputstream} classes are included.
15
16 For each archive type, there are the following classes (using zip here
17 as an example):
18
19 \begin{twocollist}\twocolwidtha{4cm}
20 \twocolitem{\helpref{wxZipInputStream}{wxzipinputstream}}{Input stream}
21 \twocolitem{\helpref{wxZipOutputStream}{wxzipoutputstream}}{Output stream}
22 \twocolitem{\helpref{wxZipEntry}{wxzipentry}}{Holds the meta-data for an
23 entry (e.g. filename, timestamp, etc.)}
24 \end{twocollist}
25
26 There are also abstract wxArchive classes that can be used to write code
27 that can handle any of the archive types,
28 see '\helpref{Generic archive programming}{wxarcgeneric}'.
29 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
30 can handle archive files in a generic way.
31
32 The classes are designed to handle archives on both seekable streams such
33 as disk files, or non-seekable streams such as pipes and sockets
34 (see '\helpref{Archives on non-seekable streams}{wxarcnoseek}').
35
36 \wxheading{See also}
37
38 \helpref{wxFileSystem}{fs}
39
40
41 \subsection{Creating an archive}\label{wxarccreate}
42
43 \helpref{Archive formats such as zip}{wxarc}
44
45 Call \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} to
46 create each new entry in the archive, then write the entry's data.
47 Another call to PutNextEntry() closes the current entry and begins the next.
48
49 For example:
50
51 \begin{verbatim}
52 wxFFileOutputStream out(_T("test.zip"));
53 wxZipOutputStream zip(out);
54 wxTextOutputStream txt(zip);
55 wxString sep(wxFileName::GetPathSeparator());
56
57 zip.PutNextEntry(_T("entry1.txt"));
58 txt << _T("Some text for entry1.txt\n");
59
60 zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
61 txt << _T("Some text for subdir/entry2.txt\n");
62
63 \end{verbatim}
64
65 The name of each entry can be a full path, which makes it possible to
66 store entries in subdirectories.
67
68
69 \subsection{Extracting an archive}\label{wxarcextract}
70
71 \helpref{Archive formats such as zip}{wxarc}
72
73 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns a pointer
74 to entry object containing the meta-data for the next entry in the archive
75 (and gives away ownership). Reading from the input stream then returns the
76 entry's data. Eof() becomes true after an attempt has been made to read past
77 the end of the entry's data.
78
79 When there are no more entries, GetNextEntry() returns NULL and sets Eof().
80
81 \begin{verbatim}
82 auto_ptr<wxZipEntry> entry;
83
84 wxFFileInputStream in(_T("test.zip"));
85 wxZipInputStream zip(in);
86
87 while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
88 {
89 // access meta-data
90 wxString name = entry->GetName();
91 // read 'zip' to access the entry's data
92 }
93
94 \end{verbatim}
95
96
97 \subsection{Modifying an archive}\label{wxarcmodify}
98
99 \helpref{Archive formats such as zip}{wxarc}
100
101 To modify an existing archive, write a new copy of the archive to a new file,
102 making any necessary changes along the way and transferring any unchanged
103 entries using \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}.
104 For archive types which compress entry data, CopyEntry() is likely to be
105 much more efficient than transferring the data using Read() and Write()
106 since it will copy them without decompressing and recompressing them.
107
108 In general modifications are not possible without rewriting the archive,
109 though it may be possible in some limited cases. Even then, rewriting the
110 archive is usually a better choice since a failure can be handled without
111 losing the whole
112 archive. \helpref{wxTempFileOutputStream}{wxtempfileoutputstream} can
113 be helpful to do this.
114
115 For example to delete all entries matching the pattern "*.txt":
116
117 \begin{verbatim}
118 auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
119 wxTempFileOutputStream out(_T("test.zip"));
120
121 wxZipInputStream inzip(*in);
122 wxZipOutputStream outzip(out);
123
124 auto_ptr<wxZipEntry> entry;
125
126 // transfer any meta-data for the archive as a whole (the zip comment
127 // in the case of zip)
128 outzip.CopyArchiveMetaData(inzip);
129
130 // call CopyEntry for each entry except those matching the pattern
131 while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
132 if (!entry->GetName().Matches(_T("*.txt")))
133 if (!outzip.CopyEntry(entry.release(), inzip))
134 break;
135
136 // close the input stream by releasing the pointer to it, do this
137 // before closing the output stream so that the file can be replaced
138 in.reset();
139
140 // you can check for success as follows
141 bool success = inzip.Eof() && outzip.Close() && out.Commit();
142
143 \end{verbatim}
144
145
146 \subsection{Looking up an archive entry by name}\label{wxarcbyname}
147
148 \helpref{Archive formats such as zip}{wxarc}
149
150 Also see \helpref{wxFileSystem}{fs} for a higher level interface that is
151 more convenient for accessing archive entries by name.
152
153 To open just one entry in an archive, the most efficient way is
154 to simply search for it linearly by calling
155 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} until the
156 required entry is found. This works both for archives on seekable and
157 non-seekable streams.
158
159 The format of filenames in the archive is likely to be different
160 from the local filename format. For example zips and tars use
161 unix style names, with forward slashes as the path separator,
162 and absolute paths are not allowed. So if on Windows the file
163 "C:$\backslash$MYDIR$\backslash$MYFILE.TXT" is stored, then when reading
164 the entry back \helpref{GetName()}{wxarchiveentryname} will return
165 "MYDIR$\backslash$MYFILE.TXT". The conversion into the internal format
166 and back has lost some information.
167
168 So to avoid ambiguity when searching for an entry matching a local name,
169 it is better to convert the local name to the archive's internal format
170 and search for that:
171
172 \begin{verbatim}
173 auto_ptr<wxZipEntry> entry;
174
175 // convert the local name we are looking for into the internal format
176 wxString name = wxZipEntry::GetInternalName(localname);
177
178 // open the zip
179 wxFFileInputStream in(_T("test.zip"));
180 wxZipInputStream zip(in);
181
182 // call GetNextEntry() until the required internal name is found
183 do {
184 entry.reset(zip.GetNextEntry());
185 }
186 while (entry.get() != NULL && entry->GetInternalName() != name);
187
188 if (entry.get() != NULL) {
189 // read the entry's data...
190 }
191
192 \end{verbatim}
193
194 To access several entries randomly, it is most efficient to transfer the
195 entire catalogue of entries to a container such as a std::map or a
196 \helpref{wxHashMap}{wxhashmap} then entries looked up by name can be
197 opened using the \helpref{OpenEntry()}{wxarchiveinputstreamopenentry} method.
198
199 \begin{verbatim}
200 WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
201 ZipCatalog::iterator it;
202 wxZipEntry *entry;
203 ZipCatalog cat;
204
205 // open the zip
206 wxFFileInputStream in(_T("test.zip"));
207 wxZipInputStream zip(in);
208
209 // load the zip catalog
210 while ((entry = zip.GetNextEntry()) != NULL) {
211 wxZipEntry*& current = cat[entry->GetInternalName()];
212 // some archive formats can have multiple entries with the same name
213 // (e.g. tar) though it is an error in the case of zip
214 delete current;
215 current = entry;
216 }
217
218 // open an entry by name
219 if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end()) {
220 zip.OpenEntry(*it->second);
221 // ... now read entry's data
222 }
223
224 \end{verbatim}
225
226 To open more than one entry simultaneously you need more than one
227 underlying stream on the same archive:
228
229 \begin{verbatim}
230 // opening another entry without closing the first requires another
231 // input stream for the same file
232 wxFFileInputStream in2(_T("test.zip"));
233 wxZipInputStream zip2(in2);
234 if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
235 zip2.OpenEntry(*it->second);
236
237 \end{verbatim}
238
239
240 \subsection{Generic archive programming}\label{wxarcgeneric}
241
242 \helpref{Archive formats such as zip}{wxarc}
243
244 Also see \helpref{wxFileSystem}{fs} for a higher level interface that
245 can handle archive files in a generic way.
246
247 The specific archive classes, such as the wxZip classes, inherit from
248 the following abstract classes which can be used to write code that can
249 handle any of the archive types:
250
251 \begin{twocollist}\twocolwidtha{5cm}
252 \twocolitem{\helpref{wxArchiveInputStream}{wxarchiveinputstream}}{Input stream}
253 \twocolitem{\helpref{wxArchiveOutputStream}{wxarchiveoutputstream}}{Output stream}
254 \twocolitem{\helpref{wxArchiveEntry}{wxarchiveentry}}{Holds the meta-data for an
255 entry (e.g. filename)}
256 \end{twocollist}
257
258 In order to able to write generic code it's necessary to be able to create
259 instances of the classes without knowing which archive type is being used.
260 To allow this there is a class factory for each archive type, derived from
261 \helpref{wxArchiveClassFactory}{wxarchiveclassfactory}, that can create
262 the other classes.
263
264 For example, given {\it wxArchiveClassFactory* factory}, streams and
265 entries can be created like this:
266
267 \begin{verbatim}
268 // create streams without knowing their type
269 auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
270 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
271
272 // create an empty entry object
273 auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
274
275 \end{verbatim}
276
277 For the factory itself, the static member
278 \helpref{wxArchiveClassFactory::Find()}{wxarchiveclassfactoryfind}.
279 can be used to find a class factory that can handle a given file
280 extension or mime type. For example, given {\it filename}:
281
282 \begin{verbatim}
283 const wxArchiveClassFactory *factory;
284 factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
285
286 if (factory)
287 stream = factory->NewStream(new wxFFileInputStream(filename));
288
289 \end{verbatim}
290
291 {\it Find} does not give away ownership of the returned pointer, so it
292 does not need to be deleted.
293
294 There are similar class factories for the filter streams that handle the
295 compression and decompression of a single stream, such as wxGzipInputStream.
296 These can be found using
297 \helpref{wxFilterClassFactory::Find()}{wxfilterclassfactoryfind}.
298
299 For example, to list the contents of archive {\it filename}:
300
301 \begin{verbatim}
302 auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
303
304 if (in->IsOk())
305 {
306 // look for a filter handler, e.g. for '.gz'
307 const wxFilterClassFactory *fcf;
308 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
309 if (fcf) {
310 in.reset(fcf->NewStream(in.release()));
311 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
312 filename = fcf->PopExtension(filename);
313 }
314
315 // look for a archive handler, e.g. for '.zip' or '.tar'
316 const wxArchiveClassFactory *acf;
317 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
318 if (acf) {
319 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
320 auto_ptr<wxArchiveEntry> entry;
321
322 // list the contents of the archive
323 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
324 std::wcout << entry->GetName().c_str() << "\n";
325 }
326 else {
327 wxLogError(_T("can't handle '%s'"), filename.c_str());
328 }
329 }
330
331 \end{verbatim}
332
333
334 \subsection{Archives on non-seekable streams}\label{wxarcnoseek}
335
336 \helpref{Archive formats such as zip}{wxarc}
337
338 In general, handling archives on non-seekable streams is done in the same
339 way as for seekable streams, with a few caveats.
340
341 The main limitation is that accessing entries randomly using
342 \helpref{OpenEntry()}{wxarchiveinputstreamopenentry}
343 is not possible, the entries can only be accessed sequentially in the order
344 they are stored within the archive.
345
346 For each archive type, there will also be other limitations which will
347 depend on the order the entries' meta-data is stored within the archive.
348 These are not too difficult to deal with, and are outlined below.
349
350 \wxheading{PutNextEntry and the entry size}
351
352 When writing archives, some archive formats store the entry size before
353 the entry's data (tar has this limitation, zip doesn't). In this case
354 the entry's size must be passed to
355 \helpref{PutNextEntry()}{wxarchiveoutputstreamputnextentry} or an error
356 occurs.
357
358 This is only an issue on non-seekable streams, since otherwise the archive
359 output stream can seek back and fix up the header once the size of the
360 entry is known.
361
362 For generic programming, one way to handle this is to supply the size
363 whenever it is known, and rely on the error message from the output
364 stream when the operation is not supported.
365
366 \wxheading{GetNextEntry and the weak reference mechanism}
367
368 Some archive formats do not store all an entry's meta-data before the
369 entry's data (zip is an example). In this case, when reading from a
370 non-seekable stream, \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry}
371 can only return a partially populated \helpref{wxArchiveEntry}{wxarchiveentry}
372 object - not all the fields are set.
373
374 The input stream then keeps a weak reference to the entry object and
375 updates it when more meta-data becomes available. A weak reference being
376 one that does not prevent you from deleting the wxArchiveEntry object - the
377 input stream only attempts to update it if it is still around.
378
379 The documentation for each archive entry type gives the details
380 of what meta-data becomes available and when. For generic programming,
381 when the worst case must be assumed, you can rely on all the fields
382 of wxArchiveEntry being fully populated when GetNextEntry() returns,
383 with the the following exceptions:
384
385 \begin{twocollist}\twocolwidtha{3cm}
386 \twocolitem{\helpref{GetSize()}{wxarchiveentrysize}}{Guaranteed to be
387 available after the entry has been read to \helpref{Eof()}{wxinputstreameof},
388 or \helpref{CloseEntry()}{wxarchiveinputstreamcloseentry} has been called}
389 \twocolitem{\helpref{IsReadOnly()}{wxarchiveentryisreadonly}}{Guaranteed to
390 be available after the end of the archive has been reached, i.e. after
391 GetNextEntry() returns NULL and Eof() is true}
392 \end{twocollist}
393
394 This mechanism allows \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}
395 to always fully preserve entries' meta-data. No matter what order order
396 the meta-data occurs within the archive, the input stream will always
397 have read it before the output stream must write it.
398
399 \wxheading{wxArchiveNotifier}
400
401 Notifier objects can be used to get a notification whenever an input
402 stream updates a \helpref{wxArchiveEntry}{wxarchiveentry} object's data
403 via the weak reference mechanism.
404
405 Consider the following code which renames an entry in an archive.
406 This is the usual way to modify an entry's meta-data, simply set the
407 required field before writing it with
408 \helpref{CopyEntry()}{wxarchiveoutputstreamcopyentry}:
409
410 \begin{verbatim}
411 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
412 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
413 auto_ptr<wxArchiveEntry> entry;
414
415 outarc->CopyArchiveMetaData(*arc);
416
417 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
418 if (entry->GetName() == from)
419 entry->SetName(to);
420 if (!outarc->CopyEntry(entry.release(), *arc))
421 break;
422 }
423
424 bool success = arc->Eof() && outarc->Close();
425
426 \end{verbatim}
427
428 However, for non-seekable streams, this technique cannot be used for
429 fields such as \helpref{IsReadOnly()}{wxarchiveentryisreadonly},
430 which are not necessarily set when
431 \helpref{GetNextEntry()}{wxarchiveinputstreamgetnextentry} returns. In
432 this case a \helpref{wxArchiveNotifier}{wxarchivenotifier} can be used:
433
434 \begin{verbatim}
435 class MyNotifier : public wxArchiveNotifier
436 {
437 public:
438 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
439 };
440
441 \end{verbatim}
442
443 The meta-data changes are done in your notifier's
444 \helpref{OnEntryUpdated()}{wxarchivenotifieronentryupdated} method,
445 then \helpref{SetNotifier()}{wxarchiveentrynotifier} is called before
446 CopyEntry():
447
448 \begin{verbatim}
449 auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
450 auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
451 auto_ptr<wxArchiveEntry> entry;
452 MyNotifier notifier;
453
454 outarc->CopyArchiveMetaData(*arc);
455
456 while (entry.reset(arc->GetNextEntry()), entry.get() != NULL) {
457 entry->SetNotifier(notifier);
458 if (!outarc->CopyEntry(entry.release(), *arc))
459 break;
460 }
461
462 bool success = arc->Eof() && outarc->Close();
463
464 \end{verbatim}
465
466 SetNotifier() calls OnEntryUpdated() immediately, then the input
467 stream calls it again whenever it sets more fields in the entry. Since
468 OnEntryUpdated() will be called at least once, this technique always
469 works even when it is not strictly necessary to use it. For example,
470 changing the entry name can be done this way too and it works on seekable
471 streams as well as non-seekable.
472