]> git.saurik.com Git - wxWidgets.git/blame - docs/doxygen/overviews/archive.h
Mention wxString caching in UTF-8 ode
[wxWidgets.git] / docs / doxygen / overviews / archive.h
CommitLineData
15b6757b 1/////////////////////////////////////////////////////////////////////////////
98ba1eee 2// Name: archive.h
15b6757b
FM
3// Purpose: topic overview
4// Author: wxWidgets team
5// RCS-ID: $Id$
6// Licence: wxWindows license
7/////////////////////////////////////////////////////////////////////////////
8
880efa2a 9/**
36c9828f 10
4cbfec15 11@page overview_archive Archive Formats
36c9828f 12
032e27aa
BP
13The archive classes handle archive formats such as zip, tar, rar and cab.
14Currently wxZip and wxTar classes are included.
e0a47918 15
032e27aa
BP
16For each archive type, there are the following classes (using zip here as an
17example):
36c9828f 18
032e27aa
BP
19@li wxZipInputStream: Input stream
20@li wxZipOutputStream: Output stream
21@li wxZipEntry: Holds meta-data for an entry (e.g. filename, timestamp, etc.)
36c9828f 22
032e27aa 23There are also abstract wxArchive classes that can be used to write code that
4cbfec15 24can handle any of the archive types, see @ref overview_archive_generic.
e0a47918 25
032e27aa
BP
26Also see wxFileSystem for a higher level interface that can handle archive
27files in a generic way.
e0a47918 28
032e27aa
BP
29The classes are designed to handle archives on both seekable streams such as
30disk files, or non-seekable streams such as pipes and sockets (see
4cbfec15 31@ref overview_archive_noseek).
e0a47918 32
032e27aa 33See also wxFileSystem.
36c9828f 34
4cbfec15
FM
35@li @ref overview_archive_create
36@li @ref overview_archive_extract
37@li @ref overview_archive_modify
38@li @ref overview_archive_byname
39@li @ref overview_archive_generic
40@li @ref overview_archive_noseek
36c9828f
FM
41
42
032e27aa 43<hr>
36c9828f 44
e0a47918 45
4cbfec15 46@section overview_archive_create Creating an Archive
e0a47918 47
032e27aa
BP
48Call wxArchiveOutputStream::PutNextEntry() to create each new entry in the
49archive, then write the entry's data. Another call to PutNextEntry() closes the
50current entry and begins the next. For example:
36c9828f 51
032e27aa
BP
52@code
53wxFFileOutputStream out(_T("test.zip"));
54wxZipOutputStream zip(out);
55wxTextOutputStream txt(zip);
56wxString sep(wxFileName::GetPathSeparator());
36c9828f 57
032e27aa
BP
58zip.PutNextEntry(_T("entry1.txt"));
59txt << _T("Some text for entry1.txt\n");
36c9828f 60
032e27aa
BP
61zip.PutNextEntry(_T("subdir") + sep + _T("entry2.txt"));
62txt << _T("Some text for subdir/entry2.txt\n");
63@endcode
36c9828f 64
032e27aa
BP
65The name of each entry can be a full path, which makes it possible to store
66entries in subdirectories.
36c9828f
FM
67
68
4cbfec15 69@section overview_archive_extract Extracting an Archive
e0a47918 70
032e27aa
BP
71wxArchiveInputStream::GetNextEntry() returns a pointer to entry object
72containing the meta-data for the next entry in the archive (and gives away
73ownership).
e0a47918 74
032e27aa
BP
75Reading from the input stream then returns the entry's data. Eof() becomes
76@true after an attempt has been made to read past the end of the entry's data.
36c9828f 77
032e27aa 78When there are no more entries, GetNextEntry() returns @NULL and sets Eof().
36c9828f 79
032e27aa
BP
80@code
81auto_ptr<wxZipEntry> entry;
36c9828f 82
032e27aa
BP
83wxFFileInputStream in(_T("test.zip"));
84wxZipInputStream zip(in);
36c9828f 85
032e27aa
BP
86while (entry.reset(zip.GetNextEntry()), entry.get() != NULL)
87{
88 // access meta-data
89 wxString name = entry->GetName();
90 // read 'zip' to access the entry's data
91}
92@endcode
36c9828f
FM
93
94
95
4cbfec15 96@section overview_archive_modify Modifying an Archive
36c9828f 97
032e27aa
BP
98To modify an existing archive, write a new copy of the archive to a new file,
99making any necessary changes along the way and transferring any unchanged
100entries using wxArchiveOutputStream::CopyEntry().
e0a47918 101
032e27aa
BP
102For archive types which compress entry data, CopyEntry() is likely to be much
103more efficient than transferring the data using Read() and Write() since it
104will copy them without decompressing and recompressing them.
e0a47918 105
032e27aa
BP
106In general modifications are not possible without rewriting the archive, though
107it may be possible in some limited cases. Even then, rewriting the archive is
108usually a better choice since a failure can be handled without losing the whole
109archive. wxTempFileOutputStream can be helpful to do this.
e0a47918 110
032e27aa 111For example to delete all entries matching the pattern "*.txt":
36c9828f 112
032e27aa
BP
113@code
114auto_ptr<wxFFileInputStream> in(new wxFFileInputStream(_T("test.zip")));
115wxTempFileOutputStream out(_T("test.zip"));
36c9828f 116
032e27aa
BP
117wxZipInputStream inzip(*in);
118wxZipOutputStream outzip(out);
36c9828f 119
032e27aa 120auto_ptr<wxZipEntry> entry;
36c9828f 121
032e27aa
BP
122// transfer any meta-data for the archive as a whole (the zip comment
123// in the case of zip)
124outzip.CopyArchiveMetaData(inzip);
36c9828f 125
032e27aa
BP
126// call CopyEntry for each entry except those matching the pattern
127while (entry.reset(inzip.GetNextEntry()), entry.get() != NULL)
128 if (!entry->GetName().Matches(_T("*.txt")))
129 if (!outzip.CopyEntry(entry.release(), inzip))
130 break;
36c9828f 131
032e27aa
BP
132// close the input stream by releasing the pointer to it, do this
133// before closing the output stream so that the file can be replaced
134in.reset();
36c9828f 135
032e27aa
BP
136// you can check for success as follows
137bool success = inzip.Eof() && outzip.Close() && out.Commit();
138@endcode
36c9828f
FM
139
140
141
4cbfec15 142@section overview_archive_byname Looking Up an Archive Entry by Name
36c9828f 143
032e27aa
BP
144Also see wxFileSystem for a higher level interface that is more convenient for
145accessing archive entries by name.
e0a47918 146
032e27aa
BP
147To open just one entry in an archive, the most efficient way is to simply
148search for it linearly by calling wxArchiveInputStream::GetNextEntry() until
149the required entry is found. This works both for archives on seekable and
150non-seekable streams.
e0a47918 151
032e27aa
BP
152The format of filenames in the archive is likely to be different from the local
153filename format. For example zips and tars use unix style names, with forward
154slashes as the path separator, and absolute paths are not allowed. So if on
155Windows the file "C:\MYDIR\MYFILE.TXT" is stored, then when reading the entry
156back wxArchiveEntry::GetName() will return "MYDIR\MYFILE.TXT". The conversion
157into the internal format and back has lost some information.
e0a47918 158
032e27aa
BP
159So to avoid ambiguity when searching for an entry matching a local name, it is
160better to convert the local name to the archive's internal format and search
161for that:
36c9828f 162
032e27aa
BP
163@code
164auto_ptr<wxZipEntry> entry;
36c9828f 165
032e27aa
BP
166// convert the local name we are looking for into the internal format
167wxString name = wxZipEntry::GetInternalName(localname);
36c9828f 168
032e27aa
BP
169// open the zip
170wxFFileInputStream in(_T("test.zip"));
171wxZipInputStream zip(in);
36c9828f 172
032e27aa
BP
173// call GetNextEntry() until the required internal name is found
174do
175{
176 entry.reset(zip.GetNextEntry());
177}
178while (entry.get() != NULL && entry->GetInternalName() != name);
36c9828f 179
032e27aa
BP
180if (entry.get() != NULL)
181{
182 // read the entry's data...
183}
184@endcode
36c9828f 185
032e27aa
BP
186To access several entries randomly, it is most efficient to transfer the entire
187catalogue of entries to a container such as a std::map or a wxHashMap then
188entries looked up by name can be opened using the
189wxArchiveInputStream::OpenEntry() method.
36c9828f 190
032e27aa
BP
191@code
192WX_DECLARE_STRING_HASH_MAP(wxZipEntry*, ZipCatalog);
193ZipCatalog::iterator it;
194wxZipEntry *entry;
195ZipCatalog cat;
196
197// open the zip
198wxFFileInputStream in(_T("test.zip"));
199wxZipInputStream zip(in);
200
201// load the zip catalog
202while ((entry = zip.GetNextEntry()) != NULL)
203{
204 wxZipEntry*& current = cat[entry->GetInternalName()];
205 // some archive formats can have multiple entries with the same name
206 // (e.g. tar) though it is an error in the case of zip
207 delete current;
208 current = entry;
209}
210
211// open an entry by name
212if ((it = cat.find(wxZipEntry::GetInternalName(localname))) != cat.end())
213{
214 zip.OpenEntry(*it->second);
215 // ... now read entry's data
216}
217@endcode
218
219To open more than one entry simultaneously you need more than one underlying
220stream on the same archive:
221
222@code
223// opening another entry without closing the first requires another
224// input stream for the same file
225wxFFileInputStream in2(_T("test.zip"));
226wxZipInputStream zip2(in2);
227if ((it = cat.find(wxZipEntry::GetInternalName(local2))) != cat.end())
228 zip2.OpenEntry(*it->second);
229@endcode
230
231
232
4cbfec15 233@section overview_archive_generic Generic Archive Programming
032e27aa
BP
234
235Also see wxFileSystem for a higher level interface that can handle archive
236files in a generic way.
237
238The specific archive classes, such as the wxZip classes, inherit from the
239following abstract classes which can be used to write code that can handle any
240of the archive types:
241
242@li wxArchiveInputStream: Input stream
243@li wxArchiveOutputStream: Output stream
244@li wxArchiveEntry: Holds the meta-data for an entry (e.g. filename)
245
246In order to able to write generic code it's necessary to be able to create
247instances of the classes without knowing which archive type is being used.
248
249To allow this there is a class factory for each archive type, derived from
250wxArchiveClassFactory, that can create the other classes.
251
252For example, given wxArchiveClassFactory* factory, streams and entries can be
253created like this:
254
255@code
256// create streams without knowing their type
257auto_ptr<wxArchiveInputStream> inarc(factory->NewStream(in));
258auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
259
260// create an empty entry object
261auto_ptr<wxArchiveEntry> entry(factory->NewEntry());
262@endcode
263
264For the factory itself, the static member wxArchiveClassFactory::Find() can be
265used to find a class factory that can handle a given file extension or mime
266type. For example, given @e filename:
267
268@code
269const wxArchiveClassFactory *factory;
270factory = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
271
272if (factory)
273 stream = factory->NewStream(new wxFFileInputStream(filename));
274@endcode
275
276@e Find() does not give away ownership of the returned pointer, so it does not
277need to be deleted.
278
279There are similar class factories for the filter streams that handle the
280compression and decompression of a single stream, such as wxGzipInputStream.
281These can be found using wxFilterClassFactory::Find().
282
283For example, to list the contents of archive @e filename:
284
285@code
286auto_ptr<wxInputStream> in(new wxFFileInputStream(filename));
287
288if (in->IsOk())
289{
290 // look for a filter handler, e.g. for '.gz'
291 const wxFilterClassFactory *fcf;
292 fcf = wxFilterClassFactory::Find(filename, wxSTREAM_FILEEXT);
293 if (fcf)
294 {
295 in.reset(fcf->NewStream(in.release()));
296 // pop the extension, so if it was '.tar.gz' it is now just '.tar'
297 filename = fcf->PopExtension(filename);
298 }
299
300 // look for a archive handler, e.g. for '.zip' or '.tar'
301 const wxArchiveClassFactory *acf;
302 acf = wxArchiveClassFactory::Find(filename, wxSTREAM_FILEEXT);
303 if (acf)
304 {
305 auto_ptr<wxArchiveInputStream> arc(acf->NewStream(in.release()));
306 auto_ptr<wxArchiveEntry> entry;
307
308 // list the contents of the archive
309 while ((entry.reset(arc->GetNextEntry())), entry.get() != NULL)
310 std::wcout << entry->GetName().c_str() << "\n";
311 }
312 else
313 {
314 wxLogError(_T("can't handle '%s'"), filename.c_str());
315 }
316}
317@endcode
36c9828f
FM
318
319
36c9828f 320
4cbfec15 321@section overview_archive_noseek Archives on Non-Seekable Streams
032e27aa
BP
322
323In general, handling archives on non-seekable streams is done in the same way
324as for seekable streams, with a few caveats.
325
326The main limitation is that accessing entries randomly using
327wxArchiveInputStream::OpenEntry() is not possible, the entries can only be
328accessed sequentially in the order they are stored within the archive.
329
330For each archive type, there will also be other limitations which will depend
331on the order the entries' meta-data is stored within the archive. These are not
332too difficult to deal with, and are outlined below.
333
4cbfec15 334@subsection overview_archive_noseek_entrysize PutNextEntry and the Entry Size
032e27aa
BP
335
336When writing archives, some archive formats store the entry size before the
337entry's data (tar has this limitation, zip doesn't). In this case the entry's
338size must be passed to wxArchiveOutputStream::PutNextEntry() or an error
339occurs.
340
341This is only an issue on non-seekable streams, since otherwise the archive
342output stream can seek back and fix up the header once the size of the entry is
343known.
344
345For generic programming, one way to handle this is to supply the size whenever
346it is known, and rely on the error message from the output stream when the
347operation is not supported.
348
4cbfec15 349@subsection overview_archive_noseek_weak GetNextEntry and the Weak Reference Mechanism
032e27aa
BP
350
351Some archive formats do not store all an entry's meta-data before the entry's
352data (zip is an example). In this case, when reading from a non-seekable
353stream, wxArchiveInputStream::GetNextEntry() can only return a partially
354populated wxArchiveEntry object - not all the fields are set.
355
356The input stream then keeps a weak reference to the entry object and updates it
357when more meta-data becomes available. A weak reference being one that does not
358prevent you from deleting the wxArchiveEntry object - the input stream only
359attempts to update it if it is still around.
360
361The documentation for each archive entry type gives the details of what
362meta-data becomes available and when. For generic programming, when the worst
363case must be assumed, you can rely on all the fields of wxArchiveEntry being
364fully populated when GetNextEntry() returns, with the the following exceptions:
365
366@li wxArchiveEntry::GetSize(): Guaranteed to be available after the entry has
367 been read to wxInputStream::Eof(), or wxArchiveInputStream::CloseEntry()
368 has been called.
369@li wxArchiveEntry::IsReadOnly(): Guaranteed to be available after the end of
370 the archive has been reached, i.e. after GetNextEntry() returns @NULL and
371 Eof() is @true.
372
373This mechanism allows wxArchiveOutputStream::CopyEntry() to always fully
374preserve entries' meta-data. No matter what order order the meta-data occurs
375within the archive, the input stream will always have read it before the output
376stream must write it.
377
4cbfec15 378@subsection overview_archive_noseek_notifier wxArchiveNotifier
032e27aa
BP
379
380Notifier objects can be used to get a notification whenever an input stream
381updates a wxArchiveEntry object's data via the weak reference mechanism.
382
383Consider the following code which renames an entry in an archive. This is the
384usual way to modify an entry's meta-data, simply set the required field before
385writing it with wxArchiveOutputStream::CopyEntry():
386
387@code
388auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
389auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
390auto_ptr<wxArchiveEntry> entry;
391
392outarc->CopyArchiveMetaData(*arc);
393
394while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
395{
396 if (entry->GetName() == from)
397 entry->SetName(to);
398 if (!outarc->CopyEntry(entry.release(), *arc))
399 break;
400}
401
402bool success = arc->Eof() && outarc->Close();
403@endcode
404
405However, for non-seekable streams, this technique cannot be used for fields
406such as wxArchiveEntry::IsReadOnly(), which are not necessarily set when
407wxArchiveInputStream::GetNextEntry() returns.
408
409In this case a wxArchiveNotifier can be used:
410
411@code
412class MyNotifier : public wxArchiveNotifier
413{
414public:
415 void OnEntryUpdated(wxArchiveEntry& entry) { entry.SetIsReadOnly(false); }
416};
417@endcode
418
419The meta-data changes are done in your notifier's
420wxArchiveNotifier::OnEntryUpdated() method, then wxArchiveEntry::SetNotifier()
421is called before CopyEntry():
422
423@code
424auto_ptr<wxArchiveInputStream> arc(factory->NewStream(in));
425auto_ptr<wxArchiveOutputStream> outarc(factory->NewStream(out));
426auto_ptr<wxArchiveEntry> entry;
427MyNotifier notifier;
428
429outarc->CopyArchiveMetaData(*arc);
430
431while (entry.reset(arc->GetNextEntry()), entry.get() != NULL)
432{
433 entry->SetNotifier(notifier);
434 if (!outarc->CopyEntry(entry.release(), *arc))
435 break;
436}
437
438bool success = arc->Eof() && outarc->Close();
439@endcode
440
441SetNotifier() calls OnEntryUpdated() immediately, then the input stream calls
442it again whenever it sets more fields in the entry. Since OnEntryUpdated() will
443be called at least once, this technique always works even when it is not
444strictly necessary to use it. For example, changing the entry name can be done
445this way too and it works on seekable streams as well as non-seekable.
36c9828f 446
e0a47918 447*/
36c9828f 448