]> git.saurik.com Git - apt.git/blob - apt-pkg/acquire.h
debian/apt.cron.daily: cleanups in the cron script
[apt.git] / apt-pkg / acquire.h
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: acquire.h,v 1.29.2.1 2003/12/24 23:09:17 mdz Exp $
4 /* ######################################################################
5
6 Acquire - File Acquiration
7
8 This module contians the Acquire system. It is responsible for bringing
9 files into the local pathname space. It deals with URIs for files and
10 URI handlers responsible for downloading or finding the URIs.
11
12 Each file to download is represented by an Acquire::Item class subclassed
13 into a specialization. The Item class can add itself to several URI
14 acquire queues each prioritized by the download scheduler. When the
15 system is run the proper URI handlers are spawned and the the acquire
16 queues are fed into the handlers by the schedular until the queues are
17 empty. This allows for an Item to be downloaded from an alternate source
18 if the first try turns out to fail. It also alows concurrent downloading
19 of multiple items from multiple sources as well as dynamic balancing
20 of load between the sources.
21
22 Schedualing of downloads is done on a first ask first get basis. This
23 preserves the order of the download as much as possible. And means the
24 fastest source will tend to process the largest number of files.
25
26 Internal methods and queues for performing gzip decompression,
27 md5sum hashing and file copying are provided to allow items to apply
28 a number of transformations to the data files they are working with.
29
30 ##################################################################### */
31 /*}}}*/
32
33 /** \defgroup acquire Acquire system
34 *
35 * \brief The Acquire system is responsible for retrieving files from
36 * local or remote URIs and postprocessing them (for instance,
37 * verifying their authenticity). The core class in this system is
38 * pkgAcquire, which is responsible for managing the download queues
39 * during the download. There is at least one download queue for
40 * each supported protocol; protocols such as http may provide one
41 * queue per host.
42 *
43 * Each file to download is represented by a subclass of
44 * pkgAcquire::Item. The files add themselves to the download
45 * queue(s) by providing their URI information to
46 * pkgAcquire::Item::QueueURI, which calls pkgAcquire::Enqueue.
47 *
48 * Once the system is set up, the Run method will spawn subprocesses
49 * to handle the enqueued URIs; the scheduler will then take items
50 * from the queues and feed them into the handlers until the queues
51 * are empty.
52 *
53 * \todo Acquire supports inserting an object into several queues at
54 * once, but it is not clear what its behavior in this case is, and
55 * no subclass of pkgAcquire::Item seems to actually use this
56 * capability.
57 */
58
59 /** \addtogroup acquire
60 *
61 * @{
62 *
63 * \file acquire.h
64 */
65
66 #ifndef PKGLIB_ACQUIRE_H
67 #define PKGLIB_ACQUIRE_H
68
69 #include <vector>
70 #include <string>
71
72 using std::vector;
73 using std::string;
74
75
76 #include <sys/time.h>
77 #include <unistd.h>
78
79 class pkgAcquireStatus;
80
81 /** \brief The core download scheduler.
82 *
83 * This class represents an ongoing download. It manages the lists
84 * of active and pending downloads and handles setting up and tearing
85 * down download-related structures.
86 *
87 * \todo Why all the protected data items and methods?
88 */
89 class pkgAcquire
90 {
91 public:
92
93 class Item;
94 class Queue;
95 class Worker;
96 struct MethodConfig;
97 struct ItemDesc;
98 friend class Item;
99 friend class Queue;
100
101 typedef vector<Item *>::iterator ItemIterator;
102 typedef vector<Item *>::const_iterator ItemCIterator;
103
104 protected:
105
106 /** \brief A list of items to download.
107 *
108 * This is built monotonically as items are created and only
109 * emptied when the download shuts down.
110 */
111 vector<Item *> Items;
112
113 /** \brief The head of the list of active queues.
114 *
115 * \todo why a hand-managed list of queues instead of std::list or
116 * std::set?
117 */
118 Queue *Queues;
119
120 /** \brief The head of the list of active workers.
121 *
122 * \todo why a hand-managed list of workers instead of std::list
123 * or std::set?
124 */
125 Worker *Workers;
126
127 /** \brief The head of the list of acquire method configurations.
128 *
129 * Each protocol (http, ftp, gzip, etc) via which files can be
130 * fetched can have a representation in this list. The
131 * configuration data is filled in by parsing the 100 Capabilities
132 * string output by a method on startup (see
133 * pkgAcqMethod::pkgAcqMethod and pkgAcquire::GetConfig).
134 *
135 * \todo why a hand-managed config dictionary instead of std::map?
136 */
137 MethodConfig *Configs;
138
139 /** \brief The progress indicator for this download. */
140 pkgAcquireStatus *Log;
141
142 /** \brief The total size of the files which are to be fetched.
143 *
144 * This is not necessarily the total number of bytes to download
145 * when, e.g., download resumption and list updates via patches
146 * are taken into account.
147 */
148 unsigned long ToFetch;
149
150 // Configurable parameters for the scheduler
151
152 /** \brief Represents the queuing strategy for remote URIs. */
153 enum QueueStrategy {
154 /** \brief Generate one queue for each protocol/host combination; downloads from
155 * multiple hosts can proceed in parallel.
156 */
157 QueueHost,
158 /** \brief Generate a single queue for each protocol; serialize
159 * downloads from multiple hosts.
160 */
161 QueueAccess} QueueMode;
162
163 /** \brief If \b true, debugging information will be dumped to std::clog. */
164 bool Debug;
165 /** \brief If \b true, a download is currently in progress. */
166 bool Running;
167
168 /** \brief Add the given item to the list of items. */
169 void Add(Item *Item);
170
171 /** \brief Remove the given item from the list of items. */
172 void Remove(Item *Item);
173
174 /** \brief Add the given worker to the list of workers. */
175 void Add(Worker *Work);
176
177 /** \brief Remove the given worker from the list of workers. */
178 void Remove(Worker *Work);
179
180 /** \brief Insert the given fetch request into the appropriate queue.
181 *
182 * \param Item The URI to download and the item to download it
183 * for. Copied by value into the queue; no reference to Item is
184 * retained.
185 */
186 void Enqueue(ItemDesc &Item);
187
188 /** \brief Remove all fetch requests for this item from all queues. */
189 void Dequeue(Item *Item);
190
191 /** \brief Determine the fetch method and queue of a URI.
192 *
193 * \param URI The URI to fetch.
194 *
195 * \param[out] Config A location in which to place the method via
196 * which the URI is to be fetched.
197 *
198 * \return the string-name of the queue in which a fetch request
199 * for the given URI should be placed.
200 */
201 string QueueName(string URI,MethodConfig const *&Config);
202
203 /** \brief Build up the set of file descriptors upon which select() should
204 * block.
205 *
206 * The default implementation inserts the file descriptors
207 * corresponding to active downloads.
208 *
209 * \param[out] Fd The largest file descriptor in the generated sets.
210 *
211 * \param[out] RSet The set of file descriptors that should be
212 * watched for input.
213 *
214 * \param[out] WSet The set of file descriptors that should be
215 * watched for output.
216 */
217 virtual void SetFds(int &Fd,fd_set *RSet,fd_set *WSet);
218
219 /** Handle input from and output to file descriptors which select()
220 * has determined are ready. The default implementation
221 * dispatches to all active downloads.
222 *
223 * \param RSet The set of file descriptors that are ready for
224 * input.
225 *
226 * \param WSet The set of file descriptors that are ready for
227 * output.
228 */
229 virtual void RunFds(fd_set *RSet,fd_set *WSet);
230
231 /** \brief Check for idle queues with ready-to-fetch items.
232 *
233 * Called by pkgAcquire::Queue::Done each time an item is dequeued
234 * but remains on some queues; i.e., another queue should start
235 * fetching it.
236 */
237 void Bump();
238
239 public:
240
241 /** \brief Retrieve information about a fetch method by name.
242 *
243 * \param Access The name of the method to look up.
244 *
245 * \return the method whose name is Access, or \b NULL if no such method exists.
246 */
247 MethodConfig *GetConfig(string Access);
248
249 /** \brief Provides information on how a download terminated. */
250 enum RunResult {
251 /** \brief All files were fetched successfully. */
252 Continue,
253
254 /** \brief Some files failed to download. */
255 Failed,
256
257 /** \brief The download was cancelled by the user (i.e., #Log's
258 * pkgAcquireStatus::Pulse() method returned \b false).
259 */
260 Cancelled};
261
262 /** \brief Download all the items that have been Add()ed to this
263 * download process.
264 *
265 * This method will block until the download completes, invoking
266 * methods on #Log to report on the progress of the download.
267 *
268 * \param PulseInterval The method pkgAcquireStatus::Pulse will be
269 * invoked on #Log at intervals of PulseInterval milliseconds.
270 *
271 * \return the result of the download.
272 */
273 RunResult Run(int PulseInterval=500000);
274
275 /** \brief Remove all items from this download process, terminate
276 * all download workers, and empty all queues.
277 */
278 void Shutdown();
279
280 /** \brief Get the first #Worker object.
281 *
282 * \return the first active worker in this download process.
283 */
284 inline Worker *WorkersBegin() {return Workers;};
285
286 /** \brief Advance to the next #Worker object.
287 *
288 * \return the worker immediately following I, or \b NULL if none
289 * exists.
290 */
291 Worker *WorkerStep(Worker *I);
292
293 /** \brief Get the head of the list of items. */
294 inline ItemIterator ItemsBegin() {return Items.begin();};
295
296 /** \brief Get the end iterator of the list of items. */
297 inline ItemIterator ItemsEnd() {return Items.end();};
298
299 // Iterate over queued Item URIs
300 class UriIterator;
301 /** \brief Get the head of the list of enqueued item URIs.
302 *
303 * This iterator will step over every element of every active
304 * queue.
305 */
306 UriIterator UriBegin();
307 /** \brief Get the end iterator of the list of enqueued item URIs. */
308 UriIterator UriEnd();
309
310 /** Deletes each entry in the given directory that is not being
311 * downloaded by this object. For instance, when downloading new
312 * list files, calling Clean() will delete the old ones.
313 *
314 * \param Dir The directory to be cleaned out.
315 *
316 * \return \b true if the directory exists and is readable.
317 */
318 bool Clean(string Dir);
319
320 /** \return the total size in bytes of all the items included in
321 * this download.
322 */
323 double TotalNeeded();
324
325 /** \return the size in bytes of all non-local items included in
326 * this download.
327 */
328 double FetchNeeded();
329
330 /** \return the amount of data to be fetched that is already
331 * present on the filesystem.
332 */
333 double PartialPresent();
334
335 /** \brief Construct a new pkgAcquire.
336 *
337 * \param Log The progress indicator associated with this
338 * download, or \b NULL for none. This object is not owned by the
339 * download process and will not be deleted when the pkgAcquire
340 * object is destroyed. Naturally, it should live for at least as
341 * long as the pkgAcquire object does.
342 */
343 pkgAcquire(pkgAcquireStatus *Log = 0);
344
345 /** \brief Destroy this pkgAcquire object.
346 *
347 * Destroys all queue, method, and item objects associated with
348 * this download.
349 */
350 virtual ~pkgAcquire();
351 };
352
353 /** \brief Represents a single download source from which an item
354 * should be downloaded.
355 *
356 * An item may have several assocated ItemDescs over its lifetime.
357 */
358 struct pkgAcquire::ItemDesc
359 {
360 /** \brief The URI from which to download this item. */
361 string URI;
362 /** brief A description of this item. */
363 string Description;
364 /** brief A shorter description of this item. */
365 string ShortDesc;
366 /** brief The underlying item which is to be downloaded. */
367 Item *Owner;
368 };
369
370 /** \brief A single download queue in a pkgAcquire object.
371 *
372 * \todo Why so many protected values?
373 */
374 class pkgAcquire::Queue
375 {
376 friend class pkgAcquire;
377 friend class pkgAcquire::UriIterator;
378 friend class pkgAcquire::Worker;
379
380 /** \brief The next queue in the pkgAcquire object's list of queues. */
381 Queue *Next;
382
383 protected:
384
385 /** \brief A single item placed in this queue. */
386 struct QItem : pkgAcquire::ItemDesc
387 {
388 /** \brief The next item in the queue. */
389 QItem *Next;
390 /** \brief The worker associated with this item, if any. */
391 pkgAcquire::Worker *Worker;
392
393 /** \brief Assign the ItemDesc portion of this QItem from
394 * another ItemDesc
395 */
396 void operator =(pkgAcquire::ItemDesc const &I)
397 {
398 URI = I.URI;
399 Description = I.Description;
400 ShortDesc = I.ShortDesc;
401 Owner = I.Owner;
402 };
403 };
404
405 /** \brief The name of this queue. */
406 string Name;
407
408 /** \brief The head of the list of items contained in this queue.
409 *
410 * \todo why a by-hand list instead of an STL structure?
411 */
412 QItem *Items;
413
414 /** \brief The head of the list of workers associated with this queue.
415 *
416 * \todo This is plural because support exists in Queue for
417 * multiple workers. However, it does not appear that there is
418 * any way to actually associate more than one worker with a
419 * queue.
420 *
421 * \todo Why not just use a std::set?
422 */
423 pkgAcquire::Worker *Workers;
424
425 /** \brief the download scheduler with which this queue is associated. */
426 pkgAcquire *Owner;
427
428 /** \brief The number of entries in this queue that are currently
429 * being downloaded.
430 */
431 signed long PipeDepth;
432
433 /** \brief The maximum number of entries that this queue will
434 * attempt to download at once.
435 */
436 unsigned long MaxPipeDepth;
437
438 public:
439
440 /** \brief Insert the given fetch request into this queue.
441 *
442 * \return \b true if the queuing was successful. May return
443 * \b false if the Item is already in the queue
444 */
445 bool Enqueue(ItemDesc &Item);
446
447 /** \brief Remove all fetch requests for the given item from this queue.
448 *
449 * \return \b true if at least one request was removed from the queue.
450 */
451 bool Dequeue(Item *Owner);
452
453 /** \brief Locate an item in this queue.
454 *
455 * \param URI A URI to match against.
456 * \param Owner A pkgAcquire::Worker to match against.
457 *
458 * \return the first item in the queue whose URI is #URI and that
459 * is being downloaded by #Owner.
460 */
461 QItem *FindItem(string URI,pkgAcquire::Worker *Owner);
462
463 /** Presumably this should start downloading an item?
464 *
465 * \todo Unimplemented. Implement it or remove?
466 */
467 bool ItemStart(QItem *Itm,unsigned long Size);
468
469 /** \brief Remove the given item from this queue and set its state
470 * to pkgAcquire::Item::StatDone.
471 *
472 * If this is the only queue containing the item, the item is also
473 * removed from the main queue by calling pkgAcquire::Dequeue.
474 *
475 * \param Itm The item to remove.
476 *
477 * \return \b true if no errors are encountered.
478 */
479 bool ItemDone(QItem *Itm);
480
481 /** \brief Start the worker process associated with this queue.
482 *
483 * If a worker process is already associated with this queue,
484 * this is equivalent to calling Cycle().
485 *
486 * \return \b true if the startup was successful.
487 */
488 bool Startup();
489
490 /** \brief Shut down the worker process associated with this queue.
491 *
492 * \param Final If \b true, then the process is stopped unconditionally.
493 * Otherwise, it is only stopped if it does not need cleanup
494 * as indicated by the pkgAcqMethod::NeedsCleanup member of
495 * its configuration.
496 *
497 * \return \b true.
498 */
499 bool Shutdown(bool Final);
500
501 /** \brief Send idle items to the worker process.
502 *
503 * Fills up the pipeline by inserting idle items into the worker's queue.
504 */
505 bool Cycle();
506
507 /** \brief Check for items that could be enqueued.
508 *
509 * Call this after an item placed in multiple queues has gone from
510 * the pkgAcquire::Item::StatFetching state to the
511 * pkgAcquire::Item::StatIdle state, to possibly refill an empty queue.
512 * This is an alias for Cycle().
513 *
514 * \todo Why both this and Cycle()? Are they expected to be
515 * different someday?
516 */
517 void Bump();
518
519 /** \brief Create a new Queue.
520 *
521 * \param Name The name of the new queue.
522 * \param Owner The download process that owns the new queue.
523 */
524 Queue(string Name,pkgAcquire *Owner);
525
526 /** Shut down all the worker processes associated with this queue
527 * and empty the queue.
528 */
529 ~Queue();
530 };
531
532 /** \brief Iterates over all the URIs being fetched by a pkgAcquire object. */
533 class pkgAcquire::UriIterator
534 {
535 /** The next queue to iterate over. */
536 pkgAcquire::Queue *CurQ;
537 /** The item that we currently point at. */
538 pkgAcquire::Queue::QItem *CurItem;
539
540 public:
541
542 inline void operator ++() {operator ++();};
543
544 void operator ++(int)
545 {
546 CurItem = CurItem->Next;
547 while (CurItem == 0 && CurQ != 0)
548 {
549 CurItem = CurQ->Items;
550 CurQ = CurQ->Next;
551 }
552 };
553
554 inline pkgAcquire::ItemDesc const *operator ->() const {return CurItem;};
555 inline bool operator !=(UriIterator const &rhs) const {return rhs.CurQ != CurQ || rhs.CurItem != CurItem;};
556 inline bool operator ==(UriIterator const &rhs) const {return rhs.CurQ == CurQ && rhs.CurItem == CurItem;};
557
558 /** \brief Create a new UriIterator.
559 *
560 * \param Q The queue over which this UriIterator should iterate.
561 */
562 UriIterator(pkgAcquire::Queue *Q) : CurQ(Q), CurItem(0)
563 {
564 while (CurItem == 0 && CurQ != 0)
565 {
566 CurItem = CurQ->Items;
567 CurQ = CurQ->Next;
568 }
569 }
570 };
571
572 /** \brief Information about the properties of a single acquire method. */
573 struct pkgAcquire::MethodConfig
574 {
575 /** \brief The next link on the acquire method list.
576 *
577 * \todo Why not an STL container?
578 */
579 MethodConfig *Next;
580
581 /** \brief The name of this acquire method (e.g., http). */
582 string Access;
583
584 /** \brief The implementation version of this acquire method. */
585 string Version;
586
587 /** \brief If \b true, only one download queue should be created for this
588 * method.
589 */
590 bool SingleInstance;
591
592 /** \brief If \b true, this method supports pipelined downloading. */
593 bool Pipeline;
594
595 /** \brief If \b true, the worker process should send the entire
596 * APT configuration tree to the fetch subprocess when it starts
597 * up.
598 */
599 bool SendConfig;
600
601 /** \brief If \b true, this fetch method does not require network access;
602 * all files are to be acquired from the local disk.
603 */
604 bool LocalOnly;
605
606 /** \brief If \b true, the subprocess has to carry out some cleanup
607 * actions before shutting down.
608 *
609 * For instance, the cdrom method needs to unmount the CD after it
610 * finishes.
611 */
612 bool NeedsCleanup;
613
614 /** \brief If \b true, this fetch method acquires files from removable media. */
615 bool Removable;
616
617 /** \brief Set up the default method parameters.
618 *
619 * All fields are initialized to NULL, "", or \b false as
620 * appropriate.
621 */
622 MethodConfig();
623 };
624
625 /** \brief A monitor object for downloads controlled by the pkgAcquire class.
626 *
627 * \todo Why protected members?
628 *
629 * \todo Should the double members be uint64_t?
630 */
631 class pkgAcquireStatus
632 {
633 protected:
634
635 /** \brief The last time at which this monitor object was updated. */
636 struct timeval Time;
637
638 /** \brief The time at which the download started. */
639 struct timeval StartTime;
640
641 /** \brief The number of bytes fetched as of the previous call to
642 * pkgAcquireStatus::Pulse, including local items.
643 */
644 double LastBytes;
645
646 /** \brief The current rate of download as of the most recent call
647 * to pkgAcquireStatus::Pulse, in bytes per second.
648 */
649 double CurrentCPS;
650
651 /** \brief The number of bytes fetched as of the most recent call
652 * to pkgAcquireStatus::Pulse, including local items.
653 */
654 double CurrentBytes;
655
656 /** \brief The total number of bytes that need to be fetched.
657 *
658 * \warning This member is inaccurate, as new items might be
659 * enqueued while the download is in progress!
660 */
661 double TotalBytes;
662
663 /** \brief The total number of bytes accounted for by items that
664 * were successfully fetched.
665 */
666 double FetchedBytes;
667
668 /** \brief The amount of time that has elapsed since the download
669 * started.
670 */
671 unsigned long ElapsedTime;
672
673 /** \brief The total number of items that need to be fetched.
674 *
675 * \warning This member is inaccurate, as new items might be
676 * enqueued while the download is in progress!
677 */
678 unsigned long TotalItems;
679
680 /** \brief The number of items that have been successfully downloaded. */
681 unsigned long CurrentItems;
682
683 public:
684
685 /** \brief If \b true, the download scheduler should call Pulse()
686 * at the next available opportunity.
687 */
688 bool Update;
689
690 /** \brief If \b true, extra Pulse() invocations will be performed.
691 *
692 * With this option set, Pulse() will be called every time that a
693 * download item starts downloading, finishes downloading, or
694 * terminates with an error.
695 */
696 bool MorePulses;
697
698 /** \brief Invoked when a local or remote file has been completely fetched.
699 *
700 * \param Size The size of the file fetched.
701 *
702 * \param ResumePoint How much of the file was already fetched.
703 */
704 virtual void Fetched(unsigned long Size,unsigned long ResumePoint);
705
706 /** \brief Invoked when the user should be prompted to change the
707 * inserted removable media.
708 *
709 * This method should not return until the user has confirmed to
710 * the user interface that the media change is complete.
711 *
712 * \param Media The name of the media type that should be changed.
713 *
714 * \param Drive The identifying name of the drive whose media
715 * should be changed.
716 *
717 * \return \b true if the user confirms the media change, \b
718 * false if it is cancelled.
719 *
720 * \todo This is a horrible blocking monster; it should be CPSed
721 * with prejudice.
722 */
723 virtual bool MediaChange(string Media,string Drive) = 0;
724
725 /** \brief Invoked when an item is confirmed to be up-to-date.
726
727 * For instance, when an HTTP download is informed that the file on
728 * the server was not modified.
729 */
730 virtual void IMSHit(pkgAcquire::ItemDesc &/*Itm*/) {};
731
732 /** \brief Invoked when some of an item's data is fetched. */
733 virtual void Fetch(pkgAcquire::ItemDesc &/*Itm*/) {};
734
735 /** \brief Invoked when an item is successfully and completely fetched. */
736 virtual void Done(pkgAcquire::ItemDesc &/*Itm*/) {};
737
738 /** \brief Invoked when the process of fetching an item encounters
739 * a fatal error.
740 */
741 virtual void Fail(pkgAcquire::ItemDesc &/*Itm*/) {};
742
743 /** \brief Periodically invoked while the Acquire process is underway.
744 *
745 * Subclasses should first call pkgAcquireStatus::Pulse(), then
746 * update their status output. The download process is blocked
747 * while Pulse() is being called.
748 *
749 * \return \b false if the user asked to cancel the whole Acquire process.
750 *
751 * \see pkgAcquire::Run
752 */
753 virtual bool Pulse(pkgAcquire *Owner);
754
755 /** \brief Invoked when the Acquire process starts running. */
756 virtual void Start();
757
758 /** \brief Invoked when the Acquire process stops running. */
759 virtual void Stop();
760
761 /** \brief Initialize all counters to 0 and the time to the current time. */
762 pkgAcquireStatus();
763 virtual ~pkgAcquireStatus() {};
764 };
765
766 /** @} */
767
768 #endif