1 // -*- mode: cpp; mode: fold -*-
3 /* ######################################################################
5 Acquire - File Acquiration
7 This module contains the Acquire system. It is responsible for bringing
8 files into the local pathname space. It deals with URIs for files and
9 URI handlers responsible for downloading or finding the URIs.
11 Each file to download is represented by an Acquire::Item class subclassed
12 into a specialization. The Item class can add itself to several URI
13 acquire queues each prioritized by the download scheduler. When the
14 system is run the proper URI handlers are spawned and the the acquire
15 queues are fed into the handlers by the schedular until the queues are
16 empty. This allows for an Item to be downloaded from an alternate source
17 if the first try turns out to fail. It also alows concurrent downloading
18 of multiple items from multiple sources as well as dynamic balancing
19 of load between the sources.
21 Schedualing of downloads is done on a first ask first get basis. This
22 preserves the order of the download as much as possible. And means the
23 fastest source will tend to process the largest number of files.
25 Internal methods and queues for performing gzip decompression,
26 md5sum hashing and file copying are provided to allow items to apply
27 a number of transformations to the data files they are working with.
29 ##################################################################### */
32 /** \defgroup acquire Acquire system {{{
34 * \brief The Acquire system is responsible for retrieving files from
35 * local or remote URIs and postprocessing them (for instance,
36 * verifying their authenticity). The core class in this system is
37 * pkgAcquire, which is responsible for managing the download queues
38 * during the download. There is at least one download queue for
39 * each supported protocol; protocols such as http may provide one
42 * Each file to download is represented by a subclass of
43 * pkgAcquire::Item. The files add themselves to the download
44 * queue(s) by providing their URI information to
45 * pkgAcquire::Item::QueueURI, which calls pkgAcquire::Enqueue.
47 * Once the system is set up, the Run method will spawn subprocesses
48 * to handle the enqueued URIs; the scheduler will then take items
49 * from the queues and feed them into the handlers until the queues
52 * \todo Acquire supports inserting an object into several queues at
53 * once, but it is not clear what its behavior in this case is, and
54 * no subclass of pkgAcquire::Item seems to actually use this
58 /** \addtogroup acquire
65 #ifndef PKGLIB_ACQUIRE_H
66 #define PKGLIB_ACQUIRE_H
68 #include <apt-pkg/macros.h>
69 #include <apt-pkg/weakptr.h>
70 #include <apt-pkg/hashes.h>
77 #include <sys/select.h>
79 #ifndef APT_10_CLEANER_HEADERS
83 #ifndef APT_8_CLEANER_HEADERS
88 class pkgAcquireStatus
;
90 /** \brief The core download scheduler. {{{
92 * This class represents an ongoing download. It manages the lists
93 * of active and pending downloads and handles setting up and tearing
94 * down download-related structures.
96 * \todo Why all the protected data items and methods?
101 /** \brief FD of the Lock file we acquire in Setup (if any) */
103 /** \brief dpointer placeholder (for later in case we need it) */
114 friend class pkgAcqMetaBase
;
117 typedef std::vector
<Item
*>::iterator ItemIterator
;
118 typedef std::vector
<Item
*>::const_iterator ItemCIterator
;
122 /** \brief A list of items to download.
124 * This is built monotonically as items are created and only
125 * emptied when the download shuts down.
127 std::vector
<Item
*> Items
;
129 /** \brief The head of the list of active queues.
131 * \todo why a hand-managed list of queues instead of std::list or
136 /** \brief The head of the list of active workers.
138 * \todo why a hand-managed list of workers instead of std::list
143 /** \brief The head of the list of acquire method configurations.
145 * Each protocol (http, ftp, gzip, etc) via which files can be
146 * fetched can have a representation in this list. The
147 * configuration data is filled in by parsing the 100 Capabilities
148 * string output by a method on startup (see
149 * pkgAcqMethod::pkgAcqMethod and pkgAcquire::GetConfig).
151 * \todo why a hand-managed config dictionary instead of std::map?
153 MethodConfig
*Configs
;
155 /** \brief The progress indicator for this download. */
156 pkgAcquireStatus
*Log
;
158 /** \brief The number of files which are to be fetched. */
159 unsigned long ToFetch
;
161 // Configurable parameters for the scheduler
163 /** \brief Represents the queuing strategy for remote URIs. */
165 /** \brief Generate one queue for each protocol/host combination; downloads from
166 * multiple hosts can proceed in parallel.
169 /** \brief Generate a single queue for each protocol; serialize
170 * downloads from multiple hosts.
172 QueueAccess
} QueueMode
;
174 /** \brief If \b true, debugging information will be dumped to std::clog. */
176 /** \brief If \b true, a download is currently in progress. */
179 /** \brief Add the given item to the list of items. */
180 void Add(Item
*Item
);
182 /** \brief Remove the given item from the list of items. */
183 void Remove(Item
*Item
);
185 /** \brief Add the given worker to the list of workers. */
186 void Add(Worker
*Work
);
188 /** \brief Remove the given worker from the list of workers. */
189 void Remove(Worker
*Work
);
191 /** \brief Insert the given fetch request into the appropriate queue.
193 * \param Item The URI to download and the item to download it
194 * for. Copied by value into the queue; no reference to Item is
197 void Enqueue(ItemDesc
&Item
);
199 /** \brief Remove all fetch requests for this item from all queues. */
200 void Dequeue(Item
*Item
);
202 /** \brief Determine the fetch method and queue of a URI.
204 * \param URI The URI to fetch.
206 * \param[out] Config A location in which to place the method via
207 * which the URI is to be fetched.
209 * \return the string-name of the queue in which a fetch request
210 * for the given URI should be placed.
212 std::string
QueueName(std::string URI
,MethodConfig
const *&Config
);
214 /** \brief Build up the set of file descriptors upon which select() should
217 * The default implementation inserts the file descriptors
218 * corresponding to active downloads.
220 * \param[out] Fd The largest file descriptor in the generated sets.
222 * \param[out] RSet The set of file descriptors that should be
225 * \param[out] WSet The set of file descriptors that should be
226 * watched for output.
228 virtual void SetFds(int &Fd
,fd_set
*RSet
,fd_set
*WSet
);
230 /** Handle input from and output to file descriptors which select()
231 * has determined are ready. The default implementation
232 * dispatches to all active downloads.
234 * \param RSet The set of file descriptors that are ready for
237 * \param WSet The set of file descriptors that are ready for
240 * \return false if there is an error condition on one of the fds
242 bool RunFdsSane(fd_set
*RSet
,fd_set
*WSet
);
244 // just here for compatbility, needs to be removed on the next
245 // ABI/API break. RunFdsSane() is what should be used as it
246 // returns if there is an error condition on one of the fds
247 virtual void RunFds(fd_set
*RSet
,fd_set
*WSet
);
249 /** \brief Check for idle queues with ready-to-fetch items.
251 * Called by pkgAcquire::Queue::Done each time an item is dequeued
252 * but remains on some queues; i.e., another queue should start
259 /** \brief Retrieve information about a fetch method by name.
261 * \param Access The name of the method to look up.
263 * \return the method whose name is Access, or \b NULL if no such method exists.
265 MethodConfig
*GetConfig(std::string Access
);
267 /** \brief Provides information on how a download terminated. */
269 /** \brief All files were fetched successfully. */
272 /** \brief Some files failed to download. */
275 /** \brief The download was cancelled by the user (i.e., #Log's
276 * pkgAcquireStatus::Pulse() method returned \b false).
280 /** \brief Download all the items that have been Add()ed to this
283 * This method will block until the download completes, invoking
284 * methods on #Log to report on the progress of the download.
286 * \param PulseInterval The method pkgAcquireStatus::Pulse will be
287 * invoked on #Log at intervals of PulseInterval milliseconds.
289 * \return the result of the download.
291 RunResult
Run(int PulseInterval
=500000);
293 /** \brief Remove all items from this download process, terminate
294 * all download workers, and empty all queues.
298 /** \brief Get the first Worker object.
300 * \return the first active worker in this download process.
302 inline Worker
*WorkersBegin() {return Workers
;};
304 /** \brief Advance to the next Worker object.
306 * \return the worker immediately following I, or \b NULL if none
309 Worker
*WorkerStep(Worker
*I
) APT_PURE
;
311 /** \brief Get the head of the list of items. */
312 inline ItemIterator
ItemsBegin() {return Items
.begin();};
313 inline ItemCIterator
ItemsBegin() const {return Items
.begin();};
315 /** \brief Get the end iterator of the list of items. */
316 inline ItemIterator
ItemsEnd() {return Items
.end();};
317 inline ItemCIterator
ItemsEnd() const {return Items
.end();};
319 // Iterate over queued Item URIs
321 /** \brief Get the head of the list of enqueued item URIs.
323 * This iterator will step over every element of every active
326 UriIterator
UriBegin();
327 /** \brief Get the end iterator of the list of enqueued item URIs. */
328 UriIterator
UriEnd();
330 /** Deletes each entry in the given directory that is not being
331 * downloaded by this object. For instance, when downloading new
332 * list files, calling Clean() will delete the old ones.
334 * \param Dir The directory to be cleaned out.
336 * \return \b true if the directory exists and is readable.
338 bool Clean(std::string Dir
);
340 /** \return the total size in bytes of all the items included in
343 unsigned long long TotalNeeded();
345 /** \return the size in bytes of all non-local items included in
348 unsigned long long FetchNeeded();
350 /** \return the amount of data to be fetched that is already
351 * present on the filesystem.
353 unsigned long long PartialPresent();
355 /** \brief Delayed constructor
357 * \param Progress indicator associated with this download or
358 * \b NULL for none. This object is not owned by the
359 * download process and will not be deleted when the pkgAcquire
360 * object is destroyed. Naturally, it should live for at least as
361 * long as the pkgAcquire object does.
362 * \param Lock defines a lock file that should be acquired to ensure
363 * only one Acquire class is in action at the time or an empty string
364 * if no lock file should be used. If set also all needed directories
367 APT_DEPRECATED_MSG("Use constructors, .SetLog and .GetLock as needed") bool Setup(pkgAcquireStatus
*Progress
= NULL
, std::string
const &Lock
= "");
369 void SetLog(pkgAcquireStatus
*Progress
) { Log
= Progress
; }
371 /** \brief acquire lock and perform directory setup
373 * \param Lock defines a lock file that should be acquired to ensure
374 * only one Acquire class is in action at the time or an empty string
375 * if no lock file should be used. If set also all needed directories
376 * will be created and setup.
378 bool GetLock(std::string
const &Lock
);
380 /** \brief Construct a new pkgAcquire. */
381 explicit pkgAcquire(pkgAcquireStatus
*Log
);
384 /** \brief Destroy this pkgAcquire object.
386 * Destroys all queue, method, and item objects associated with
389 virtual ~pkgAcquire();
392 APT_HIDDEN
void Initialize();
395 /** \brief Represents a single download source from which an item
396 * should be downloaded.
398 * An item may have several assocated ItemDescs over its lifetime.
400 struct pkgAcquire::ItemDesc
: public WeakPointable
402 /** \brief URI from which to download this item. */
404 /** \brief description of this item. */
405 std::string Description
;
406 /** \brief shorter description of this item. */
407 std::string ShortDesc
;
408 /** \brief underlying item which is to be downloaded. */
412 /** \brief A single download queue in a pkgAcquire object. {{{
414 * \todo Why so many protected values?
416 class pkgAcquire::Queue
418 friend class pkgAcquire
;
419 friend class pkgAcquire::UriIterator
;
420 friend class pkgAcquire::Worker
;
422 /** \brief dpointer placeholder (for later in case we need it) */
425 /** \brief The next queue in the pkgAcquire object's list of queues. */
430 /** \brief A single item placed in this queue. */
431 struct QItem
: public ItemDesc
433 /** \brief The next item in the queue. */
435 /** \brief The worker associated with this item, if any. */
436 pkgAcquire::Worker
*Worker
;
438 /** \brief The underlying items interested in the download */
439 std::vector
<Item
*> Owners
;
441 typedef std::vector
<Item
*>::const_iterator owner_iterator
;
443 /** \brief Assign the ItemDesc portion of this QItem from
446 void operator =(pkgAcquire::ItemDesc
const &I
)
449 Description
= I
.Description
;
450 ShortDesc
= I
.ShortDesc
;
452 Owners
.push_back(I
.Owner
);
456 /** @return the sum of all expected hashes by all owners */
457 HashStringList
GetExpectedHashes() const;
459 /** @return smallest maximum size of all owners */
460 unsigned long long GetMaximumSize() const;
462 /** \brief get partial files in order */
463 void SyncDestinationFiles() const;
465 /** @return the custom headers to use for this item */
466 std::string
Custom600Headers() const;
467 /** @return the maximum priority of this item */
468 int APT_HIDDEN
GetPriority() const;
471 /** \brief The name of this queue. */
474 /** \brief The head of the list of items contained in this queue.
476 * \todo why a by-hand list instead of an STL structure?
480 /** \brief The head of the list of workers associated with this queue.
482 * \todo This is plural because support exists in Queue for
483 * multiple workers. However, it does not appear that there is
484 * any way to actually associate more than one worker with a
487 * \todo Why not just use a std::set?
489 pkgAcquire::Worker
*Workers
;
491 /** \brief the download scheduler with which this queue is associated. */
494 /** \brief The number of entries in this queue that are currently
497 signed long PipeDepth
;
499 /** \brief The maximum number of entries that this queue will
500 * attempt to download at once.
502 unsigned long MaxPipeDepth
;
506 /** \brief Insert the given fetch request into this queue.
508 * \return \b true if the queuing was successful. May return
509 * \b false if the Item is already in the queue
511 bool Enqueue(ItemDesc
&Item
);
513 /** \brief Remove all fetch requests for the given item from this queue.
515 * \return \b true if at least one request was removed from the queue.
517 bool Dequeue(Item
*Owner
);
519 /** \brief Locate an item in this queue.
521 * \param URI A URI to match against.
522 * \param Owner A pkgAcquire::Worker to match against.
524 * \return the first item in the queue whose URI is #URI and that
525 * is being downloaded by #Owner.
527 QItem
*FindItem(std::string URI
,pkgAcquire::Worker
*Owner
) APT_PURE
;
529 /** Presumably this should start downloading an item?
531 * \todo Unimplemented. Implement it or remove?
533 bool ItemStart(QItem
*Itm
,unsigned long long Size
);
535 /** \brief Remove the given item from this queue and set its state
536 * to pkgAcquire::Item::StatDone.
538 * If this is the only queue containing the item, the item is also
539 * removed from the main queue by calling pkgAcquire::Dequeue.
541 * \param Itm The item to remove.
543 * \return \b true if no errors are encountered.
545 bool ItemDone(QItem
*Itm
);
547 /** \brief Start the worker process associated with this queue.
549 * If a worker process is already associated with this queue,
550 * this is equivalent to calling Cycle().
552 * \return \b true if the startup was successful.
556 /** \brief Shut down the worker process associated with this queue.
558 * \param Final If \b true, then the process is stopped unconditionally.
559 * Otherwise, it is only stopped if it does not need cleanup
560 * as indicated by the pkgAcqMethod::NeedsCleanup member of
565 bool Shutdown(bool Final
);
567 /** \brief Send idle items to the worker process.
569 * Fills up the pipeline by inserting idle items into the worker's queue.
573 /** \brief Check for items that could be enqueued.
575 * Call this after an item placed in multiple queues has gone from
576 * the pkgAcquire::Item::StatFetching state to the
577 * pkgAcquire::Item::StatIdle state, to possibly refill an empty queue.
578 * This is an alias for Cycle().
580 * \todo Why both this and Cycle()? Are they expected to be
585 /** \brief Create a new Queue.
587 * \param Name The name of the new queue.
588 * \param Owner The download process that owns the new queue.
590 Queue(std::string
const &Name
,pkgAcquire
* const Owner
);
592 /** Shut down all the worker processes associated with this queue
593 * and empty the queue.
598 /** \brief Iterates over all the URIs being fetched by a pkgAcquire object. {{{*/
599 class pkgAcquire::UriIterator
601 /** \brief dpointer placeholder (for later in case we need it) */
604 /** The next queue to iterate over. */
605 pkgAcquire::Queue
*CurQ
;
606 /** The item that we currently point at. */
607 pkgAcquire::Queue::QItem
*CurItem
;
611 inline void operator ++() {operator ++(0);};
613 void operator ++(int)
615 CurItem
= CurItem
->Next
;
616 while (CurItem
== 0 && CurQ
!= 0)
618 CurItem
= CurQ
->Items
;
623 inline pkgAcquire::Queue::QItem
const *operator ->() const {return CurItem
;};
624 inline bool operator !=(UriIterator
const &rhs
) const {return rhs
.CurQ
!= CurQ
|| rhs
.CurItem
!= CurItem
;};
625 inline bool operator ==(UriIterator
const &rhs
) const {return rhs
.CurQ
== CurQ
&& rhs
.CurItem
== CurItem
;};
627 /** \brief Create a new UriIterator.
629 * \param Q The queue over which this UriIterator should iterate.
631 explicit UriIterator(pkgAcquire::Queue
*Q
);
632 virtual ~UriIterator();
635 /** \brief Information about the properties of a single acquire method. {{{*/
636 struct pkgAcquire::MethodConfig
638 /** \brief dpointer placeholder (for later in case we need it) */
641 /** \brief The next link on the acquire method list.
643 * \todo Why not an STL container?
647 /** \brief The name of this acquire method (e.g., http). */
650 /** \brief The implementation version of this acquire method. */
653 /** \brief If \b true, only one download queue should be created for this
658 /** \brief If \b true, this method supports pipelined downloading. */
661 /** \brief If \b true, the worker process should send the entire
662 * APT configuration tree to the fetch subprocess when it starts
667 /** \brief If \b true, this fetch method does not require network access;
668 * all files are to be acquired from the local disk.
672 /** \brief If \b true, the subprocess has to carry out some cleanup
673 * actions before shutting down.
675 * For instance, the cdrom method needs to unmount the CD after it
680 /** \brief If \b true, this fetch method acquires files from removable media. */
683 /** \brief Set up the default method parameters.
685 * All fields are initialized to NULL, "", or \b false as
690 virtual ~MethodConfig();
693 /** \brief A monitor object for downloads controlled by the pkgAcquire class. {{{
695 * \todo Why protected members?
697 class pkgAcquireStatus
699 /** \brief dpointer placeholder (for later in case we need it) */
704 /** \brief The last time at which this monitor object was updated. */
707 /** \brief The time at which the download started. */
708 struct timeval StartTime
;
710 /** \brief The number of bytes fetched as of the previous call to
711 * pkgAcquireStatus::Pulse, including local items.
713 unsigned long long LastBytes
;
715 /** \brief The current rate of download as of the most recent call
716 * to pkgAcquireStatus::Pulse, in bytes per second.
718 unsigned long long CurrentCPS
;
720 /** \brief The number of bytes fetched as of the most recent call
721 * to pkgAcquireStatus::Pulse, including local items.
723 unsigned long long CurrentBytes
;
725 /** \brief The total number of bytes that need to be fetched.
727 * \warning This member is inaccurate, as new items might be
728 * enqueued while the download is in progress!
730 unsigned long long TotalBytes
;
732 /** \brief The total number of bytes accounted for by items that
733 * were successfully fetched.
735 unsigned long long FetchedBytes
;
737 /** \brief The amount of time that has elapsed since the download
740 unsigned long long ElapsedTime
;
742 /** \brief The total number of items that need to be fetched.
744 * \warning This member is inaccurate, as new items might be
745 * enqueued while the download is in progress!
747 unsigned long TotalItems
;
749 /** \brief The number of items that have been successfully downloaded. */
750 unsigned long CurrentItems
;
752 /** \brief The estimated percentage of the download (0-100)
758 /** \brief If \b true, the download scheduler should call Pulse()
759 * at the next available opportunity.
763 /** \brief If \b true, extra Pulse() invocations will be performed.
765 * With this option set, Pulse() will be called every time that a
766 * download item starts downloading, finishes downloading, or
767 * terminates with an error.
771 /** \brief Invoked when a local or remote file has been completely fetched.
773 * \param Size The size of the file fetched.
775 * \param ResumePoint How much of the file was already fetched.
777 virtual void Fetched(unsigned long long Size
,unsigned long long ResumePoint
);
779 /** \brief Invoked when the user should be prompted to change the
780 * inserted removable media.
782 * This method should not return until the user has confirmed to
783 * the user interface that the media change is complete.
785 * \param Media The name of the media type that should be changed.
787 * \param Drive The identifying name of the drive whose media
790 * \return \b true if the user confirms the media change, \b
791 * false if it is cancelled.
793 * \todo This is a horrible blocking monster; it should be CPSed
796 virtual bool MediaChange(std::string Media
,std::string Drive
) = 0;
798 /** \brief Invoked when an item is confirmed to be up-to-date.
800 * For instance, when an HTTP download is informed that the file on
801 * the server was not modified.
803 virtual void IMSHit(pkgAcquire::ItemDesc
&/*Itm*/) {};
805 /** \brief Invoked when some of an item's data is fetched. */
806 virtual void Fetch(pkgAcquire::ItemDesc
&/*Itm*/) {};
808 /** \brief Invoked when an item is successfully and completely fetched. */
809 virtual void Done(pkgAcquire::ItemDesc
&/*Itm*/) {};
811 /** \brief Invoked when the process of fetching an item encounters
814 virtual void Fail(pkgAcquire::ItemDesc
&/*Itm*/) {};
816 /** \brief Periodically invoked while the Acquire process is underway.
818 * Subclasses should first call pkgAcquireStatus::Pulse(), then
819 * update their status output. The download process is blocked
820 * while Pulse() is being called.
822 * \return \b false if the user asked to cancel the whole Acquire process.
824 * \see pkgAcquire::Run
826 virtual bool Pulse(pkgAcquire
*Owner
);
828 /** \brief Invoked when the Acquire process starts running. */
829 virtual void Start();
831 /** \brief Invoked when the Acquire process stops running. */
834 /** \brief Initialize all counters to 0 and the time to the current time. */
836 virtual ~pkgAcquireStatus();