]>
git.saurik.com Git - apt.git/blob - methods/http.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: http.cc,v 1.36 1999/07/18 23:06:56 jgg Exp $
4 /* ######################################################################
6 HTTP Aquire Method - This is the HTTP aquire method for APT.
8 It uses HTTP/1.1 and many of the fancy options there-in, such as
9 pipelining, range, if-range and so on. It accepts on the command line
10 a list of url destination pairs and writes to stdout the status of the
11 operation as defined in the APT method spec.
13 It is based on a doubly buffered select loop. All the requests are
14 fed into a single output buffer that is constantly fed out the
15 socket. This provides ideal pipelining as in many cases all of the
16 requests will fit into a single packet. The input socket is buffered
17 the same way and fed into the fd for the file.
19 This double buffering provides fairly substantial transfer rates,
20 compared to wget the http method is about 4% faster. Most importantly,
21 when HTTP is compared with FTP as a protocol the speed difference is
22 huge. In tests over the internet from two sites to llug (via ATM) this
23 program got 230k/s sustained http transfer rates. FTP on the other
24 hand topped out at 170k/s. That combined with the time to setup the
25 FTP connection makes HTTP a vastly superior protocol.
27 ##################################################################### */
29 // Include Files /*{{{*/
30 #include <apt-pkg/fileutl.h>
31 #include <apt-pkg/acquire-method.h>
32 #include <apt-pkg/error.h>
33 #include <apt-pkg/md5.h>
47 #include "rfc2553emu.h"
52 string
HttpMethod::FailFile
;
53 int HttpMethod::FailFd
= -1;
54 time_t HttpMethod::FailTime
= 0;
55 unsigned long PipelineDepth
= 10;
56 unsigned long TimeOut
= 120;
58 // CircleBuf::CircleBuf - Circular input buffer /*{{{*/
59 // ---------------------------------------------------------------------
61 CircleBuf::CircleBuf(unsigned long Size
) : Size(Size
), MD5(0)
63 Buf
= new unsigned char[Size
];
67 // CircleBuf::Reset - Reset to the default state /*{{{*/
68 // ---------------------------------------------------------------------
70 void CircleBuf::Reset()
75 MaxGet
= (unsigned int)-1;
80 MD5
= new MD5Summation
;
84 // CircleBuf::Read - Read from a FD into the circular buffer /*{{{*/
85 // ---------------------------------------------------------------------
86 /* This fills up the buffer with as much data as is in the FD, assuming it
88 bool CircleBuf::Read(int Fd
)
92 // Woops, buffer is full
93 if (InP
- OutP
== Size
)
96 // Write the buffer segment
98 Res
= read(Fd
,Buf
+ (InP%Size
),LeftRead());
110 gettimeofday(&Start
,0);
115 // CircleBuf::Read - Put the string into the buffer /*{{{*/
116 // ---------------------------------------------------------------------
117 /* This will hold the string in and fill the buffer with it as it empties */
118 bool CircleBuf::Read(string Data
)
125 // CircleBuf::FillOut - Fill the buffer from the output queue /*{{{*/
126 // ---------------------------------------------------------------------
128 void CircleBuf::FillOut()
130 if (OutQueue
.empty() == true)
134 // Woops, buffer is full
135 if (InP
- OutP
== Size
)
138 // Write the buffer segment
139 unsigned long Sz
= LeftRead();
140 if (OutQueue
.length() - StrPos
< Sz
)
141 Sz
= OutQueue
.length() - StrPos
;
142 memcpy(Buf
+ (InP%Size
),OutQueue
.begin() + StrPos
,Sz
);
147 if (OutQueue
.length() == StrPos
)
156 // CircleBuf::Write - Write from the buffer into a FD /*{{{*/
157 // ---------------------------------------------------------------------
158 /* This empties the buffer into the FD. */
159 bool CircleBuf::Write(int Fd
)
165 // Woops, buffer is empty
172 // Write the buffer segment
174 Res
= write(Fd
,Buf
+ (OutP%Size
),LeftWrite());
187 MD5
->Add(Buf
+ (OutP%Size
),Res
);
193 // CircleBuf::WriteTillEl - Write from the buffer to a string /*{{{*/
194 // ---------------------------------------------------------------------
195 /* This copies till the first empty line */
196 bool CircleBuf::WriteTillEl(string
&Data
,bool Single
)
198 // We cheat and assume it is unneeded to have more than one buffer load
199 for (unsigned long I
= OutP
; I
< InP
; I
++)
201 if (Buf
[I%Size
] != '\n')
203 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
207 if (Buf
[I%Size
] != '\n')
209 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
218 unsigned long Sz
= LeftWrite();
221 if (I
- OutP
< LeftWrite())
223 Data
+= string((char *)(Buf
+ (OutP%Size
)),Sz
);
231 // CircleBuf::Stats - Print out stats information /*{{{*/
232 // ---------------------------------------------------------------------
234 void CircleBuf::Stats()
240 gettimeofday(&Stop
,0);
241 /* float Diff = Stop.tv_sec - Start.tv_sec +
242 (float)(Stop.tv_usec - Start.tv_usec)/1000000;
243 clog << "Got " << InP << " in " << Diff << " at " << InP/Diff << endl;*/
247 // ServerState::ServerState - Constructor /*{{{*/
248 // ---------------------------------------------------------------------
250 ServerState::ServerState(URI Srv
,HttpMethod
*Owner
) : Owner(Owner
),
251 In(64*1024), Out(4*1024),
257 // ServerState::Open - Open a connection to the server /*{{{*/
258 // ---------------------------------------------------------------------
259 /* This opens a connection to the server. */
262 struct addrinfo
*LastHostAddr
= 0;
263 bool ServerState::Open()
265 // Use the already open connection if possible.
273 // Determine the proxy setting
274 if (getenv("http_proxy") == 0)
276 string DefProxy
= _config
->Find("Acquire::http::Proxy");
277 string SpecificProxy
= _config
->Find("Acquire::http::Proxy::" + ServerName
.Host
);
278 if (SpecificProxy
.empty() == false)
280 if (SpecificProxy
== "DIRECT")
283 Proxy
= SpecificProxy
;
289 Proxy
= getenv("http_proxy");
291 // Determine what host and port to use based on the proxy settings
294 if (Proxy
.empty() == true)
296 if (ServerName
.Port
!= 0)
297 Port
= ServerName
.Port
;
298 Host
= ServerName
.Host
;
307 // Connect to the remote server
308 if (Connect(Host
,Port
,"http",80,ServerFd
,TimeOut
,Owner
) == false)
314 // ServerState::Close - Close a connection to the server /*{{{*/
315 // ---------------------------------------------------------------------
317 bool ServerState::Close()
324 // ServerState::RunHeaders - Get the headers before the data /*{{{*/
325 // ---------------------------------------------------------------------
326 /* Returns 0 if things are OK, 1 if an IO error occursed and 2 if a header
327 parse error occured */
328 int ServerState::RunHeaders()
332 Owner
->Status("Waiting for file");
346 if (In
.WriteTillEl(Data
) == false)
349 for (string::const_iterator I
= Data
.begin(); I
< Data
.end(); I
++)
351 string::const_iterator J
= I
;
352 for (; J
!= Data
.end() && *J
!= '\n' && *J
!= '\r';J
++);
353 if (HeaderLine(string(I
,J
-I
)) == false)
359 while (Owner
->Go(false,this) == true);
364 // ServerState::RunData - Transfer the data from the socket /*{{{*/
365 // ---------------------------------------------------------------------
367 bool ServerState::RunData()
371 // Chunked transfer encoding is fun..
372 if (Encoding
== Chunked
)
376 // Grab the block size
382 if (In
.WriteTillEl(Data
,true) == true)
385 while ((Last
= Owner
->Go(false,this)) == true);
390 // See if we are done
391 unsigned long Len
= strtol(Data
.c_str(),0,16);
396 // We have to remove the entity trailer
400 if (In
.WriteTillEl(Data
,true) == true && Data
.length() <= 2)
403 while ((Last
= Owner
->Go(false,this)) == true);
406 return !_error
->PendingError();
409 // Transfer the block
411 while (Owner
->Go(true,this) == true)
412 if (In
.IsLimit() == true)
416 if (In
.IsLimit() == false)
419 // The server sends an extra new line before the next block specifier..
424 if (In
.WriteTillEl(Data
,true) == true)
427 while ((Last
= Owner
->Go(false,this)) == true);
434 /* Closes encoding is used when the server did not specify a size, the
435 loss of the connection means we are done */
436 if (Encoding
== Closes
)
439 In
.Limit(Size
- StartPos
);
441 // Just transfer the whole block.
444 if (In
.IsLimit() == false)
448 return !_error
->PendingError();
450 while (Owner
->Go(true,this) == true);
453 return Owner
->Flush(this) && !_error
->PendingError();
456 // ServerState::HeaderLine - Process a header line /*{{{*/
457 // ---------------------------------------------------------------------
459 bool ServerState::HeaderLine(string Line
)
461 if (Line
.empty() == true)
464 // The http server might be trying to do something evil.
465 if (Line
.length() >= MAXLEN
)
466 return _error
->Error("Got a single header line over %u chars",MAXLEN
);
468 string::size_type Pos
= Line
.find(' ');
469 if (Pos
== string::npos
|| Pos
+1 > Line
.length())
471 // Blah, some servers use "connection:closes", evil.
472 Pos
= Line
.find(':');
473 if (Pos
== string::npos
|| Pos
+ 2 > Line
.length())
474 return _error
->Error("Bad header line");
478 // Parse off any trailing spaces between the : and the next word.
479 string::size_type Pos2
= Pos
;
480 while (Pos2
< Line
.length() && isspace(Line
[Pos2
]) != 0)
483 string Tag
= string(Line
,0,Pos
);
484 string Val
= string(Line
,Pos2
);
486 if (stringcasecmp(Tag
.begin(),Tag
.begin()+4,"HTTP") == 0)
488 // Evil servers return no version
491 if (sscanf(Line
.c_str(),"HTTP/%u.%u %u %[^\n]",&Major
,&Minor
,
493 return _error
->Error("The http server sent an invalid reply header");
499 if (sscanf(Line
.c_str(),"HTTP %u %[^\n]",&Result
,Code
) != 2)
500 return _error
->Error("The http server sent an invalid reply header");
506 if (stringcasecmp(Tag
,"Content-Length:") == 0)
508 if (Encoding
== Closes
)
512 // The length is already set from the Content-Range header
516 if (sscanf(Val
.c_str(),"%lu",&Size
) != 1)
517 return _error
->Error("The http server sent an invalid Content-Length header");
521 if (stringcasecmp(Tag
,"Content-Type:") == 0)
527 if (stringcasecmp(Tag
,"Content-Range:") == 0)
531 if (sscanf(Val
.c_str(),"bytes %lu-%*u/%lu",&StartPos
,&Size
) != 2)
532 return _error
->Error("The http server sent an invalid Content-Range header");
533 if ((unsigned)StartPos
> Size
)
534 return _error
->Error("This http server has broken range support");
538 if (stringcasecmp(Tag
,"Transfer-Encoding:") == 0)
541 if (stringcasecmp(Val
,"chunked") == 0)
547 if (stringcasecmp(Tag
,"Last-Modified:") == 0)
549 if (StrToTime(Val
,Date
) == false)
550 return _error
->Error("Unknown date format");
558 // HttpMethod::SendReq - Send the HTTP request /*{{{*/
559 // ---------------------------------------------------------------------
560 /* This places the http request in the outbound buffer */
561 void HttpMethod::SendReq(FetchItem
*Itm
,CircleBuf
&Out
)
565 // The HTTP server expects a hostname with a trailing :port
567 string ProperHost
= Uri
.Host
;
570 sprintf(Buf
,":%u",Uri
.Port
);
575 if (Itm
->Uri
.length() >= sizeof(Buf
))
578 /* Build the request. We include a keep-alive header only for non-proxy
579 requests. This is to tweak old http/1.0 servers that do support keep-alive
580 but not HTTP/1.1 automatic keep-alive. Doing this with a proxy server
581 will glitch HTTP/1.0 proxies because they do not filter it out and
582 pass it on, HTTP/1.1 says the connection should default to keep alive
583 and we expect the proxy to do this */
584 if (Proxy
.empty() == true)
585 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\nConnection: keep-alive\r\n",
586 QuoteString(Uri
.Path
,"~").c_str(),ProperHost
.c_str());
589 /* Generate a cache control header if necessary. We place a max
590 cache age on index files, optionally set a no-cache directive
591 and a no-store directive for archives. */
592 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\n",
593 Itm
->Uri
.c_str(),ProperHost
.c_str());
594 if (_config
->FindB("Acquire::http::No-Cache",false) == true)
595 strcat(Buf
,"Cache-Control: no-cache\r\nPragma: no-cache\r\n");
598 if (Itm
->IndexFile
== true)
599 sprintf(Buf
+strlen(Buf
),"Cache-Control: max-age=%u\r\n",
600 _config
->FindI("Acquire::http::Max-Age",60*60*24));
603 if (_config
->FindB("Acquire::http::No-Store",false) == true)
604 strcat(Buf
,"Cache-Control: no-store\r\n");
611 // Check for a partial file
613 if (stat(Itm
->DestFile
.c_str(),&SBuf
) >= 0 && SBuf
.st_size
> 0)
615 // In this case we send an if-range query with a range header
616 sprintf(Buf
,"Range: bytes=%li-\r\nIf-Range: %s\r\n",SBuf
.st_size
- 1,
617 TimeRFC1123(SBuf
.st_mtime
).c_str());
622 if (Itm
->LastModified
!= 0)
624 sprintf(Buf
,"If-Modified-Since: %s\r\n",TimeRFC1123(Itm
->LastModified
).c_str());
629 if (Proxy
.User
.empty() == false || Proxy
.Password
.empty() == false)
630 Req
+= string("Proxy-Authorization: Basic ") +
631 Base64Encode(Proxy
.User
+ ":" + Proxy
.Password
) + "\r\n";
633 Req
+= "User-Agent: Debian APT-HTTP/1.2\r\n\r\n";
634 // cerr << Req << endl;
639 // HttpMethod::Go - Run a single loop /*{{{*/
640 // ---------------------------------------------------------------------
641 /* This runs the select loop over the server FDs, Output file FDs and
643 bool HttpMethod::Go(bool ToFile
,ServerState
*Srv
)
645 // Server has closed the connection
646 if (Srv
->ServerFd
== -1 && (Srv
->In
.WriteSpace() == false ||
650 fd_set rfds
,wfds
,efds
;
656 if (Srv
->Out
.WriteSpace() == true && Srv
->ServerFd
!= -1)
657 FD_SET(Srv
->ServerFd
,&wfds
);
658 if (Srv
->In
.ReadSpace() == true && Srv
->ServerFd
!= -1)
659 FD_SET(Srv
->ServerFd
,&rfds
);
666 if (Srv
->In
.WriteSpace() == true && ToFile
== true && FileFD
!= -1)
667 FD_SET(FileFD
,&wfds
);
670 FD_SET(STDIN_FILENO
,&rfds
);
674 FD_SET(FileFD
,&efds
);
675 if (Srv
->ServerFd
!= -1)
676 FD_SET(Srv
->ServerFd
,&efds
);
678 // Figure out the max fd
680 if (MaxFd
< Srv
->ServerFd
)
681 MaxFd
= Srv
->ServerFd
;
688 if ((Res
= select(MaxFd
+1,&rfds
,&wfds
,&efds
,&tv
)) < 0)
689 return _error
->Errno("select","Select failed");
693 _error
->Error("Connection timed out");
694 return ServerDie(Srv
);
697 // Some kind of exception (error) on the sockets, die
698 if ((FileFD
!= -1 && FD_ISSET(FileFD
,&efds
)) ||
699 (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&efds
)))
700 return _error
->Error("Socket Exception");
703 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&rfds
))
706 if (Srv
->In
.Read(Srv
->ServerFd
) == false)
707 return ServerDie(Srv
);
710 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&wfds
))
713 if (Srv
->Out
.Write(Srv
->ServerFd
) == false)
714 return ServerDie(Srv
);
717 // Send data to the file
718 if (FileFD
!= -1 && FD_ISSET(FileFD
,&wfds
))
720 if (Srv
->In
.Write(FileFD
) == false)
721 return _error
->Errno("write","Error writing to output file");
724 // Handle commands from APT
725 if (FD_ISSET(STDIN_FILENO
,&rfds
))
734 // HttpMethod::Flush - Dump the buffer into the file /*{{{*/
735 // ---------------------------------------------------------------------
736 /* This takes the current input buffer from the Server FD and writes it
738 bool HttpMethod::Flush(ServerState
*Srv
)
742 SetNonBlock(File
->Fd(),false);
743 if (Srv
->In
.WriteSpace() == false)
746 while (Srv
->In
.WriteSpace() == true)
748 if (Srv
->In
.Write(File
->Fd()) == false)
749 return _error
->Errno("write","Error writing to file");
750 if (Srv
->In
.IsLimit() == true)
754 if (Srv
->In
.IsLimit() == true || Srv
->Encoding
== ServerState::Closes
)
760 // HttpMethod::ServerDie - The server has closed the connection. /*{{{*/
761 // ---------------------------------------------------------------------
763 bool HttpMethod::ServerDie(ServerState
*Srv
)
765 unsigned int LErrno
= errno
;
767 // Dump the buffer to the file
768 if (Srv
->State
== ServerState::Data
)
770 SetNonBlock(File
->Fd(),false);
771 while (Srv
->In
.WriteSpace() == true)
773 if (Srv
->In
.Write(File
->Fd()) == false)
774 return _error
->Errno("write","Error writing to the file");
777 if (Srv
->In
.IsLimit() == true)
782 // See if this is because the server finished the data stream
783 if (Srv
->In
.IsLimit() == false && Srv
->State
!= ServerState::Header
&&
784 Srv
->Encoding
!= ServerState::Closes
)
788 return _error
->Error("Error reading from server Remote end closed connection");
790 return _error
->Errno("read","Error reading from server");
796 // Nothing left in the buffer
797 if (Srv
->In
.WriteSpace() == false)
800 // We may have got multiple responses back in one packet..
808 // HttpMethod::DealWithHeaders - Handle the retrieved header data /*{{{*/
809 // ---------------------------------------------------------------------
810 /* We look at the header data we got back from the server and decide what
814 3 - Unrecoverable error
815 4 - Error with error content page
816 5 - Unrecoverable non-server error (close the connection) */
817 int HttpMethod::DealWithHeaders(FetchResult
&Res
,ServerState
*Srv
)
820 if (Srv
->Result
== 304)
822 unlink(Queue
->DestFile
.c_str());
824 Res
.LastModified
= Queue
->LastModified
;
828 /* We have a reply we dont handle. This should indicate a perm server
830 if (Srv
->Result
< 200 || Srv
->Result
>= 300)
832 _error
->Error("%u %s",Srv
->Result
,Srv
->Code
);
833 if (Srv
->HaveContent
== true)
838 // This is some sort of 2xx 'data follows' reply
839 Res
.LastModified
= Srv
->Date
;
840 Res
.Size
= Srv
->Size
;
844 File
= new FileFd(Queue
->DestFile
,FileFd::WriteAny
);
845 if (_error
->PendingError() == true)
848 FailFile
= Queue
->DestFile
;
849 FailFile
.c_str(); // Make sure we dont do a malloc in the signal handler
851 FailTime
= Srv
->Date
;
853 // Set the expected size
854 if (Srv
->StartPos
>= 0)
856 Res
.ResumePoint
= Srv
->StartPos
;
857 ftruncate(File
->Fd(),Srv
->StartPos
);
860 // Set the start point
861 lseek(File
->Fd(),0,SEEK_END
);
864 Srv
->In
.MD5
= new MD5Summation
;
866 // Fill the MD5 Hash if the file is non-empty (resume)
867 if (Srv
->StartPos
> 0)
869 lseek(File
->Fd(),0,SEEK_SET
);
870 if (Srv
->In
.MD5
->AddFD(File
->Fd(),Srv
->StartPos
) == false)
872 _error
->Errno("read","Problem hashing file");
875 lseek(File
->Fd(),0,SEEK_END
);
878 SetNonBlock(File
->Fd(),true);
882 // HttpMethod::SigTerm - Handle a fatal signal /*{{{*/
883 // ---------------------------------------------------------------------
884 /* This closes and timestamps the open file. This is neccessary to get
885 resume behavoir on user abort */
886 void HttpMethod::SigTerm(int)
894 UBuf
.actime
= FailTime
;
895 UBuf
.modtime
= FailTime
;
896 utime(FailFile
.c_str(),&UBuf
);
901 // HttpMethod::Fetch - Fetch an item /*{{{*/
902 // ---------------------------------------------------------------------
903 /* This adds an item to the pipeline. We keep the pipeline at a fixed
905 bool HttpMethod::Fetch(FetchItem
*)
910 // Queue the requests
913 for (FetchItem
*I
= Queue
; I
!= 0 && Depth
< (signed)PipelineDepth
; I
= I
->Next
, Depth
++)
915 // Make sure we stick with the same server
916 if (Server
->Comp(I
->Uri
) == false)
923 SendReq(I
,Server
->Out
);
931 // HttpMethod::Configuration - Handle a configuration message /*{{{*/
932 // ---------------------------------------------------------------------
933 /* We stash the desired pipeline depth */
934 bool HttpMethod::Configuration(string Message
)
936 if (pkgAcqMethod::Configuration(Message
) == false)
939 TimeOut
= _config
->FindI("Acquire::http::Timeout",TimeOut
);
940 PipelineDepth
= _config
->FindI("Acquire::http::Pipeline-Depth",
946 // HttpMethod::Loop - Main loop /*{{{*/
947 // ---------------------------------------------------------------------
949 int HttpMethod::Loop()
951 signal(SIGTERM
,SigTerm
);
952 signal(SIGINT
,SigTerm
);
959 // We have no commands, wait for some to arrive
962 if (WaitFd(STDIN_FILENO
) == false)
973 // Connect to the server
974 if (Server
== 0 || Server
->Comp(Queue
->Uri
) == false)
977 Server
= new ServerState(Queue
->Uri
,this);
980 // Reset the pipeline
981 if (Server
->ServerFd
== -1)
984 // Connnect to the host
985 if (Server
->Open() == false)
993 // Fill the pipeline.
996 // Fetch the next URL header data from the server.
997 switch (Server
->RunHeaders())
1002 // The header data is bad
1005 _error
->Error("Bad header Data");
1010 // The server closed a connection during the header get..
1018 if (FailCounter
>= 2)
1020 Fail("Connection failed",true);
1028 // Decide what to do.
1030 Res
.Filename
= Queue
->DestFile
;
1031 switch (DealWithHeaders(Res
,Server
))
1033 // Ok, the file is Open
1039 bool Result
= Server
->RunData();
1041 // Close the file, destroy the FD object and timestamp it
1047 struct utimbuf UBuf
;
1049 UBuf
.actime
= Server
->Date
;
1050 UBuf
.modtime
= Server
->Date
;
1051 utime(Queue
->DestFile
.c_str(),&UBuf
);
1053 // Send status to APT
1056 Res
.MD5Sum
= Server
->In
.MD5
->Result();
1072 // Hard server error, not found or something
1079 // Hard internal error, kill the connection and fail
1087 // We need to flush the data, the header is like a 404 w/ error text
1092 // Send to content to dev/null
1093 File
= new FileFd("/dev/null",FileFd::WriteExists
);
1101 Fail("Internal error");