]>
git.saurik.com Git - apt.git/blob - methods/http.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: http.cc,v 1.11 1998/12/07 07:32:03 jgg Exp $
4 /* ######################################################################
6 HTTP Aquire Method - This is the HTTP aquire method for APT.
8 It uses HTTP/1.1 and many of the fancy options there-in, such as
9 pipelining, range, if-range and so on. It accepts on the command line
10 a list of url destination pairs and writes to stdout the status of the
11 operation as defined in the APT method spec.
13 It is based on a doubly buffered select loop. All the requests are
14 fed into a single output buffer that is constantly fed out the
15 socket. This provides ideal pipelining as in many cases all of the
16 requests will fit into a single packet. The input socket is buffered
17 the same way and fed into the fd for the file.
19 This double buffering provides fairly substantial transfer rates,
20 compared to wget the http method is about 4% faster. Most importantly,
21 when HTTP is compared with FTP as a protocol the speed difference is
22 huge. In tests over the internet from two sites to llug (via ATM) this
23 program got 230k/s sustained http transfer rates. FTP on the other
24 hand topped out at 170k/s. That combined with the time to setup the
25 FTP connection makes HTTP a vastly superior protocol.
27 ##################################################################### */
29 // Include Files /*{{{*/
30 #include <apt-pkg/fileutl.h>
31 #include <apt-pkg/acquire-method.h>
32 #include <apt-pkg/error.h>
33 #include <apt-pkg/md5.h>
43 #include <netinet/in.h>
44 #include <sys/socket.h>
45 #include <arpa/inet.h>
51 string
HttpMethod::FailFile
;
52 int HttpMethod::FailFd
= -1;
53 time_t HttpMethod::FailTime
= 0;
54 unsigned long PipelineDepth
= 5;
56 // CircleBuf::CircleBuf - Circular input buffer /*{{{*/
57 // ---------------------------------------------------------------------
59 CircleBuf::CircleBuf(unsigned long Size
) : Size(Size
), MD5(0)
61 Buf
= new unsigned char[Size
];
65 // CircleBuf::Reset - Reset to the default state /*{{{*/
66 // ---------------------------------------------------------------------
68 void CircleBuf::Reset()
73 MaxGet
= (unsigned int)-1;
78 MD5
= new MD5Summation
;
82 // CircleBuf::Read - Read from a FD into the circular buffer /*{{{*/
83 // ---------------------------------------------------------------------
84 /* This fills up the buffer with as much data as is in the FD, assuming it
86 bool CircleBuf::Read(int Fd
)
90 // Woops, buffer is full
91 if (InP
- OutP
== Size
)
94 // Write the buffer segment
96 Res
= read(Fd
,Buf
+ (InP%Size
),LeftRead());
108 gettimeofday(&Start
,0);
113 // CircleBuf::Read - Put the string into the buffer /*{{{*/
114 // ---------------------------------------------------------------------
115 /* This will hold the string in and fill the buffer with it as it empties */
116 bool CircleBuf::Read(string Data
)
123 // CircleBuf::FillOut - Fill the buffer from the output queue /*{{{*/
124 // ---------------------------------------------------------------------
126 void CircleBuf::FillOut()
128 if (OutQueue
.empty() == true)
132 // Woops, buffer is full
133 if (InP
- OutP
== Size
)
136 // Write the buffer segment
137 unsigned long Sz
= LeftRead();
138 if (OutQueue
.length() - StrPos
< Sz
)
139 Sz
= OutQueue
.length() - StrPos
;
140 memcpy(Buf
+ (InP%Size
),OutQueue
.begin() + StrPos
,Sz
);
145 if (OutQueue
.length() == StrPos
)
154 // CircleBuf::Write - Write from the buffer into a FD /*{{{*/
155 // ---------------------------------------------------------------------
156 /* This empties the buffer into the FD. */
157 bool CircleBuf::Write(int Fd
)
163 // Woops, buffer is empty
170 // Write the buffer segment
172 Res
= write(Fd
,Buf
+ (OutP%Size
),LeftWrite());
185 MD5
->Add(Buf
+ (OutP%Size
),Res
);
191 // CircleBuf::WriteTillEl - Write from the buffer to a string /*{{{*/
192 // ---------------------------------------------------------------------
193 /* This copies till the first empty line */
194 bool CircleBuf::WriteTillEl(string
&Data
,bool Single
)
196 // We cheat and assume it is unneeded to have more than one buffer load
197 for (unsigned long I
= OutP
; I
< InP
; I
++)
199 if (Buf
[I%Size
] != '\n')
201 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
205 if (Buf
[I%Size
] != '\n')
207 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
216 unsigned long Sz
= LeftWrite();
219 if (I
- OutP
< LeftWrite())
221 Data
+= string((char *)(Buf
+ (OutP%Size
)),Sz
);
229 // CircleBuf::Stats - Print out stats information /*{{{*/
230 // ---------------------------------------------------------------------
232 void CircleBuf::Stats()
238 gettimeofday(&Stop
,0);
239 /* float Diff = Stop.tv_sec - Start.tv_sec +
240 (float)(Stop.tv_usec - Start.tv_usec)/1000000;
241 clog << "Got " << InP << " in " << Diff << " at " << InP/Diff << endl;*/
245 // ServerState::ServerState - Constructor /*{{{*/
246 // ---------------------------------------------------------------------
248 ServerState::ServerState(URI Srv
,HttpMethod
*Owner
) : Owner(Owner
),
249 In(64*1024), Out(1*1024),
255 // ServerState::Open - Open a connection to the server /*{{{*/
256 // ---------------------------------------------------------------------
257 /* This opens a connection to the server. */
260 bool ServerState::Open()
262 // Use the already open connection if possible.
270 // Determine the proxy setting
271 if (getenv("http_proxy") == 0)
273 string DefProxy
= _config
->Find("Acquire::http::Proxy");
274 string SpecificProxy
= _config
->Find("Acquire::http::Proxy::" + ServerName
.Host
);
275 if (SpecificProxy
.empty() == false)
277 if (SpecificProxy
== "DIRECT")
280 Proxy
= SpecificProxy
;
286 Proxy
= getenv("http_proxy");
288 // Determine what host and port to use based on the proxy settings
291 if (Proxy
.empty() == true)
293 if (ServerName
.Port
!= 0)
294 Port
= ServerName
.Port
;
295 Host
= ServerName
.Host
;
304 /* We used a cached address record.. Yes this is against the spec but
305 the way we have setup our rotating dns suggests that this is more
307 if (LastHost
!= Host
)
309 Owner
->Status("Connecting to %s",Host
.c_str());
312 hostent
*Addr
= gethostbyname(Host
.c_str());
314 return _error
->Error("Could not resolve '%s'",Host
.c_str());
316 LastHostA
= *(in_addr
*)(Addr
->h_addr_list
[0]);
319 Owner
->Status("Connecting to %s (%s)",Host
.c_str(),inet_ntoa(LastHostA
));
322 if ((ServerFd
= socket(AF_INET
,SOCK_STREAM
,0)) < 0)
323 return _error
->Errno("socket","Could not create a socket");
325 // Connect to the server
326 struct sockaddr_in server
;
327 server
.sin_family
= AF_INET
;
328 server
.sin_port
= htons(Port
);
329 server
.sin_addr
= LastHostA
;
330 if (connect(ServerFd
,(sockaddr
*)&server
,sizeof(server
)) < 0)
331 return _error
->Errno("socket","Could not create a socket");
333 SetNonBlock(ServerFd
,true);
337 // ServerState::Close - Close a connection to the server /*{{{*/
338 // ---------------------------------------------------------------------
340 bool ServerState::Close()
347 // ServerState::RunHeaders - Get the headers before the data /*{{{*/
348 // ---------------------------------------------------------------------
349 /* Returns 0 if things are OK, 1 if an IO error occursed and 2 if a header
350 parse error occured */
351 int ServerState::RunHeaders()
355 Owner
->Status("Waiting for file");
369 if (In
.WriteTillEl(Data
) == false)
372 for (string::const_iterator I
= Data
.begin(); I
< Data
.end(); I
++)
374 string::const_iterator J
= I
;
375 for (; J
!= Data
.end() && *J
!= '\n' && *J
!= '\r';J
++);
376 if (HeaderLine(string(I
,J
-I
)) == false)
382 while (Owner
->Go(false,this) == true);
387 // ServerState::RunData - Transfer the data from the socket /*{{{*/
388 // ---------------------------------------------------------------------
390 bool ServerState::RunData()
394 // Chunked transfer encoding is fun..
395 if (Encoding
== Chunked
)
399 // Grab the block size
405 if (In
.WriteTillEl(Data
,true) == true)
408 while ((Last
= Owner
->Go(false,this)) == true);
413 // See if we are done
414 unsigned long Len
= strtol(Data
.c_str(),0,16);
419 // We have to remove the entity trailer
423 if (In
.WriteTillEl(Data
,true) == true && Data
.length() <= 2)
426 while ((Last
= Owner
->Go(false,this)) == true);
432 // Transfer the block
434 while (Owner
->Go(true,this) == true)
435 if (In
.IsLimit() == true)
439 if (In
.IsLimit() == false)
442 // The server sends an extra new line before the next block specifier..
447 if (In
.WriteTillEl(Data
,true) == true)
450 while ((Last
= Owner
->Go(false,this)) == true);
457 /* Closes encoding is used when the server did not specify a size, the
458 loss of the connection means we are done */
459 if (Encoding
== Closes
)
462 In
.Limit(Size
- StartPos
);
464 // Just transfer the whole block.
467 if (In
.IsLimit() == false)
473 while (Owner
->Go(true,this) == true);
476 return Owner
->Flush(this);
479 // ServerState::HeaderLine - Process a header line /*{{{*/
480 // ---------------------------------------------------------------------
482 bool ServerState::HeaderLine(string Line
)
484 if (Line
.empty() == true)
487 // The http server might be trying to do something evil.
488 if (Line
.length() >= MAXLEN
)
489 return _error
->Error("Got a single header line over %u chars",MAXLEN
);
491 string::size_type Pos
= Line
.find(' ');
492 if (Pos
== string::npos
|| Pos
+1 > Line
.length())
493 return _error
->Error("Bad header line");
495 string Tag
= string(Line
,0,Pos
);
496 string Val
= string(Line
,Pos
+1);
498 if (stringcasecmp(Tag
.begin(),Tag
.begin()+4,"HTTP") == 0)
500 // Evil servers return no version
503 if (sscanf(Line
.c_str(),"HTTP/%u.%u %u %[^\n]",&Major
,&Minor
,
505 return _error
->Error("The http server sent an invalid reply header");
511 if (sscanf(Line
.c_str(),"HTTP %u %[^\n]",&Result
,Code
) != 2)
512 return _error
->Error("The http server sent an invalid reply header");
518 if (stringcasecmp(Tag
,"Content-Length:") == 0)
520 if (Encoding
== Closes
)
524 // The length is already set from the Content-Range header
528 if (sscanf(Val
.c_str(),"%lu",&Size
) != 1)
529 return _error
->Error("The http server sent an invalid Content-Length header");
533 if (stringcasecmp(Tag
,"Content-Type:") == 0)
539 if (stringcasecmp(Tag
,"Content-Range:") == 0)
543 if (sscanf(Val
.c_str(),"bytes %lu-%*u/%lu",&StartPos
,&Size
) != 2)
544 return _error
->Error("The http server sent an invalid Content-Range header");
545 if ((unsigned)StartPos
> Size
)
546 return _error
->Error("This http server has broken range support");
550 if (stringcasecmp(Tag
,"Transfer-Encoding:") == 0)
553 if (stringcasecmp(Val
,"chunked") == 0)
559 if (stringcasecmp(Tag
,"Last-Modified:") == 0)
561 if (StrToTime(Val
,Date
) == false)
562 return _error
->Error("Unknown date format");
570 // HttpMethod::SendReq - Send the HTTP request /*{{{*/
571 // ---------------------------------------------------------------------
572 /* This places the http request in the outbound buffer */
573 void HttpMethod::SendReq(FetchItem
*Itm
,CircleBuf
&Out
)
577 // The HTTP server expects a hostname with a trailing :port
579 string ProperHost
= Uri
.Host
;
582 sprintf(Buf
,":%u",Uri
.Port
);
587 if (Itm
->Uri
.length() >= sizeof(Buf
))
590 /* Build the request. We include a keep-alive header only for non-proxy
591 requests. This is to tweak old http/1.0 servers that do support keep-alive
592 but not HTTP/1.1 automatic keep-alive. Doing this with a proxy server
593 will glitch HTTP/1.0 proxies because they do not filter it out and
594 pass it on, HTTP/1.1 says the connection should default to keep alive
595 and we expect the proxy to do this */
596 if (Proxy
.empty() == true)
597 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\nConnection: keep-alive\r\n",
598 Uri
.Path
.c_str(),ProperHost
.c_str());
601 /* Generate a cache control header if necessary. We place a max
602 cache age on index files, optionally set a no-cache directive
603 and a no-store directive for archives. */
604 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\n",
605 Itm
->Uri
.c_str(),ProperHost
.c_str());
606 if (_config
->FindB("Acquire::http::No-Cache",false) == true)
607 strcat(Buf
,"Cache-Control: no-cache\r\n");
610 if (Itm
->IndexFile
== true)
611 sprintf(Buf
+strlen(Buf
),"Cache-Control: max-age=%u\r\n",
612 _config
->FindI("Acquire::http::Max-Age",60*60*24));
615 if (_config
->FindB("Acquire::http::No-Store",false) == true)
616 strcat(Buf
,"Cache-Control: no-store\r\n");
623 // Check for a partial file
625 if (stat(Itm
->DestFile
.c_str(),&SBuf
) >= 0 && SBuf
.st_size
> 0)
627 // In this case we send an if-range query with a range header
628 sprintf(Buf
,"Range: bytes=%li-\r\nIf-Range: %s\r\n",SBuf
.st_size
- 1,
629 TimeRFC1123(SBuf
.st_mtime
).c_str());
634 if (Itm
->LastModified
!= 0)
636 sprintf(Buf
,"If-Modified-Since: %s\r\n",TimeRFC1123(Itm
->LastModified
).c_str());
641 /* if (ProxyAuth.empty() == false)
642 Req += string("Proxy-Authorization: Basic ") + Base64Encode(ProxyAuth) + "\r\n";*/
644 Req
+= "User-Agent: Debian APT-HTTP/1.2\r\n\r\n";
645 // cerr << Req << endl;
650 // HttpMethod::Go - Run a single loop /*{{{*/
651 // ---------------------------------------------------------------------
652 /* This runs the select loop over the server FDs, Output file FDs and
654 bool HttpMethod::Go(bool ToFile
,ServerState
*Srv
)
656 // Server has closed the connection
657 if (Srv
->ServerFd
== -1 && Srv
->In
.WriteSpace() == false)
660 fd_set rfds
,wfds
,efds
;
666 if (Srv
->Out
.WriteSpace() == true && Srv
->ServerFd
!= -1)
667 FD_SET(Srv
->ServerFd
,&wfds
);
668 if (Srv
->In
.ReadSpace() == true && Srv
->ServerFd
!= -1)
669 FD_SET(Srv
->ServerFd
,&rfds
);
676 if (Srv
->In
.WriteSpace() == true && ToFile
== true && FileFD
!= -1)
677 FD_SET(FileFD
,&wfds
);
680 FD_SET(STDIN_FILENO
,&rfds
);
684 FD_SET(FileFD
,&efds
);
685 if (Srv
->ServerFd
!= -1)
686 FD_SET(Srv
->ServerFd
,&efds
);
688 // Figure out the max fd
690 if (MaxFd
< Srv
->ServerFd
)
691 MaxFd
= Srv
->ServerFd
;
698 if ((Res
= select(MaxFd
+1,&rfds
,&wfds
,&efds
,&tv
)) < 0)
699 return _error
->Errno("select","Select failed");
703 _error
->Error("Connection timed out");
704 return ServerDie(Srv
);
707 // Some kind of exception (error) on the sockets, die
708 if ((FileFD
!= -1 && FD_ISSET(FileFD
,&efds
)) ||
709 (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&efds
)))
710 return _error
->Error("Socket Exception");
713 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&rfds
))
716 if (Srv
->In
.Read(Srv
->ServerFd
) == false)
717 return ServerDie(Srv
);
720 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&wfds
))
723 if (Srv
->Out
.Write(Srv
->ServerFd
) == false)
724 return ServerDie(Srv
);
727 // Send data to the file
728 if (FileFD
!= -1 && FD_ISSET(FileFD
,&wfds
))
730 if (Srv
->In
.Write(FileFD
) == false)
731 return _error
->Errno("write","Error writing to output file");
734 // Handle commands from APT
735 if (FD_ISSET(STDIN_FILENO
,&rfds
))
744 // HttpMethod::Flush - Dump the buffer into the file /*{{{*/
745 // ---------------------------------------------------------------------
746 /* This takes the current input buffer from the Server FD and writes it
748 bool HttpMethod::Flush(ServerState
*Srv
)
752 SetNonBlock(File
->Fd(),false);
753 if (Srv
->In
.WriteSpace() == false)
756 while (Srv
->In
.WriteSpace() == true)
758 if (Srv
->In
.Write(File
->Fd()) == false)
759 return _error
->Errno("write","Error writing to file");
760 if (Srv
->In
.IsLimit() == true)
764 if (Srv
->In
.IsLimit() == true || Srv
->Encoding
== ServerState::Closes
)
770 // HttpMethod::ServerDie - The server has closed the connection. /*{{{*/
771 // ---------------------------------------------------------------------
773 bool HttpMethod::ServerDie(ServerState
*Srv
)
775 // Dump the buffer to the file
776 if (Srv
->State
== ServerState::Data
)
778 SetNonBlock(File
->Fd(),false);
779 while (Srv
->In
.WriteSpace() == true)
781 if (Srv
->In
.Write(File
->Fd()) == false)
782 return _error
->Errno("write","Error writing to the file");
785 if (Srv
->In
.IsLimit() == true)
790 // See if this is because the server finished the data stream
791 if (Srv
->In
.IsLimit() == false && Srv
->State
!= ServerState::Header
&&
792 Srv
->Encoding
!= ServerState::Closes
)
795 return _error
->Error("Error reading from server Remote end closed connection");
796 return _error
->Errno("read","Error reading from server");
802 // Nothing left in the buffer
803 if (Srv
->In
.WriteSpace() == false)
806 // We may have got multiple responses back in one packet..
814 // HttpMethod::DealWithHeaders - Handle the retrieved header data /*{{{*/
815 // ---------------------------------------------------------------------
816 /* We look at the header data we got back from the server and decide what
820 3 - Unrecoverable error
821 4 - Error with error content page
822 5 - Unrecoverable non-server error (close the connection) */
823 int HttpMethod::DealWithHeaders(FetchResult
&Res
,ServerState
*Srv
)
826 if (Srv
->Result
== 304)
828 unlink(Queue
->DestFile
.c_str());
830 Res
.LastModified
= Queue
->LastModified
;
834 /* We have a reply we dont handle. This should indicate a perm server
836 if (Srv
->Result
< 200 || Srv
->Result
>= 300)
838 _error
->Error("%u %s",Srv
->Result
,Srv
->Code
);
839 if (Srv
->HaveContent
== true)
844 // This is some sort of 2xx 'data follows' reply
845 Res
.LastModified
= Srv
->Date
;
846 Res
.Size
= Srv
->Size
;
850 File
= new FileFd(Queue
->DestFile
,FileFd::WriteAny
);
851 if (_error
->PendingError() == true)
854 FailFile
= Queue
->DestFile
;
856 FailTime
= Srv
->Date
;
858 // Set the expected size
859 if (Srv
->StartPos
>= 0)
861 Res
.ResumePoint
= Srv
->StartPos
;
862 ftruncate(File
->Fd(),Srv
->StartPos
);
865 // Set the start point
866 lseek(File
->Fd(),0,SEEK_END
);
869 Srv
->In
.MD5
= new MD5Summation
;
871 // Fill the MD5 Hash if the file is non-empty (resume)
872 if (Srv
->StartPos
> 0)
874 lseek(File
->Fd(),0,SEEK_SET
);
875 if (Srv
->In
.MD5
->AddFD(File
->Fd(),Srv
->StartPos
) == false)
877 _error
->Errno("read","Problem hashing file");
880 lseek(File
->Fd(),0,SEEK_END
);
883 SetNonBlock(File
->Fd(),true);
887 // HttpMethod::SigTerm - Handle a fatal signal /*{{{*/
888 // ---------------------------------------------------------------------
889 /* This closes and timestamps the open file. This is neccessary to get
890 resume behavoir on user abort */
891 void HttpMethod::SigTerm(int)
900 UBuf
.actime
= FailTime
;
901 UBuf
.modtime
= FailTime
;
902 utime(FailFile
.c_str(),&UBuf
);
907 // HttpMethod::Fetch - Fetch an item /*{{{*/
908 // ---------------------------------------------------------------------
909 /* This adds an item to the pipeline. We keep the pipeline at a fixed
911 bool HttpMethod::Fetch(FetchItem
*)
916 // Queue the requests
919 for (FetchItem
*I
= Queue
; I
!= 0 && Depth
< (signed)PipelineDepth
; I
= I
->Next
, Depth
++)
921 // Make sure we stick with the same server
922 if (Server
->Comp(I
->Uri
) == false)
931 SendReq(I
,Server
->Out
);
939 // HttpMethod::Loop - Main loop /*{{{*/
940 // ---------------------------------------------------------------------
942 int HttpMethod::Loop()
944 signal(SIGTERM
,SigTerm
);
945 signal(SIGINT
,SigTerm
);
952 if (FailCounter
>= 2)
954 Fail("Massive Server Brain Damage");
958 // We have no commands, wait for some to arrive
961 if (WaitFd(STDIN_FILENO
) == false)
972 // Connect to the server
973 if (Server
== 0 || Server
->Comp(Queue
->Uri
) == false)
976 Server
= new ServerState(Queue
->Uri
,this);
979 // Connnect to the host
980 if (Server
->Open() == false)
986 // Fill the pipeline.
989 // Fetch the next URL header data from the server.
990 switch (Server
->RunHeaders())
995 // The header data is bad
998 _error
->Error("Bad header Data");
1003 // The server closed a connection during the header get..
1008 _error
->DumpErrors();
1014 // Decide what to do.
1016 Res
.Filename
= Queue
->DestFile
;
1017 switch (DealWithHeaders(Res
,Server
))
1019 // Ok, the file is Open
1025 bool Result
= Server
->RunData();
1027 // Close the file, destroy the FD object and timestamp it
1033 struct utimbuf UBuf
;
1035 UBuf
.actime
= Server
->Date
;
1036 UBuf
.modtime
= Server
->Date
;
1037 utime(Queue
->DestFile
.c_str(),&UBuf
);
1039 // Send status to APT
1042 Res
.MD5Sum
= Server
->In
.MD5
->Result();
1058 // Hard server error, not found or something
1065 // Hard internal error, kill the connection and fail
1073 // We need to flush the data, the header is like a 404 w/ error text
1078 // Send to content to dev/null
1079 File
= new FileFd("/dev/null",FileFd::WriteExists
);
1087 Fail("Internal error");