]>
git.saurik.com Git - apt.git/blob - methods/http.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: http.cc,v 1.44 2000/01/30 08:16:43 jgg Exp $
4 /* ######################################################################
6 HTTP Aquire Method - This is the HTTP aquire method for APT.
8 It uses HTTP/1.1 and many of the fancy options there-in, such as
9 pipelining, range, if-range and so on. It accepts on the command line
10 a list of url destination pairs and writes to stdout the status of the
11 operation as defined in the APT method spec.
13 It is based on a doubly buffered select loop. All the requests are
14 fed into a single output buffer that is constantly fed out the
15 socket. This provides ideal pipelining as in many cases all of the
16 requests will fit into a single packet. The input socket is buffered
17 the same way and fed into the fd for the file.
19 This double buffering provides fairly substantial transfer rates,
20 compared to wget the http method is about 4% faster. Most importantly,
21 when HTTP is compared with FTP as a protocol the speed difference is
22 huge. In tests over the internet from two sites to llug (via ATM) this
23 program got 230k/s sustained http transfer rates. FTP on the other
24 hand topped out at 170k/s. That combined with the time to setup the
25 FTP connection makes HTTP a vastly superior protocol.
27 ##################################################################### */
29 // Include Files /*{{{*/
30 #include <apt-pkg/fileutl.h>
31 #include <apt-pkg/acquire-method.h>
32 #include <apt-pkg/error.h>
33 #include <apt-pkg/md5.h>
47 #include "rfc2553emu.h"
52 string
HttpMethod::FailFile
;
53 int HttpMethod::FailFd
= -1;
54 time_t HttpMethod::FailTime
= 0;
55 unsigned long PipelineDepth
= 10;
56 unsigned long TimeOut
= 120;
59 // CircleBuf::CircleBuf - Circular input buffer /*{{{*/
60 // ---------------------------------------------------------------------
62 CircleBuf::CircleBuf(unsigned long Size
) : Size(Size
), MD5(0)
64 Buf
= new unsigned char[Size
];
68 // CircleBuf::Reset - Reset to the default state /*{{{*/
69 // ---------------------------------------------------------------------
71 void CircleBuf::Reset()
76 MaxGet
= (unsigned int)-1;
81 MD5
= new MD5Summation
;
85 // CircleBuf::Read - Read from a FD into the circular buffer /*{{{*/
86 // ---------------------------------------------------------------------
87 /* This fills up the buffer with as much data as is in the FD, assuming it
89 bool CircleBuf::Read(int Fd
)
93 // Woops, buffer is full
94 if (InP
- OutP
== Size
)
97 // Write the buffer segment
99 Res
= read(Fd
,Buf
+ (InP%Size
),LeftRead());
111 gettimeofday(&Start
,0);
116 // CircleBuf::Read - Put the string into the buffer /*{{{*/
117 // ---------------------------------------------------------------------
118 /* This will hold the string in and fill the buffer with it as it empties */
119 bool CircleBuf::Read(string Data
)
126 // CircleBuf::FillOut - Fill the buffer from the output queue /*{{{*/
127 // ---------------------------------------------------------------------
129 void CircleBuf::FillOut()
131 if (OutQueue
.empty() == true)
135 // Woops, buffer is full
136 if (InP
- OutP
== Size
)
139 // Write the buffer segment
140 unsigned long Sz
= LeftRead();
141 if (OutQueue
.length() - StrPos
< Sz
)
142 Sz
= OutQueue
.length() - StrPos
;
143 memcpy(Buf
+ (InP%Size
),OutQueue
.begin() + StrPos
,Sz
);
148 if (OutQueue
.length() == StrPos
)
157 // CircleBuf::Write - Write from the buffer into a FD /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This empties the buffer into the FD. */
160 bool CircleBuf::Write(int Fd
)
166 // Woops, buffer is empty
173 // Write the buffer segment
175 Res
= write(Fd
,Buf
+ (OutP%Size
),LeftWrite());
188 MD5
->Add(Buf
+ (OutP%Size
),Res
);
194 // CircleBuf::WriteTillEl - Write from the buffer to a string /*{{{*/
195 // ---------------------------------------------------------------------
196 /* This copies till the first empty line */
197 bool CircleBuf::WriteTillEl(string
&Data
,bool Single
)
199 // We cheat and assume it is unneeded to have more than one buffer load
200 for (unsigned long I
= OutP
; I
< InP
; I
++)
202 if (Buf
[I%Size
] != '\n')
204 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
208 if (Buf
[I%Size
] != '\n')
210 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
219 unsigned long Sz
= LeftWrite();
222 if (I
- OutP
< LeftWrite())
224 Data
+= string((char *)(Buf
+ (OutP%Size
)),Sz
);
232 // CircleBuf::Stats - Print out stats information /*{{{*/
233 // ---------------------------------------------------------------------
235 void CircleBuf::Stats()
241 gettimeofday(&Stop
,0);
242 /* float Diff = Stop.tv_sec - Start.tv_sec +
243 (float)(Stop.tv_usec - Start.tv_usec)/1000000;
244 clog << "Got " << InP << " in " << Diff << " at " << InP/Diff << endl;*/
248 // ServerState::ServerState - Constructor /*{{{*/
249 // ---------------------------------------------------------------------
251 ServerState::ServerState(URI Srv
,HttpMethod
*Owner
) : Owner(Owner
),
252 In(64*1024), Out(4*1024),
258 // ServerState::Open - Open a connection to the server /*{{{*/
259 // ---------------------------------------------------------------------
260 /* This opens a connection to the server. */
263 struct addrinfo
*LastHostAddr
= 0;
264 bool ServerState::Open()
266 // Use the already open connection if possible.
274 // Determine the proxy setting
275 if (getenv("http_proxy") == 0)
277 string DefProxy
= _config
->Find("Acquire::http::Proxy");
278 string SpecificProxy
= _config
->Find("Acquire::http::Proxy::" + ServerName
.Host
);
279 if (SpecificProxy
.empty() == false)
281 if (SpecificProxy
== "DIRECT")
284 Proxy
= SpecificProxy
;
290 Proxy
= getenv("http_proxy");
292 // Parse no_proxy, a , seperated list of hosts
293 if (getenv("no_proxy") != 0)
295 const char *Start
= getenv("no_proxy");
296 for (const char *Cur
= Start
; true ; Cur
++)
298 if (*Cur
!= ',' && *Cur
!= 0)
300 if (stringcasecmp(ServerName
.Host
.begin(),ServerName
.Host
.end(),
313 // Determine what host and port to use based on the proxy settings
316 if (Proxy
.empty() == true)
318 if (ServerName
.Port
!= 0)
319 Port
= ServerName
.Port
;
320 Host
= ServerName
.Host
;
329 // Connect to the remote server
330 if (Connect(Host
,Port
,"http",80,ServerFd
,TimeOut
,Owner
) == false)
336 // ServerState::Close - Close a connection to the server /*{{{*/
337 // ---------------------------------------------------------------------
339 bool ServerState::Close()
346 // ServerState::RunHeaders - Get the headers before the data /*{{{*/
347 // ---------------------------------------------------------------------
348 /* Returns 0 if things are OK, 1 if an IO error occursed and 2 if a header
349 parse error occured */
350 int ServerState::RunHeaders()
354 Owner
->Status("Waiting for file");
368 if (In
.WriteTillEl(Data
) == false)
374 for (string::const_iterator I
= Data
.begin(); I
< Data
.end(); I
++)
376 string::const_iterator J
= I
;
377 for (; J
!= Data
.end() && *J
!= '\n' && *J
!= '\r';J
++);
378 if (HeaderLine(string(I
,J
-I
)) == false)
384 while (Owner
->Go(false,this) == true);
389 // ServerState::RunData - Transfer the data from the socket /*{{{*/
390 // ---------------------------------------------------------------------
392 bool ServerState::RunData()
396 // Chunked transfer encoding is fun..
397 if (Encoding
== Chunked
)
401 // Grab the block size
407 if (In
.WriteTillEl(Data
,true) == true)
410 while ((Last
= Owner
->Go(false,this)) == true);
415 // See if we are done
416 unsigned long Len
= strtol(Data
.c_str(),0,16);
421 // We have to remove the entity trailer
425 if (In
.WriteTillEl(Data
,true) == true && Data
.length() <= 2)
428 while ((Last
= Owner
->Go(false,this)) == true);
431 return !_error
->PendingError();
434 // Transfer the block
436 while (Owner
->Go(true,this) == true)
437 if (In
.IsLimit() == true)
441 if (In
.IsLimit() == false)
444 // The server sends an extra new line before the next block specifier..
449 if (In
.WriteTillEl(Data
,true) == true)
452 while ((Last
= Owner
->Go(false,this)) == true);
459 /* Closes encoding is used when the server did not specify a size, the
460 loss of the connection means we are done */
461 if (Encoding
== Closes
)
464 In
.Limit(Size
- StartPos
);
466 // Just transfer the whole block.
469 if (In
.IsLimit() == false)
473 return !_error
->PendingError();
475 while (Owner
->Go(true,this) == true);
478 return Owner
->Flush(this) && !_error
->PendingError();
481 // ServerState::HeaderLine - Process a header line /*{{{*/
482 // ---------------------------------------------------------------------
484 bool ServerState::HeaderLine(string Line
)
486 if (Line
.empty() == true)
489 // The http server might be trying to do something evil.
490 if (Line
.length() >= MAXLEN
)
491 return _error
->Error("Got a single header line over %u chars",MAXLEN
);
493 string::size_type Pos
= Line
.find(' ');
494 if (Pos
== string::npos
|| Pos
+1 > Line
.length())
496 // Blah, some servers use "connection:closes", evil.
497 Pos
= Line
.find(':');
498 if (Pos
== string::npos
|| Pos
+ 2 > Line
.length())
499 return _error
->Error("Bad header line");
503 // Parse off any trailing spaces between the : and the next word.
504 string::size_type Pos2
= Pos
;
505 while (Pos2
< Line
.length() && isspace(Line
[Pos2
]) != 0)
508 string Tag
= string(Line
,0,Pos
);
509 string Val
= string(Line
,Pos2
);
511 if (stringcasecmp(Tag
.begin(),Tag
.begin()+4,"HTTP") == 0)
513 // Evil servers return no version
516 if (sscanf(Line
.c_str(),"HTTP/%u.%u %u %[^\n]",&Major
,&Minor
,
518 return _error
->Error("The http server sent an invalid reply header");
524 if (sscanf(Line
.c_str(),"HTTP %u %[^\n]",&Result
,Code
) != 2)
525 return _error
->Error("The http server sent an invalid reply header");
531 if (stringcasecmp(Tag
,"Content-Length:") == 0)
533 if (Encoding
== Closes
)
537 // The length is already set from the Content-Range header
541 if (sscanf(Val
.c_str(),"%lu",&Size
) != 1)
542 return _error
->Error("The http server sent an invalid Content-Length header");
546 if (stringcasecmp(Tag
,"Content-Type:") == 0)
552 if (stringcasecmp(Tag
,"Content-Range:") == 0)
556 if (sscanf(Val
.c_str(),"bytes %lu-%*u/%lu",&StartPos
,&Size
) != 2)
557 return _error
->Error("The http server sent an invalid Content-Range header");
558 if ((unsigned)StartPos
> Size
)
559 return _error
->Error("This http server has broken range support");
563 if (stringcasecmp(Tag
,"Transfer-Encoding:") == 0)
566 if (stringcasecmp(Val
,"chunked") == 0)
572 if (stringcasecmp(Tag
,"Last-Modified:") == 0)
574 if (StrToTime(Val
,Date
) == false)
575 return _error
->Error("Unknown date format");
583 // HttpMethod::SendReq - Send the HTTP request /*{{{*/
584 // ---------------------------------------------------------------------
585 /* This places the http request in the outbound buffer */
586 void HttpMethod::SendReq(FetchItem
*Itm
,CircleBuf
&Out
)
590 // The HTTP server expects a hostname with a trailing :port
592 string ProperHost
= Uri
.Host
;
595 sprintf(Buf
,":%u",Uri
.Port
);
600 if (Itm
->Uri
.length() >= sizeof(Buf
))
603 /* Build the request. We include a keep-alive header only for non-proxy
604 requests. This is to tweak old http/1.0 servers that do support keep-alive
605 but not HTTP/1.1 automatic keep-alive. Doing this with a proxy server
606 will glitch HTTP/1.0 proxies because they do not filter it out and
607 pass it on, HTTP/1.1 says the connection should default to keep alive
608 and we expect the proxy to do this */
609 if (Proxy
.empty() == true)
610 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\nConnection: keep-alive\r\n",
611 QuoteString(Uri
.Path
,"~").c_str(),ProperHost
.c_str());
614 /* Generate a cache control header if necessary. We place a max
615 cache age on index files, optionally set a no-cache directive
616 and a no-store directive for archives. */
617 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\n",
618 Itm
->Uri
.c_str(),ProperHost
.c_str());
619 if (_config
->FindB("Acquire::http::No-Cache",false) == true)
620 strcat(Buf
,"Cache-Control: no-cache\r\nPragma: no-cache\r\n");
623 if (Itm
->IndexFile
== true)
624 sprintf(Buf
+strlen(Buf
),"Cache-Control: max-age=%u\r\n",
625 _config
->FindI("Acquire::http::Max-Age",60*60*24));
628 if (_config
->FindB("Acquire::http::No-Store",false) == true)
629 strcat(Buf
,"Cache-Control: no-store\r\n");
636 // Check for a partial file
638 if (stat(Itm
->DestFile
.c_str(),&SBuf
) >= 0 && SBuf
.st_size
> 0)
640 // In this case we send an if-range query with a range header
641 sprintf(Buf
,"Range: bytes=%li-\r\nIf-Range: %s\r\n",(long)SBuf
.st_size
- 1,
642 TimeRFC1123(SBuf
.st_mtime
).c_str());
647 if (Itm
->LastModified
!= 0)
649 sprintf(Buf
,"If-Modified-Since: %s\r\n",TimeRFC1123(Itm
->LastModified
).c_str());
654 if (Proxy
.User
.empty() == false || Proxy
.Password
.empty() == false)
655 Req
+= string("Proxy-Authorization: Basic ") +
656 Base64Encode(Proxy
.User
+ ":" + Proxy
.Password
) + "\r\n";
658 Req
+= "User-Agent: Debian APT-HTTP/1.2\r\n\r\n";
666 // HttpMethod::Go - Run a single loop /*{{{*/
667 // ---------------------------------------------------------------------
668 /* This runs the select loop over the server FDs, Output file FDs and
670 bool HttpMethod::Go(bool ToFile
,ServerState
*Srv
)
672 // Server has closed the connection
673 if (Srv
->ServerFd
== -1 && (Srv
->In
.WriteSpace() == false ||
682 if (Srv
->Out
.WriteSpace() == true && Srv
->ServerFd
!= -1)
683 FD_SET(Srv
->ServerFd
,&wfds
);
684 if (Srv
->In
.ReadSpace() == true && Srv
->ServerFd
!= -1)
685 FD_SET(Srv
->ServerFd
,&rfds
);
692 if (Srv
->In
.WriteSpace() == true && ToFile
== true && FileFD
!= -1)
693 FD_SET(FileFD
,&wfds
);
696 FD_SET(STDIN_FILENO
,&rfds
);
698 // Figure out the max fd
700 if (MaxFd
< Srv
->ServerFd
)
701 MaxFd
= Srv
->ServerFd
;
708 if ((Res
= select(MaxFd
+1,&rfds
,&wfds
,0,&tv
)) < 0)
709 return _error
->Errno("select","Select failed");
713 _error
->Error("Connection timed out");
714 return ServerDie(Srv
);
718 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&rfds
))
721 if (Srv
->In
.Read(Srv
->ServerFd
) == false)
722 return ServerDie(Srv
);
725 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&wfds
))
728 if (Srv
->Out
.Write(Srv
->ServerFd
) == false)
729 return ServerDie(Srv
);
732 // Send data to the file
733 if (FileFD
!= -1 && FD_ISSET(FileFD
,&wfds
))
735 if (Srv
->In
.Write(FileFD
) == false)
736 return _error
->Errno("write","Error writing to output file");
739 // Handle commands from APT
740 if (FD_ISSET(STDIN_FILENO
,&rfds
))
749 // HttpMethod::Flush - Dump the buffer into the file /*{{{*/
750 // ---------------------------------------------------------------------
751 /* This takes the current input buffer from the Server FD and writes it
753 bool HttpMethod::Flush(ServerState
*Srv
)
757 SetNonBlock(File
->Fd(),false);
758 if (Srv
->In
.WriteSpace() == false)
761 while (Srv
->In
.WriteSpace() == true)
763 if (Srv
->In
.Write(File
->Fd()) == false)
764 return _error
->Errno("write","Error writing to file");
765 if (Srv
->In
.IsLimit() == true)
769 if (Srv
->In
.IsLimit() == true || Srv
->Encoding
== ServerState::Closes
)
775 // HttpMethod::ServerDie - The server has closed the connection. /*{{{*/
776 // ---------------------------------------------------------------------
778 bool HttpMethod::ServerDie(ServerState
*Srv
)
780 unsigned int LErrno
= errno
;
782 // Dump the buffer to the file
783 if (Srv
->State
== ServerState::Data
)
785 SetNonBlock(File
->Fd(),false);
786 while (Srv
->In
.WriteSpace() == true)
788 if (Srv
->In
.Write(File
->Fd()) == false)
789 return _error
->Errno("write","Error writing to the file");
792 if (Srv
->In
.IsLimit() == true)
797 // See if this is because the server finished the data stream
798 if (Srv
->In
.IsLimit() == false && Srv
->State
!= ServerState::Header
&&
799 Srv
->Encoding
!= ServerState::Closes
)
803 return _error
->Error("Error reading from server Remote end closed connection");
805 return _error
->Errno("read","Error reading from server");
811 // Nothing left in the buffer
812 if (Srv
->In
.WriteSpace() == false)
815 // We may have got multiple responses back in one packet..
823 // HttpMethod::DealWithHeaders - Handle the retrieved header data /*{{{*/
824 // ---------------------------------------------------------------------
825 /* We look at the header data we got back from the server and decide what
829 3 - Unrecoverable error
830 4 - Error with error content page
831 5 - Unrecoverable non-server error (close the connection) */
832 int HttpMethod::DealWithHeaders(FetchResult
&Res
,ServerState
*Srv
)
835 if (Srv
->Result
== 304)
837 unlink(Queue
->DestFile
.c_str());
839 Res
.LastModified
= Queue
->LastModified
;
843 /* We have a reply we dont handle. This should indicate a perm server
845 if (Srv
->Result
< 200 || Srv
->Result
>= 300)
847 _error
->Error("%u %s",Srv
->Result
,Srv
->Code
);
848 if (Srv
->HaveContent
== true)
853 // This is some sort of 2xx 'data follows' reply
854 Res
.LastModified
= Srv
->Date
;
855 Res
.Size
= Srv
->Size
;
859 File
= new FileFd(Queue
->DestFile
,FileFd::WriteAny
);
860 if (_error
->PendingError() == true)
863 FailFile
= Queue
->DestFile
;
864 FailFile
.c_str(); // Make sure we dont do a malloc in the signal handler
866 FailTime
= Srv
->Date
;
868 // Set the expected size
869 if (Srv
->StartPos
>= 0)
871 Res
.ResumePoint
= Srv
->StartPos
;
872 ftruncate(File
->Fd(),Srv
->StartPos
);
875 // Set the start point
876 lseek(File
->Fd(),0,SEEK_END
);
879 Srv
->In
.MD5
= new MD5Summation
;
881 // Fill the MD5 Hash if the file is non-empty (resume)
882 if (Srv
->StartPos
> 0)
884 lseek(File
->Fd(),0,SEEK_SET
);
885 if (Srv
->In
.MD5
->AddFD(File
->Fd(),Srv
->StartPos
) == false)
887 _error
->Errno("read","Problem hashing file");
890 lseek(File
->Fd(),0,SEEK_END
);
893 SetNonBlock(File
->Fd(),true);
897 // HttpMethod::SigTerm - Handle a fatal signal /*{{{*/
898 // ---------------------------------------------------------------------
899 /* This closes and timestamps the open file. This is neccessary to get
900 resume behavoir on user abort */
901 void HttpMethod::SigTerm(int)
909 UBuf
.actime
= FailTime
;
910 UBuf
.modtime
= FailTime
;
911 utime(FailFile
.c_str(),&UBuf
);
916 // HttpMethod::Fetch - Fetch an item /*{{{*/
917 // ---------------------------------------------------------------------
918 /* This adds an item to the pipeline. We keep the pipeline at a fixed
920 bool HttpMethod::Fetch(FetchItem
*)
925 // Queue the requests
928 for (FetchItem
*I
= Queue
; I
!= 0 && Depth
< (signed)PipelineDepth
;
929 I
= I
->Next
, Depth
++)
931 // If pipelining is disabled, we only queue 1 request
932 if (Server
->Pipeline
== false && Depth
>= 0)
935 // Make sure we stick with the same server
936 if (Server
->Comp(I
->Uri
) == false)
943 SendReq(I
,Server
->Out
);
951 // HttpMethod::Configuration - Handle a configuration message /*{{{*/
952 // ---------------------------------------------------------------------
953 /* We stash the desired pipeline depth */
954 bool HttpMethod::Configuration(string Message
)
956 if (pkgAcqMethod::Configuration(Message
) == false)
959 TimeOut
= _config
->FindI("Acquire::http::Timeout",TimeOut
);
960 PipelineDepth
= _config
->FindI("Acquire::http::Pipeline-Depth",
962 Debug
= _config
->FindB("Debug::Acquire::http",false);
967 // HttpMethod::Loop - Main loop /*{{{*/
968 // ---------------------------------------------------------------------
970 int HttpMethod::Loop()
972 signal(SIGTERM
,SigTerm
);
973 signal(SIGINT
,SigTerm
);
980 // We have no commands, wait for some to arrive
983 if (WaitFd(STDIN_FILENO
) == false)
987 /* Run messages, we can accept 0 (no message) if we didn't
988 do a WaitFd above.. Otherwise the FD is closed. */
989 int Result
= Run(true);
990 if (Result
!= -1 && (Result
!= 0 || Queue
== 0))
996 // Connect to the server
997 if (Server
== 0 || Server
->Comp(Queue
->Uri
) == false)
1000 Server
= new ServerState(Queue
->Uri
,this);
1003 // Reset the pipeline
1004 if (Server
->ServerFd
== -1)
1007 // Connnect to the host
1008 if (Server
->Open() == false)
1016 // Fill the pipeline.
1019 // Fetch the next URL header data from the server.
1020 switch (Server
->RunHeaders())
1025 // The header data is bad
1028 _error
->Error("Bad header Data");
1033 // The server closed a connection during the header get..
1040 Server
->Pipeline
= false;
1042 if (FailCounter
>= 2)
1044 Fail("Connection failed",true);
1052 // Decide what to do.
1054 Res
.Filename
= Queue
->DestFile
;
1055 switch (DealWithHeaders(Res
,Server
))
1057 // Ok, the file is Open
1063 bool Result
= Server
->RunData();
1065 // Close the file, destroy the FD object and timestamp it
1071 struct utimbuf UBuf
;
1073 UBuf
.actime
= Server
->Date
;
1074 UBuf
.modtime
= Server
->Date
;
1075 utime(Queue
->DestFile
.c_str(),&UBuf
);
1077 // Send status to APT
1080 Res
.MD5Sum
= Server
->In
.MD5
->Result();
1096 // Hard server error, not found or something
1103 // Hard internal error, kill the connection and fail
1111 // We need to flush the data, the header is like a 404 w/ error text
1116 // Send to content to dev/null
1117 File
= new FileFd("/dev/null",FileFd::WriteExists
);
1125 Fail("Internal error");