]>
git.saurik.com Git - apt.git/blob - methods/http.cc
1 // -*- mode: cpp; mode: fold -*-
3 // $Id: http.cc,v 1.43 1999/12/10 23:40:29 jgg Exp $
4 /* ######################################################################
6 HTTP Aquire Method - This is the HTTP aquire method for APT.
8 It uses HTTP/1.1 and many of the fancy options there-in, such as
9 pipelining, range, if-range and so on. It accepts on the command line
10 a list of url destination pairs and writes to stdout the status of the
11 operation as defined in the APT method spec.
13 It is based on a doubly buffered select loop. All the requests are
14 fed into a single output buffer that is constantly fed out the
15 socket. This provides ideal pipelining as in many cases all of the
16 requests will fit into a single packet. The input socket is buffered
17 the same way and fed into the fd for the file.
19 This double buffering provides fairly substantial transfer rates,
20 compared to wget the http method is about 4% faster. Most importantly,
21 when HTTP is compared with FTP as a protocol the speed difference is
22 huge. In tests over the internet from two sites to llug (via ATM) this
23 program got 230k/s sustained http transfer rates. FTP on the other
24 hand topped out at 170k/s. That combined with the time to setup the
25 FTP connection makes HTTP a vastly superior protocol.
27 ##################################################################### */
29 // Include Files /*{{{*/
30 #include <apt-pkg/fileutl.h>
31 #include <apt-pkg/acquire-method.h>
32 #include <apt-pkg/error.h>
33 #include <apt-pkg/md5.h>
47 #include "rfc2553emu.h"
52 string
HttpMethod::FailFile
;
53 int HttpMethod::FailFd
= -1;
54 time_t HttpMethod::FailTime
= 0;
55 unsigned long PipelineDepth
= 10;
56 unsigned long TimeOut
= 120;
59 // CircleBuf::CircleBuf - Circular input buffer /*{{{*/
60 // ---------------------------------------------------------------------
62 CircleBuf::CircleBuf(unsigned long Size
) : Size(Size
), MD5(0)
64 Buf
= new unsigned char[Size
];
68 // CircleBuf::Reset - Reset to the default state /*{{{*/
69 // ---------------------------------------------------------------------
71 void CircleBuf::Reset()
76 MaxGet
= (unsigned int)-1;
81 MD5
= new MD5Summation
;
85 // CircleBuf::Read - Read from a FD into the circular buffer /*{{{*/
86 // ---------------------------------------------------------------------
87 /* This fills up the buffer with as much data as is in the FD, assuming it
89 bool CircleBuf::Read(int Fd
)
93 // Woops, buffer is full
94 if (InP
- OutP
== Size
)
97 // Write the buffer segment
99 Res
= read(Fd
,Buf
+ (InP%Size
),LeftRead());
111 gettimeofday(&Start
,0);
116 // CircleBuf::Read - Put the string into the buffer /*{{{*/
117 // ---------------------------------------------------------------------
118 /* This will hold the string in and fill the buffer with it as it empties */
119 bool CircleBuf::Read(string Data
)
126 // CircleBuf::FillOut - Fill the buffer from the output queue /*{{{*/
127 // ---------------------------------------------------------------------
129 void CircleBuf::FillOut()
131 if (OutQueue
.empty() == true)
135 // Woops, buffer is full
136 if (InP
- OutP
== Size
)
139 // Write the buffer segment
140 unsigned long Sz
= LeftRead();
141 if (OutQueue
.length() - StrPos
< Sz
)
142 Sz
= OutQueue
.length() - StrPos
;
143 memcpy(Buf
+ (InP%Size
),OutQueue
.begin() + StrPos
,Sz
);
148 if (OutQueue
.length() == StrPos
)
157 // CircleBuf::Write - Write from the buffer into a FD /*{{{*/
158 // ---------------------------------------------------------------------
159 /* This empties the buffer into the FD. */
160 bool CircleBuf::Write(int Fd
)
166 // Woops, buffer is empty
173 // Write the buffer segment
175 Res
= write(Fd
,Buf
+ (OutP%Size
),LeftWrite());
188 MD5
->Add(Buf
+ (OutP%Size
),Res
);
194 // CircleBuf::WriteTillEl - Write from the buffer to a string /*{{{*/
195 // ---------------------------------------------------------------------
196 /* This copies till the first empty line */
197 bool CircleBuf::WriteTillEl(string
&Data
,bool Single
)
199 // We cheat and assume it is unneeded to have more than one buffer load
200 for (unsigned long I
= OutP
; I
< InP
; I
++)
202 if (Buf
[I%Size
] != '\n')
204 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
208 if (Buf
[I%Size
] != '\n')
210 for (I
++; I
< InP
&& Buf
[I%Size
] == '\r'; I
++);
219 unsigned long Sz
= LeftWrite();
222 if (I
- OutP
< LeftWrite())
224 Data
+= string((char *)(Buf
+ (OutP%Size
)),Sz
);
232 // CircleBuf::Stats - Print out stats information /*{{{*/
233 // ---------------------------------------------------------------------
235 void CircleBuf::Stats()
241 gettimeofday(&Stop
,0);
242 /* float Diff = Stop.tv_sec - Start.tv_sec +
243 (float)(Stop.tv_usec - Start.tv_usec)/1000000;
244 clog << "Got " << InP << " in " << Diff << " at " << InP/Diff << endl;*/
248 // ServerState::ServerState - Constructor /*{{{*/
249 // ---------------------------------------------------------------------
251 ServerState::ServerState(URI Srv
,HttpMethod
*Owner
) : Owner(Owner
),
252 In(64*1024), Out(4*1024),
258 // ServerState::Open - Open a connection to the server /*{{{*/
259 // ---------------------------------------------------------------------
260 /* This opens a connection to the server. */
263 struct addrinfo
*LastHostAddr
= 0;
264 bool ServerState::Open()
266 // Use the already open connection if possible.
274 // Determine the proxy setting
275 if (getenv("http_proxy") == 0)
277 string DefProxy
= _config
->Find("Acquire::http::Proxy");
278 string SpecificProxy
= _config
->Find("Acquire::http::Proxy::" + ServerName
.Host
);
279 if (SpecificProxy
.empty() == false)
281 if (SpecificProxy
== "DIRECT")
284 Proxy
= SpecificProxy
;
290 Proxy
= getenv("http_proxy");
292 // Parse no_proxy, a , seperated list of hosts
293 if (getenv("no_proxy") != 0)
295 const char *Start
= getenv("no_proxy");
296 for (const char *Cur
= Start
; true ; Cur
++)
298 if (*Cur
!= ',' && *Cur
!= 0)
300 if (stringcasecmp(ServerName
.Host
.begin(),ServerName
.Host
.end(),
313 // Determine what host and port to use based on the proxy settings
316 if (Proxy
.empty() == true)
318 if (ServerName
.Port
!= 0)
319 Port
= ServerName
.Port
;
320 Host
= ServerName
.Host
;
329 // Connect to the remote server
330 if (Connect(Host
,Port
,"http",80,ServerFd
,TimeOut
,Owner
) == false)
336 // ServerState::Close - Close a connection to the server /*{{{*/
337 // ---------------------------------------------------------------------
339 bool ServerState::Close()
346 // ServerState::RunHeaders - Get the headers before the data /*{{{*/
347 // ---------------------------------------------------------------------
348 /* Returns 0 if things are OK, 1 if an IO error occursed and 2 if a header
349 parse error occured */
350 int ServerState::RunHeaders()
354 Owner
->Status("Waiting for file");
368 if (In
.WriteTillEl(Data
) == false)
371 for (string::const_iterator I
= Data
.begin(); I
< Data
.end(); I
++)
373 string::const_iterator J
= I
;
374 for (; J
!= Data
.end() && *J
!= '\n' && *J
!= '\r';J
++);
375 if (HeaderLine(string(I
,J
-I
)) == false)
381 while (Owner
->Go(false,this) == true);
386 // ServerState::RunData - Transfer the data from the socket /*{{{*/
387 // ---------------------------------------------------------------------
389 bool ServerState::RunData()
393 // Chunked transfer encoding is fun..
394 if (Encoding
== Chunked
)
398 // Grab the block size
404 if (In
.WriteTillEl(Data
,true) == true)
407 while ((Last
= Owner
->Go(false,this)) == true);
412 // See if we are done
413 unsigned long Len
= strtol(Data
.c_str(),0,16);
418 // We have to remove the entity trailer
422 if (In
.WriteTillEl(Data
,true) == true && Data
.length() <= 2)
425 while ((Last
= Owner
->Go(false,this)) == true);
428 return !_error
->PendingError();
431 // Transfer the block
433 while (Owner
->Go(true,this) == true)
434 if (In
.IsLimit() == true)
438 if (In
.IsLimit() == false)
441 // The server sends an extra new line before the next block specifier..
446 if (In
.WriteTillEl(Data
,true) == true)
449 while ((Last
= Owner
->Go(false,this)) == true);
456 /* Closes encoding is used when the server did not specify a size, the
457 loss of the connection means we are done */
458 if (Encoding
== Closes
)
461 In
.Limit(Size
- StartPos
);
463 // Just transfer the whole block.
466 if (In
.IsLimit() == false)
470 return !_error
->PendingError();
472 while (Owner
->Go(true,this) == true);
475 return Owner
->Flush(this) && !_error
->PendingError();
478 // ServerState::HeaderLine - Process a header line /*{{{*/
479 // ---------------------------------------------------------------------
481 bool ServerState::HeaderLine(string Line
)
483 if (Line
.empty() == true)
486 // The http server might be trying to do something evil.
487 if (Line
.length() >= MAXLEN
)
488 return _error
->Error("Got a single header line over %u chars",MAXLEN
);
490 string::size_type Pos
= Line
.find(' ');
491 if (Pos
== string::npos
|| Pos
+1 > Line
.length())
493 // Blah, some servers use "connection:closes", evil.
494 Pos
= Line
.find(':');
495 if (Pos
== string::npos
|| Pos
+ 2 > Line
.length())
496 return _error
->Error("Bad header line");
500 // Parse off any trailing spaces between the : and the next word.
501 string::size_type Pos2
= Pos
;
502 while (Pos2
< Line
.length() && isspace(Line
[Pos2
]) != 0)
505 string Tag
= string(Line
,0,Pos
);
506 string Val
= string(Line
,Pos2
);
508 if (stringcasecmp(Tag
.begin(),Tag
.begin()+4,"HTTP") == 0)
510 // Evil servers return no version
513 if (sscanf(Line
.c_str(),"HTTP/%u.%u %u %[^\n]",&Major
,&Minor
,
515 return _error
->Error("The http server sent an invalid reply header");
521 if (sscanf(Line
.c_str(),"HTTP %u %[^\n]",&Result
,Code
) != 2)
522 return _error
->Error("The http server sent an invalid reply header");
528 if (stringcasecmp(Tag
,"Content-Length:") == 0)
530 if (Encoding
== Closes
)
534 // The length is already set from the Content-Range header
538 if (sscanf(Val
.c_str(),"%lu",&Size
) != 1)
539 return _error
->Error("The http server sent an invalid Content-Length header");
543 if (stringcasecmp(Tag
,"Content-Type:") == 0)
549 if (stringcasecmp(Tag
,"Content-Range:") == 0)
553 if (sscanf(Val
.c_str(),"bytes %lu-%*u/%lu",&StartPos
,&Size
) != 2)
554 return _error
->Error("The http server sent an invalid Content-Range header");
555 if ((unsigned)StartPos
> Size
)
556 return _error
->Error("This http server has broken range support");
560 if (stringcasecmp(Tag
,"Transfer-Encoding:") == 0)
563 if (stringcasecmp(Val
,"chunked") == 0)
569 if (stringcasecmp(Tag
,"Last-Modified:") == 0)
571 if (StrToTime(Val
,Date
) == false)
572 return _error
->Error("Unknown date format");
580 // HttpMethod::SendReq - Send the HTTP request /*{{{*/
581 // ---------------------------------------------------------------------
582 /* This places the http request in the outbound buffer */
583 void HttpMethod::SendReq(FetchItem
*Itm
,CircleBuf
&Out
)
587 // The HTTP server expects a hostname with a trailing :port
589 string ProperHost
= Uri
.Host
;
592 sprintf(Buf
,":%u",Uri
.Port
);
597 if (Itm
->Uri
.length() >= sizeof(Buf
))
600 /* Build the request. We include a keep-alive header only for non-proxy
601 requests. This is to tweak old http/1.0 servers that do support keep-alive
602 but not HTTP/1.1 automatic keep-alive. Doing this with a proxy server
603 will glitch HTTP/1.0 proxies because they do not filter it out and
604 pass it on, HTTP/1.1 says the connection should default to keep alive
605 and we expect the proxy to do this */
606 if (Proxy
.empty() == true)
607 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\nConnection: keep-alive\r\n",
608 QuoteString(Uri
.Path
,"~").c_str(),ProperHost
.c_str());
611 /* Generate a cache control header if necessary. We place a max
612 cache age on index files, optionally set a no-cache directive
613 and a no-store directive for archives. */
614 sprintf(Buf
,"GET %s HTTP/1.1\r\nHost: %s\r\n",
615 Itm
->Uri
.c_str(),ProperHost
.c_str());
616 if (_config
->FindB("Acquire::http::No-Cache",false) == true)
617 strcat(Buf
,"Cache-Control: no-cache\r\nPragma: no-cache\r\n");
620 if (Itm
->IndexFile
== true)
621 sprintf(Buf
+strlen(Buf
),"Cache-Control: max-age=%u\r\n",
622 _config
->FindI("Acquire::http::Max-Age",60*60*24));
625 if (_config
->FindB("Acquire::http::No-Store",false) == true)
626 strcat(Buf
,"Cache-Control: no-store\r\n");
633 // Check for a partial file
635 if (stat(Itm
->DestFile
.c_str(),&SBuf
) >= 0 && SBuf
.st_size
> 0)
637 // In this case we send an if-range query with a range header
638 sprintf(Buf
,"Range: bytes=%li-\r\nIf-Range: %s\r\n",(long)SBuf
.st_size
- 1,
639 TimeRFC1123(SBuf
.st_mtime
).c_str());
644 if (Itm
->LastModified
!= 0)
646 sprintf(Buf
,"If-Modified-Since: %s\r\n",TimeRFC1123(Itm
->LastModified
).c_str());
651 if (Proxy
.User
.empty() == false || Proxy
.Password
.empty() == false)
652 Req
+= string("Proxy-Authorization: Basic ") +
653 Base64Encode(Proxy
.User
+ ":" + Proxy
.Password
) + "\r\n";
655 Req
+= "User-Agent: Debian APT-HTTP/1.2\r\n\r\n";
663 // HttpMethod::Go - Run a single loop /*{{{*/
664 // ---------------------------------------------------------------------
665 /* This runs the select loop over the server FDs, Output file FDs and
667 bool HttpMethod::Go(bool ToFile
,ServerState
*Srv
)
669 // Server has closed the connection
670 if (Srv
->ServerFd
== -1 && (Srv
->In
.WriteSpace() == false ||
679 if (Srv
->Out
.WriteSpace() == true && Srv
->ServerFd
!= -1)
680 FD_SET(Srv
->ServerFd
,&wfds
);
681 if (Srv
->In
.ReadSpace() == true && Srv
->ServerFd
!= -1)
682 FD_SET(Srv
->ServerFd
,&rfds
);
689 if (Srv
->In
.WriteSpace() == true && ToFile
== true && FileFD
!= -1)
690 FD_SET(FileFD
,&wfds
);
693 FD_SET(STDIN_FILENO
,&rfds
);
695 // Figure out the max fd
697 if (MaxFd
< Srv
->ServerFd
)
698 MaxFd
= Srv
->ServerFd
;
705 if ((Res
= select(MaxFd
+1,&rfds
,&wfds
,0,&tv
)) < 0)
706 return _error
->Errno("select","Select failed");
710 _error
->Error("Connection timed out");
711 return ServerDie(Srv
);
715 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&rfds
))
718 if (Srv
->In
.Read(Srv
->ServerFd
) == false)
719 return ServerDie(Srv
);
722 if (Srv
->ServerFd
!= -1 && FD_ISSET(Srv
->ServerFd
,&wfds
))
725 if (Srv
->Out
.Write(Srv
->ServerFd
) == false)
726 return ServerDie(Srv
);
729 // Send data to the file
730 if (FileFD
!= -1 && FD_ISSET(FileFD
,&wfds
))
732 if (Srv
->In
.Write(FileFD
) == false)
733 return _error
->Errno("write","Error writing to output file");
736 // Handle commands from APT
737 if (FD_ISSET(STDIN_FILENO
,&rfds
))
746 // HttpMethod::Flush - Dump the buffer into the file /*{{{*/
747 // ---------------------------------------------------------------------
748 /* This takes the current input buffer from the Server FD and writes it
750 bool HttpMethod::Flush(ServerState
*Srv
)
754 SetNonBlock(File
->Fd(),false);
755 if (Srv
->In
.WriteSpace() == false)
758 while (Srv
->In
.WriteSpace() == true)
760 if (Srv
->In
.Write(File
->Fd()) == false)
761 return _error
->Errno("write","Error writing to file");
762 if (Srv
->In
.IsLimit() == true)
766 if (Srv
->In
.IsLimit() == true || Srv
->Encoding
== ServerState::Closes
)
772 // HttpMethod::ServerDie - The server has closed the connection. /*{{{*/
773 // ---------------------------------------------------------------------
775 bool HttpMethod::ServerDie(ServerState
*Srv
)
777 unsigned int LErrno
= errno
;
779 // Dump the buffer to the file
780 if (Srv
->State
== ServerState::Data
)
782 SetNonBlock(File
->Fd(),false);
783 while (Srv
->In
.WriteSpace() == true)
785 if (Srv
->In
.Write(File
->Fd()) == false)
786 return _error
->Errno("write","Error writing to the file");
789 if (Srv
->In
.IsLimit() == true)
794 // See if this is because the server finished the data stream
795 if (Srv
->In
.IsLimit() == false && Srv
->State
!= ServerState::Header
&&
796 Srv
->Encoding
!= ServerState::Closes
)
800 return _error
->Error("Error reading from server Remote end closed connection");
802 return _error
->Errno("read","Error reading from server");
808 // Nothing left in the buffer
809 if (Srv
->In
.WriteSpace() == false)
812 // We may have got multiple responses back in one packet..
820 // HttpMethod::DealWithHeaders - Handle the retrieved header data /*{{{*/
821 // ---------------------------------------------------------------------
822 /* We look at the header data we got back from the server and decide what
826 3 - Unrecoverable error
827 4 - Error with error content page
828 5 - Unrecoverable non-server error (close the connection) */
829 int HttpMethod::DealWithHeaders(FetchResult
&Res
,ServerState
*Srv
)
832 if (Srv
->Result
== 304)
834 unlink(Queue
->DestFile
.c_str());
836 Res
.LastModified
= Queue
->LastModified
;
840 /* We have a reply we dont handle. This should indicate a perm server
842 if (Srv
->Result
< 200 || Srv
->Result
>= 300)
844 _error
->Error("%u %s",Srv
->Result
,Srv
->Code
);
845 if (Srv
->HaveContent
== true)
850 // This is some sort of 2xx 'data follows' reply
851 Res
.LastModified
= Srv
->Date
;
852 Res
.Size
= Srv
->Size
;
856 File
= new FileFd(Queue
->DestFile
,FileFd::WriteAny
);
857 if (_error
->PendingError() == true)
860 FailFile
= Queue
->DestFile
;
861 FailFile
.c_str(); // Make sure we dont do a malloc in the signal handler
863 FailTime
= Srv
->Date
;
865 // Set the expected size
866 if (Srv
->StartPos
>= 0)
868 Res
.ResumePoint
= Srv
->StartPos
;
869 ftruncate(File
->Fd(),Srv
->StartPos
);
872 // Set the start point
873 lseek(File
->Fd(),0,SEEK_END
);
876 Srv
->In
.MD5
= new MD5Summation
;
878 // Fill the MD5 Hash if the file is non-empty (resume)
879 if (Srv
->StartPos
> 0)
881 lseek(File
->Fd(),0,SEEK_SET
);
882 if (Srv
->In
.MD5
->AddFD(File
->Fd(),Srv
->StartPos
) == false)
884 _error
->Errno("read","Problem hashing file");
887 lseek(File
->Fd(),0,SEEK_END
);
890 SetNonBlock(File
->Fd(),true);
894 // HttpMethod::SigTerm - Handle a fatal signal /*{{{*/
895 // ---------------------------------------------------------------------
896 /* This closes and timestamps the open file. This is neccessary to get
897 resume behavoir on user abort */
898 void HttpMethod::SigTerm(int)
906 UBuf
.actime
= FailTime
;
907 UBuf
.modtime
= FailTime
;
908 utime(FailFile
.c_str(),&UBuf
);
913 // HttpMethod::Fetch - Fetch an item /*{{{*/
914 // ---------------------------------------------------------------------
915 /* This adds an item to the pipeline. We keep the pipeline at a fixed
917 bool HttpMethod::Fetch(FetchItem
*)
922 // Queue the requests
925 for (FetchItem
*I
= Queue
; I
!= 0 && Depth
< (signed)PipelineDepth
;
926 I
= I
->Next
, Depth
++)
928 // If pipelining is disabled, we only queue 1 request
929 if (Server
->Pipeline
== false && Depth
>= 0)
932 // Make sure we stick with the same server
933 if (Server
->Comp(I
->Uri
) == false)
940 SendReq(I
,Server
->Out
);
948 // HttpMethod::Configuration - Handle a configuration message /*{{{*/
949 // ---------------------------------------------------------------------
950 /* We stash the desired pipeline depth */
951 bool HttpMethod::Configuration(string Message
)
953 if (pkgAcqMethod::Configuration(Message
) == false)
956 TimeOut
= _config
->FindI("Acquire::http::Timeout",TimeOut
);
957 PipelineDepth
= _config
->FindI("Acquire::http::Pipeline-Depth",
959 Debug
= _config
->FindB("Debug::Acquire::http",false);
964 // HttpMethod::Loop - Main loop /*{{{*/
965 // ---------------------------------------------------------------------
967 int HttpMethod::Loop()
969 signal(SIGTERM
,SigTerm
);
970 signal(SIGINT
,SigTerm
);
977 // We have no commands, wait for some to arrive
980 if (WaitFd(STDIN_FILENO
) == false)
984 /* Run messages, we can accept 0 (no message) if we didn't
985 do a WaitFd above.. Otherwise the FD is closed. */
986 int Result
= Run(true);
987 if (Result
!= -1 && (Result
!= 0 || Queue
== 0))
993 // Connect to the server
994 if (Server
== 0 || Server
->Comp(Queue
->Uri
) == false)
997 Server
= new ServerState(Queue
->Uri
,this);
1000 // Reset the pipeline
1001 if (Server
->ServerFd
== -1)
1004 // Connnect to the host
1005 if (Server
->Open() == false)
1013 // Fill the pipeline.
1016 // Fetch the next URL header data from the server.
1017 switch (Server
->RunHeaders())
1022 // The header data is bad
1025 _error
->Error("Bad header Data");
1030 // The server closed a connection during the header get..
1037 Server
->Pipeline
= false;
1039 if (FailCounter
>= 2)
1041 Fail("Connection failed",true);
1049 // Decide what to do.
1051 Res
.Filename
= Queue
->DestFile
;
1052 switch (DealWithHeaders(Res
,Server
))
1054 // Ok, the file is Open
1060 bool Result
= Server
->RunData();
1062 // Close the file, destroy the FD object and timestamp it
1068 struct utimbuf UBuf
;
1070 UBuf
.actime
= Server
->Date
;
1071 UBuf
.modtime
= Server
->Date
;
1072 utime(Queue
->DestFile
.c_str(),&UBuf
);
1074 // Send status to APT
1077 Res
.MD5Sum
= Server
->In
.MD5
->Result();
1093 // Hard server error, not found or something
1100 // Hard internal error, kill the connection and fail
1108 // We need to flush the data, the header is like a 404 w/ error text
1113 // Send to content to dev/null
1114 File
= new FileFd("/dev/null",FileFd::WriteExists
);
1122 Fail("Internal error");