2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities
4 * Copyright (c) 2007, NLnet Labs. All rights reserved.
6 * This software is open source.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * Redistributions of source code must retain the above copyright notice,
13 * this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
19 * Neither the name of the NLNET LABS nor the names of its contributors may
20 * be used to endorse or promote products derived from this software without
21 * specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 * This file contains the infrastructure cache.
42 #include "ldns/rrdef.h"
43 #include "services/cache/infra.h"
44 #include "util/storage/slabhash.h"
45 #include "util/storage/lookup3.h"
46 #include "util/data/dname.h"
48 #include "util/net_help.h"
49 #include "util/config_file.h"
50 #include "iterator/iterator.h"
52 /** Timeout when only a single probe query per IP is allowed. */
53 #define PROBE_MAXRTO 12000 /* in msec */
55 /** number of timeouts for a type when the domain can be blocked ;
56 * even if another type has completely rtt maxed it, the different type
57 * can do this number of packets (until those all timeout too) */
58 #define TIMEOUT_COUNT_MAX 3
61 infra_sizefunc(void* k
, void* ATTR_UNUSED(d
))
63 struct infra_key
* key
= (struct infra_key
*)k
;
64 return sizeof(*key
) + sizeof(struct infra_data
) + key
->namelen
65 + lock_get_mem(&key
->entry
.lock
);
69 infra_compfunc(void* key1
, void* key2
)
71 struct infra_key
* k1
= (struct infra_key
*)key1
;
72 struct infra_key
* k2
= (struct infra_key
*)key2
;
73 int r
= sockaddr_cmp(&k1
->addr
, k1
->addrlen
, &k2
->addr
, k2
->addrlen
);
76 if(k1
->namelen
!= k2
->namelen
) {
77 if(k1
->namelen
< k2
->namelen
)
81 return query_dname_compare(k1
->zonename
, k2
->zonename
);
85 infra_delkeyfunc(void* k
, void* ATTR_UNUSED(arg
))
87 struct infra_key
* key
= (struct infra_key
*)k
;
90 lock_rw_destroy(&key
->entry
.lock
);
96 infra_deldatafunc(void* d
, void* ATTR_UNUSED(arg
))
98 struct infra_data
* data
= (struct infra_data
*)d
;
103 infra_create(struct config_file
* cfg
)
105 struct infra_cache
* infra
= (struct infra_cache
*)calloc(1,
106 sizeof(struct infra_cache
));
107 size_t maxmem
= cfg
->infra_cache_numhosts
* (sizeof(struct infra_key
)+
108 sizeof(struct infra_data
)+INFRA_BYTES_NAME
);
109 infra
->hosts
= slabhash_create(cfg
->infra_cache_slabs
,
110 INFRA_HOST_STARTSIZE
, maxmem
, &infra_sizefunc
, &infra_compfunc
,
111 &infra_delkeyfunc
, &infra_deldatafunc
, NULL
);
116 infra
->host_ttl
= cfg
->host_ttl
;
121 infra_delete(struct infra_cache
* infra
)
125 slabhash_delete(infra
->hosts
);
130 infra_adjust(struct infra_cache
* infra
, struct config_file
* cfg
)
134 return infra_create(cfg
);
135 infra
->host_ttl
= cfg
->host_ttl
;
136 maxmem
= cfg
->infra_cache_numhosts
* (sizeof(struct infra_key
)+
137 sizeof(struct infra_data
)+INFRA_BYTES_NAME
);
138 if(maxmem
!= slabhash_get_size(infra
->hosts
) ||
139 cfg
->infra_cache_slabs
!= infra
->hosts
->size
) {
141 infra
= infra_create(cfg
);
146 /** calculate the hash value for a host key */
148 hash_addr(struct sockaddr_storage
* addr
, socklen_t addrlen
)
150 hashvalue_t h
= 0xab;
151 /* select the pieces to hash, some OS have changing data inside */
152 if(addr_is_ip6(addr
, addrlen
)) {
153 struct sockaddr_in6
* in6
= (struct sockaddr_in6
*)addr
;
154 h
= hashlittle(&in6
->sin6_family
, sizeof(in6
->sin6_family
), h
);
155 h
= hashlittle(&in6
->sin6_port
, sizeof(in6
->sin6_port
), h
);
156 h
= hashlittle(&in6
->sin6_addr
, INET6_SIZE
, h
);
158 struct sockaddr_in
* in
= (struct sockaddr_in
*)addr
;
159 h
= hashlittle(&in
->sin_family
, sizeof(in
->sin_family
), h
);
160 h
= hashlittle(&in
->sin_port
, sizeof(in
->sin_port
), h
);
161 h
= hashlittle(&in
->sin_addr
, INET_SIZE
, h
);
166 /** calculate infra hash for a key */
168 hash_infra(struct sockaddr_storage
* addr
, socklen_t addrlen
, uint8_t* name
)
170 return dname_query_hash(name
, hash_addr(addr
, addrlen
));
173 /** lookup version that does not check host ttl (you check it) */
174 struct lruhash_entry
*
175 infra_lookup_nottl(struct infra_cache
* infra
, struct sockaddr_storage
* addr
,
176 socklen_t addrlen
, uint8_t* name
, size_t namelen
, int wr
)
180 memcpy(&k
.addr
, addr
, addrlen
);
183 k
.entry
.hash
= hash_infra(addr
, addrlen
, name
);
184 k
.entry
.key
= (void*)&k
;
186 return slabhash_lookup(infra
->hosts
, k
.entry
.hash
, &k
, wr
);
189 /** init the data elements */
191 data_entry_init(struct infra_cache
* infra
, struct lruhash_entry
* e
,
194 struct infra_data
* data
= (struct infra_data
*)e
->data
;
195 data
->ttl
= timenow
+ infra
->host_ttl
;
196 rtt_init(&data
->rtt
);
197 data
->edns_version
= 0;
198 data
->edns_lame_known
= 0;
199 data
->probedelay
= 0;
200 data
->isdnsseclame
= 0;
202 data
->lame_type_A
= 0;
203 data
->lame_other
= 0;
205 data
->timeout_AAAA
= 0;
206 data
->timeout_other
= 0;
210 * Create and init a new entry for a host
211 * @param infra: infra structure with config parameters.
212 * @param addr: host address.
213 * @param addrlen: length of addr.
214 * @param name: name of zone
215 * @param namelen: length of name.
216 * @param tm: time now.
217 * @return: the new entry or NULL on malloc failure.
219 static struct lruhash_entry
*
220 new_entry(struct infra_cache
* infra
, struct sockaddr_storage
* addr
,
221 socklen_t addrlen
, uint8_t* name
, size_t namelen
, time_t tm
)
223 struct infra_data
* data
;
224 struct infra_key
* key
= (struct infra_key
*)malloc(sizeof(*key
));
227 data
= (struct infra_data
*)malloc(sizeof(struct infra_data
));
232 key
->zonename
= memdup(name
, namelen
);
238 key
->namelen
= namelen
;
239 lock_rw_init(&key
->entry
.lock
);
240 key
->entry
.hash
= hash_infra(addr
, addrlen
, name
);
241 key
->entry
.key
= (void*)key
;
242 key
->entry
.data
= (void*)data
;
243 key
->addrlen
= addrlen
;
244 memcpy(&key
->addr
, addr
, addrlen
);
245 data_entry_init(infra
, &key
->entry
, tm
);
250 infra_host(struct infra_cache
* infra
, struct sockaddr_storage
* addr
,
251 socklen_t addrlen
, uint8_t* nm
, size_t nmlen
, time_t timenow
,
252 int* edns_vs
, uint8_t* edns_lame_known
, int* to
)
254 struct lruhash_entry
* e
= infra_lookup_nottl(infra
, addr
, addrlen
,
256 struct infra_data
* data
;
258 if(e
&& ((struct infra_data
*)e
->data
)->ttl
< timenow
) {
259 /* it expired, try to reuse existing entry */
260 int old
= ((struct infra_data
*)e
->data
)->rtt
.rto
;
261 uint8_t tA
= ((struct infra_data
*)e
->data
)->timeout_A
;
262 uint8_t tAAAA
= ((struct infra_data
*)e
->data
)->timeout_AAAA
;
263 uint8_t tother
= ((struct infra_data
*)e
->data
)->timeout_other
;
264 lock_rw_unlock(&e
->lock
);
265 e
= infra_lookup_nottl(infra
, addr
, addrlen
, nm
, nmlen
, 1);
267 /* if its still there we have a writelock, init */
269 /* do not touch lameness, it may be valid still */
270 data_entry_init(infra
, e
, timenow
);
272 /* TOP_TIMEOUT remains on reuse */
273 if(old
>= USEFUL_SERVER_TOP_TIMEOUT
) {
274 ((struct infra_data
*)e
->data
)->rtt
.rto
275 = USEFUL_SERVER_TOP_TIMEOUT
;
276 ((struct infra_data
*)e
->data
)->timeout_A
= tA
;
277 ((struct infra_data
*)e
->data
)->timeout_AAAA
= tAAAA
;
278 ((struct infra_data
*)e
->data
)->timeout_other
= tother
;
283 /* insert new entry */
284 if(!(e
= new_entry(infra
, addr
, addrlen
, nm
, nmlen
, timenow
)))
286 data
= (struct infra_data
*)e
->data
;
287 *edns_vs
= data
->edns_version
;
288 *edns_lame_known
= data
->edns_lame_known
;
289 *to
= rtt_timeout(&data
->rtt
);
290 slabhash_insert(infra
->hosts
, e
->hash
, e
, data
, NULL
);
293 /* use existing entry */
294 data
= (struct infra_data
*)e
->data
;
295 *edns_vs
= data
->edns_version
;
296 *edns_lame_known
= data
->edns_lame_known
;
297 *to
= rtt_timeout(&data
->rtt
);
298 if(*to
>= PROBE_MAXRTO
&& rtt_notimeout(&data
->rtt
)*4 <= *to
) {
299 /* delay other queries, this is the probe query */
301 lock_rw_unlock(&e
->lock
);
302 e
= infra_lookup_nottl(infra
, addr
,addrlen
,nm
,nmlen
, 1);
303 if(!e
) { /* flushed from cache real fast, no use to
304 allocate just for the probedelay */
307 data
= (struct infra_data
*)e
->data
;
309 /* add 999 to round up the timeout value from msec to sec,
310 * then add a whole second so it is certain that this probe
311 * has timed out before the next is allowed */
312 data
->probedelay
= timenow
+ ((*to
)+1999)/1000;
314 lock_rw_unlock(&e
->lock
);
319 infra_set_lame(struct infra_cache
* infra
, struct sockaddr_storage
* addr
,
320 socklen_t addrlen
, uint8_t* nm
, size_t nmlen
, time_t timenow
,
321 int dnsseclame
, int reclame
, uint16_t qtype
)
323 struct infra_data
* data
;
324 struct lruhash_entry
* e
;
325 int needtoinsert
= 0;
326 e
= infra_lookup_nottl(infra
, addr
, addrlen
, nm
, nmlen
, 1);
329 if(!(e
= new_entry(infra
, addr
, addrlen
, nm
, nmlen
, timenow
))) {
330 log_err("set_lame: malloc failure");
334 } else if( ((struct infra_data
*)e
->data
)->ttl
< timenow
) {
335 /* expired, reuse existing entry */
336 data_entry_init(infra
, e
, timenow
);
338 /* got an entry, now set the zone lame */
339 data
= (struct infra_data
*)e
->data
;
340 /* merge data (if any) */
342 data
->isdnsseclame
= 1;
345 if(!dnsseclame
&& !reclame
&& qtype
== LDNS_RR_TYPE_A
)
346 data
->lame_type_A
= 1;
347 if(!dnsseclame
&& !reclame
&& qtype
!= LDNS_RR_TYPE_A
)
348 data
->lame_other
= 1;
351 slabhash_insert(infra
->hosts
, e
->hash
, e
, e
->data
, NULL
);
352 else { lock_rw_unlock(&e
->lock
); }
357 infra_update_tcp_works(struct infra_cache
* infra
,
358 struct sockaddr_storage
* addr
, socklen_t addrlen
, uint8_t* nm
,
361 struct lruhash_entry
* e
= infra_lookup_nottl(infra
, addr
, addrlen
,
363 struct infra_data
* data
;
365 return; /* doesn't exist */
366 data
= (struct infra_data
*)e
->data
;
367 if(data
->rtt
.rto
>= RTT_MAX_TIMEOUT
)
368 /* do not disqualify this server altogether, it is better
370 data
->rtt
.rto
= RTT_MAX_TIMEOUT
-1000;
371 lock_rw_unlock(&e
->lock
);
375 infra_rtt_update(struct infra_cache
* infra
, struct sockaddr_storage
* addr
,
376 socklen_t addrlen
, uint8_t* nm
, size_t nmlen
, int qtype
,
377 int roundtrip
, int orig_rtt
, time_t timenow
)
379 struct lruhash_entry
* e
= infra_lookup_nottl(infra
, addr
, addrlen
,
381 struct infra_data
* data
;
382 int needtoinsert
= 0;
385 if(!(e
= new_entry(infra
, addr
, addrlen
, nm
, nmlen
, timenow
)))
388 } else if(((struct infra_data
*)e
->data
)->ttl
< timenow
) {
389 data_entry_init(infra
, e
, timenow
);
391 /* have an entry, update the rtt */
392 data
= (struct infra_data
*)e
->data
;
393 if(roundtrip
== -1) {
394 rtt_lost(&data
->rtt
, orig_rtt
);
395 if(qtype
== LDNS_RR_TYPE_A
) {
396 if(data
->timeout_A
< TIMEOUT_COUNT_MAX
)
398 } else if(qtype
== LDNS_RR_TYPE_AAAA
) {
399 if(data
->timeout_AAAA
< TIMEOUT_COUNT_MAX
)
400 data
->timeout_AAAA
++;
402 if(data
->timeout_other
< TIMEOUT_COUNT_MAX
)
403 data
->timeout_other
++;
406 /* if we got a reply, but the old timeout was above server
407 * selection height, delete the timeout so the server is
408 * fully available again */
409 if(rtt_unclamped(&data
->rtt
) >= USEFUL_SERVER_TOP_TIMEOUT
)
410 rtt_init(&data
->rtt
);
411 rtt_update(&data
->rtt
, roundtrip
);
412 data
->probedelay
= 0;
413 if(qtype
== LDNS_RR_TYPE_A
)
415 else if(qtype
== LDNS_RR_TYPE_AAAA
)
416 data
->timeout_AAAA
= 0;
417 else data
->timeout_other
= 0;
419 if(data
->rtt
.rto
> 0)
423 slabhash_insert(infra
->hosts
, e
->hash
, e
, e
->data
, NULL
);
424 else { lock_rw_unlock(&e
->lock
); }
428 long long infra_get_host_rto(struct infra_cache
* infra
,
429 struct sockaddr_storage
* addr
, socklen_t addrlen
, uint8_t* nm
,
430 size_t nmlen
, struct rtt_info
* rtt
, int* delay
, time_t timenow
,
431 int* tA
, int* tAAAA
, int* tother
)
433 struct lruhash_entry
* e
= infra_lookup_nottl(infra
, addr
, addrlen
,
435 struct infra_data
* data
;
438 data
= (struct infra_data
*)e
->data
;
439 if(data
->ttl
>= timenow
) {
440 ttl
= (long long)(data
->ttl
- timenow
);
441 memmove(rtt
, &data
->rtt
, sizeof(*rtt
));
442 if(timenow
< data
->probedelay
)
443 *delay
= (int)(data
->probedelay
- timenow
);
446 *tA
= (int)data
->timeout_A
;
447 *tAAAA
= (int)data
->timeout_AAAA
;
448 *tother
= (int)data
->timeout_other
;
449 lock_rw_unlock(&e
->lock
);
454 infra_edns_update(struct infra_cache
* infra
, struct sockaddr_storage
* addr
,
455 socklen_t addrlen
, uint8_t* nm
, size_t nmlen
, int edns_version
,
458 struct lruhash_entry
* e
= infra_lookup_nottl(infra
, addr
, addrlen
,
460 struct infra_data
* data
;
461 int needtoinsert
= 0;
463 if(!(e
= new_entry(infra
, addr
, addrlen
, nm
, nmlen
, timenow
)))
466 } else if(((struct infra_data
*)e
->data
)->ttl
< timenow
) {
467 data_entry_init(infra
, e
, timenow
);
469 /* have an entry, update the rtt, and the ttl */
470 data
= (struct infra_data
*)e
->data
;
471 /* do not update if noEDNS and stored is yesEDNS */
472 if(!(edns_version
== -1 && (data
->edns_version
!= -1 &&
473 data
->edns_lame_known
))) {
474 data
->edns_version
= edns_version
;
475 data
->edns_lame_known
= 1;
479 slabhash_insert(infra
->hosts
, e
->hash
, e
, e
->data
, NULL
);
480 else { lock_rw_unlock(&e
->lock
); }
485 infra_get_lame_rtt(struct infra_cache
* infra
,
486 struct sockaddr_storage
* addr
, socklen_t addrlen
,
487 uint8_t* name
, size_t namelen
, uint16_t qtype
,
488 int* lame
, int* dnsseclame
, int* reclame
, int* rtt
, time_t timenow
)
490 struct infra_data
* host
;
491 struct lruhash_entry
* e
= infra_lookup_nottl(infra
, addr
, addrlen
,
495 host
= (struct infra_data
*)e
->data
;
496 *rtt
= rtt_unclamped(&host
->rtt
);
497 if(host
->rtt
.rto
>= PROBE_MAXRTO
&& timenow
< host
->probedelay
498 && rtt_notimeout(&host
->rtt
)*4 <= host
->rtt
.rto
) {
499 /* single probe for this domain, and we are not probing */
500 /* unless the query type allows a probe to happen */
501 if(qtype
== LDNS_RR_TYPE_A
) {
502 if(host
->timeout_A
>= TIMEOUT_COUNT_MAX
)
503 *rtt
= USEFUL_SERVER_TOP_TIMEOUT
;
504 else *rtt
= USEFUL_SERVER_TOP_TIMEOUT
-1000;
505 } else if(qtype
== LDNS_RR_TYPE_AAAA
) {
506 if(host
->timeout_AAAA
>= TIMEOUT_COUNT_MAX
)
507 *rtt
= USEFUL_SERVER_TOP_TIMEOUT
;
508 else *rtt
= USEFUL_SERVER_TOP_TIMEOUT
-1000;
510 if(host
->timeout_other
>= TIMEOUT_COUNT_MAX
)
511 *rtt
= USEFUL_SERVER_TOP_TIMEOUT
;
512 else *rtt
= USEFUL_SERVER_TOP_TIMEOUT
-1000;
515 if(timenow
> host
->ttl
) {
517 /* see if this can be a re-probe of an unresponsive server */
518 /* minus 1000 because that is outside of the RTTBAND, so
519 * blacklisted servers stay blacklisted if this is chosen */
520 if(host
->rtt
.rto
>= USEFUL_SERVER_TOP_TIMEOUT
) {
521 lock_rw_unlock(&e
->lock
);
522 *rtt
= USEFUL_SERVER_TOP_TIMEOUT
-1000;
528 lock_rw_unlock(&e
->lock
);
531 /* check lameness first */
532 if(host
->lame_type_A
&& qtype
== LDNS_RR_TYPE_A
) {
533 lock_rw_unlock(&e
->lock
);
538 } else if(host
->lame_other
&& qtype
!= LDNS_RR_TYPE_A
) {
539 lock_rw_unlock(&e
->lock
);
544 } else if(host
->isdnsseclame
) {
545 lock_rw_unlock(&e
->lock
);
550 } else if(host
->rec_lame
) {
551 lock_rw_unlock(&e
->lock
);
557 /* no lameness for this type of query */
558 lock_rw_unlock(&e
->lock
);
566 infra_get_mem(struct infra_cache
* infra
)
568 return sizeof(*infra
) + slabhash_get_mem(infra
->hosts
);