]> git.saurik.com Git - apple/network_cmds.git/blob - unbound/contrib/unbound_munin_
network_cmds-480.tar.gz
[apple/network_cmds.git] / unbound / contrib / unbound_munin_
1 #!/bin/sh
2 #
3 # plugin for munin to monitor usage of unbound servers.
4 # To install copy this to /usr/local/share/munin/plugins/unbound_munin_
5 # and use munin-node-configure (--suggest, --shell).
6 #
7 # (C) 2008 W.C.A. Wijngaards. BSD Licensed.
8 #
9 # To install; enable statistics and unbound-control in unbound.conf
10 # server: extended-statistics: yes
11 # statistics-cumulative: no
12 # statistics-interval: 0
13 # remote-control: control-enable: yes
14 # Run the command unbound-control-setup to generate the key files.
15 #
16 # Environment variables for this script
17 # statefile - where to put temporary statefile.
18 # unbound_conf - where the unbound.conf file is located.
19 # unbound_control - where to find unbound-control executable.
20 # spoof_warn - what level to warn about spoofing
21 # spoof_crit - what level to crit about spoofing
22 #
23 # You can set them in your munin/plugin-conf.d/plugins.conf file
24 # with:
25 # [unbound*]
26 # user root
27 # env.statefile /usr/local/var/munin/plugin-state/unbound-state
28 # env.unbound_conf /usr/local/etc/unbound/unbound.conf
29 # env.unbound_control /usr/local/sbin/unbound-control
30 # env.spoof_warn 1000
31 # env.spoof_crit 100000
32 #
33 # This plugin can create different graphs depending on what name
34 # you link it as (with ln -s) into the plugins directory
35 # You can link it multiple times.
36 # If you are only a casual user, the _hits and _by_type are most interesting,
37 # possibly followed by _by_rcode.
38 #
39 # unbound_munin_hits - base volume, cache hits, unwanted traffic
40 # unbound_munin_queue - to monitor the internal requestlist
41 # unbound_munin_memory - memory usage
42 # unbound_munin_by_type - incoming queries by type
43 # unbound_munin_by_class - incoming queries by class
44 # unbound_munin_by_opcode - incoming queries by opcode
45 # unbound_munin_by_rcode - answers by rcode, validation status
46 # unbound_munin_by_flags - incoming queries by flags
47 # unbound_munin_histogram - histogram of query resolving times
48 #
49 # Magic markers - optional - used by installation scripts and
50 # munin-config: (originally contrib family but munin-node-configure ignores it)
51 #
52 #%# family=auto
53 #%# capabilities=autoconf suggest
54
55 # POD documentation
56 : <<=cut
57 =head1 NAME
58
59 unbound_munin_ - Munin plugin to monitor the Unbound DNS resolver.
60
61 =head1 APPLICABLE SYSTEMS
62
63 System with unbound daemon.
64
65 =head1 CONFIGURATION
66
67 [unbound*]
68 user root
69 env.statefile /usr/local/var/munin/plugin-state/unbound-state
70 env.unbound_conf /usr/local/etc/unbound/unbound.conf
71 env.unbound_control /usr/local/sbin/unbound-control
72 env.spoof_warn 1000
73 env.spoof_crit 100000
74
75 Use the .env settings to override the defaults.
76
77 =head1 USAGE
78
79 Can be used to present different graphs. Use ln -s for that name in
80 the plugins directory to enable the graph.
81 unbound_munin_hits - base volume, cache hits, unwanted traffic
82 unbound_munin_queue - to monitor the internal requestlist
83 unbound_munin_memory - memory usage
84 unbound_munin_by_type - incoming queries by type
85 unbound_munin_by_class - incoming queries by class
86 unbound_munin_by_opcode - incoming queries by opcode
87 unbound_munin_by_rcode - answers by rcode, validation status
88 unbound_munin_by_flags - incoming queries by flags
89 unbound_munin_histogram - histogram of query resolving times
90
91 =head1 AUTHOR
92
93 Copyright 2008 W.C.A. Wijngaards
94
95 =head1 LICENSE
96
97 BSD
98
99 =cut
100
101 state=${statefile:-/usr/local/var/munin/plugin-state/unbound-state}
102 conf=${unbound_conf:-/usr/local/etc/unbound/unbound.conf}
103 ctrl=${unbound_control:-/usr/local/sbin/unbound-control}
104 warn=${spoof_warn:-1000}
105 crit=${spoof_crit:-100000}
106 lock=$state.lock
107
108 # number of seconds between polling attempts.
109 # makes the statefile hang around for at least this many seconds,
110 # so that multiple links of this script can share the results.
111 lee=55
112
113 # to keep things within 19 characters
114 ABBREV="-e s/total/t/ -e s/thread/t/ -e s/num/n/ -e s/query/q/ -e s/answer/a/ -e s/unwanted/u/ -e s/requestlist/ql/ -e s/type/t/ -e s/class/c/ -e s/opcode/o/ -e s/rcode/r/ -e s/edns/e/ -e s/mem/m/ -e s/cache/c/ -e s/mod/m/"
115
116 # get value from $1 into return variable $value
117 get_value ( ) {
118 value="`grep '^'$1'=' $state | sed -e 's/^.*=//'`"
119 if test "$value"x = ""x; then
120 value="0"
121 fi
122 }
123
124 # download the state from the unbound server.
125 get_state ( ) {
126 # obtain lock for fetching the state
127 # because there is a race condition in fetching and writing to file
128
129 # see if the lock is stale, if so, take it
130 if test -f $lock ; then
131 pid="`cat $lock 2>&1`"
132 kill -0 "$pid" >/dev/null 2>&1
133 if test $? -ne 0 -a "$pid" != $$ ; then
134 echo $$ >$lock
135 fi
136 fi
137
138 i=0
139 while test ! -f $lock || test "`cat $lock 2>&1`" != $$; do
140 while test -f $lock; do
141 # wait
142 i=`expr $i + 1`
143 if test $i -gt 1000; then
144 sleep 1;
145 fi
146 if test $i -gt 1500; then
147 echo "error locking $lock" "=" `cat $lock`
148 rm -f $lock
149 exit 1
150 fi
151 done
152 # try to get it
153 echo $$ >$lock
154 done
155 # do not refetch if the file exists and only LEE seconds old
156 if test -f $state; then
157 now=`date +%s`
158 get_value "time.now"
159 value="`echo $value | sed -e 's/\..*$//'`"
160 if test $now -lt `expr $value + $lee`; then
161 rm -f $lock
162 return
163 fi
164 fi
165 $ctrl -c $conf stats > $state
166 if test $? -ne 0; then
167 echo "error retrieving data from unbound server"
168 rm -f $lock
169 exit 1
170 fi
171 rm -f $lock
172 }
173
174 if test "$1" = "autoconf" ; then
175 if test ! -f $conf; then
176 echo no "($conf does not exist)"
177 exit 1
178 fi
179 if test ! -d `dirname $state`; then
180 echo no "(`dirname $state` directory does not exist)"
181 exit 1
182 fi
183 echo yes
184 exit 0
185 fi
186
187 if test "$1" = "suggest" ; then
188 echo "hits"
189 echo "queue"
190 echo "memory"
191 echo "by_type"
192 echo "by_class"
193 echo "by_opcode"
194 echo "by_rcode"
195 echo "by_flags"
196 echo "histogram"
197 exit 0
198 fi
199
200 # determine my type, by name
201 id=`echo $0 | sed -e 's/^.*unbound_munin_//'`
202 if test "$id"x = ""x; then
203 # some default to keep people sane.
204 id="hits"
205 fi
206
207 # if $1 exists in statefile, config is echoed with label $2
208 exist_config ( ) {
209 mn=`echo $1 | sed $ABBREV | tr . _`
210 if grep '^'$1'=' $state >/dev/null 2>&1; then
211 echo "$mn.label $2"
212 echo "$mn.min 0"
213 fi
214 }
215
216 # print label and min 0 for a name $1 in unbound format
217 p_config ( ) {
218 mn=`echo $1 | sed $ABBREV | tr . _`
219 echo $mn.label "$2"
220 echo $mn.min 0
221 }
222
223 if test "$1" = "config" ; then
224 if test ! -f $state; then
225 get_state
226 fi
227 case $id in
228 hits)
229 echo "graph_title Unbound DNS traffic and cache hits"
230 echo "graph_args --base 1000 -l 0"
231 echo "graph_vlabel queries / second"
232 echo "graph_category DNS"
233 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
234 sed -e 's/=.*//'`; do
235 exist_config $x "queries handled by `basename $x .num.queries`"
236 done
237 p_config "total.num.queries" "total queries from clients"
238 p_config "total.num.cachehits" "cache hits"
239 p_config "total.num.prefetch" "cache prefetch"
240 p_config "num.query.tcp" "TCP queries"
241 p_config "num.query.tcpout" "TCP out queries"
242 p_config "num.query.ipv6" "IPv6 queries"
243 p_config "unwanted.queries" "queries that failed acl"
244 p_config "unwanted.replies" "unwanted or unsolicited replies"
245 echo "u_replies.warning $warn"
246 echo "u_replies.critical $crit"
247 echo "graph_info DNS queries to the recursive resolver. The unwanted replies could be innocent duplicate packets, late replies, or spoof threats."
248 ;;
249 queue)
250 echo "graph_title Unbound requestlist size"
251 echo "graph_args --base 1000 -l 0"
252 echo "graph_vlabel number of queries"
253 echo "graph_category DNS"
254 p_config "total.requestlist.avg" "Average size of queue on insert"
255 p_config "total.requestlist.max" "Max size of queue (in 5 min)"
256 p_config "total.requestlist.overwritten" "Number of queries replaced by new ones"
257 p_config "total.requestlist.exceeded" "Number of queries dropped due to lack of space"
258 echo "graph_info The queries that did not hit the cache and need recursion service take up space in the requestlist. If there are too many queries, first queries get overwritten, and at last resort dropped."
259 ;;
260 memory)
261 echo "graph_title Unbound memory usage"
262 echo "graph_args --base 1024 -l 0"
263 echo "graph_vlabel memory used in bytes"
264 echo "graph_category DNS"
265 p_config "mem.total.sbrk" "Total memory"
266 p_config "mem.cache.rrset" "RRset cache memory"
267 p_config "mem.cache.message" "Message cache memory"
268 p_config "mem.mod.iterator" "Iterator module memory"
269 p_config "mem.mod.validator" "Validator module and key cache memory"
270 p_config "msg.cache.count" "msg cache count"
271 p_config "rrset.cache.count" "rrset cache count"
272 p_config "infra.cache.count" "infra cache count"
273 p_config "key.cache.count" "key cache count"
274 echo "graph_info The memory used by unbound."
275 ;;
276 by_type)
277 echo "graph_title Unbound DNS queries by type"
278 echo "graph_args --base 1000 -l 0"
279 echo "graph_vlabel queries / second"
280 echo "graph_category DNS"
281 for x in `grep "^num.query.type" $state`; do
282 nm=`echo $x | sed -e 's/=.*$//'`
283 tp=`echo $nm | sed -e s/num.query.type.//`
284 p_config "$nm" "$tp"
285 done
286 echo "graph_info queries by DNS RR type queried for"
287 ;;
288 by_class)
289 echo "graph_title Unbound DNS queries by class"
290 echo "graph_args --base 1000 -l 0"
291 echo "graph_vlabel queries / second"
292 echo "graph_category DNS"
293 for x in `grep "^num.query.class" $state`; do
294 nm=`echo $x | sed -e 's/=.*$//'`
295 tp=`echo $nm | sed -e s/num.query.class.//`
296 p_config "$nm" "$tp"
297 done
298 echo "graph_info queries by DNS RR class queried for."
299 ;;
300 by_opcode)
301 echo "graph_title Unbound DNS queries by opcode"
302 echo "graph_args --base 1000 -l 0"
303 echo "graph_vlabel queries / second"
304 echo "graph_category DNS"
305 for x in `grep "^num.query.opcode" $state`; do
306 nm=`echo $x | sed -e 's/=.*$//'`
307 tp=`echo $nm | sed -e s/num.query.opcode.//`
308 p_config "$nm" "$tp"
309 done
310 echo "graph_info queries by opcode in the query packet."
311 ;;
312 by_rcode)
313 echo "graph_title Unbound DNS answers by return code"
314 echo "graph_args --base 1000 -l 0"
315 echo "graph_vlabel answer packets / second"
316 echo "graph_category DNS"
317 for x in `grep "^num.answer.rcode" $state`; do
318 nm=`echo $x | sed -e 's/=.*$//'`
319 tp=`echo $nm | sed -e s/num.answer.rcode.//`
320 p_config "$nm" "$tp"
321 done
322 p_config "num.answer.secure" "answer secure"
323 p_config "num.answer.bogus" "answer bogus"
324 p_config "num.rrset.bogus" "num rrsets marked bogus"
325 echo "graph_info answers sorted by return value. rrsets bogus is the number of rrsets marked bogus per second by the validator"
326 ;;
327 by_flags)
328 echo "graph_title Unbound DNS incoming queries by flags"
329 echo "graph_args --base 1000 -l 0"
330 echo "graph_vlabel queries / second"
331 echo "graph_category DNS"
332 p_config "num.query.flags.QR" "QR (query reply) flag"
333 p_config "num.query.flags.AA" "AA (auth answer) flag"
334 p_config "num.query.flags.TC" "TC (truncated) flag"
335 p_config "num.query.flags.RD" "RD (recursion desired) flag"
336 p_config "num.query.flags.RA" "RA (rec avail) flag"
337 p_config "num.query.flags.Z" "Z (zero) flag"
338 p_config "num.query.flags.AD" "AD (auth data) flag"
339 p_config "num.query.flags.CD" "CD (check disabled) flag"
340 p_config "num.query.edns.present" "EDNS OPT present"
341 p_config "num.query.edns.DO" "DO (DNSSEC OK) flag"
342 echo "graph_info This graphs plots the flags inside incoming queries. For example, if QR, AA, TC, RA, Z flags are set, the query can be rejected. RD, AD, CD and DO are legitimately set by some software."
343 ;;
344 histogram)
345 echo "graph_title Unbound DNS histogram of reply time"
346 echo "graph_args --base 1000 -l 0"
347 echo "graph_vlabel queries / second"
348 echo "graph_category DNS"
349 echo hcache.label "cache hits"
350 echo hcache.min 0
351 echo hcache.draw AREA
352 echo hcache.colour 999999
353 echo h64ms.label "0 msec - 66 msec"
354 echo h64ms.min 0
355 echo h64ms.draw STACK
356 echo h64ms.colour 0000FF
357 echo h128ms.label "66 msec - 131 msec"
358 echo h128ms.min 0
359 echo h128ms.colour 1F00DF
360 echo h128ms.draw STACK
361 echo h256ms.label "131 msec - 262 msec"
362 echo h256ms.min 0
363 echo h256ms.draw STACK
364 echo h256ms.colour 3F00BF
365 echo h512ms.label "262 msec - 524 msec"
366 echo h512ms.min 0
367 echo h512ms.draw STACK
368 echo h512ms.colour 5F009F
369 echo h1s.label "524 msec - 1 sec"
370 echo h1s.min 0
371 echo h1s.draw STACK
372 echo h1s.colour 7F007F
373 echo h2s.label "1 sec - 2 sec"
374 echo h2s.min 0
375 echo h2s.draw STACK
376 echo h2s.colour 9F005F
377 echo h4s.label "2 sec - 4 sec"
378 echo h4s.min 0
379 echo h4s.draw STACK
380 echo h4s.colour BF003F
381 echo h8s.label "4 sec - 8 sec"
382 echo h8s.min 0
383 echo h8s.draw STACK
384 echo h8s.colour DF001F
385 echo h16s.label "8 sec - ..."
386 echo h16s.min 0
387 echo h16s.draw STACK
388 echo h16s.colour FF0000
389 echo "graph_info Histogram of the reply times for queries."
390 ;;
391 esac
392
393 exit 0
394 fi
395
396 # do the stats itself
397 get_state
398
399 # get the time elapsed
400 get_value "time.elapsed"
401 if test $value = 0 || test $value = "0.000000"; then
402 echo "error: time elapsed 0 or could not retrieve data"
403 exit 1
404 fi
405 elapsed="$value"
406
407 # print value for $1 / elapsed
408 print_qps ( ) {
409 mn=`echo $1 | sed $ABBREV | tr . _`
410 get_value $1
411 echo "$mn.value" `echo scale=6';' $value / $elapsed | bc `
412 }
413
414 # print qps if line already found in $2
415 print_qps_line ( ) {
416 mn=`echo $1 | sed $ABBREV | tr . _`
417 value="`echo $2 | sed -e 's/^.*=//'`"
418 echo "$mn.value" `echo scale=6';' $value / $elapsed | bc `
419 }
420
421 # print value for $1
422 print_value ( ) {
423 mn=`echo $1 | sed $ABBREV | tr . _`
424 get_value $1
425 echo "$mn.value" $value
426 }
427
428 case $id in
429 hits)
430 for x in `grep "^thread[0-9][0-9]*\.num\.queries=" $state |
431 sed -e 's/=.*//'` total.num.queries \
432 total.num.cachehits total.num.prefetch num.query.tcp \
433 num.query.tcpout num.query.ipv6 unwanted.queries \
434 unwanted.replies; do
435 if grep "^"$x"=" $state >/dev/null 2>&1; then
436 print_qps $x
437 fi
438 done
439 ;;
440 queue)
441 for x in total.requestlist.avg total.requestlist.max \
442 total.requestlist.overwritten total.requestlist.exceeded; do
443 print_value $x
444 done
445 ;;
446 memory)
447 mn=`echo mem.total.sbrk | sed $ABBREV | tr . _`
448 get_value 'mem.total.sbrk'
449 if test $value -eq 0; then
450 chk=`echo $ctrl | sed -e 's/-control$/-checkconf/'`
451 pidf=`$chk -o pidfile $conf 2>&1`
452 pid=`cat $pidf 2>&1`
453 value=`ps -p "$pid" -o rss= 2>&1`
454 if test "`expr $value + 1 - 1 2>&1`" -eq "$value" 2>&1; then
455 value=`expr $value \* 1024`
456 else
457 value=0
458 fi
459 fi
460 echo "$mn.value" $value
461 for x in mem.cache.rrset mem.cache.message mem.mod.iterator \
462 mem.mod.validator msg.cache.count rrset.cache.count \
463 infra.cache.count key.cache.count; do
464 print_value $x
465 done
466 ;;
467 by_type)
468 for x in `grep "^num.query.type" $state`; do
469 nm=`echo $x | sed -e 's/=.*$//'`
470 print_qps_line $nm $x
471 done
472 ;;
473 by_class)
474 for x in `grep "^num.query.class" $state`; do
475 nm=`echo $x | sed -e 's/=.*$//'`
476 print_qps_line $nm $x
477 done
478 ;;
479 by_opcode)
480 for x in `grep "^num.query.opcode" $state`; do
481 nm=`echo $x | sed -e 's/=.*$//'`
482 print_qps_line $nm $x
483 done
484 ;;
485 by_rcode)
486 for x in `grep "^num.answer.rcode" $state`; do
487 nm=`echo $x | sed -e 's/=.*$//'`
488 print_qps_line $nm $x
489 done
490 print_qps "num.answer.secure"
491 print_qps "num.answer.bogus"
492 print_qps "num.rrset.bogus"
493 ;;
494 by_flags)
495 for x in num.query.flags.QR num.query.flags.AA num.query.flags.TC num.query.flags.RD num.query.flags.RA num.query.flags.Z num.query.flags.AD num.query.flags.CD num.query.edns.present num.query.edns.DO; do
496 print_qps $x
497 done
498 ;;
499 histogram)
500 get_value total.num.cachehits
501 echo hcache.value `echo scale=6';' $value / $elapsed | bc `
502 r=0
503 for x in histogram.000000.000000.to.000000.000001 \
504 histogram.000000.000001.to.000000.000002 \
505 histogram.000000.000002.to.000000.000004 \
506 histogram.000000.000004.to.000000.000008 \
507 histogram.000000.000008.to.000000.000016 \
508 histogram.000000.000016.to.000000.000032 \
509 histogram.000000.000032.to.000000.000064 \
510 histogram.000000.000064.to.000000.000128 \
511 histogram.000000.000128.to.000000.000256 \
512 histogram.000000.000256.to.000000.000512 \
513 histogram.000000.000512.to.000000.001024 \
514 histogram.000000.001024.to.000000.002048 \
515 histogram.000000.002048.to.000000.004096 \
516 histogram.000000.004096.to.000000.008192 \
517 histogram.000000.008192.to.000000.016384 \
518 histogram.000000.016384.to.000000.032768 \
519 histogram.000000.032768.to.000000.065536; do
520 get_value $x
521 r=`expr $r + $value`
522 done
523 echo h64ms.value `echo scale=6';' $r / $elapsed | bc `
524 get_value histogram.000000.065536.to.000000.131072
525 echo h128ms.value `echo scale=6';' $value / $elapsed | bc `
526 get_value histogram.000000.131072.to.000000.262144
527 echo h256ms.value `echo scale=6';' $value / $elapsed | bc `
528 get_value histogram.000000.262144.to.000000.524288
529 echo h512ms.value `echo scale=6';' $value / $elapsed | bc `
530 get_value histogram.000000.524288.to.000001.000000
531 echo h1s.value `echo scale=6';' $value / $elapsed | bc `
532 get_value histogram.000001.000000.to.000002.000000
533 echo h2s.value `echo scale=6';' $value / $elapsed | bc `
534 get_value histogram.000002.000000.to.000004.000000
535 echo h4s.value `echo scale=6';' $value / $elapsed | bc `
536 get_value histogram.000004.000000.to.000008.000000
537 echo h8s.value `echo scale=6';' $value / $elapsed | bc `
538 r=0
539 for x in histogram.000008.000000.to.000016.000000 \
540 histogram.000016.000000.to.000032.000000 \
541 histogram.000032.000000.to.000064.000000 \
542 histogram.000064.000000.to.000128.000000 \
543 histogram.000128.000000.to.000256.000000 \
544 histogram.000256.000000.to.000512.000000 \
545 histogram.000512.000000.to.001024.000000 \
546 histogram.001024.000000.to.002048.000000 \
547 histogram.002048.000000.to.004096.000000 \
548 histogram.004096.000000.to.008192.000000 \
549 histogram.008192.000000.to.016384.000000 \
550 histogram.016384.000000.to.032768.000000 \
551 histogram.032768.000000.to.065536.000000 \
552 histogram.065536.000000.to.131072.000000 \
553 histogram.131072.000000.to.262144.000000 \
554 histogram.262144.000000.to.524288.000000; do
555 get_value $x
556 r=`expr $r + $value`
557 done
558 echo h16s.value `echo scale=6';' $r / $elapsed | bc `
559 ;;
560 esac