nvidia-smi Munin Plugin
Nach ein bisschen Kniffeln, hier ein Munin Plugin, was fuer eine beliebige Anzahl Nvidia Grafikkarten mit Cuda die Temperatur und Speicherauslastung per nvidia-smi ausliest:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
#!/bin/bash DATA_STR=`nvidia-smi -a | egrep "Gpu|Used|Total" | egrep "C|MB" | awk '{print $3}' | grep -v Memory | sed s/\%//g` DATA=($DATA_STR) let GPU_COUNT=${#DATA[@]}/3 case $1 in autoconf) echo "yes" exit 0;; #%# family=auto #%# capabilities=autoconf config) cat <<'EOM' graph_title NVIDIA Temperatures/Memory graph_vlabel GPU Temperatures/Memory graph_args --base 1000 -l 0 graph_category sensors EOM for GPU in $(seq 0 $((GPU_COUNT-1))) ; do echo "gpu${GPU}.label GPU${GPU} Temp" echo "gpu${GPU}.type GAUGE" echo "gpu${GPU}.min 0" echo "gpumem${GPU}.label GPU${GPU} Memory" echo "gpumem${GPU}.type GAUGE" echo "gpumem${GPU}.min 0" done exit 0;; esac for GPU in $(seq 0 $((GPU_COUNT-1))) ; do GPU_MEM_TOTAL=${DATA[$((GPU*3))]} GPU_MEM_UTIL=${DATA[$((GPU*3+1))]} GPU_TEMP=${DATA[$((GPU*3+2))]} if [ $GPU_MEM_TOTAL -ne "0" ] then GPU_MEM=`echo $(($GPU_MEM_UTIL * 100 / $GPU_MEM_TOTAL))` fi echo "gpu${GPU}.value ${GPU_TEMP}" echo "gpumem${GPU}.value ${GPU_MEM}" done |