2
0
mirror of https://github.com/munin-monitoring/contrib.git synced 2018-11-08 00:59:34 +01:00
contrib-munin/plugins/other/nvidia_smi_

161 lines
6.2 KiB
Bash
Executable File

#!/bin/bash
#written by Matthew Ritchie
#Monitor GPU statistics for single or muliple GPU boards
EXEC=/usr/bin/nvidia-smi
if [ ! -f ${EXEC} ]
then
echo "${EXEC} does not exist! Bailing."
fi
DRIVER_VERSION=`sed -n 1p /proc/driver/nvidia/version | awk '{print $8}' | awk -F. '{print $1}'`
GPU_TOTAL=`${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]|wc -l`
GPUSTART=0
FUNCT_270() {
for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`
do
NORMAL=1
GPU_ID=${i}
GPU_PROD=`${EXEC} -g ${GPUSTART} -q |grep "Product Name" |awk -F: '{print $2}'`
GPU_DRV=`${EXEC} -g 0 -q |grep "Driver Version" | awk '{print $4}'`
GPU_TEMP=`${EXEC} -g ${GPUSTART} -q |grep -A 1 "Temperature" |sed -n 2p |awk '{print $3}'i`
GPU_FANSPEED=`${EXEC} -g ${GPUSTART} -q |grep "Fan Speed" | awk '{print $4}' | awk -F% '{print $1}'`
GPU_UTIL=`${EXEC} -g ${GPUSTART} -q |grep -A 1 "Utilization" |sed -n 2p |awk '{print $3}'`
GPU_MEM_UTIL=`${EXEC} -g ${GPUSTART} -q |grep -A 2 "Utilization" | sed -n 3p |awk '{print $3}'`
if [ "$1" = "autoconf" ]
then
if [ "$GPU_TEMP" != "" ]
then
echo yes
exit 0
else
echo no
exit 1
fi
fi
if [ "$1" = "config" ]
then
echo "graph_title ${GPU_PROD}"
echo "graph_args --upper-limit 120 -l 0"
echo "graph_vlabel Percent or Degrees C"
echo "graph_category NVIDIA"
echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}"
echo "GPU_UTIL_${GPUSTART}.label NVidia GPU utilization for GPU${GPUSTART}"
echo "GPU_FANSPEED_${GPUSTART}.label NVidia fan speed for GPU${GPUSTART}"
echo "GPU_MEM_UTIL_${GPUSTART}.label NVidia memory utilization for GPU${GPUSTART}"
echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}"
NORMAL=0
if [ ${GPU_TOTAL} == 1 ]
then
exit 0
fi
fi
if [ ${NORMAL} == 1 ]
then
echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}"
echo "GPU_FANSPEED_${GPUSTART}.value ${GPU_FANSPEED}"
echo "GPU_UTIL_${GPUSTART}.value ${GPU_UTIL}"
echo "GPU_MEM_UTIL_${GPUSTART}.value ${GPU_MEM_UTIL}"
fi
GPUSTART=$((GPUSTART + 1))
GPU_TOTAL=$((GPU_TOTAL - 1))
done
}
FUNCT_260() {
for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`
do
NORMAL=1
GPU_ID=${i}
GPU_PROD=`${EXEC} -g ${GPUSTART} -q | grep "Product Name" |awk -F: '{print $2}'`
GPU_DRV=`${EXEC} -g 0 -q | grep "Driver Version" | awk '{print $4}'`
GPU_TEMP=`${EXEC} -g ${GPUSTART} -q | grep "Temperature" | awk '{print $3}'`
GPU_FANSPEED=`${EXEC} -g ${GPUSTART} -q | grep "Fan Speed" | awk '{print $4}' | awk -F% '{print $1}'`
GPU_UTIL=`${EXEC} -g ${GPUSTART} -q | grep "Utilization" | awk '{print $3}' | awk -F% '{print $1}'`
GPU_MEM_UTIL=`${EXEC} -g ${GPUSTART} -q | grep "Utilization" | awk '{print $3}' | awk -F% '{print $1}'`
if [ "$1" = "autoconf" ]
then
if [ "$GPU_TEMP" != "" ]
then
echo yes
exit 0
else
echo no
exit 1
fi
fi
if [ "$1" = "config" ]
then
echo "graph_title ${GPU_PROD}"
echo "graph_args --upper-limit 120 -l 0"
echo "graph_vlabel Percent or Degrees C"
echo "graph_category NVIDIA"
echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}"
echo "GPU_UTIL_${GPUSTART}.label NVidia GPU utilization for GPU${GPUSTART}"
echo "GPU_FANSPEED_${GPUSTART}.label NVidia fan speed for GPU${GPUSTART}"
echo "GPU_MEM_UTIL_${GPUSTART}.label NVidia memory utilization for GPU${GPUSTART}"
echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}"
NORMAL=0
if [ ${GPU_TOTAL} == 1 ]
then
exit 0
fi
fi
if [ ${NORMAL} == 1 ]
then
echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}"
echo "GPU_FANSPEED_${GPUSTART}.value ${GPU_FANSPEED}"
echo "GPU_UTIL_${GPUSTART}.value ${GPU_UTIL}"
echo "GPU_MEM_UTIL_${GPUSTART}.value ${GPU_MEM_UTIL}"
fi
GPUSTART=$((GPUSTART + 1))
GPU_TOTAL=$((GPU_TOTAL - 1))
done
}
FUNCT_195() {
for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`
do
NORMAL=1
GPU_ID=${i}
GPU_PROD=`${EXEC} -g ${GPUSTART} -q | grep "Product Name" |awk -F: '{print $2}'`
GPU_DRV=`sed -n 1p /proc/driver/nvidia/version | awk '{print $8}'`
GPU_TEMP=`${EXEC} -g ${GPUSTART} -q | grep "Temperature" | awk '{print $3}'`
if [ "$1" = "autoconf" ]
then
if [ "$GPU_TEMP" != "" ]
then
echo yes
exit 0
else
echo no
exit 1
fi
fi
if [ "$1" = "config" ]
then
echo "graph_title ${GPU_PROD}"
echo "graph_args --upper-limit 120 -l 0"
echo "graph_vlabel Degrees C"
echo "graph_category NVIDIA"
echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}"
echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}"
NORMAL=0
if [ ${GPU_TOTAL} == 1 ]
then
exit 0
fi
fi
if [ ${NORMAL} == 1 ]
then
echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}"
fi
GPUSTART=$((GPUSTART + 1))
GPU_TOTAL=$((GPU_TOTAL - 1))
done
}
FUNCT_${DRIVER_VERSION} $1