mirror of
https://github.com/munin-monitoring/contrib.git
synced 2018-11-08 00:59:34 +01:00
161 lines
6.2 KiB
Bash
Executable File
161 lines
6.2 KiB
Bash
Executable File
#!/bin/bash
|
|
#written by Matthew Ritchie
|
|
#Monitor GPU statistics for single or muliple GPU boards
|
|
EXEC=/usr/bin/nvidia-smi
|
|
|
|
if [ ! -f ${EXEC} ]
|
|
then
|
|
echo "${EXEC} does not exist! Bailing."
|
|
fi
|
|
|
|
DRIVER_VERSION=`sed -n 1p /proc/driver/nvidia/version | awk '{print $8}' | awk -F. '{print $1}'`
|
|
GPU_TOTAL=`${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]|wc -l`
|
|
GPUSTART=0
|
|
|
|
FUNCT_270() {
|
|
for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`
|
|
do
|
|
NORMAL=1
|
|
GPU_ID=${i}
|
|
GPU_PROD=`${EXEC} -g ${GPUSTART} -q |grep "Product Name" |awk -F: '{print $2}'`
|
|
GPU_DRV=`${EXEC} -g 0 -q |grep "Driver Version" | awk '{print $4}'`
|
|
GPU_TEMP=`${EXEC} -g ${GPUSTART} -q |grep -A 1 "Temperature" |sed -n 2p |awk '{print $3}'i`
|
|
GPU_FANSPEED=`${EXEC} -g ${GPUSTART} -q |grep "Fan Speed" | awk '{print $4}' | awk -F% '{print $1}'`
|
|
GPU_UTIL=`${EXEC} -g ${GPUSTART} -q |grep -A 1 "Utilization" |sed -n 2p |awk '{print $3}'`
|
|
GPU_MEM_UTIL=`${EXEC} -g ${GPUSTART} -q |grep -A 2 "Utilization" | sed -n 3p |awk '{print $3}'`
|
|
if [ "$1" = "autoconf" ]
|
|
then
|
|
if [ "$GPU_TEMP" != "" ]
|
|
then
|
|
echo yes
|
|
exit 0
|
|
else
|
|
echo no
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [ "$1" = "config" ]
|
|
then
|
|
echo "graph_title ${GPU_PROD}"
|
|
echo "graph_args --upper-limit 120 -l 0"
|
|
echo "graph_vlabel Percent or Degrees C"
|
|
echo "graph_category NVIDIA"
|
|
echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}"
|
|
echo "GPU_UTIL_${GPUSTART}.label NVidia GPU utilization for GPU${GPUSTART}"
|
|
echo "GPU_FANSPEED_${GPUSTART}.label NVidia fan speed for GPU${GPUSTART}"
|
|
echo "GPU_MEM_UTIL_${GPUSTART}.label NVidia memory utilization for GPU${GPUSTART}"
|
|
echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}"
|
|
NORMAL=0
|
|
if [ ${GPU_TOTAL} == 1 ]
|
|
then
|
|
exit 0
|
|
fi
|
|
fi
|
|
if [ ${NORMAL} == 1 ]
|
|
then
|
|
echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}"
|
|
echo "GPU_FANSPEED_${GPUSTART}.value ${GPU_FANSPEED}"
|
|
echo "GPU_UTIL_${GPUSTART}.value ${GPU_UTIL}"
|
|
echo "GPU_MEM_UTIL_${GPUSTART}.value ${GPU_MEM_UTIL}"
|
|
fi
|
|
GPUSTART=$((GPUSTART + 1))
|
|
GPU_TOTAL=$((GPU_TOTAL - 1))
|
|
done
|
|
}
|
|
|
|
FUNCT_260() {
|
|
for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`
|
|
do
|
|
NORMAL=1
|
|
GPU_ID=${i}
|
|
GPU_PROD=`${EXEC} -g ${GPUSTART} -q | grep "Product Name" |awk -F: '{print $2}'`
|
|
GPU_DRV=`${EXEC} -g 0 -q | grep "Driver Version" | awk '{print $4}'`
|
|
GPU_TEMP=`${EXEC} -g ${GPUSTART} -q | grep "Temperature" | awk '{print $3}'`
|
|
GPU_FANSPEED=`${EXEC} -g ${GPUSTART} -q | grep "Fan Speed" | awk '{print $4}' | awk -F% '{print $1}'`
|
|
GPU_UTIL=`${EXEC} -g ${GPUSTART} -q | grep "Utilization" | awk '{print $3}' | awk -F% '{print $1}'`
|
|
GPU_MEM_UTIL=`${EXEC} -g ${GPUSTART} -q | grep "Utilization" | awk '{print $3}' | awk -F% '{print $1}'`
|
|
if [ "$1" = "autoconf" ]
|
|
then
|
|
if [ "$GPU_TEMP" != "" ]
|
|
then
|
|
echo yes
|
|
exit 0
|
|
else
|
|
echo no
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [ "$1" = "config" ]
|
|
then
|
|
echo "graph_title ${GPU_PROD}"
|
|
echo "graph_args --upper-limit 120 -l 0"
|
|
echo "graph_vlabel Percent or Degrees C"
|
|
echo "graph_category NVIDIA"
|
|
echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}"
|
|
echo "GPU_UTIL_${GPUSTART}.label NVidia GPU utilization for GPU${GPUSTART}"
|
|
echo "GPU_FANSPEED_${GPUSTART}.label NVidia fan speed for GPU${GPUSTART}"
|
|
echo "GPU_MEM_UTIL_${GPUSTART}.label NVidia memory utilization for GPU${GPUSTART}"
|
|
echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}"
|
|
NORMAL=0
|
|
if [ ${GPU_TOTAL} == 1 ]
|
|
then
|
|
exit 0
|
|
fi
|
|
fi
|
|
if [ ${NORMAL} == 1 ]
|
|
then
|
|
echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}"
|
|
echo "GPU_FANSPEED_${GPUSTART}.value ${GPU_FANSPEED}"
|
|
echo "GPU_UTIL_${GPUSTART}.value ${GPU_UTIL}"
|
|
echo "GPU_MEM_UTIL_${GPUSTART}.value ${GPU_MEM_UTIL}"
|
|
fi
|
|
GPUSTART=$((GPUSTART + 1))
|
|
GPU_TOTAL=$((GPU_TOTAL - 1))
|
|
done
|
|
}
|
|
|
|
FUNCT_195() {
|
|
for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]`
|
|
do
|
|
NORMAL=1
|
|
GPU_ID=${i}
|
|
GPU_PROD=`${EXEC} -g ${GPUSTART} -q | grep "Product Name" |awk -F: '{print $2}'`
|
|
GPU_DRV=`sed -n 1p /proc/driver/nvidia/version | awk '{print $8}'`
|
|
GPU_TEMP=`${EXEC} -g ${GPUSTART} -q | grep "Temperature" | awk '{print $3}'`
|
|
if [ "$1" = "autoconf" ]
|
|
then
|
|
if [ "$GPU_TEMP" != "" ]
|
|
then
|
|
echo yes
|
|
exit 0
|
|
else
|
|
echo no
|
|
exit 1
|
|
fi
|
|
fi
|
|
if [ "$1" = "config" ]
|
|
then
|
|
echo "graph_title ${GPU_PROD}"
|
|
echo "graph_args --upper-limit 120 -l 0"
|
|
echo "graph_vlabel Degrees C"
|
|
echo "graph_category NVIDIA"
|
|
echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}"
|
|
echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}"
|
|
NORMAL=0
|
|
if [ ${GPU_TOTAL} == 1 ]
|
|
then
|
|
exit 0
|
|
fi
|
|
fi
|
|
if [ ${NORMAL} == 1 ]
|
|
then
|
|
echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}"
|
|
fi
|
|
GPUSTART=$((GPUSTART + 1))
|
|
GPU_TOTAL=$((GPU_TOTAL - 1))
|
|
done
|
|
}
|
|
|
|
|
|
FUNCT_${DRIVER_VERSION} $1
|