#!/bin/bash #written by Matthew Ritchie #Monitor GPU statistics for single or muliple GPU boards EXEC=/usr/bin/nvidia-smi if [ ! -f ${EXEC} ] then echo "${EXEC} does not exist! Bailing." fi DRIVER_VERSION=`sed -n 1p /proc/driver/nvidia/version | awk '{print $8}' | awk -F. '{print $1}'` GPU_TOTAL=`${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]|wc -l` GPUSTART=0 FUNCT_270() { for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]` do NORMAL=1 GPU_ID=${i} GPU_PROD=`${EXEC} -g ${GPUSTART} -q |grep "Product Name" |awk -F: '{print $2}'` GPU_DRV=`${EXEC} -g 0 -q |grep "Driver Version" | awk '{print $4}'` GPU_TEMP=`${EXEC} -g ${GPUSTART} -q |grep -A 1 "Temperature" |sed -n 2p |awk '{print $3}'i` GPU_FANSPEED=`${EXEC} -g ${GPUSTART} -q |grep "Fan Speed" | awk '{print $4}' | awk -F% '{print $1}'` GPU_UTIL=`${EXEC} -g ${GPUSTART} -q |grep -A 1 "Utilization" |sed -n 2p |awk '{print $3}'` GPU_MEM_UTIL=`${EXEC} -g ${GPUSTART} -q |grep -A 2 "Utilization" | sed -n 3p |awk '{print $3}'` if [ "$1" = "autoconf" ] then if [ "$GPU_TEMP" != "" ] then echo yes exit 0 else echo no exit 1 fi fi if [ "$1" = "config" ] then echo "graph_title ${GPU_PROD}" echo "graph_args --upper-limit 120 -l 0" echo "graph_vlabel Percent or Degrees C" echo "graph_category NVIDIA" echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}" echo "GPU_UTIL_${GPUSTART}.label NVidia GPU utilization for GPU${GPUSTART}" echo "GPU_FANSPEED_${GPUSTART}.label NVidia fan speed for GPU${GPUSTART}" echo "GPU_MEM_UTIL_${GPUSTART}.label NVidia memory utilization for GPU${GPUSTART}" echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}" NORMAL=0 if [ ${GPU_TOTAL} == 1 ] then exit 0 fi fi if [ ${NORMAL} == 1 ] then echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}" echo "GPU_FANSPEED_${GPUSTART}.value ${GPU_FANSPEED}" echo "GPU_UTIL_${GPUSTART}.value ${GPU_UTIL}" echo "GPU_MEM_UTIL_${GPUSTART}.value ${GPU_MEM_UTIL}" fi GPUSTART=$((GPUSTART + 1)) GPU_TOTAL=$((GPU_TOTAL - 1)) done } FUNCT_260() { for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]` do NORMAL=1 GPU_ID=${i} GPU_PROD=`${EXEC} -g ${GPUSTART} -q | grep "Product Name" |awk -F: '{print $2}'` GPU_DRV=`${EXEC} -g 0 -q | grep "Driver Version" | awk '{print $4}'` GPU_TEMP=`${EXEC} -g ${GPUSTART} -q | grep "Temperature" | awk '{print $3}'` GPU_FANSPEED=`${EXEC} -g ${GPUSTART} -q | grep "Fan Speed" | awk '{print $4}' | awk -F% '{print $1}'` GPU_UTIL=`${EXEC} -g ${GPUSTART} -q | grep "Utilization" | awk '{print $3}' | awk -F% '{print $1}'` GPU_MEM_UTIL=`${EXEC} -g ${GPUSTART} -q | grep "Utilization" | awk '{print $3}' | awk -F% '{print $1}'` if [ "$1" = "autoconf" ] then if [ "$GPU_TEMP" != "" ] then echo yes exit 0 else echo no exit 1 fi fi if [ "$1" = "config" ] then echo "graph_title ${GPU_PROD}" echo "graph_args --upper-limit 120 -l 0" echo "graph_vlabel Percent or Degrees C" echo "graph_category NVIDIA" echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}" echo "GPU_UTIL_${GPUSTART}.label NVidia GPU utilization for GPU${GPUSTART}" echo "GPU_FANSPEED_${GPUSTART}.label NVidia fan speed for GPU${GPUSTART}" echo "GPU_MEM_UTIL_${GPUSTART}.label NVidia memory utilization for GPU${GPUSTART}" echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}" NORMAL=0 if [ ${GPU_TOTAL} == 1 ] then exit 0 fi fi if [ ${NORMAL} == 1 ] then echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}" echo "GPU_FANSPEED_${GPUSTART}.value ${GPU_FANSPEED}" echo "GPU_UTIL_${GPUSTART}.value ${GPU_UTIL}" echo "GPU_MEM_UTIL_${GPUSTART}.value ${GPU_MEM_UTIL}" fi GPUSTART=$((GPUSTART + 1)) GPU_TOTAL=$((GPU_TOTAL - 1)) done } FUNCT_195() { for i in `${EXEC} -a | egrep ^GPU | sed -e "s/ //g" | sed -e "s/://g" | tr [:upper:] [:lower:]` do NORMAL=1 GPU_ID=${i} GPU_PROD=`${EXEC} -g ${GPUSTART} -q | grep "Product Name" |awk -F: '{print $2}'` GPU_DRV=`sed -n 1p /proc/driver/nvidia/version | awk '{print $8}'` GPU_TEMP=`${EXEC} -g ${GPUSTART} -q | grep "Temperature" | awk '{print $3}'` if [ "$1" = "autoconf" ] then if [ "$GPU_TEMP" != "" ] then echo yes exit 0 else echo no exit 1 fi fi if [ "$1" = "config" ] then echo "graph_title ${GPU_PROD}" echo "graph_args --upper-limit 120 -l 0" echo "graph_vlabel Degrees C" echo "graph_category NVIDIA" echo "graph_info This graph shows information about your ${GPU_PROD} graphics card ${GPUSTART} running driver version ${GPU_DRV}" echo "GPU_TEMP_${GPUSTART}.label NVidia temperature for GPU${GPUSTART}" NORMAL=0 if [ ${GPU_TOTAL} == 1 ] then exit 0 fi fi if [ ${NORMAL} == 1 ] then echo "GPU_TEMP_${GPUSTART}.value ${GPU_TEMP}" fi GPUSTART=$((GPUSTART + 1)) GPU_TOTAL=$((GPU_TOTAL - 1)) done } FUNCT_${DRIVER_VERSION} $1