2011-10-06 01:12:07 +02:00
|
|
|
#!/usr/bin/perl
|
2011-04-28 00:24:58 +02:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
#
|
|
|
|
# Copyright and BSD license
|
|
|
|
#
|
|
|
|
# Copyright (c) 2011 NVIDIA Corporation
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# Redistribution and use in source and binary forms are permitted
|
|
|
|
# provided that the above copyright notice and this paragraph are
|
|
|
|
# duplicated in all such forms and that any documentation,
|
|
|
|
# advertising materials, and other materials related to such
|
|
|
|
# distribution and use acknowledge that the software was developed
|
|
|
|
# by NVIDIA Corporation. The name of the NVIDIA Corporation may not be
|
|
|
|
# used to endorse or promote products derived from this software
|
|
|
|
# without specific prior written permission.
|
|
|
|
#
|
|
|
|
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
|
|
|
|
# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
|
|
|
# WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
#
|
2011-04-28 00:24:58 +02:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
#
|
|
|
|
# This script collects GPU information for use as a munin plugin
|
|
|
|
# Inspired by Matthew Ritchie and Vadim Bulakh's nvidia_smi_ plugin
|
|
|
|
#
|
2011-04-28 00:24:58 +02:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
#
|
|
|
|
# This requires the NVML bindings and NVIDIA driver >= R270
|
|
|
|
# $ sudo cpan install nvidia::ml
|
|
|
|
# http://search.cpan.org/~nvbinding/nvidia-ml-pl/lib/nvidia/ml.pm
|
|
|
|
#
|
2011-01-09 18:44:22 +01:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
use strict;
|
|
|
|
use nvidia::ml qw(:all);
|
2011-01-09 18:44:22 +01:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
my $runType = "normal";
|
|
|
|
my @runTypes = qw( normal config autoconf );
|
|
|
|
if ($#ARGV + 1 == 1)
|
|
|
|
{
|
2018-08-02 02:03:42 +02:00
|
|
|
if (grep $_ eq $ARGV[0], @runTypes)
|
2011-10-06 01:12:07 +02:00
|
|
|
{
|
|
|
|
$runType = $ARGV[0];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2014-12-05 00:37:42 +01:00
|
|
|
print "Invalid argument: $ARGV[0].\n";
|
|
|
|
print "Valid arguments: @runTypes.\n";
|
2011-10-06 01:12:07 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
2011-01-09 18:44:22 +01:00
|
|
|
}
|
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
my $ret = nvmlInit();
|
|
|
|
exit(1) unless $ret == $NVML_SUCCESS;
|
|
|
|
|
|
|
|
($ret, my $gpuCount) = nvmlDeviceGetCount();
|
|
|
|
exit(1) unless $ret == $NVML_SUCCESS;
|
|
|
|
|
|
|
|
($ret, my $driverVersion) = nvmlSystemGetDriverVersion();
|
|
|
|
$driverVersion = "Unknown" if $ret != $NVML_SUCCESS;
|
|
|
|
|
|
|
|
for (my $i = 0; $i < $gpuCount; $i++)
|
|
|
|
{
|
|
|
|
($ret, my $handle) = nvmlDeviceGetHandleByIndex($i);
|
|
|
|
next if $ret != $NVML_SUCCESS;
|
2018-08-02 02:03:42 +02:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
($ret, my $pciInfo) = nvmlDeviceGetPciInfo($handle);
|
|
|
|
my $gpuName = $pciInfo->{'busId'} if $ret == $NVML_SUCCESS;
|
2018-08-02 02:03:42 +02:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
if ($runType eq "config")
|
|
|
|
{
|
|
|
|
# only print the graph information once
|
|
|
|
if ($i == 0)
|
|
|
|
{
|
|
|
|
print "graph_title GPU\n";
|
|
|
|
print "graph_args --upper-limit 120 -l 0\n";
|
|
|
|
print "graph_vlabel Percent or Degrees C\n";
|
2017-02-22 05:34:14 +01:00
|
|
|
print "graph_category sensors\n";
|
2011-10-06 01:12:07 +02:00
|
|
|
print "graph_info Information for NVIDIA GPUs using driver version $driverVersion\n";
|
|
|
|
}
|
2018-08-02 02:03:42 +02:00
|
|
|
|
2011-10-06 01:12:07 +02:00
|
|
|
# metrics are collected for all the GPUs to a single graph
|
|
|
|
print "GPU_UTIL_$i.label GPU$i - $gpuName : GPU utilization\n";
|
|
|
|
print "GPU_FANSPEED_$i.label GPU$i - $gpuName : fan speed\n";
|
|
|
|
print "GPU_MEM_UTIL_$i.label GPU$i - $gpuName : GPU memory utilization\n";
|
|
|
|
print "GPU_TEMP_$i.label GPU$i - $gpuName : GPU temperature\n";
|
|
|
|
}
|
|
|
|
elsif ($runType eq "autoconf")
|
|
|
|
{
|
|
|
|
print "yes\n";
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
($ret, my $gpuTemp) = nvmlDeviceGetTemperature($handle,
|
|
|
|
$NVML_TEMPERATURE_GPU);
|
|
|
|
$gpuTemp = "N/A" if $ret != $NVML_SUCCESS;
|
|
|
|
|
|
|
|
($ret, my $gpuFanSpeed) = nvmlDeviceGetFanSpeed($handle);
|
|
|
|
$gpuFanSpeed = "N/A" if $ret != $NVML_SUCCESS;
|
|
|
|
|
|
|
|
($ret, my $utilRates) = nvmlDeviceGetUtilizationRates($handle);
|
|
|
|
my $gpuUtil;
|
|
|
|
my $memUtil;
|
|
|
|
if ($ret == $NVML_SUCCESS)
|
|
|
|
{
|
|
|
|
$gpuUtil = $utilRates->{'gpu'};
|
|
|
|
$memUtil = $utilRates->{'memory'};
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$gpuUtil = "N/A";
|
2012-08-19 19:42:14 +02:00
|
|
|
|
|
|
|
($ret, my $memory) = nvmlDeviceGetMemoryInfo($handle);
|
|
|
|
if ($ret == $NVML_SUCCESS)
|
|
|
|
{
|
|
|
|
$memUtil = $memory->{"used"} / $memory->{"total"} * 100;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
$memUtil = "N/A";
|
|
|
|
}
|
2011-10-06 01:12:07 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
print "GPU_TEMP_$i.value $gpuTemp\n";
|
|
|
|
print "GPU_FANSPEED_$i.value $gpuFanSpeed\n";
|
|
|
|
print "GPU_UTIL_$i.value $gpuUtil\n";
|
|
|
|
print "GPU_MEM_UTIL_$i.value $memUtil\n";
|
|
|
|
}
|
|
|
|
}
|
2011-01-09 18:44:22 +01:00
|
|
|
|