2
0
mirror of https://github.com/munin-monitoring/contrib.git synced 2018-11-08 00:59:34 +01:00
contrib-munin/plugins/other/fc-switch-ports

354 lines
14 KiB
Plaintext
Raw Normal View History

2011-03-01 14:29:11 +01:00
#!/usr/bin/env python
"""
Munin plugin which reports selected counters regarding ports on a SAN
FC-switch. Only enabled ports are considered.
The counters shown:
enc_out: Encoding errors outside FC frame.
Not as interesting as enc_out_per_mframe,
but it reflects the absolute values, instead
of being put in relation to the port's trafic.
enc_out_per_mframe: As above, but per million frames of trafic.
If there is a high number for this counter,
it could reflect:
- If there is also a high value for
rx_crcs for the port, then there is likely
a GBIC/SFP problem.
- If there the value of rx_crcs for the port
is low, there is likely a cable/connector
problem.
rx_crcs: CRC errors detected in received frames.
Together with enc_out errors, CRC errors
indicate a GBIC/SFP problem.
words: FC transmission words (each word comprises
four 10-bit units). Reflects how busy the
port is.
When symlinking to the plugin, indicate hostname like this:
fc_switch_ports_HOSTNAME
# Special requirements:
# - the pysnmp module; on RHEL 6 with EPEL 6, you may simply yum-
# install it
"""
# TODOs:
# - implement snmpconf
# Munin magic markers
#%# family=manual
#%# capabilities=
# http://community.brocade.com/servlet/JiveServlet/download/5581-1453/portErrShow.pdf
# is useful when trying to understand counters on a Brocade switch.
# Author: Troels Arvin <tra@sst.dk>
# See http://troels.arvin.dk/code/munin/ for latest version.
# Only tested with Red Hat Enterprise Linux 5, currently.
# Released according to the "New BSD License" AKA the 3-clause
# BSD License:
# ====================================================================
# Copyright (c) 2011, Danish National Board of Health.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the the Danish National Board of Health nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY the Danish National Board of Health ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL the Danish National Board of Health BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# ====================================================================
# $Id: fc_switch_ports_ 15423 2011-03-01 13:21:14Z tra $
import os, sys, re
from pysnmp.entity.rfc3413.oneliner import cmdgen
my_canonical_name = 'fc_switch_ports_' # If called as - e.g. -
# fc_switch_ports_sansw1,then
# sansw1 will be interpreted as
# the host_name
community = 'public'
# For reference:
# SW-MIB::swFCPortLinkState = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6
# SW-MIB::swFCPortTxWords = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.11
# SW-MIB::swFCPortRxWords = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.12
# SW-MIB::swFCPortTxFrames = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.13
# SW-MIB::swFCPortRxFrames = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.14
# SW-MIB::swFCPortRxCrcs = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.22
# SW-MIB::swFCPortRxEncOutFrs = .1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26
# OID strings must be without leading dot in this script
port_link_state_oidstr = '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.6'
oidstrs = {
'rx_crcs' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.22',
'enc_out' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26',
'tx_words' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.11',
'rx_words' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.12',
'tx_frames' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.13',
'rx_frames' : '1.3.6.1.4.1.1588.2.1.1.1.6.2.1.14',
}
descriptions = {
'rx_crcs' : 'the number of CRC errors detected for frames received',
'enc_out' : 'encoding errors outside FC frame',
'enc_out_per_mframe': 'enc errors outside FC frame, per million frames of rx+tx trafic',
'words' : 'transmitted/received words'
}
# These counter types don't distinguish
combined_tx_rx_countertypes = [ 'rx_crcs', 'enc_out', 'enc_out_per_mframe' ]
# Some helper functions:
def bailout(msg):
sys.stderr.write(msg+"\n")
sys.exit(1)
def debug(msg):
print('Debug: %s\n' % msg)
# Break OID-string in to a tuple of elements
def oidstr2tuple(oidstr):
int_list = [ int(s) for s in oidstr.split('.') ]
return tuple(int_list)
# if object_name is 1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26.1, return
# 1.3.6.1.4.1.1588.2.1.1.1.6.2.1.26
def get_ObjectName_subtree(obj):
return obj[:len(obj)-1]
# Convert SNMP objects to simpler structure, and cut off
# excessive return-value data (which bulkCmd may generated)
def varBindTable2plainDict(varBindTable):
ret_dict = {}
wanted_subtree = get_ObjectName_subtree(varBindTable[0][0][0])
#debug('wanted_subtree: '+str(wanted_subtree))
for varBindTableRow in varBindTable:
if get_ObjectName_subtree(varBindTableRow[0][0]) == wanted_subtree:
portnum = varBindTableRow[0][0][-1]
count = int(varBindTableRow[0][1])
ret_dict[portnum] = count
else:
#debug('Skipped '+str(varBindTableRow))
pass
#debug('ret_dict: '+str(ret_dict))
return ret_dict
# The more interesting functions:
# Honor the munin-APIs "config" command
def print_config(host_name,enabled_ports):
print('host_name %s' % host_name)
for counter_type in descriptions:
for portnum in enabled_ports:
print('multigraph %s.port_%d' % (counter_type,portnum))
print('graph_title Port %d %s' % (portnum,counter_type))
print('graph_args --base 1000 -l 0')
print('graph_category SAN')
print('graph_info This graph shows the count of %s' % descriptions[counter_type])
# for some of the graphs, there is an in/out aspect
if counter_type in combined_tx_rx_countertypes:
print('graph_vlabel count')
print('count.label count')
print('count.min 0')
# enc_out_per_mframe is special; others will just get the implied default (GAUGE)
if counter_type == 'enc_out_per_mframe':
print('count.type COUNTER')
else:
print('graph_vlabel units in (-) / out (+) per ${graph_period}')
print('graph_order tx rx')
print('tx.label tx')
print('tx.graph no')
print('tx.min 0')
print('rx.label tx')
print('rx.negative tx')
print('rx.min 0')
print('rx.info units transmitted/received by this interface')
for counter_type in descriptions:
print('multigraph %s' % counter_type)
print('graph_title %s total %s' % (host_name,counter_type))
print('graph_args --base 1000 -l 0')
print('graph_category SAN')
print('graph_info This graph shows the total count of %s across all ports' % descriptions[counter_type])
if counter_type in combined_tx_rx_countertypes:
print('graph_vlabel count')
print('count.label count')
print('count.min 0')
# enc_out_per_mframe is special; others will just get the implied default (GAUGE)
if counter_type == 'enc_out_per_mframe':
print('count.type COUNTER')
else:
print('graph_vlabel units in (-) / out (+) per ${graph_period}')
print('tx.label tx')
print('tx.graph no')
print('tx.min 0')
print('rx.label tx')
print('rx.negative tx')
print('rx.min 0')
print('rx.info units transmitted/received')
# We don't care for disabled ports
def get_enabled_ports(host_name):
link_states = get_port_values(host_name,port_link_state_oidstr)
# status 1 means enabled
return [ portnum for portnum in link_states if link_states[portnum] == 1 ]
# Talk to the SNMP agent performing the equivalent of an snmpwalk from
# the starting point indicated by the oid_start_tpl tuple.
# Handle potential errors.
def pull_values(host_name,oid_start_tpl):
try:
errorIndication, errorStatus, errorIndex, varBindTable = cmdgen.CommandGenerator().bulkCmd(
cmdgen.CommunityData('whatever', community),
cmdgen.UdpTransportTarget((host_name, 161)),
300, 0,
(oid_start_tpl)
)
except Exception, e:
err("Walking %s threw exception: %s" % (oid_start_str,str(e)))
if errorStatus:
err("Walking %s failed: %s" % (oid_start_str,errorStatus.prettyPrint()))
if errorIndication:
err("Walking %s failed with errorIndication=" % (oid_start_str,errorIndication))
if len(varBindTable) < 1:
err("Empty result from walk of %s" % oid_start_str)
#debug('Pull result: %s' % varBindTable)
return varBindTable
# Combine oidstr2tupl, pull_values and varBindTable2plainDict.
# Return dict of port-number => count
def get_port_values(host_name,oid_start_str):
return varBindTable2plainDict(
pull_values(host_name,oidstr2tuple(oid_start_str))
)
# Initial sanity check
n_args=len(sys.argv)
if n_args > 2:
# At most one arg expected
bailout('%d arguments given - expecting only one' % n_args)
# Make sure that multigraphs are supported
if 'MUNIN_CAP_MULTIGRAPH' not in os.environ:
bailout('MUNIN_CAP_MULTIGRAPH not found in environment')
# Parse host_name and counter type from arg0
called_as = os.path.basename(sys.argv[0])
regex_str = '^'+my_canonical_name+'(.+)'
match = re.match(regex_str, called_as)
if match:
host_name = match.group(1)
else:
bailout('Missing host_name and/or counter type')
enabled_ports = get_enabled_ports(host_name)
# See how we were called
if n_args == 2:
# An argument was given, so let's not simply print
# values.
arg = sys.argv[1]
if arg == 'config':
print_config(host_name,enabled_ports)
sys.exit(0)
if arg == 'fetch':
pass
else:
bailout("Unknown argument '%s'" % arg)
sys.exit(1)
# Prepare some structures
counters = {}
counters['rx_crcs' ] = get_port_values(host_name,oidstrs['rx_crcs' ])
counters['enc_out' ] = get_port_values(host_name,oidstrs['enc_out' ])
counters['tx_frames'] = get_port_values(host_name,oidstrs['tx_frames'])
counters['rx_frames'] = get_port_values(host_name,oidstrs['rx_frames'])
counters['tx_words' ] = get_port_values(host_name,oidstrs['tx_words' ])
counters['rx_words' ] = get_port_values(host_name,oidstrs['rx_words' ])
totals = {}
totals['rx_crcs'] = 0
totals['enc_out'] = 0
totals['enc_out_per_mframe'] = 0
totals['tx_frames'] = 0
totals['rx_frames'] = 0
totals['tx_words'] = 0
totals['rx_words'] = 0
# special handling of enc_out per million frames
counters['enc_out_per_mframe'] = {}
for k in counters['tx_frames'].keys():
if counters['tx_frames'][k] + counters['rx_frames'][k] > 0:
counters['enc_out_per_mframe'][k] = 1000000*counters['enc_out'][k] / (counters['tx_frames'][k] + counters['rx_frames'][k])
else:
counters['enc_out_per_mframe'][k] = 0
#debug('counters: ' + str(counters))
# Handle the default case (fetch)
# Per-port values
for portnum in enabled_ports:
for counter_type in descriptions:
print('multigraph %s.port_%d' % (counter_type,portnum))
# for some of the graphs, there is an in/out aspect
if counter_type in combined_tx_rx_countertypes:
print('count.value %d' % counters[counter_type][portnum])
totals[counter_type] += counters[counter_type][portnum]
else:
tx_key = 'tx_%s' % counter_type
rx_key = 'rx_%s' % counter_type
tx_value = counters[tx_key][portnum]
rx_value = counters[rx_key][portnum]
print('tx.value %d' % tx_value)
print('rx.value %d' % rx_value)
totals[tx_key] += tx_value
totals[rx_key] += rx_value
# Totals
for counter_type in descriptions:
print('multigraph %s' % (counter_type))
# for some of the graphs, there is an in/out aspect
if counter_type in combined_tx_rx_countertypes:
print('count.value %d' % totals[counter_type])
else:
tx_key = 'tx_%s' % counter_type
rx_key = 'rx_%s' % counter_type
print('tx.value %d' % totals[tx_key])
print('rx.value %d' % totals[rx_key])