2016-11-14 18:16:25 +01:00
|
|
|
#!/bin/bash
|
|
|
|
######################################################################################################################
|
|
|
|
# Plugin to monitor basic statistics of EMC VNX 5300 Unified Datamovers #
|
|
|
|
######################################################################################################################
|
|
|
|
|
|
|
|
# Author: Evgeny Beysembaev <megabotva@gmail.com>
|
|
|
|
|
|
|
|
#####################################
|
|
|
|
# Description #
|
|
|
|
#####################################
|
|
|
|
|
|
|
|
# The plugin monitors basic statistics of EMC Unified Storage system Datamovers. Probably it can also be compatible with
|
|
|
|
# other Isilon or Celerra systems. It uses SSH to connect to Control Stations, then remotely executes
|
|
|
|
# /nas/sbin/server_stats and fetches and parses data from it. It supports gathering data both from active/active
|
|
|
|
# and active/passive Datamover configurations, ignoring offline or standby Datamovers. If all Datamovers are
|
|
|
|
# offline or absent, the plugin returns error.
|
|
|
|
# This plugin also automatically chooses Primary Control Station from the list by calling /nasmcd/sbin/getreason and
|
|
|
|
# /nasmcd/sbin/t2slot.
|
|
|
|
#
|
|
|
|
# Data is gathered from basic-std Statistics Group
|
|
|
|
#
|
|
|
|
# It's quite easy to comment out unneeded data to make graphs less overloaded or to add new statistics sources.
|
|
|
|
|
|
|
|
#####################################
|
|
|
|
# Configuration #
|
|
|
|
#####################################
|
|
|
|
|
|
|
|
# The plugin uses SSH to connect to Control Stations. It's possible to use 'nasadmin' user, but it would be better
|
|
|
|
# if you create read-only global user by Unisphere Client. The user should have only Operator role.
|
|
|
|
# I created "operator" user but due to the fact that Control Stations already had one internal "operator" user,
|
|
|
|
# the new one was called "operator1". So be careful.
|
|
|
|
#
|
|
|
|
# On munin-node side choose a user which will be used to connect through SSH. Generally user "munin" is ok. Then,
|
|
|
|
# execute "sudo su munin -s /bin/bash", "ssh-keygen" and "ssh-copy-id" to both Control Stations with newly created
|
|
|
|
# user.
|
|
|
|
#
|
|
|
|
# Make a link from /usr/share/munin/plugins/emc_vnx_dm_basic_stats to /etc/munin/plugins/emc_vnx_dm_basic_stats_<NAME>,
|
|
|
|
# where <NAME> is any arbitrary name of your storage system. The plugin will return <NAME> in its answer
|
|
|
|
# as "host_name" field.
|
|
|
|
# Assume your storage system is called "VNX5300".
|
|
|
|
#
|
|
|
|
# Make a configuration file at /etc/munin/plugin-conf.d/emc_vnx_dm_basic_stats_VNX5300
|
|
|
|
#
|
|
|
|
# [emc_vnx_dm_basic_stats_VNX5300]
|
|
|
|
# user munin # SSH Client local user
|
|
|
|
# env.username operator1 # Remote user with Operator role
|
|
|
|
# env.cs_addr 192.168.1.1 192.168.1.2 # Control Stations addresses
|
|
|
|
# env.nas_servers server_2 server_3 # This is the default value and can be omitted
|
|
|
|
|
2016-11-17 13:56:52 +01:00
|
|
|
#####################################
|
|
|
|
# Errata #
|
|
|
|
#####################################
|
|
|
|
|
|
|
|
# Since LUN names can be long, the plugin can be affected by this bug: http://munin-monitoring.org/ticket/1352
|
|
|
|
|
2016-11-14 18:16:25 +01:00
|
|
|
#####################################
|
|
|
|
# History #
|
|
|
|
#####################################
|
|
|
|
|
|
|
|
# 08.11.2016 - First Release
|
|
|
|
|
|
|
|
######################################################################################################################
|
|
|
|
|
|
|
|
export LANG=C
|
|
|
|
|
|
|
|
TARGET=$(echo "${0##*/}" | cut -d _ -f 6)
|
|
|
|
: ${nas_servers:="server_2 server_3"}
|
|
|
|
SSH_CHECK='ssh -q $username@$CS "/nasmcd/sbin/getreason | grep -w slot_\`/nasmcd/sbin/t2slot\` | cut -d- -f1"'
|
|
|
|
|
|
|
|
if [ "$1" = "autoconf" ]; then
|
|
|
|
echo "yes"
|
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ -z "$username" ]; then
|
|
|
|
echo "No username!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ -z "$cs_addr" ]; then
|
|
|
|
echo "No control station addresses!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
#Choosing Cotrol Station. Code have to be "10"
|
|
|
|
for CS in $cs_addr; do
|
2016-11-17 13:56:52 +01:00
|
|
|
if [ "$(eval $SSH_CHECK)" -eq "10" ]; then
|
2016-11-14 18:16:25 +01:00
|
|
|
# echo "$CS is Primary"
|
|
|
|
PRIMARY_CS=$CS
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if [ -z "$PRIMARY_CS" ]; then
|
|
|
|
echo "No alive primary Control Station from list \"$cs_addr\"";
|
|
|
|
exit 1;
|
|
|
|
fi
|
|
|
|
|
|
|
|
SSH="ssh -q $username@$PRIMARY_CS . /home/operator1/.bash_profile; "
|
|
|
|
|
|
|
|
echo "host_name ${TARGET}"
|
|
|
|
|
|
|
|
if [ "$1" = "config" ] ; then
|
|
|
|
for server in $nas_servers; do
|
|
|
|
$SSH nas_server -i $server | grep -q 'type *= nas'
|
|
|
|
if [ "$?" != 0 ] ; then continue; fi
|
|
|
|
nas_server_ok=TRUE
|
|
|
|
echo "multigraph emc_vnx_cpu_percent
|
|
|
|
graph_title EMC VNX 5300 Datamover CPU Util %
|
|
|
|
graph_vlabel %
|
|
|
|
graph_category cpu
|
|
|
|
graph_scale no
|
|
|
|
graph_args --upper-limit 100 -l 0
|
|
|
|
${server}_cpuutil.min 0
|
|
|
|
${server}_cpuutil.label $server CPU util. in %."
|
2016-11-17 13:56:52 +01:00
|
|
|
|
2016-11-14 18:16:25 +01:00
|
|
|
echo -e "\nmultigraph emc_vnx_network_kib
|
|
|
|
graph_title EMC VNX 5300 Datamover Network bytes over all interfaces
|
|
|
|
graph_vlabel B/s recv. (-) / sent (+)
|
|
|
|
graph_category network
|
|
|
|
graph_args --base 1000
|
|
|
|
${server}_net_in.graph no
|
|
|
|
${server}_net_in.label none
|
|
|
|
${server}_net_in.cdef ${server}_net_in,1000,*
|
|
|
|
${server}_net_out.label $server B/s
|
|
|
|
${server}_net_out.cdef ${server}_net_out,1000,*
|
|
|
|
${server}_net_out.negative ${server}_net_in
|
|
|
|
${server}_net_out.draw AREA"
|
2016-11-17 13:56:52 +01:00
|
|
|
|
2016-11-14 18:16:25 +01:00
|
|
|
echo -e "\nmultigraph emc_vnx_storage_kib
|
|
|
|
graph_title EMC VNX 5300 Datamover Storage bytes over all interfaces
|
|
|
|
graph_vlabel B/s recv. (-) / sent (+)
|
|
|
|
graph_category network
|
|
|
|
graph_args --base 1000
|
|
|
|
${server}_stor_read.graph no
|
|
|
|
${server}_stor_read.label none
|
|
|
|
${server}_stor_read.cdef ${server}_stor_read,1000,*
|
|
|
|
${server}_stor_write.label $server B/s
|
|
|
|
${server}_stor_write.cdef ${server}_stor_write,1000,*
|
|
|
|
${server}_stor_write.negative ${server}_stor_read
|
|
|
|
${server}_stor_write.draw AREA"
|
2016-11-17 13:56:52 +01:00
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_memory
|
|
|
|
graph_title EMC VNX 5300 Datamover Memory
|
|
|
|
graph_vlabel MiB
|
|
|
|
graph_category memory
|
|
|
|
graph_args --base 1024
|
|
|
|
graph_order ${server}_used ${server}_free ${server}_total ${server}_freebuffer ${server}_encumbered
|
|
|
|
${server}_used.label ${server} Used
|
|
|
|
${server}_used.cdef ${server}_used,1024,/
|
|
|
|
${server}_free.label ${server} Free
|
|
|
|
${server}_free.draw STACK
|
|
|
|
${server}_free.cdef ${server}_free,1024,/
|
|
|
|
${server}_total.label ${server} Total
|
|
|
|
${server}_total.cdef ${server}_total,1024,/
|
|
|
|
${server}_freebuffer.label ${server} Free Buffer
|
|
|
|
${server}_freebuffer.cdef ${server}_freebuffer,1024,/
|
|
|
|
${server}_encumbered.label ${server} Encumbered
|
|
|
|
${server}_encumbered.cdef ${server}_encumbered,1024,/"
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_filecache
|
|
|
|
graph_title EMC VNX 5300 File Buffer Cache
|
|
|
|
graph_vlabel per second
|
|
|
|
graph_category memory
|
|
|
|
graph_args --base 1000
|
|
|
|
graph_order ${server}_highw_hits ${server}_loww_hits ${server}_w_hits ${server}_hits ${server}_lookups
|
|
|
|
${server}_highw_hits.label High Watermark Hits
|
|
|
|
${server}_loww_hits.label Low Watermark Hits
|
|
|
|
${server}_loww_hits.draw STACK
|
|
|
|
${server}_w_hits.label Watermark Hits
|
|
|
|
${server}_hits.label Hits
|
|
|
|
${server}_lookups.label Lookups"
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_fileresolve
|
|
|
|
graph_title EMC VNX 5300 FileResolve
|
|
|
|
graph_vlabel Entries
|
|
|
|
graph_category memory
|
|
|
|
graph_args --base 1000
|
|
|
|
${server}_dropped.label Dropped Entries
|
|
|
|
${server}_max.label Max Limit
|
|
|
|
${server}_used.label Used Entries"
|
|
|
|
|
2016-11-14 18:16:25 +01:00
|
|
|
done
|
|
|
|
if [ -z $nas_server_ok ]; then
|
|
|
|
echo "No active data movers!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
|
|
|
for server in $nas_servers; do
|
|
|
|
$SSH nas_server -i $server | grep -q 'type *= nas'
|
|
|
|
if [ "$?" != 0 ] ; then continue; fi
|
|
|
|
nas_server_ok=TRUE
|
2016-11-17 13:56:52 +01:00
|
|
|
member_elements=$($SSH server_stats $server -count 1 -terminationsummary no -titles never )
|
|
|
|
# NUMCOL=5
|
2016-11-14 18:16:25 +01:00
|
|
|
IFS=$' ' read -ra graphs <<< $member_elements
|
|
|
|
|
|
|
|
echo "multigraph emc_vnx_cpu_percent"
|
|
|
|
echo "${server}_cpuutil.value ${graphs[1]}"
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_network_kib"
|
|
|
|
echo "${server}_net_in.value ${graphs[2]}"
|
|
|
|
echo "${server}_net_out.value ${graphs[3]}"
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_storage_kib"
|
|
|
|
echo "${server}_stor_read.value ${graphs[4]}"
|
|
|
|
echo "${server}_stor_write.value ${graphs[5]}"
|
|
|
|
|
2016-11-17 13:56:52 +01:00
|
|
|
member_elements=$($SSH server_stats $server -monitor kernel.memory -count 1 -terminationsummary no -titles never)
|
|
|
|
IFS=$' ' read -ra graphs <<< $member_elements
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_memory"
|
|
|
|
echo "${server}_total.value ${graphs[14]}"
|
|
|
|
echo "${server}_used.value ${graphs[15]}"
|
|
|
|
echo "${server}_free.value ${graphs[12]}"
|
|
|
|
echo "${server}_freebuffer.value ${graphs[1]}"
|
|
|
|
echo "${server}_encumbered.value ${graphs[8]}"
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_filecache"
|
|
|
|
echo "${server}_highw_hits.value ${graphs[2]}"
|
|
|
|
echo "${server}_loww_hits.value ${graphs[6]}"
|
|
|
|
echo "${server}_w_hits.value ${graphs[7]}"
|
|
|
|
echo "${server}_hits.value ${graphs[4]}"
|
|
|
|
echo "${server}_lookups.value ${graphs[5]}"
|
|
|
|
|
|
|
|
echo -e "\nmultigraph emc_vnx_fileresolve"
|
|
|
|
echo "${server}_dropped.value ${graphs[9]}"
|
|
|
|
echo "${server}_max.value ${graphs[10]}"
|
|
|
|
echo "${server}_used.value ${graphs[11]}"
|
2016-11-14 18:16:25 +01:00
|
|
|
done
|
|
|
|
if [ -z $nas_server_ok ]; then
|
|
|
|
echo "No active data movers!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
exit 0
|
|
|
|
|