2
0
mirror of https://github.com/munin-monitoring/contrib.git synced 2018-11-08 00:59:34 +01:00
contrib-munin/plugins/emc/emc_vnx_file_stats

468 lines
18 KiB
Plaintext
Raw Normal View History

2016-11-14 18:16:25 +01:00
#!/bin/bash
######################################################################################################################
# Plugin to monitor NFS statistics of EMC VNX 5300 Unified Storage system #
######################################################################################################################
# Author: Evgeny Beysembaev <megabotva@gmail.com>
#####################################
# Description #
#####################################
2016-12-16 02:00:33 +01:00
# The plugin monitors basic statistics of EMC Unified Storage system Datamovers and NFS statistics of EMC VNX5300 Unified
# Storage system. Probably it can also be compatible with other Isilon or Celerra systems. It uses SSH to connect to Control
# Stations, then remotely executes '/nas/sbin/server_stats' and fetches and parses data from it. It supports gathering
# data both from active/active and active/passive Datamover configurations, ignoring offline or standby Datamovers.
# If all Datamovers are offline or absent, the plugin returns error.
# This plugin also automatically chooses Primary Control Station from the list by calling '/nasmcd/sbin/getreason' and
# '/nasmcd/sbin/t2slot'.
2016-11-14 18:16:25 +01:00
#
# At the moment data is gathered from the following statistics sources:
2016-12-16 02:00:33 +01:00
# * nfs.v3.op - Tons of timings about NFSv3 RPC calls
# * nfs.v4.op - Tons of timings about NFSv4 RPC calls
# * nfs.client - Here new Client addresses are rescanned and added automatically.
# * basic-std Statistics Group - Basic Statistics of Datamovers (eg. CPU, Memory etc.)
2016-11-14 18:16:25 +01:00
#
# It's quite easy to comment out unneeded data to make graphs less overloaded or to add new statistics sources.
2016-12-16 02:00:33 +01:00
#####################################
# Compatibility #
#####################################
# The plugin has been written for being compatible with EMC VNX5300 Storage system, as this is the only EMC storage which
# i have.
# By the way, i am pretty sure it can also work with other VNX1 storages, like VNX5100 and VNX5500.
# About VNX2 series, i don't know whether the plugin will be able to work with them. Maybe it would need some corrections
# in command-line backend. The same situation is in other EMC systems, so i encourage you to try and fix the plugin.
2016-11-14 18:16:25 +01:00
#####################################
# Configuration #
#####################################
# The plugin uses SSH to connect to Control Stations. It's possible to use 'nasadmin' user, but it would be better
# if you create read-only global user by Unisphere Client. The user should have only Operator role.
# I created "operator" user but due to the fact that Control Stations already had one internal "operator" user,
# the new one was called "operator1". So be careful.
#
# On munin-node side choose a user which will be used to connect through SSH. Generally user "munin" is ok. Then,
# execute "sudo su munin -s /bin/bash", "ssh-keygen" and "ssh-copy-id" to both Control Stations with newly created
# user.
#
2016-12-16 02:00:33 +01:00
# Make a link from /usr/share/munin/plugins/emc_vnx_file_stats to /etc/munin/plugins/.
# If you want to get NFS statistics, name the link as "emc_vnx_file_nfs_stats_<NAME>", otherwise to get Basic Datamover
# statistics you have to name it "emc_vnx_file_basicdm_stats_<NAME>", where <NAME> is any arbitrary name of your
# storage system. The plugin will return <NAME> in its answer as "host_name" field.
#
# For example, assume your storage system is called "VNX5300".
# Make a configuration file at /etc/munin/plugin-conf.d/emc_vnx_file_stats_VNX5300
2016-11-14 18:16:25 +01:00
#
2016-12-16 02:00:33 +01:00
# [emc_vnx_file_*]
2016-11-14 18:16:25 +01:00
# user munin # SSH Client local user
# env.username operator1 # Remote user with Operator role
# env.cs_addr 192.168.1.1 192.168.1.2 # Control Stations addresses
2016-12-16 02:00:33 +01:00
# env.nas_servers server_2 server_3 # This is the default value and can be omitteda
#####################################
# Errata #
#####################################
# Since LUN names can be long, the plugin may be affected by this bug: http://munin-monitoring.org/ticket/1352
#TODO
2016-11-14 18:16:25 +01:00
#####################################
# History #
#####################################
# 08.11.2016 - First Release
2016-12-16 02:00:33 +01:00
# 17.11.2016 - NFSv4 support, Memory section
# 16.12.2016 - Merged "NFS" and "Datamover Stats" plugins
2016-11-14 18:16:25 +01:00
######################################################################################################################
export LANG=C
2016-12-16 02:00:33 +01:00
STATSTYPE=$(echo "${0##*/}" | cut -d _ -f 1-5)
if [ $STATSTYPE = "emc_vnx_file_nfs_stats" ]; then STATSTYPE=NFS;
elif [ $STATSTYPE = "emc_vnx_file_basicdm_stats" ]; then STATSTYPE=BASICDM;
else echo "Do not know what to do. Name the plugin as 'emc_vnx_file_nfs_stats_<HOSTNAME>' or 'emc_vnx_file_basicdm_stats_<HOSTNAME>'"; exit 1; fi
# echo $STATSTYPE
TARGET=$(echo "${0##*/}" | cut -d _ -f 6)
2016-11-14 18:16:25 +01:00
: ${nas_servers:="server_2 server_3"}
SSH_CHECK='ssh -q $username@$CS "/nasmcd/sbin/getreason | grep -w slot_\`/nasmcd/sbin/t2slot\` | cut -d- -f1"'
if [ "$1" = "autoconf" ]; then
echo "yes"
exit 0
fi
if [ -z "$username" ]; then
echo "No username!"
exit 1
fi
if [ -z "$cs_addr" ]; then
echo "No control station addresses!"
exit 1
fi
#Choosing Cotrol Station. Code have to be "10"
for CS in $cs_addr; do
2016-11-26 00:31:36 +01:00
if [[ "10" -eq "$(eval $SSH_CHECK)" ]]; then
2016-11-14 18:16:25 +01:00
# echo "$CS is Primary"
PRIMARY_CS=$CS
break
fi
done
if [ -z "$PRIMARY_CS" ]; then
echo "No alive primary Control Station from list \"$cs_addr\"";
exit 1;
fi
SSH="ssh -q $username@$PRIMARY_CS . /home/operator1/.bash_profile; "
echo "host_name ${TARGET}"
if [ "$1" = "config" ] ; then
for server in $nas_servers; do
$SSH nas_server -i $server | grep -q 'type *= nas'
if [ "$?" != 0 ] ; then continue; fi
nas_server_ok=TRUE
2016-12-16 02:00:33 +01:00
if [ $STATSTYPE = "BASICDM" ] ; then
echo "multigraph emc_vnx_cpu_percent
graph_title EMC VNX 5300 Datamover CPU Util %
graph_vlabel %
graph_category cpu
graph_scale no
graph_args --upper-limit 100 -l 0
${server}_cpuutil.min 0
${server}_cpuutil.label $server CPU util. in %."
echo -e "\nmultigraph emc_vnx_network_kib
graph_title EMC VNX 5300 Datamover Network bytes over all interfaces
graph_vlabel B/s recv. (-) / sent (+)
graph_category network
graph_args --base 1000
${server}_net_in.graph no
${server}_net_in.label none
${server}_net_in.cdef ${server}_net_in,1000,*
${server}_net_out.label $server B/s
${server}_net_out.cdef ${server}_net_out,1000,*
${server}_net_out.negative ${server}_net_in
${server}_net_out.draw AREA"
echo -e "\nmultigraph emc_vnx_storage_kib
graph_title EMC VNX 5300 Datamover Storage bytes over all interfaces
graph_vlabel B/s recv. (-) / sent (+)
graph_category network
graph_args --base 1000
${server}_stor_read.graph no
${server}_stor_read.label none
${server}_stor_read.cdef ${server}_stor_read,1000,*
${server}_stor_write.label $server B/s
${server}_stor_write.cdef ${server}_stor_write,1000,*
${server}_stor_write.negative ${server}_stor_read
${server}_stor_write.draw AREA"
echo -e "\nmultigraph emc_vnx_memory
graph_title EMC VNX 5300 Datamover Memory
graph_vlabel MiB
graph_category memory
graph_args --base 1024
graph_order ${server}_used ${server}_free ${server}_total ${server}_freebuffer ${server}_encumbered
${server}_used.label ${server} Used
${server}_used.cdef ${server}_used,1024,/
${server}_free.label ${server} Free
${server}_free.draw STACK
${server}_free.cdef ${server}_free,1024,/
${server}_total.label ${server} Total
${server}_total.cdef ${server}_total,1024,/
${server}_freebuffer.label ${server} Free Buffer
${server}_freebuffer.cdef ${server}_freebuffer,1024,/
${server}_encumbered.label ${server} Encumbered
${server}_encumbered.cdef ${server}_encumbered,1024,/"
echo -e "\nmultigraph emc_vnx_filecache
graph_title EMC VNX 5300 File Buffer Cache
graph_vlabel per second
graph_category memory
graph_args --base 1000
graph_order ${server}_highw_hits ${server}_loww_hits ${server}_w_hits ${server}_hits ${server}_lookups
${server}_highw_hits.label High Watermark Hits
${server}_loww_hits.label Low Watermark Hits
${server}_loww_hits.draw STACK
${server}_w_hits.label Watermark Hits
${server}_hits.label Hits
${server}_lookups.label Lookups"
echo -e "\nmultigraph emc_vnx_fileresolve
graph_title EMC VNX 5300 FileResolve
graph_vlabel Entries
graph_category memory
graph_args --base 1000
${server}_dropped.label Dropped Entries
${server}_max.label Max Limit
${server}_used.label Used Entries"
fi
if [ $STATSTYPE = "NFS" ] ; then
2016-11-14 18:16:25 +01:00
#nfs.v3.op data
member_elements=$($SSH server_stats $server -info nfs.v3.op | grep member_elements | sed -ne 's/^.*= //p')
2016-11-14 18:16:25 +01:00
IFS=',' read -ra graphs <<< $member_elements
echo "multigraph vnx_emc_v3_calls_s
graph_title EMC VNX 5300 NFSv3 Calls per second
2016-11-14 18:16:25 +01:00
graph_vlabel Calls
graph_category nfs
graph_args --base 1000"
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
2016-11-14 18:16:25 +01:00
echo "${server}_$field.label $server $field"
done
echo -e "\nmultigraph vnx_emc_v3_usec_call
graph_title EMC VNX 5300 NFSv3 uSeconds per call
2016-11-14 18:16:25 +01:00
graph_vlabel uSec / call
graph_category nfs
graph_args --base 1000"
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
2016-11-14 18:16:25 +01:00
echo "${server}_$field.label $server $field"
done
echo -e "\nmultigraph vnx_emc_v3_op_percent
graph_title EMC VNX 5300 NFSv3 Op %
2016-11-14 18:16:25 +01:00
graph_vlabel %
graph_scale no
graph_category nfs"
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
2016-11-14 18:16:25 +01:00
echo "${server}_$field.label $server $field"
echo "${server}_$field.min 0"
done
graphs=()
#nfs.v4.op data
member_elements=$($SSH server_stats $server -info nfs.v4.op | grep member_elements | sed -ne 's/^.*= //p')
IFS=',' read -ra graphs <<< $member_elements
echo "multigraph vnx_emc_v4_calls_s
graph_title EMC VNX 5300 NFSv4 Calls per second
graph_vlabel Calls
graph_category nfs
graph_args --base 1000"
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
done
2016-11-14 18:16:25 +01:00
echo -e "\nmultigraph vnx_emc_v4_usec_call
graph_title EMC VNX 5300 NFSv4 uSeconds per call
graph_vlabel uSec / call
graph_category nfs
graph_args --base 1000"
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
done
echo -e "\nmultigraph vnx_emc_v4_op_percent
graph_title EMC VNX 5300 NFSv4 Op %
graph_vlabel %
graph_scale no
graph_category nfs"
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
echo "${server}_$field.min 0"
done
2016-11-14 18:16:25 +01:00
#nfs.client data
# Total Read Write Suspicious Total Read Write Avg
# Ops/s Ops/s Ops/s Ops diff KiB/s KiB/s KiB/s uSec/call
member_elements=$($SSH server_stats server_2 -monitor nfs.client -count 1 -terminationsummary no -titles never | sed -ne 's/^.*id=//p' | cut -d' ' -f1)
2016-11-14 18:16:25 +01:00
readarray graphs2 <<< $member_elements
echo -e "\nmultigraph vnx_emc_nfs_client_ops_s
graph_title EMC VNX 5300 NFS Client Ops/s
graph_vlabel Ops/s
graph_category nfs"
echo -n "graph_order "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
2016-11-14 18:16:25 +01:00
echo -n "${server}_${field}_r ${server}_${field}_w ${server}_${field}_t ${server}_${field}_s "
done
echo " "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
2016-11-14 18:16:25 +01:00
echo "${server}_${field}_r.label $server $field Read Ops/s"
echo "${server}_${field}_w.label $server $field Write Ops/s"
echo "${server}_${field}_w.draw STACK"
echo "${server}_${field}_t.label $server $field Total Ops/s"
echo "${server}_${field}_s.label $server $field Suspicious Ops diff"
done
echo -e "\nmultigraph vnx_emc_nfs_client_kib_s
graph_title EMC VNX 5300 NFS Client B/s
graph_vlabel B/s
graph_category nfs"
echo -n "graph_order "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
2016-11-14 18:16:25 +01:00
echo -n "${server}_${field}_r ${server}_${field}_w ${server}_${field}_t "
done
echo " "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
2016-11-14 18:16:25 +01:00
echo "${server}_${field}_r.label $server $field Read B/s"
echo "${server}_${field}_r.cdef ${server}_${field}_r,1024,*"
echo "${server}_${field}_w.label $server $field Write B/s"
echo "${server}_${field}_w.cdef ${server}_${field}_w,1024,*"
echo "${server}_${field}_w.draw STACK"
echo "${server}_${field}_t.label $server $field Total B/s"
echo "${server}_${field}_t.cdef ${server}_${field}_t,1024,*"
done
echo -e "\nmultigraph vnx_emc_nfs_client_avg_usec
graph_title EMC VNX 5300 NFS Client Avg uSec/call
graph_vlabel uSec/call
graph_category nfs"
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
2016-11-14 18:16:25 +01:00
echo "${server}_${field}.label $server $field Avg uSec/call"
done
2016-12-16 02:00:33 +01:00
fi
2016-11-14 18:16:25 +01:00
done
if [ -z $nas_server_ok ]; then
echo "No active data movers!"
exit 1
fi
exit 0
fi
for server in $nas_servers; do
$SSH nas_server -i $server | grep -q 'type *= nas'
if [ "$?" != 0 ] ; then continue; fi
nas_server_ok=TRUE
2016-12-16 02:00:33 +01:00
if [ $STATSTYPE = "BASICDM" ] ; then
member_elements=$($SSH server_stats $server -count 1 -terminationsummary no -titles never | grep '^[^[:space:]]')
# NUMCOL=5
IFS=$' ' read -ra graphs <<< $member_elements
echo "multigraph emc_vnx_cpu_percent"
echo "${server}_cpuutil.value ${graphs[1]}"
echo -e "\nmultigraph emc_vnx_network_kib"
echo "${server}_net_in.value ${graphs[2]}"
echo "${server}_net_out.value ${graphs[3]}"
echo -e "\nmultigraph emc_vnx_storage_kib"
echo "${server}_stor_read.value ${graphs[4]}"
echo "${server}_stor_write.value ${graphs[5]}"
member_elements=$($SSH server_stats $server -monitor kernel.memory -count 1 -terminationsummary no -titles never | grep '^[^[:space:]]')
IFS=$' ' read -ra graphs <<< $member_elements
echo -e "\nmultigraph emc_vnx_memory"
echo "${server}_total.value ${graphs[14]}"
echo "${server}_used.value ${graphs[15]}"
echo "${server}_free.value ${graphs[12]}"
echo "${server}_freebuffer.value ${graphs[1]}"
echo "${server}_encumbered.value ${graphs[8]}"
echo -e "\nmultigraph emc_vnx_filecache"
echo "${server}_highw_hits.value ${graphs[2]}"
echo "${server}_loww_hits.value ${graphs[6]}"
echo "${server}_w_hits.value ${graphs[7]}"
echo "${server}_hits.value ${graphs[4]}"
echo "${server}_lookups.value ${graphs[5]}"
echo -e "\nmultigraph emc_vnx_fileresolve"
echo "${server}_dropped.value ${graphs[9]}"
echo "${server}_max.value ${graphs[10]}"
echo "${server}_used.value ${graphs[11]}"
fi
if [ $STATSTYPE = "NFS" ] ; then
#nfs.v3.op data
member_elements=$($SSH server_stats $server -monitor nfs.v3.op -count 1 -terminationsummary no -titles never | sed -ne 's/^.*v3/v3/p')
2016-11-14 18:16:25 +01:00
NUMCOL=5
LINES=$(wc -l <<< $member_elements)
2016-11-14 18:16:25 +01:00
while IFS=$'\n' read -ra graphs ; do
element+=( $graphs )
done <<< $member_elements
echo "multigraph vnx_emc_v3_calls_s"
2016-11-14 18:16:25 +01:00
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+1]}"
done
echo -e "\nmultigraph vnx_emc_v3_usec_call"
2016-11-14 18:16:25 +01:00
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+3]}"
done
echo -e "\nmultigraph vnx_emc_v3_op_percent"
2016-11-14 18:16:25 +01:00
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+4]}"
done
element=()
#nfs.v4.op data
member_elements=$($SSH server_stats $server -monitor nfs.v4.op -count 1 -terminationsummary no -titles never | sed -ne 's/^.*v4/v4/p')
NUMCOL=5
LINES=$(wc -l <<< $member_elements)
while IFS=$'\n' read -ra graphs ; do
element+=( $graphs )
done <<< $member_elements
echo -e "\nmultigraph vnx_emc_v4_calls_s"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+1]}"
done
echo -e "\nmultigraph vnx_emc_v4_usec_call"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+3]}"
done
echo -e "\nmultigraph vnx_emc_v4_op_percent"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+4]}"
done
element=()
2016-11-14 18:16:25 +01:00
#nfs.client data
echo -e "\nmultigraph vnx_emc_nfs_client_ops_s"
member_elements=$($SSH server_stats server_2 -monitor nfs.client -count 1 -terminationsummary no -titles never | sed -ne 's/^.*id=//p')
2016-11-14 18:16:25 +01:00
NUMCOL=9
LINES=$(wc -l <<< $member_elements)
2016-11-14 18:16:25 +01:00
while IFS=$'\n' read -ra graphs; do
element+=($graphs)
done <<< $member_elements
for (( i=0; i<$((LINES)); i++ )); do
client=$( echo ${element[i*$NUMCOL]} | sed -ne 's/\./_/pg')
2016-11-14 18:16:25 +01:00
echo "${server}_${client}_r".value "${element[$i*$NUMCOL+2]}"
echo "${server}_${client}_w".value "${element[$i*$NUMCOL+3]}"
echo "${server}_${client}_t".value "${element[$i*$NUMCOL+1]}"
echo "${server}_${client}_s".value "${element[$i*$NUMCOL+4]}"
done
echo -e "\nmultigraph vnx_emc_nfs_client_kib_s"
for (( i=0; i<$((LINES)); i++ )); do
client=$( echo ${element[i*$NUMCOL]} | sed -ne 's/\./_/pg')
2016-11-14 18:16:25 +01:00
echo "${server}_${client}_r".value "${element[$i*$NUMCOL+6]}"
echo "${server}_${client}_w".value "${element[$i*$NUMCOL+7]}"
echo "${server}_${client}_t".value "${element[$i*$NUMCOL+5]}"
done
echo -e "\nmultigraph vnx_emc_nfs_client_avg_usec"
for (( i=0; i<$((LINES)); i++ )); do
client=$( echo ${element[i*$NUMCOL]} | sed -ne 's/\./_/pg')
2016-11-14 18:16:25 +01:00
echo "${server}_${client}".value "${element[$i*$NUMCOL+8]}"
done
2016-12-16 02:00:33 +01:00
fi
2016-11-14 18:16:25 +01:00
done
if [ -z $nas_server_ok ]; then
echo "No active data movers!"
exit 1
fi
exit 0