2
0
mirror of https://github.com/munin-monitoring/contrib.git synced 2018-11-08 00:59:34 +01:00
contrib-munin/plugins/emc/emc_vnx_file_
2016-12-26 23:31:37 +03:00

517 lines
18 KiB
Bash
Executable File

#!/bin/bash
: <<=cut
=head1 NAME
emc_vnx_file_stats - Plugin to monitor Basic, NFSv3 and NFSv4 statistics of EMC VNX 5300 Unified Storage system's Datamovers
=head1 AUTHOR
Evgeny Beysembaev <megabotva@gmail.com>
=head1 LICENSE
GPLv2
=head1 MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf suggest
=head1 DESCRIPTION
The plugin monitors basic statistics of EMC Unified Storage system Datamovers and NFS statistics of EMC VNX5300 Unified
Storage system. Probably it can also be compatible with other Isilon or Celerra systems. It uses SSH to connect to Control
Stations, then remotely executes '/nas/sbin/server_stats' and fetches and parses data from it. It supports gathering
data both from active/active and active/passive Datamover configurations, ignoring offline or standby Datamovers.
If all Datamovers are offline or absent, the plugin returns error.
This plugin also automatically chooses Primary Control Station from the list by calling '/nasmcd/sbin/getreason' and
'/nasmcd/sbin/t2slot'.
At the moment data is gathered from the following statistics sources:
* nfs.v3.op - Tons of timings about NFSv3 RPC calls
* nfs.v4.op - Tons of timings about NFSv4 RPC calls
* nfs.client - Here new Client addresses are rescanned and added automatically.
* basic-std Statistics Group - Basic Statistics of Datamovers (eg. CPU, Memory etc.)
It's quite easy to comment out unneeded data to make graphs less overloaded or to add new statistics sources.
=head1 COMPATIBILITY
The plugin has been written for being compatible with EMC VNX5300 Storage system, as this is the only EMC storage which
i have.
By the way, i am pretty sure it can also work with other VNX1 storages, like VNX5100 and VNX5500.
About VNX2 series, i don't know whether the plugin will be able to work with them. Maybe it would need some corrections
in command-line backend. The same situation is with other EMC systems, so i encourage you to try and fix the plugin.
=head1 CONFIGURATION
The plugin uses SSH to connect to Control Stations. It's possible to use 'nasadmin' user, but it would be better
if you create read-only global user by Unisphere Client. The user should have only Operator role.
I created "operator" user but due to the fact that Control Stations already had one internal "operator" user,
the new one was called "operator1". So be careful.
On munin-node side choose a user which will be used to connect through SSH. Generally user "munin" is ok. Then,
execute "sudo su munin -s /bin/bash", "ssh-keygen" and "ssh-copy-id" to both Control Stations with newly created
user.
Make a link from /usr/share/munin/plugins/emc_vnx_file_stats to /etc/munin/plugins/.
If you want to get NFS statistics, name the link as "emc_vnx_file_nfs_stats_<NAME>", otherwise to get Basic Datamover
statistics you have to name it "emc_vnx_file_basicdm_stats_<NAME>", where <NAME> is any arbitrary name of your
storage system. The plugin will return <NAME> in its answer as "host_name" field.
For example, assume your storage system is called "VNX5300".
Make a configuration file at /etc/munin/plugin-conf.d/emc_vnx_file_stats_VNX5300
[emc_vnx_file_*]
user munin # SSH Client local user
env.username operator1 # Remote user with Operator role
env.cs_addr 192.168.1.1 192.168.1.2 # Control Stations addresses
env.nas_servers server_2 server_3 # This is the default value and can be omitteda
=head1 HISTORY
08.11.2016 - First Release
17.11.2016 - NFSv4 support, Memory section
16.12.2016 - Merged "NFS" and "Datamover Stats" plugins
26.12.2016 - Compatibility with Munin coding style
=cut
export LANG=C
ssh_check() {
ssh -q $username@$1 "/nasmcd/sbin/getreason | grep -w slot_\`/nasmcd/sbin/t2slot\` | cut -d- -f1"
}
check_conf () {
if [ -z "$username" ]; then
echo "No username ('username' environment variable)!"
return 1
fi
if [ -z "$cs_addr" ]; then
echo "No control station addresses ('cs_addr' environment variable)!"
return 1
fi
#Choosing Cotrol Station. Code have to be "10"
for CS in $cs_addr; do
if [[ "10" -eq "$(ssh_check $CS)" ]]; then
# echo "$CS is Primary"
PRIMARY_CS=$CS
break
fi
done
if [ -z "$PRIMARY_CS" ]; then
echo "No alive primary Control Station from list \"$cs_addr\"";
return 1
fi
return 0
}
if [ "$1" = "autoconf" ]; then
check_conf_ans=$(check_conf)
if [ $? -eq 0 ]; then
echo "yes"
else
echo "no ($check_conf_ans)"
fi
exit 0
fi
if [ "$1" = "suggest" ]; then
echo "nfs_stats"
echo "basicdm_stats"
exit 0;
fi
STATSTYPE=$(echo "${0##*/}" | cut -d _ -f 1-5)
if [ $STATSTYPE = "emc_vnx_file_nfs_stats" ]; then STATSTYPE=NFS;
elif [ $STATSTYPE = "emc_vnx_file_basicdm_stats" ]; then STATSTYPE=BASICDM;
else echo "Do not know what to do. Name the plugin as 'emc_vnx_file_nfs_stats_<HOSTNAME>' or 'emc_vnx_file_basicdm_stats_<HOSTNAME>'"; exit 1; fi
# echo $STATSTYPE
TARGET=$(echo "${0##*/}" | cut -d _ -f 6)
: ${nas_servers:="server_2 server_3"}
check_conf
if [[ $? -eq 1 ]]; then
exit 1;
fi
SSH="ssh -q $username@$PRIMARY_CS . /home/operator1/.bash_profile; "
echo "host_name ${TARGET}"
if [ "$1" = "config" ] ; then
for server in $nas_servers; do
$SSH nas_server -i $server | grep -q 'type *= nas'
if [ "$?" != 0 ] ; then continue; fi
nas_server_ok=TRUE
if [ $STATSTYPE = "BASICDM" ] ; then
cat <<-EOF
multigraph emc_vnx_cpu_percent
graph_title EMC VNX 5300 Datamover CPU Util %
graph_vlabel %
graph_category cpu
graph_scale no
graph_args --upper-limit 100 -l 0
${server}_cpuutil.min 0
${server}_cpuutil.label $server CPU util. in %.
multigraph emc_vnx_network_kib
graph_title EMC VNX 5300 Datamover Network bytes over all interfaces
graph_vlabel B/s recv. (-) / sent (+)
graph_category network
graph_args --base 1000
${server}_net_in.graph no
${server}_net_in.label none
${server}_net_in.cdef ${server}_net_in,1000,*
${server}_net_out.label $server B/s
${server}_net_out.cdef ${server}_net_out,1000,*
${server}_net_out.negative ${server}_net_in
${server}_net_out.draw AREA
multigraph emc_vnx_storage_kib
graph_title EMC VNX 5300 Datamover Storage bytes over all interfaces
graph_vlabel B/s recv. (-) / sent (+)
graph_category network
graph_args --base 1000
${server}_stor_read.graph no
${server}_stor_read.label none
${server}_stor_read.cdef ${server}_stor_read,1000,*
${server}_stor_write.label $server B/s
${server}_stor_write.cdef ${server}_stor_write,1000,*
${server}_stor_write.negative ${server}_stor_read
${server}_stor_write.draw AREA
multigraph emc_vnx_memory
graph_title EMC VNX 5300 Datamover Memory
graph_vlabel MiB
graph_category memory
graph_args --base 1024
graph_order ${server}_used ${server}_free ${server}_total ${server}_freebuffer ${server}_encumbered
${server}_used.label ${server} Used
${server}_used.cdef ${server}_used,1024,/
${server}_free.label ${server} Free
${server}_free.draw STACK
${server}_free.cdef ${server}_free,1024,/
${server}_total.label ${server} Total
${server}_total.cdef ${server}_total,1024,/
${server}_freebuffer.label ${server} Free Buffer
${server}_freebuffer.cdef ${server}_freebuffer,1024,/
${server}_encumbered.label ${server} Encumbered
${server}_encumbered.cdef ${server}_encumbered,1024,/
multigraph emc_vnx_filecache
graph_title EMC VNX 5300 File Buffer Cache
graph_vlabel per second
graph_category memory
graph_args --base 1000
graph_order ${server}_highw_hits ${server}_loww_hits ${server}_w_hits ${server}_hits ${server}_lookups
${server}_highw_hits.label High Watermark Hits
${server}_loww_hits.label Low Watermark Hits
${server}_loww_hits.draw STACK
${server}_w_hits.label Watermark Hits
${server}_hits.label Hits
${server}_lookups.label Lookups
multigraph emc_vnx_fileresolve
graph_title EMC VNX 5300 FileResolve
graph_vlabel Entries
graph_category memory
graph_args --base 1000
${server}_dropped.label Dropped Entries
${server}_max.label Max Limit
${server}_used.label Used Entries
EOF
fi
if [ $STATSTYPE = "NFS" ] ; then
#nfs.v3.op data
member_elements=$($SSH server_stats $server -info nfs.v3.op | grep member_elements | sed -ne 's/^.*= //p')
IFS=',' read -ra graphs <<< $member_elements
cat <<-EOF
multigraph vnx_emc_v3_calls_s
graph_title EMC VNX 5300 NFSv3 Calls per second
graph_vlabel Calls
graph_category nfs
graph_args --base 1000
EOF
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
done
cat <<-EOF
multigraph vnx_emc_v3_usec_call
graph_title EMC VNX 5300 NFSv3 uSeconds per call
graph_vlabel uSec / call
graph_category nfs
graph_args --base 1000
EOF
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
done
cat <<-EOF
multigraph vnx_emc_v3_op_percent
graph_title EMC VNX 5300 NFSv3 Op %
graph_vlabel %
graph_scale no
graph_category nfs
EOF
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
echo "${server}_$field.min 0"
done
graphs=()
#nfs.v4.op data
member_elements=$($SSH server_stats $server -info nfs.v4.op | grep member_elements | sed -ne 's/^.*= //p')
IFS=',' read -ra graphs <<< $member_elements
cat <<-EOF
multigraph vnx_emc_v4_calls_s
graph_title EMC VNX 5300 NFSv4 Calls per second
graph_vlabel Calls
graph_category nfs
graph_args --base 1000
EOF
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
done
cat <<-EOF
multigraph vnx_emc_v4_usec_call
graph_title EMC VNX 5300 NFSv4 uSeconds per call
graph_vlabel uSec / call
graph_category nfs
graph_args --base 1000
EOF
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
done
cat <<-EOF
multigraph vnx_emc_v4_op_percent
graph_title EMC VNX 5300 NFSv4 Op %
graph_vlabel %
graph_scale no
graph_category nfs
EOF
for graph in "${graphs[@]}"; do
field=$(echo "$graph" | cut -d '.' -f4 )
echo "${server}_$field.label $server $field"
echo "${server}_$field.min 0"
done
#nfs.client data
# Total Read Write Suspicious Total Read Write Avg
# Ops/s Ops/s Ops/s Ops diff KiB/s KiB/s KiB/s uSec/call
member_elements=$($SSH server_stats server_2 -monitor nfs.client -count 1 -terminationsummary no -titles never | sed -ne 's/^.*id=//p' | cut -d' ' -f1)
readarray graphs2 <<< $member_elements
cat <<-EOF
multigraph vnx_emc_nfs_client_ops_s
graph_title EMC VNX 5300 NFS Client Ops/s
graph_vlabel Ops/s
graph_category nfs
EOF
echo -n "graph_order "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
echo -n "${server}_${field}_r ${server}_${field}_w ${server}_${field}_t ${server}_${field}_s "
done
echo " "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
echo "${server}_${field}_r.label $server $field Read Ops/s"
echo "${server}_${field}_w.label $server $field Write Ops/s"
echo "${server}_${field}_w.draw STACK"
echo "${server}_${field}_t.label $server $field Total Ops/s"
echo "${server}_${field}_s.label $server $field Suspicious Ops diff"
done
cat <<-EOF
multigraph vnx_emc_nfs_client_kib_s
graph_title EMC VNX 5300 NFS Client B/s
graph_vlabel B/s
graph_category nfs
EOF
echo -n "graph_order "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
echo -n "${server}_${field}_r ${server}_${field}_w ${server}_${field}_t "
done
echo " "
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
echo "${server}_${field}_r.label $server $field Read B/s"
echo "${server}_${field}_r.cdef ${server}_${field}_r,1024,*"
echo "${server}_${field}_w.label $server $field Write B/s"
echo "${server}_${field}_w.cdef ${server}_${field}_w,1024,*"
echo "${server}_${field}_w.draw STACK"
echo "${server}_${field}_t.label $server $field Total B/s"
echo "${server}_${field}_t.cdef ${server}_${field}_t,1024,*"
done
cat <<-EOF
multigraph vnx_emc_nfs_client_avg_usec
graph_title EMC VNX 5300 NFS Client Avg uSec/call
graph_vlabel uSec/call
graph_category nfs
EOF
for graph in "${graphs2[@]}"; do
field=$(echo "$graph" | sed -ne 's/\./_/pg' )
echo "${server}_${field}.label $server $field Avg uSec/call"
done
fi
done
if [ -z $nas_server_ok ]; then
echo "No active data movers!"
exit 1
fi
exit 0
fi
for server in $nas_servers; do
$SSH nas_server -i $server | grep -q 'type *= nas'
if [ "$?" != 0 ] ; then continue; fi
nas_server_ok=TRUE
if [ $STATSTYPE = "BASICDM" ] ; then
member_elements=$($SSH server_stats $server -count 1 -terminationsummary no -titles never | grep '^[^[:space:]]')
# NUMCOL=5
IFS=$' ' read -ra graphs <<< $member_elements
echo "multigraph emc_vnx_cpu_percent"
echo "${server}_cpuutil.value ${graphs[1]}"
echo -e "\nmultigraph emc_vnx_network_kib"
echo "${server}_net_in.value ${graphs[2]}"
echo "${server}_net_out.value ${graphs[3]}"
echo -e "\nmultigraph emc_vnx_storage_kib"
echo "${server}_stor_read.value ${graphs[4]}"
echo "${server}_stor_write.value ${graphs[5]}"
member_elements=$($SSH server_stats $server -monitor kernel.memory -count 1 -terminationsummary no -titles never | grep '^[^[:space:]]')
IFS=$' ' read -ra graphs <<< $member_elements
echo -e "\nmultigraph emc_vnx_memory"
echo "${server}_total.value ${graphs[14]}"
echo "${server}_used.value ${graphs[15]}"
echo "${server}_free.value ${graphs[12]}"
echo "${server}_freebuffer.value ${graphs[1]}"
echo "${server}_encumbered.value ${graphs[8]}"
echo -e "\nmultigraph emc_vnx_filecache"
echo "${server}_highw_hits.value ${graphs[2]}"
echo "${server}_loww_hits.value ${graphs[6]}"
echo "${server}_w_hits.value ${graphs[7]}"
echo "${server}_hits.value ${graphs[4]}"
echo "${server}_lookups.value ${graphs[5]}"
echo -e "\nmultigraph emc_vnx_fileresolve"
echo "${server}_dropped.value ${graphs[9]}"
echo "${server}_max.value ${graphs[10]}"
echo "${server}_used.value ${graphs[11]}"
fi
if [ $STATSTYPE = "NFS" ] ; then
#nfs.v3.op data
member_elements=$($SSH server_stats $server -monitor nfs.v3.op -count 1 -terminationsummary no -titles never | sed -ne 's/^.*v3/v3/p')
NUMCOL=5
LINES=$(wc -l <<< $member_elements)
while IFS=$'\n' read -ra graphs ; do
element+=( $graphs )
done <<< $member_elements
echo "multigraph vnx_emc_v3_calls_s"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+1]}"
done
echo -e "\nmultigraph vnx_emc_v3_usec_call"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+3]}"
done
echo -e "\nmultigraph vnx_emc_v3_op_percent"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+4]}"
done
element=()
#nfs.v4.op data
member_elements=$($SSH server_stats $server -monitor nfs.v4.op -count 1 -terminationsummary no -titles never | sed -ne 's/^.*v4/v4/p')
NUMCOL=5
LINES=$(wc -l <<< $member_elements)
while IFS=$'\n' read -ra graphs ; do
element+=( $graphs )
done <<< $member_elements
echo -e "\nmultigraph vnx_emc_v4_calls_s"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+1]}"
done
echo -e "\nmultigraph vnx_emc_v4_usec_call"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+3]}"
done
echo -e "\nmultigraph vnx_emc_v4_op_percent"
for ((i=0; i<$((LINES)); i++ )); do
echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+4]}"
done
element=()
#nfs.client data
echo -e "\nmultigraph vnx_emc_nfs_client_ops_s"
member_elements=$($SSH server_stats server_2 -monitor nfs.client -count 1 -terminationsummary no -titles never | sed -ne 's/^.*id=//p')
NUMCOL=9
LINES=$(wc -l <<< $member_elements)
while IFS=$'\n' read -ra graphs; do
element+=($graphs)
done <<< $member_elements
for (( i=0; i<$((LINES)); i++ )); do
client=$( echo ${element[i*$NUMCOL]} | sed -ne 's/\./_/pg')
echo "${server}_${client}_r".value "${element[$i*$NUMCOL+2]}"
echo "${server}_${client}_w".value "${element[$i*$NUMCOL+3]}"
echo "${server}_${client}_t".value "${element[$i*$NUMCOL+1]}"
echo "${server}_${client}_s".value "${element[$i*$NUMCOL+4]}"
done
echo -e "\nmultigraph vnx_emc_nfs_client_kib_s"
for (( i=0; i<$((LINES)); i++ )); do
client=$( echo ${element[i*$NUMCOL]} | sed -ne 's/\./_/pg')
echo "${server}_${client}_r".value "${element[$i*$NUMCOL+6]}"
echo "${server}_${client}_w".value "${element[$i*$NUMCOL+7]}"
echo "${server}_${client}_t".value "${element[$i*$NUMCOL+5]}"
done
echo -e "\nmultigraph vnx_emc_nfs_client_avg_usec"
for (( i=0; i<$((LINES)); i++ )); do
client=$( echo ${element[i*$NUMCOL]} | sed -ne 's/\./_/pg')
echo "${server}_${client}".value "${element[$i*$NUMCOL+8]}"
done
fi
done
if [ -z $nas_server_ok ]; then
echo "No active data movers!"
exit 1
fi
exit 0