#!/bin/bash : <<=cut =head1 NAME emc_vnx_file_stats - Plugin to monitor Basic, NFSv3 and NFSv4 statistics of EMC VNX 5300 Unified Storage system's Datamovers =head1 AUTHOR Evgeny Beysembaev =head1 LICENSE GPLv2 =head1 MAGIC MARKERS #%# family=auto #%# capabilities=autoconf suggest =head1 DESCRIPTION The plugin monitors basic statistics of EMC Unified Storage system Datamovers and NFS statistics of EMC VNX5300 Unified Storage system. Probably it can also be compatible with other Isilon or Celerra systems. It uses SSH to connect to Control Stations, then remotely executes '/nas/sbin/server_stats' and fetches and parses data from it. It supports gathering data both from active/active and active/passive Datamover configurations, ignoring offline or standby Datamovers. If all Datamovers are offline or absent, the plugin returns error. This plugin also automatically chooses Primary Control Station from the list by calling '/nasmcd/sbin/getreason' and '/nasmcd/sbin/t2slot'. At the moment data is gathered from the following statistics sources: * nfs.v3.op - Tons of timings about NFSv3 RPC calls * nfs.v4.op - Tons of timings about NFSv4 RPC calls * nfs.client - Here new Client addresses are rescanned and added automatically. * basic-std Statistics Group - Basic Statistics of Datamovers (eg. CPU, Memory etc.) It's quite easy to comment out unneeded data to make graphs less overloaded or to add new statistics sources. =head1 COMPATIBILITY The plugin has been written for being compatible with EMC VNX5300 Storage system, as this is the only EMC storage which i have. By the way, i am pretty sure it can also work with other VNX1 storages, like VNX5100 and VNX5500. About VNX2 series, i don't know whether the plugin will be able to work with them. Maybe it would need some corrections in command-line backend. The same situation is with other EMC systems, so i encourage you to try and fix the plugin. =head1 CONFIGURATION The plugin uses SSH to connect to Control Stations. It's possible to use 'nasadmin' user, but it would be better if you create read-only global user by Unisphere Client. The user should have only Operator role. I created "operator" user but due to the fact that Control Stations already had one internal "operator" user, the new one was called "operator1". So be careful. After that, copy .bash_profile from /home/nasadmin to a newly created /home/operator1 On munin-node side choose a user which will be used to connect through SSH. Generally user "munin" is ok. Then, execute "sudo su munin -s /bin/bash", "ssh-keygen" and "ssh-copy-id" to both Control Stations with newly created user. Make a link from /usr/share/munin/plugins/emc_vnx_file_stats to /etc/munin/plugins/. If you want to get NFS statistics, name the link as "emc_vnx_file_nfs_stats_", otherwise to get Basic Datamover statistics you have to name it "emc_vnx_file_basicdm_stats_", where is any arbitrary name of your storage system. The plugin will return in its answer as "host_name" field. For example, assume your storage system is called "VNX5300". Make a configuration file at /etc/munin/plugin-conf.d/emc_vnx_file_stats_VNX5300 [emc_vnx_file_*] user munin # SSH Client local user env.username operator1 # Remote user with Operator role env.cs_addr 192.168.1.1 192.168.1.2 # Control Stations addresses env.nas_servers server_2 server_3 # This is the default value and can be omitteda =head1 HISTORY 08.11.2016 - First Release 17.11.2016 - NFSv4 support, Memory section 16.12.2016 - Merged "NFS" and "Datamover Stats" plugins 26.12.2016 - Compatibility with Munin coding style =cut export LANG=C . "$MUNIN_LIBDIR/plugins/plugin.sh" ssh_check_cmd() { ssh -q $username@$1 "/nasmcd/sbin/getreason | grep -w slot_\`/nasmcd/sbin/t2slot\` | cut -d- -f1" } check_conf () { if [ -z "$username" ]; then echo "No username ('username' environment variable)!" return 1 fi if [ -z "$cs_addr" ]; then echo "No control station addresses ('cs_addr' environment variable)!" return 1 fi #Choosing Cotrol Station. Code have to be "10" for CS in $cs_addr; do if [[ "10" -eq "$(ssh_check_cmd \"$CS\")" ]]; then # echo "$CS is Primary" PRIMARY_CS=$CS break fi done if [ -z "$PRIMARY_CS" ]; then echo "No alive primary Control Station from list \"$cs_addr\""; return 1 fi return 0 } if [ "$1" = "autoconf" ]; then check_conf_ans=$(check_conf) if [ $? -eq 0 ]; then echo "yes" else echo "no ($check_conf_ans)" fi exit 0 fi if [ "$1" = "suggest" ]; then echo "nfs_stats" echo "basicdm_stats" exit 0; fi STATSTYPE=$(echo "${0##*/}" | cut -d _ -f 1-5) if [ "$STATSTYPE" = "emc_vnx_file_nfs_stats" ]; then STATSTYPE=NFS; elif [ "$STATSTYPE" = "emc_vnx_file_basicdm_stats" ]; then STATSTYPE=BASICDM; else echo "Do not know what to do. Name the plugin as 'emc_vnx_file_nfs_stats_' or 'emc_vnx_file_basicdm_stats_'"; exit 1; fi # echo $STATSTYPE TARGET=$(echo "${0##*/}" | cut -d _ -f 6) : ${nas_servers:="server_2 server_3"} check_conf 1>&2 || exit 1 run_remote () { ssh -q "$username@$PRIMARY_CS" ". /home/""$username""/.bash_profile; $*" } echo "host_name ${TARGET}" if [ "$1" = "config" ] ; then for server in $nas_servers; do run_remote nas_server -i $server | grep -q 'type *= nas' if [ "$?" != 0 ] ; then continue; fi nas_server_ok=TRUE if [ $STATSTYPE = "BASICDM" ] ; then cat <<-EOF multigraph emc_vnx_cpu_percent graph_title EMC VNX 5300 Datamover CPU Util % graph_vlabel % graph_category cpu graph_scale no graph_args --upper-limit 100 -l 0 ${server}_cpuutil.min 0 ${server}_cpuutil.label $server CPU util. in %. multigraph emc_vnx_network_kib graph_title EMC VNX 5300 Datamover Network bytes over all interfaces graph_vlabel B/s recv. (-) / sent (+) graph_category network graph_args --base 1000 ${server}_net_in.graph no ${server}_net_in.label none ${server}_net_out.label $server kB/s ${server}_net_out.negative ${server}_net_in ${server}_net_out.draw AREA multigraph emc_vnx_storage_kib graph_title EMC VNX 5300 Datamover Storage bytes over all interfaces graph_vlabel B/s recv. (-) / sent (+) graph_category network graph_args --base 1000 ${server}_stor_read.graph no ${server}_stor_read.label none ${server}_stor_write.label $server kB/s ${server}_stor_write.negative ${server}_stor_read ${server}_stor_write.draw AREA multigraph emc_vnx_memory graph_title EMC VNX 5300 Datamover Memory graph_vlabel MiB graph_category memory graph_args --base 1024 graph_order ${server}_used ${server}_free ${server}_total ${server}_freebuffer ${server}_encumbered ${server}_used.label ${server} Used ${server}_used.cdef ${server}_used,1024,/ ${server}_free.label ${server} Free ${server}_free.draw STACK ${server}_free.cdef ${server}_free,1024,/ ${server}_total.label ${server} Total ${server}_total.cdef ${server}_total,1024,/ ${server}_freebuffer.label ${server} Free Buffer ${server}_freebuffer.cdef ${server}_freebuffer,1024,/ ${server}_encumbered.label ${server} Encumbered ${server}_encumbered.cdef ${server}_encumbered,1024,/ multigraph emc_vnx_filecache graph_title EMC VNX 5300 File Buffer Cache graph_vlabel per second graph_category memory graph_args --base 1000 graph_order ${server}_highw_hits ${server}_loww_hits ${server}_w_hits ${server}_hits ${server}_lookups ${server}_highw_hits.label High Watermark Hits ${server}_loww_hits.label Low Watermark Hits ${server}_loww_hits.draw STACK ${server}_w_hits.label Watermark Hits ${server}_hits.label Hits ${server}_lookups.label Lookups multigraph emc_vnx_fileresolve graph_title EMC VNX 5300 FileResolve graph_vlabel Entries graph_category memory graph_args --base 1000 ${server}_dropped.label Dropped Entries ${server}_max.label Max Limit ${server}_used.label Used Entries EOF fi if [ $STATSTYPE = "NFS" ] ; then #nfs.v3.op data member_elements=$(run_remote server_stats "$server" -info nfs.v3.op | grep member_elements | sed -ne 's/^.*= //p') IFS=',' read -ra graphs <<< "$member_elements" cat <<-EOF multigraph vnx_emc_v3_calls_s graph_title EMC VNX 5300 NFSv3 Calls per second graph_vlabel Calls graph_category nfs graph_args --base 1000 EOF for graph in "${graphs[@]}"; do field=$(echo "$graph" | cut -d '.' -f4 ) echo "${server}_$field.label $server $field" done cat <<-EOF multigraph vnx_emc_v3_usec_call graph_title EMC VNX 5300 NFSv3 uSeconds per call graph_vlabel uSec / call graph_category nfs graph_args --base 1000 EOF for graph in "${graphs[@]}"; do field=$(echo "$graph" | cut -d '.' -f4 ) echo "${server}_$field.label $server $field" done cat <<-EOF multigraph vnx_emc_v3_op_percent graph_title EMC VNX 5300 NFSv3 Op % graph_vlabel % graph_scale no graph_category nfs EOF for graph in "${graphs[@]}"; do field=$(echo "$graph" | cut -d '.' -f4 ) echo "${server}_$field.label $server $field" echo "${server}_$field.min 0" done graphs=() #nfs.v4.op data member_elements=$(run_remote server_stats "$server" -info nfs.v4.op | grep member_elements | sed -ne 's/^.*= //p') IFS=',' read -ra graphs <<< "$member_elements" cat <<-EOF multigraph vnx_emc_v4_calls_s graph_title EMC VNX 5300 NFSv4 Calls per second graph_vlabel Calls graph_category nfs graph_args --base 1000 EOF for graph in "${graphs[@]}"; do field=$(echo "$graph" | cut -d '.' -f4 ) echo "${server}_$field.label $server $field" done cat <<-EOF multigraph vnx_emc_v4_usec_call graph_title EMC VNX 5300 NFSv4 uSeconds per call graph_vlabel uSec / call graph_category nfs graph_args --base 1000 EOF for graph in "${graphs[@]}"; do field=$(echo "$graph" | cut -d '.' -f4 ) echo "${server}_$field.label $server $field" done cat <<-EOF multigraph vnx_emc_v4_op_percent graph_title EMC VNX 5300 NFSv4 Op % graph_vlabel % graph_scale no graph_category nfs EOF for graph in "${graphs[@]}"; do field=$(echo "$graph" | cut -d '.' -f4 ) echo "${server}_$field.label $server $field" echo "${server}_$field.min 0" done #nfs.client data # Total Read Write Suspicious Total Read Write Avg # Ops/s Ops/s Ops/s Ops diff KiB/s KiB/s KiB/s uSec/call member_elements=$(run_remote server_stats server_2 -monitor nfs.client -count 1 -terminationsummary no -titles never | sed -ne 's/^.*id=//p' | cut -d' ' -f1) readarray graphs_array <<< "$member_elements" cat <<-EOF multigraph vnx_emc_nfs_client_ops_s graph_title EMC VNX 5300 NFS Client Ops/s graph_vlabel Ops/s graph_category nfs EOF echo -n "graph_order " for graph in "${graphs_array[@]}"; do field="$(clean_fieldname "$graph")" echo -n "${server}_${field}_r ${server}_${field}_w ${server}_${field}_t ${server}_${field}_s " done echo " " for graph in "${graphs_array[@]}"; do field="$(clean_fieldname "$graph")" echo "${server}_${field}_r.label $server $field Read Ops/s" echo "${server}_${field}_w.label $server $field Write Ops/s" echo "${server}_${field}_w.draw STACK" echo "${server}_${field}_t.label $server $field Total Ops/s" echo "${server}_${field}_s.label $server $field Suspicious Ops diff" done cat <<-EOF multigraph vnx_emc_nfs_client_kib_s graph_title EMC VNX 5300 NFS Client B/s graph_vlabel B/s graph_category nfs EOF echo -n "graph_order " for graph in "${graphs_array[@]}"; do field="$(clean_fieldname "$graph")" echo -n "${server}_${field}_r ${server}_${field}_w ${server}_${field}_t " done echo " " for graph in "${graphs_array[@]}"; do field="$(clean_fieldname "$graph")" echo "${server}_${field}_r.label $server $field Read B/s" echo "${server}_${field}_r.cdef ${server}_${field}_r,1024,*" echo "${server}_${field}_w.label $server $field Write B/s" echo "${server}_${field}_w.cdef ${server}_${field}_w,1024,*" echo "${server}_${field}_w.draw STACK" echo "${server}_${field}_t.label $server $field Total B/s" echo "${server}_${field}_t.cdef ${server}_${field}_t,1024,*" done cat <<-EOF multigraph vnx_emc_nfs_client_avg_usec graph_title EMC VNX 5300 NFS Client Avg uSec/call graph_vlabel uSec/call graph_category nfs EOF for graph in "${graphs_array[@]}"; do field="$(clean_fieldname "$graph")" echo "${server}_${field}.label $server $field Avg uSec/call" done fi done if [ -z $nas_server_ok ]; then echo "No active data movers!" 1>&2 fi exit 0 fi for server in $nas_servers; do run_remote nas_server -i $server | grep -q 'type *= nas' if [ "$?" != 0 ] ; then continue; fi nas_server_ok=TRUE if [ $STATSTYPE = "BASICDM" ] ; then member_elements=$(run_remote server_stats "$server" -count 1 -terminationsummary no -titles never | grep '^[^[:space:]]') # NUMCOL=5 IFS=$' ' read -ra graphs <<< "$member_elements" echo "multigraph emc_vnx_cpu_percent" echo "${server}_cpuutil.value ${graphs[1]}" echo -e "\nmultigraph emc_vnx_network_kib" echo "${server}_net_in.value ${graphs[2]}" echo "${server}_net_out.value ${graphs[3]}" echo -e "\nmultigraph emc_vnx_storage_kib" echo "${server}_stor_read.value ${graphs[4]}" echo "${server}_stor_write.value ${graphs[5]}" member_elements=$(run_remote server_stats "$server" -monitor kernel.memory -count 1 -terminationsummary no -titles never | grep '^[^[:space:]]') IFS=$' ' read -ra graphs <<< "$member_elements" echo -e "\nmultigraph emc_vnx_memory" echo "${server}_total.value ${graphs[14]}" echo "${server}_used.value ${graphs[15]}" echo "${server}_free.value ${graphs[12]}" echo "${server}_freebuffer.value ${graphs[1]}" echo "${server}_encumbered.value ${graphs[8]}" echo -e "\nmultigraph emc_vnx_filecache" echo "${server}_highw_hits.value ${graphs[2]}" echo "${server}_loww_hits.value ${graphs[6]}" echo "${server}_w_hits.value ${graphs[7]}" echo "${server}_hits.value ${graphs[4]}" echo "${server}_lookups.value ${graphs[5]}" echo -e "\nmultigraph emc_vnx_fileresolve" echo "${server}_dropped.value ${graphs[9]}" echo "${server}_max.value ${graphs[10]}" echo "${server}_used.value ${graphs[11]}" fi if [ $STATSTYPE = "NFS" ] ; then #nfs.v3.op data member_elements=$(run_remote server_stats "$server" -monitor nfs.v3.op -count 1 -terminationsummary no -titles never | sed -ne 's/^.*v3/v3/p') NUMCOL=5 LINES=$(wc -l <<< "$member_elements") while IFS=$'\n' read -ra graphs ; do element+=( $graphs ) done <<< "$member_elements" echo "multigraph vnx_emc_v3_calls_s" for ((i=0; i<$((LINES)); i++ )); do echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+1]}" done echo -e "\nmultigraph vnx_emc_v3_usec_call" for ((i=0; i<$((LINES)); i++ )); do echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+3]}" done echo -e "\nmultigraph vnx_emc_v3_op_percent" for ((i=0; i<$((LINES)); i++ )); do echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+4]}" done element=() #nfs.v4.op data member_elements=$(run_remote server_stats "$server" -monitor nfs.v4.op -count 1 -terminationsummary no -titles never | sed -ne 's/^.*v4/v4/p') NUMCOL=5 LINES=$(wc -l <<< "$member_elements") while IFS=$'\n' read -ra graphs ; do element+=( $graphs ) done <<< "$member_elements" echo -e "\nmultigraph vnx_emc_v4_calls_s" for ((i=0; i<$((LINES)); i++ )); do echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+1]}" done echo -e "\nmultigraph vnx_emc_v4_usec_call" for ((i=0; i<$((LINES)); i++ )); do echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+3]}" done echo -e "\nmultigraph vnx_emc_v4_op_percent" for ((i=0; i<$((LINES)); i++ )); do echo "${server}_${element[i*$NUMCOL]}".value "${element[i*$NUMCOL+4]}" done element=() #nfs.client data echo -e "\nmultigraph vnx_emc_nfs_client_ops_s" member_elements=$(run_remote server_stats server_2 -monitor nfs.client -count 1 -terminationsummary no -titles never | sed -ne 's/^.*id=//p') NUMCOL=9 LINES=$(wc -l <<< "$member_elements") while IFS=$'\n' read -ra graphs; do element+=($graphs) done <<< "$member_elements" for (( i=0; i<$((LINES)); i++ )); do client="$(clean_fieldname "${element[i*$NUMCOL]}")" echo "${server}_${client}_r".value "${element[$i*$NUMCOL+2]}" echo "${server}_${client}_w".value "${element[$i*$NUMCOL+3]}" echo "${server}_${client}_t".value "${element[$i*$NUMCOL+1]}" echo "${server}_${client}_s".value "${element[$i*$NUMCOL+4]}" done echo -e "\nmultigraph vnx_emc_nfs_client_kib_s" for (( i=0; i<$((LINES)); i++ )); do client="$(clean_fieldname "${element[i*$NUMCOL]}")" echo "${server}_${client}_r".value "${element[$i*$NUMCOL+6]}" echo "${server}_${client}_w".value "${element[$i*$NUMCOL+7]}" echo "${server}_${client}_t".value "${element[$i*$NUMCOL+5]}" done echo -e "\nmultigraph vnx_emc_nfs_client_avg_usec" for (( i=0; i<$((LINES)); i++ )); do client="$(clean_fieldname "${element[i*$NUMCOL]}")" echo "${server}_${client}".value "${element[$i*$NUMCOL+8]}" done fi done if [ -z $nas_server_ok ]; then echo "No active data movers!" 1>&2 fi exit 0