2010-02-07 17:38:26 +01:00
#!/bin/bash
#
2018-08-02 02:03:42 +02:00
# plugin to monitor difference between st_last_event_ts and st_last_received_ts
2010-02-07 17:38:26 +01:00
# in sl_status table (based on slony_ and slony_lag_)
#
# http://blog.endpoint.com/2009/07/slony-slstatus-and-diagnosing.html
#
# st_origin: the local slony system
# st_received: the slony instance that sent an event
# st_last_event: the sequence number of the last event received from that origin/received pair
# st_last_event_ts: the timestamp on the last event received
# st_last_received: the sequence number of the last sl_event + sl_confirm pair received
# st_last_received_ts: the timestamp on the sl_confirm in that pair
# st_last_received_event_ts: the timestamp on the sl_event in that pair
# st_lag_num_events: difference between st_last_event and st_last_received
# st_lag_time: difference between st_last_event_ts and st_last_received_ts
2018-08-02 02:03:42 +02:00
#
2010-02-07 17:38:26 +01:00
#
# Configuration variables:
#
# PGHOST - Database server to use.
# PGUSER - User to connect as.
# PGPASSWORD - Password to use.
# PGSCHEMA - Replication schema.
#
# Configuration example:
#
# munin-node:
#
# [slony*]
# user slony
# env.PGHOST localhost
# env.PGUSER slony
# env.PGPASSWORD password
# env.PGSCHEMA _slony
#
# postgresql.conf:
#
# standard_conforming_strings = on
#
# ln -s /usr/share/munin/plugins/slony_lag_time_/etc/munin/plugins/slony_lag_time_PGDATABASE
#
#
# Magic markers (optional - only used by munin-config and some installation scripts):
#%# family=contrib
PGDATABASE=$(basename $0 | sed 's/^slony_lag_time_//g')
if [ "$1" = "config" ]; then
echo "graph_args --base 1000 -l 0"
2017-02-22 20:59:43 +01:00
echo "graph_category db"
2010-02-07 17:38:26 +01:00
echo "graph_info Slony st_lag_time for ${PGDATABASE}"
echo "graph_title Slony lag time for ${PGDATABASE}"
echo "graph_vlabel \${graph_period}"
2018-08-02 02:03:42 +02:00
psql -h ${PGHOST} -d ${PGDATABASE} -U ${PGUSER} -tc "SELECT no_id,regexp_replace(pa_conninfo, '.*host=(.*?) .*$', '\\1') FROM ${PGSCHEMA}.sl_node JOIN ${PGSCHEMA}.sl_path ON (pa_server=no_id) WHERE pa_client=${PGSCHEMA}.getlocalnodeid('${PGSCHEMA}'::name);" | while read node_id sep host
2010-02-07 17:38:26 +01:00
do
test -z "${node_id}" && continue
echo "${node_id}.label ${host}"
echo "${node_id}.type GAUGE"
echo "${node_id}.draw LINE2"
echo "${node_id}.info difference between st_last_event_ts and st_last_received_ts"
echo "${node_id}.warning 300"
echo "${node_id}.critical 600"
done
exit 0
fi
psql -h ${PGHOST} -d ${PGDATABASE} -U ${PGUSER} -tc "SELECT st_received, extract(epoch FROM st_lag_time)::integer FROM ${PGSCHEMA}.sl_status ORDER BY 1;" | while read node_id sep time
2018-08-02 02:03:42 +02:00
do
2010-02-07 17:38:26 +01:00
test -z "${node_id}" && continue
echo "${node_id}.value ${time}"
done