From 52191f68011bdb0f87a12cd3992f1cbd56cf52cd Mon Sep 17 00:00:00 2001 From: antonio Date: Fri, 5 Jul 2013 05:17:34 +0200 Subject: [PATCH] Multigraph plugin to monitor a Solr4 single/multicore using mbean http interface --- plugins/solr/solr4_ | 431 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 431 insertions(+) create mode 100755 plugins/solr/solr4_ diff --git a/plugins/solr/solr4_ b/plugins/solr/solr4_ new file mode 100755 index 00000000..80d38317 --- /dev/null +++ b/plugins/solr/solr4_ @@ -0,0 +1,431 @@ +#!/usr/bin/env python +# +# Copyright (c) 2013, Antonio Verni, me.verni@gmail.com +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +# +# Munin plugin for monitoring a multicore solr 4.* installation via mbean. +# It calls: +# > http://localhost:8080/solr/admin/cores?action=STATUS&wt=json +# and +# > http://localhost:8080/solr/corename/admin/mbeans?stats=true&wt=json +# for each core to retrieve cores and data. Verify those urls on your instance. +# +# Configuration parameters: +# [solr_*] +# host_port +# qpshandler_ +# availableram +# +# Example: +# host_port solrhost:8080 +# qpshandler_select /select +# availableram 3221225472 +# +# Defined checks: +# numdocs +# qps +# indexsize +# requesttimes +# documentcache +# fieldvaluecache +# filtercache +# queryresultcache +# +# Installation example: +# ln -s /usr/share/munin/plugins/solr_.py /etc/munin/plugins/solr_numdocs_core_1 +# ln -s /usr/share/munin/plugins/solr_.py /etc/munin/plugins/solr_requesttimes_select +# ln -s /usr/share/munin/plugins/solr_.py /etc/munin/plugins/solr_qps_core_1_select +# +# Source repo: https://github.com/averni/munin-solr + +import sys +import os +import httplib +import json + +def parse_params(): + plugname = os.path.basename(sys.argv[0]).split('_', 2)[1:] + params = { + 'type': plugname[0], + 'op': 'config' if sys.argv[-1] == 'config' else 'fetch', + 'core': plugname[1] if len(plugname) > 1 else '', + 'params': {} + } + if plugname[0] in[ 'qps', 'requesttimes']: + data = params['core'].rsplit('_', 1) + handler = data.pop() + params['params'] = { + 'handler': os.environ.get('qpshandler_%s' % handler, '/select') + } + if not data: + params['core'] = '' + else: + params['core'] = data[0] + elif plugname[0] == 'indexsize': + params['params']['core'] = params['core'] + return params + +############################################################################# +# Datasources + +class CheckException(Exception): + pass + +class JSONReader: + @classmethod + def readValue(cls, struct, path): + if not path[0] in struct: + return -1 + obj = struct[path[0]] + if not obj: + return -1 + for k in path[1:]: + obj = obj[k] + return obj + +class SolrCoresAdmin: + def __init__(self, host): + self.host = host + self.data = None + + def fetchcores(self): + uri = "/solr/admin/cores?action=STATUS&wt=json" + conn = httplib.HTTPConnection(self.host) + conn.request("GET", uri) + res = conn.getresponse() + data = res.read() + if res.status != 200: + raise CheckException("Cores status fetch failed: %s\n%s" %( str(res.status), res.read())) + self.data = json.loads(data) + + def getCores(self): + if not self.data: + self.fetchcores() + cores = JSONReader.readValue(self.data, ['status']) + return cores.keys() + + def indexsize(self, core = None): + if not self.data: + self.fetchcores() + if core: + return { + core: JSONReader.readValue(self.data, ['status', core, 'index', 'sizeInBytes']) + } + else: + ret = {} + for core in self.getCores(): + ret[core] = JSONReader.readValue(self.data, ['status', core, 'index', 'sizeInBytes']) + return ret + +class SolrCoreMBean: + def __init__(self, host, core): + self.host = host + self.data = None + self.core = core + + def _fetch(self): + uri = "/solr/%s/admin/mbeans?stats=true&wt=json" % self.core + conn = httplib.HTTPConnection(self.host) + conn.request("GET", uri) + res = conn.getresponse() + data = res.read() + if res.status != 200: + raise CheckException("MBean fetch failed: %s\n%s" %( str(res.status), res.read())) + raw_data = json.loads(data) + data = {} + self.data = { + 'solr-mbeans': data + } + key = None + for pos, el in enumerate(raw_data['solr-mbeans']): + if pos % 2 == 1: + data[key] = el + else: + key = el + + def _read(self, path): + if self.data is None: + self._fetch() + return JSONReader.readValue(self.data, path) + + def _readCache(self, cache): + result = {} + for key in ['lookups', 'hits', 'inserts', 'evictions', 'hitratio']: + path = ['solr-mbeans', 'CACHE', cache, 'stats', 'cumulative_%s' % key] + result[key] = self._read(path) + result['size'] = self._read(['solr-mbeans', 'CACHE', cache, 'stats', 'size']) + return result + + def getCore(self): + return self.core + + def qps(self, handler): + path = ['solr-mbeans', 'QUERYHANDLER', handler, 'stats', 'avgRequestsPerSecond'] + return self._read(path) + + def requesttimes(self, handler): + times = {} + path = ['solr-mbeans', 'QUERYHANDLER', handler, 'stats'] + for perc in ['avgTimePerRequest', '75thPcRequestTime', '99thPcRequestTime']: + times[perc] = self._read(path + [perc]) + return times + + def numdocs(self): + path = ['solr-mbeans', 'CORE', 'searcher', 'stats', 'numDocs'] + return self._read(path) + + def documentcache(self): + return self._readCache('documentCache') + + def filtercache(self): + return self._readCache('filterCache') + + def fieldvaluecache(self): + return self._readCache('fieldValueCache') + + def queryresultcache(self): + return self._readCache('queryResultCache') + +############################################################################# +# Graph Templates + +CACHE_GRAPH_TPL = """multigraph solr_{core}_{cacheType}_hit_rates +graph_category solr +graph_title Solr {core} {cacheName} Hit rates +graph_order lookups hits inserts +graph_scale no +graph_vlabel Hit Rate +graph_args -u 100 --rigid +lookups.label Cache lookups +lookups.graph no +lookups.min 0 +lookups.type DERIVE +inserts.label Cache misses +inserts.min 0 +inserts.draw STACK +inserts.cdef inserts,lookups,/,100,* +inserts.type DERIVE +hits.label Cache hits +hits.min 0 +hits.draw AREA +hits.cdef hits,lookups,/,100,* +hits.type DERIVE + +multigraph solr_{core}_{cacheType}_size +graph_title Solr {core} {cacheName} Size +graph_args -l 0 +graph_category solr +graph_vlabel Size +size.label Size +size.draw LINE2 +evictions.label Evictions +evictions.draw LINE2 + +""" + +QPSMAIN_GRAPH_TPL = """graph_title Solr {core} {handler} Request per second" +graph_args -l 0 +graph_vlabel request / second +graph_category solr +{cores_qps_graphs}""" + +QPSCORE_GRAPH_TPL = """qps_{core}.label {core} Request per second +qps_{core}.type LINESTACK1 +qps_{core}.graph yes""" + +REQUESTTIMES_GRAPH_TPL = """multigraph {core}_requesttimes +graph_title Solr {core} {handler} Time per request +graph_args -l 0 +graph_vlabel millis +graph_category solr +savgtimeperrequest_{core}.label {core} Avg time per request +savgtimeperrequest_{core}.type gauge +savgtimeperrequest_{core}.graph yes +s75thpcrequesttime_{core}.label {core} 75th perc +s75thpcrequesttime_{core}.type gauge +s75thpcrequesttime_{core}.graph yes +s99thpcrequesttime_{core}.label {core} 99th perc +s99thpcrequesttime_{core}.type gauge +s99thpcrequesttime_{core}.graph yes + +""" + +NUMDOCS_GRAPH_TPL = """graph_title Solr Docs %s +graph_vlabel docs +docs.label Docs +graph_category solr""" + +INDEXSIZE_GRAPH_TPL = """graph_args --base 1024 -l 0 --upper-limit {availableram} +graph_vlabel Bytes +graph_title Index Size +graph_category solr +graph_info Solr Index Memory Usage. +graph_order {cores} +{cores_config} +""" + +INDEXSIZECORE_GRAPH_TPL = """{core}.label {core} +{core}.draw STACK""" + +############################################################################# +# Graph managment +CHECKS_DEFINED = [ + 'numdocs', + 'qps', + 'indexsize', + 'requesttimes', + 'documentcache', + 'fieldvaluecache', + 'filtercache', + 'queryresultcache' +] + +class SolrMuninGraph: + def __init__(self, hostport, solrmbean): + self.solrcoresadmin = SolrCoresAdmin(hostport) + self.hostport = hostport + self.params = params + + def _getMBean(self, core): + return SolrCoreMBean(self.hostport, core) + + def _cacheConfig(self, cacheType, cacheName): + return CACHE_GRAPH_TPL.format(core=self.params['core'], cacheType=cacheType, cacheName=cacheName) + + def _cacheFetch(self, cacheType, fields = None): + fields = fields or ['size', 'lookups', 'hits', 'inserts', 'evictions'] + hits_fields = ['lookups', 'hits', 'inserts'] + size_fields = ['size', 'evictions'] + results = [] + solrmbean = self._getMBean(self.params['core']) + data = getattr(solrmbean, cacheType)() + results.append('multigraph solr_{core}_{cacheType}_hit_rates'.format(core=self.params['core'], cacheType=cacheType)) + for label in hits_fields: + results.append("%s.value %s" % (label, data[label])) + results.append('multigraph solr_{core}_{cacheType}_size'.format(core=self.params['core'], cacheType=cacheType)) + for label in size_fields: + results.append("%s.value %s" % (label, data[label])) + return "\n".join(results) + + def config(self, mtype): + if not mtype: + raise CheckException("""Check missing. Available checks: \n\t%s""" % '\n\t'.join(CHECKS_DEFINED)) + if not hasattr(self, '%sConfig' % mtype): + raise CheckException("Unknown check %s" % mtype) + return getattr(self, '%sConfig' % mtype)() + + def fetch(self, mtype): + if not hasattr(self, params['type']): + return None + return getattr(self, params['type'])() + + def _getCores(self): + if self.params['core']: + cores = [self.params['core']] + else: + cores = sorted(self.solrcoresadmin.getCores()) + return cores + + def qpsConfig(self): + cores = self._getCores() + graph = [QPSCORE_GRAPH_TPL.format(core=c) for c in cores ] + return QPSMAIN_GRAPH_TPL.format( + cores_qps_graphs='\n'.join(graph), + handler=self.params['params']['handler'], + core=self.params['core'], + cores_qps_cdefs='%s,%s' % (','.join(map(lambda x: 'qps_%s' % x, cores)),','.join(['+']*(len(cores)-1))) + ) + + def qps(self): + results = [] + cores = self._getCores() + for c in cores: + mbean = self._getMBean(c) + results.append('qps_%s.value %s' % (c, mbean.qps(self.params['params']['handler']))) + return '\n'.join(results) + + def requesttimesConfig(self): + cores = self._getCores() + graphs = [REQUESTTIMES_GRAPH_TPL.format(core=c, handler=self.params['params']['handler']) for c in cores ] + return '\n'.join(graphs) + + def requesttimes(self): + cores = self._getCores() + results = [] + for c in cores: + mbean = self._getMBean(c) + results.append('multigraph {core}_requesttimes'.format(core=c)) + for k, time in mbean.requesttimes(self.params['params']['handler']).items(): + results.append('s%s_%s.value %s' % (k.lower(), c, time)) + return '\n'.join(results) + + def numdocsConfig(self): + return NUMDOCS_GRAPH_TPL % self.params['core'] + + def numdocs(self): + mbean = self._getMBean(self.params['core']) + return 'docs.value %s' % mbean.numdocs(**self.params['params']) + + def indexsizeConfig(self): + cores = self._getCores() + availableram = os.environ.get('availableram', 16868532224) + graph = [ INDEXSIZECORE_GRAPH_TPL.format(core=c) for c in cores] + return INDEXSIZE_GRAPH_TPL.format(cores=" ".join(cores), cores_config="\n".join(graph), availableram=availableram) + + def indexsize(self): + results = [] + for c, size in self.solrcoresadmin.indexsize(**self.params['params']).items(): + results.append("%s.value %s" % (c, size)) + return "\n".join(results) + + def documentcacheConfig(self): + return self._cacheConfig('documentcache', 'Document Cache') + + def documentcache(self): + return self._cacheFetch('documentcache') + + def filtercacheConfig(self): + return self._cacheConfig('filtercache', 'Filter Cache') + + def filtercache(self): + return self._cacheFetch('filtercache') + + def fieldvaluecacheConfig(self): + return self._cacheConfig('fieldvaluecache', 'Field Value Cache') + + def fieldvaluecache(self): + return self._cacheFetch('fieldvaluecache') + + def queryresultcacheConfig(self): + return self._cacheConfig('queryresultcache', 'Query Cache') + + def queryresultcache(self): + return self._cacheFetch('queryresultcache') + +if __name__ == '__main__': + params = parse_params() + SOLR_HOST_PORT = os.environ.get('host_port', 'localhost:8080').replace('http://', '') + mb = SolrMuninGraph(SOLR_HOST_PORT, params) + try: + if hasattr(mb, params['op']): + print getattr(mb, params['op'])(params['type']) + except Exception, ex: + print "ERROR: %s" % ex + exit(1)