zabbix调用PagerDuty实现语音电话报警

zabbix实现一个高大上的语音报警,这样可以省下 聘请大量noc客服去通知打电话的费用。

wget https://raw.github.com/PagerDuty/pagerduty-zabbix-py/master/pagerduty.py
cp pagerduty.py /etc/zabbix/alert.d/

Make sure the script is executable by Zabbix:
chmod 755 /etc/zabbix/alert.d/pagerduty.py

在zabbix里面配置报警选用的脚本为pagerduty.py  脚本的权限要777


总的来说,zabbix去调用pagerduty的python脚本。


具体的操作方法,pagerduty已经提供了图文的教程, 请跑到

http://www.pagerduty.com/docs/guides/zabbix-integration-guide/


import sys
import syslog
import urllib2
import os
import re
import fcntl
import time

try:
    import json
except ImportError:
    import simplejson as json

class SimpleLogger(object):
    """
    A Simple logger
    """

    def __init__(self):
        # Open syslog for logging
        syslog.openlog("pagerduty_python")

    # Some utility functions for logging
    def info(self, message):
        self.log(syslog.LOG_INFO, message)

    def warn(self, message):
        self.log(syslog.LOG_WARNING, message)

    def error(self, message):
        self.log(syslog.LOG_ERR, message)

    def log(self, level, message):
        # print(message)
        syslog.syslog(level, message)

logger = SimpleLogger()

class PagerDutyClient(object):
    """
    A simple client that can submit events (a file based event) to PagerDuty.
    """

    EVENTS_API_BASE = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"

    def __init__(self, api_base=EVENTS_API_BASE):
        self.api_base = api_base

    def submit_event(self, file_path):
        json_event = None
        with open(file_path, "r") as event_file:
            json_event = event_file.read()

        incident_key = None
        retry = False

        try:
            request = urllib2.Request(self.api_base)
            request.add_header("Content-type", "application/json")
            request.add_data(json_event)
            response = urllib2.urlopen(request)
            result = json.loads(response.read())

            if result["status"] == "success":
                incident_key = result["incident_key"]
            else:
                logger.warn("PagerDuty server REJECTED the event in file: %s, Reason: %s" % (file_path, str(response)))

        except urllib2.URLError as e:
            # client error
            if e.code >= 400 and e.code < 500:
                logger.warn("PagerDuty server REJECTED the event in file: %s, Reason: %s" % (file_path, e.read()))
            else:
                logger.warn("DEFERRED PagerDuty event in file: %s, Reason: [%s, %s]" % (file_path, e.code, e.reason))
                retry = True # We'll need to retry

        return (retry, incident_key)


class PagerDutyQueue(object):
    """
    This class implements a simple directory based queue for PagerDuty events
    """

    QUEUE_DIR = "/tmp/pagerduty"

    def __init__(self, queue_dir=QUEUE_DIR, pagerduy_client=PagerDutyClient()):
        self.queue_dir = queue_dir
        self.pagerduy_client = pagerduy_client
        self._create_queue_dir()
        self._verify_permissions()

    def _create_queue_dir(self):
        if not os.access(self.queue_dir, os.F_OK):
            os.mkdir(self.queue_dir, 0700)

    def _verify_permissions(self):
        if not (os.access(self.queue_dir, os.R_OK)
            and os.access(self.queue_dir, os.W_OK)):
            logger.error("Can't read/write to directory %s, please check permissions." % self.queue_dir)
            raise Exception("Can't read/write to directory %s, please check permissions." % self.queue_dir)

    # Get the list of files from the queue directory
    def _queued_files(self):
        files = os.listdir(self.queue_dir)
        pd_names = re.compile("pd_")
        pd_file_names = filter(pd_names.match, files)

        # We need to sort the files by the timestamp.
        # This function extracts the timestamp out of the file name
        def file_timestamp(file_name):
            return int(re.search('pd_(\d+)_', file_name).group(1))

        sorted_file_names = sorted(pd_file_names, key=file_timestamp)
        return pd_file_names

    def _flush_queue(self):
        file_names = self._queued_files()
        for file_name in file_names:
            file_path = ("%s/%s" % (self.queue_dir, file_name))
            retry, incident_key = self.pagerduy_client.submit_event(file_path)

            if not retry:
                os.remove(file_path)

            if incident_key:
                logger.info("PagerDuty event submitted with incident key: %s" % incident_key)

    def lock_and_flush_queue(self):
        with open("%s/lockfile" % self.queue_dir, "w") as lock_file:
            try:
                logger.info("Acquiring lock on queue")
                fcntl.lockf(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
                # We have acquired the lock here
                # Let's flush the queue
                self._flush_queue()
            except IOError as e:
                logger.warn("Error while trying to acquire lock on queue: %s" % str(e))
            finally:
                logger.info("Releasing lock on queue")
                fcntl.lockf(lock_file.fileno(), fcntl.LOCK_UN)

    def enqueue(self, event):
        encoded_event = json.dumps(event)
        process_id = os.getpid()
        time_seconds = int(time.time())
        file_name = "%s/pd_%d_%d" % (self.queue_dir, time_seconds, process_id)
        logger.info("Queuing event %s" % str(event))
        with open(file_name, "w", 0600) as f:
            f.write(encoded_event)

class Zabbix(object):
    """
    Zabbix integration
    """

    def __init__(self, arguments):
        self.arguments = arguments

    # Parse the Zabbix message body. The body MUST be in this format:
    #
    # name:{TRIGGER.NAME}
    # id:{TRIGGER.ID}
    # status:{TRIGGER.STATUS}
    # hostname:{HOSTNAME}
    # ip:{IPADDRESS}
    # value:{TRIGGER.VALUE}
    # event_id:{EVENT.ID}
    # severity:{TRIGGER.SEVERITY}
    #
    def _parse_zabbix_body(self, body_str):
        return dict(line.strip().split(':', 1) for line in body_str.strip().split('\n'))

    # Parse the Zabbix message subject.
    # The subject MUST be one of the following:
    #
    # trigger
    # resolve
    #
    def _parse_zabbix_subject(self, subject_str):
        return subject_str

    def event(self):
        # The first argument is the service key
        service_key = self.arguments[1]
        # The second argument is the message type
        message_type = self._parse_zabbix_subject(self.arguments[2])
        event = self._parse_zabbix_body(self.arguments[3])
        logger.info("event %s" % event)

        # Incident key is created by concatenating trigger id and host name.
        # Remember, incident key is used for de-duping and also to match
        # trigger with resolve messages
        incident_key = "%s-%s" % (event["id"], event["hostname"])

        # The description that is rendered in PagerDuty and also sent as SMS
        # and phone alert
        description = "%s : %s for %s" % (event["name"],
                        event["status"], event["hostname"])

        pagerduty_event = {
            "service_key": service_key, "event_type":message_type,
            "description": description, "incident_key": incident_key,
            "details": event
        }
        return pagerduty_event


# If the length of the arguments is 4 then assume it was invoked from
# Zabbix, otherwise, just try to flush the queue
if __name__ == "__main__":
    pagerduty_queue = PagerDutyQueue()
    if len(sys.argv) == 4:
        pagerduty_queue.enqueue(Zabbix(sys.argv).event())
    pagerduty_queue.lock_and_flush_queue()


大家觉得文章对你有些作用! 如果想赏钱,可以用微信扫描下面的二维码,感谢!
另外再次标注博客原地址  xiaorui.cc

发表评论

邮箱地址不会被公开。 必填项已用*标注