zabbix实现一个高大上的语音报警,这样可以省下 聘请大量noc客服去通知打电话的费用。
wget https://raw.github.com/PagerDuty/pagerduty-zabbix-py/master/pagerduty.py cp pagerduty.py /etc/zabbix/alert.d/
Make sure the script is executable by Zabbix:
chmod 755 /etc/zabbix/alert.d/pagerduty.py
在zabbix里面配置报警选用的脚本为pagerduty.py 脚本的权限要777
总的来说,zabbix去调用pagerduty的python脚本。
具体的操作方法,pagerduty已经提供了图文的教程, 请跑到
http://www.pagerduty.com/docs/guides/zabbix-integration-guide/
import sys
import syslog
import urllib2
import os
import re
import fcntl
import time
try:
import json
except ImportError:
import simplejson as json
class SimpleLogger(object):
"""
A Simple logger
"""
def __init__(self):
# Open syslog for logging
syslog.openlog("pagerduty_python")
# Some utility functions for logging
def info(self, message):
self.log(syslog.LOG_INFO, message)
def warn(self, message):
self.log(syslog.LOG_WARNING, message)
def error(self, message):
self.log(syslog.LOG_ERR, message)
def log(self, level, message):
# print(message)
syslog.syslog(level, message)
logger = SimpleLogger()
class PagerDutyClient(object):
"""
A simple client that can submit events (a file based event) to PagerDuty.
"""
EVENTS_API_BASE = "https://events.pagerduty.com/generic/2010-04-15/create_event.json"
def __init__(self, api_base=EVENTS_API_BASE):
self.api_base = api_base
def submit_event(self, file_path):
json_event = None
with open(file_path, "r") as event_file:
json_event = event_file.read()
incident_key = None
retry = False
try:
request = urllib2.Request(self.api_base)
request.add_header("Content-type", "application/json")
request.add_data(json_event)
response = urllib2.urlopen(request)
result = json.loads(response.read())
if result["status"] == "success":
incident_key = result["incident_key"]
else:
logger.warn("PagerDuty server REJECTED the event in file: %s, Reason: %s" % (file_path, str(response)))
except urllib2.URLError as e:
# client error
if e.code >= 400 and e.code < 500:
logger.warn("PagerDuty server REJECTED the event in file: %s, Reason: %s" % (file_path, e.read()))
else:
logger.warn("DEFERRED PagerDuty event in file: %s, Reason: [%s, %s]" % (file_path, e.code, e.reason))
retry = True # We'll need to retry
return (retry, incident_key)
class PagerDutyQueue(object):
"""
This class implements a simple directory based queue for PagerDuty events
"""
QUEUE_DIR = "/tmp/pagerduty"
def __init__(self, queue_dir=QUEUE_DIR, pagerduy_client=PagerDutyClient()):
self.queue_dir = queue_dir
self.pagerduy_client = pagerduy_client
self._create_queue_dir()
self._verify_permissions()
def _create_queue_dir(self):
if not os.access(self.queue_dir, os.F_OK):
os.mkdir(self.queue_dir, 0700)
def _verify_permissions(self):
if not (os.access(self.queue_dir, os.R_OK)
and os.access(self.queue_dir, os.W_OK)):
logger.error("Can't read/write to directory %s, please check permissions." % self.queue_dir)
raise Exception("Can't read/write to directory %s, please check permissions." % self.queue_dir)
# Get the list of files from the queue directory
def _queued_files(self):
files = os.listdir(self.queue_dir)
pd_names = re.compile("pd_")
pd_file_names = filter(pd_names.match, files)
# We need to sort the files by the timestamp.
# This function extracts the timestamp out of the file name
def file_timestamp(file_name):
return int(re.search('pd_(\d+)_', file_name).group(1))
sorted_file_names = sorted(pd_file_names, key=file_timestamp)
return pd_file_names
def _flush_queue(self):
file_names = self._queued_files()
for file_name in file_names:
file_path = ("%s/%s" % (self.queue_dir, file_name))
retry, incident_key = self.pagerduy_client.submit_event(file_path)
if not retry:
os.remove(file_path)
if incident_key:
logger.info("PagerDuty event submitted with incident key: %s" % incident_key)
def lock_and_flush_queue(self):
with open("%s/lockfile" % self.queue_dir, "w") as lock_file:
try:
logger.info("Acquiring lock on queue")
fcntl.lockf(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
# We have acquired the lock here
# Let's flush the queue
self._flush_queue()
except IOError as e:
logger.warn("Error while trying to acquire lock on queue: %s" % str(e))
finally:
logger.info("Releasing lock on queue")
fcntl.lockf(lock_file.fileno(), fcntl.LOCK_UN)
def enqueue(self, event):
encoded_event = json.dumps(event)
process_id = os.getpid()
time_seconds = int(time.time())
file_name = "%s/pd_%d_%d" % (self.queue_dir, time_seconds, process_id)
logger.info("Queuing event %s" % str(event))
with open(file_name, "w", 0600) as f:
f.write(encoded_event)
class Zabbix(object):
"""
Zabbix integration
"""
def __init__(self, arguments):
self.arguments = arguments
# Parse the Zabbix message body. The body MUST be in this format:
#
# name:{TRIGGER.NAME}
# id:{TRIGGER.ID}
# status:{TRIGGER.STATUS}
# hostname:{HOSTNAME}
# ip:{IPADDRESS}
# value:{TRIGGER.VALUE}
# event_id:{EVENT.ID}
# severity:{TRIGGER.SEVERITY}
#
def _parse_zabbix_body(self, body_str):
return dict(line.strip().split(':', 1) for line in body_str.strip().split('\n'))
# Parse the Zabbix message subject.
# The subject MUST be one of the following:
#
# trigger
# resolve
#
def _parse_zabbix_subject(self, subject_str):
return subject_str
def event(self):
# The first argument is the service key
service_key = self.arguments[1]
# The second argument is the message type
message_type = self._parse_zabbix_subject(self.arguments[2])
event = self._parse_zabbix_body(self.arguments[3])
logger.info("event %s" % event)
# Incident key is created by concatenating trigger id and host name.
# Remember, incident key is used for de-duping and also to match
# trigger with resolve messages
incident_key = "%s-%s" % (event["id"], event["hostname"])
# The description that is rendered in PagerDuty and also sent as SMS
# and phone alert
description = "%s : %s for %s" % (event["name"],
event["status"], event["hostname"])
pagerduty_event = {
"service_key": service_key, "event_type":message_type,
"description": description, "incident_key": incident_key,
"details": event
}
return pagerduty_event
# If the length of the arguments is 4 then assume it was invoked from
# Zabbix, otherwise, just try to flush the queue
if __name__ == "__main__":
pagerduty_queue = PagerDutyQueue()
if len(sys.argv) == 4:
pagerduty_queue.enqueue(Zabbix(sys.argv).event())
pagerduty_queue.lock_and_flush_queue()
