CDH运维 oozie失败调度钉钉报警
前言
采用CDH集成的大数据架构,常用oozie作为调度工具,需要对任务进行监控,有用邮件的,有用钉钉通知的。因每天邮件太多,很容易忽略,因此采用时刻都在用的办公软件钉钉作为通知
目录
- 钉钉群自定义接口申请
- 脚本开发
自定义接口申请
请参考之前blog:https://blog.csdn.net/dbc_zt/article/details/109781554
脚本开发
oozie架构
oozie调度执行过的 job存放在 WF_JOBS 表
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | /* *安装oozie默认的WF_JOBS 建表语句如下 */ CREATE TABLE `WF_JOBS` ( `id` varchar(255) NOT NULL, `app_name` varchar(255) DEFAULT NULL, -- 应用名称 `app_path` varchar(255) DEFAULT NULL, -- 应用路径 `conf` mediumblob, `created_time` datetime DEFAULT NULL, `end_time` datetime DEFAULT NULL, `external_id` varchar(255) DEFAULT NULL, `group_name` varchar(255) DEFAULT NULL, `last_modified_time` datetime DEFAULT NULL, `log_token` varchar(255) DEFAULT NULL, `parent_id` varchar(255) DEFAULT NULL, `proto_action_conf` mediumblob, `run` int(11) DEFAULT NULL, `sla_xml` mediumblob, `start_time` datetime DEFAULT NULL, -- 开始时间 `status` varchar(255) DEFAULT NULL, -- 执行状态 `user_name` varchar(255) DEFAULT NULL, -- 用户名称 `wf_instance` mediumblob, PRIMARY KEY (`id`), KEY `I_WF_JOBS_CREATED_TIME` (`created_time`), KEY `I_WF_JOBS_END_TIME` (`end_time`), KEY `I_WF_JOBS_EXTERNAL_ID` (`external_id`), KEY `I_WF_JOBS_LAST_MODIFIED_TIME` (`last_modified_time`), KEY `I_WF_JOBS_PARENT_ID` (`parent_id`), KEY `I_WF_JOBS_STATUS` (`status`), KEY `I_WF_JOBS_USER_NAME` (`user_name`), KEY `I_WF_JOBS_STATUS_CREATED_TIME` (`status`,`created_time`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8 |
Python脚本
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | #!/home/user/nlpconda/bin/python # -*- coding: UTF-8 -*- import datetime import json import urllib.request import time import hmac import hashlib import base64 import urllib.parse import sys import pymysql # 发送钉钉消息 def send_request(url, datas): # 传入url和内容发送请求 # 构建一下请求头部 header = {<!-- --> "Content-Type": "application/json", "Charset": "UTF-8" } sendData = json.dumps(datas) # 将字典类型数据转化为json格式 sendDatas = sendData.encode("utf-8") # python3的Request要求data为byte类型 # 发送请求 request = urllib.request.Request(url=url, data=sendDatas, headers=header) # 将请求发回的数据构建成为文件格式 opener = urllib.request.urlopen(request) # 打印返回的结果 print(opener.read()) # 获取钉钉发送数据类型 def get_ddmodel_datas(type): # 返回钉钉模型数据,1:文本;2:markdown所有人;3:markdown带图片,@接收人;4:link类型 if type == 1: my_data = {<!-- --> "msgtype": "text", "text": {<!-- --> "content": "test】我就是我, 是不一样的烟火" }, "at": {<!-- --> "atMobiles": [ "13000000000" ], "isAtAll": True } } elif type == 3: my_data = {<!-- --> "msgtype": "markdown", "markdown": {<!-- -->"title": " ", "text": " " }, "at": {<!-- --> "atMobiles": [ "13000000000" ], "isAtAll": True } } return my_data # 获取签名 def get_sign(timestamp, secret): secret_enc = secret.encode('utf-8') string_to_sign = '{}\n{}'.format(timestamp, secret) string_to_sign_enc = string_to_sign.encode('utf-8') hmac_code = hmac.new(secret_enc, string_to_sign_enc, digestmod=hashlib.sha256).digest() sign = urllib.parse.quote_plus(base64.b64encode(hmac_code)) return sign # 获取钉钉markdown字符串 def sg_md_deal(head, data): sig = '|' cols_list = head sig_cols = sig + sig.join(('**' + i + '**' for i in cols_list)) + sig + ' \n ' sig_tab = sig + sig.join([':---' for i in range(len(cols_list))]) + sig + ' \n ' sig_md_str = '### {} 调度异常哈 \n'.format(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")) sig_md_str = sig_md_str + sig_cols + sig_tab for value in data: sig_val = sig + sig.join([' ' + str(i) for i in value]) + sig + ' \n ' sig_md_str = sig_md_str + sig_val return sig_md_str ## 获取 wf_jobs 表执行失败调度 def dms_sendnum_mysql(ptt_day): conn1 = pymysql.connect(host="oozie数据库ip",port=3306, user="oozie_dingding_only_read", password="012345", database="oozie", charset="utf8") cursor1 = conn1.cursor() ## 失败调度的SQL,其中user_name 和status是索引字段 sql1 = """ select start_time, app_name from wf_jobs where user_name = 'hue上执行workflow的用户名' and status != 'SUCCEEDED' and start_time >= '{}' order by start_time; """.format(ptt_day) cursor1.execute(sql1) res= cursor1.fetchall() cursor1.close() conn1.close() return res if __name__ == "__main__": # 钉钉api信息 my_url = "https://oapi.dingtalk.com/robot/send?" \ "access_token=【我的access_token AAAA】" secret = '【我的签名BBBB】' timestamp = str(round(time.time() * 1000)) sign = get_sign(timestamp=timestamp, secret=secret) # 表头 head = [' 开始时间', ' 任务名'] # print('Main! The time is: %s' % datetime.now()) # 3.Markdown(带图片@对象) my_data = get_ddmodel_datas(3) my_data["markdown"]["title"] = "oozie失败调度" ptt_day = str(datetime.date.today()) mysql_data = dms_sendnum_mysql(ptt_day) if not mysql_data: sys.exit() my_data["markdown"]["text"] = sg_md_deal(head, mysql_data) my_url = my_url + '×tamp=' + timestamp + '&sign=' + sign print(my_url) send_request(my_url, my_data) |
crontab调度
1 2 3 4 5 | crontab -e # 编辑如下 # oozie failure schedule 30 8 * * * ${PYTHON_HOME}/python ${DINGDING_REPORT_DIR}/dingding_oozie_fail_schedule.py |