prometheus altermanager邮件报警:
下载网址:
https://prometheus.io/download
tar zxfv alertmanager-0.15.2.linux-amd64.tar.gz -C /space/
mv /space/alertmanager-0.15.2.linux-amd64 /Influxdb/alertmanager
vi /space/altermanager/altermanager.yml
global:
resolve_timeout: 5m
smtp_smarthost: ‘smtp.ming.com:25‘
smtp_from: ‘[email protected]‘
route:
group_by: [‘down‘]
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: ‘shprom‘
receivers:
- name: ‘shprom‘
email_configs:- to: ‘[email protected]‘
:wq
/space/altermanager/altermanager --config.file=/space/altermanager/altermanager.yml
vi /space/altermanager/down.yml
groups:
- name: down
rules:- alert: InstanceDown
expr: up == 0
for: 30s
labels:
user: shprom
annotations:
summary "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
:wq
vi /space/altermanager/mem.yml
groups:
- name: mem
rules:- alert: NodeMemoryUsage
expr: expr: 100 - ((sum(node_memory_Cached_bytes)+sum(node_memory_Buffers_bytes)+sum(node_memory_MemFree_bytes))/sum(node_memory_MemTotal_bytes)*100) > 80
for: 1m
labels:
user: shprom
annotations:
summary: "{{ $labels.instance }} High Memory usage detected"
description: "{{ $labels.instance }}: Memory usage is above 80% (current value is:{{ $value }})"
- alert: NodeMemoryUsage
:wq
vi /space/prometheus/prometheus.yml
alerting:
alertmanagers:
- static_configs:
- targets: [‘localhost:9093‘]
- alertmanager:9093
- targets: [‘localhost:9093‘]
rule_files:
- "down.yml"
- "mem.yml"
:wq
/space/prometheus/prometheus --config.file=/space/prometheus/prometheus.yml --storage.tsdb.path=/space/prometheus/data
原文地址:http://blog.51cto.com/yangzhiming/2309012
时间: 2024-11-02 13:30:19