Prometheus+Alertmanager+Grafana監控組件容器化部署 ...
直接上部署配置文件
docker-compose.yml
version: '3'
networks:
monitor:
driver: bridge
services:
prometheus:
image: prom/prometheus
container_name: prometheus
hostname: prometheus
restart: always
volumes:
- /data/monitor/prometheus.yml:/etc/prometheus/prometheus.yml
- /data/monitor/prom_db:/prometheus
- /data/monitor/prom_rules:/etc/prometheus/rules
ports:
- "9090:9090"
networks:
- monitor
grafana:
image: grafana/grafana
container_name: grafana
hostname: grafana
restart: always
volumes:
- /data/monitor/ga_data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin@123
ports:
- "3000:3000"
networks:
- monitor
node-exporter:
image: quay.io/prometheus/node-exporter
container_name: node-exporter
hostname: node-exporter
restart: always
ports:
- "9100:9100"
networks:
- monitor
alertmanager:
image: prom/alertmanager
container_name: alertmanager
volumes:
- /data/monitor/alertmanager.yml:/etc/alertmanager/alertmanager.yml
ports:
- "9093:9093"
restart: always
networks:
- monitor
promethues.yml
global:
scrape_interval: 60s
evaluation_interval: 60s
scrape_configs:
- job_name: prom-server
static_configs:
- targets: ['localhost:9090']
labels:
instance: prometheus
- job_name: "node_exporter"
static_configs:
- targets:
- localhost:9100
alerting:
alertmanagers:
- static_configs:
- targets: ["loclhost:9093"]
rule_files:
- /etc/prometheus/rules/rules.yml
alertmanager.yml
global:
smtp_smarthost: 'smtp.126.com:25'
smtp_from: '[email protected]'
smtp_auth_username: 'XXXXX'
smtp_auth_password: 'XXXXX'
smtp_require_tls: false
receivers:
- name: default-receiver
email_configs:
- to: "[email protected]"
require_tls: false
send_resolved: true
route:
group_by: ['alertname', 'cluster', 'service']
group_wait: 10s
group_interval: 5m
receiver: default-receiver
repeat_interval: 3h
rule.yml
groups:
- name: response-rule
rules:
- alert: NodeDiskUsageException
expr: (1 - (node_filesystem_free_bytes{mountpoint=~"/|/logs|/backup|/home"} / node_filesystem_size_bytes{mountpoint=~"/|/logs|/backup|/home"})) * 100 > 75
labels:
ai_mon: node
annotations:
description: 伺服器{{$labels.instance}}磁碟{{$labels.device}}空間占用比例為{{$value}}%, 大於閾值75%
summary: 磁碟占用超標