66 lines
2.2 KiB
YAML
66 lines
2.2 KiB
YAML
# RocketMQ Alert Rules for Prometheus
|
|
|
|
groups:
|
|
- name: rocketmq
|
|
interval: 30s
|
|
rules:
|
|
# NameServer告警
|
|
- alert: RocketMQNameServerDown
|
|
expr: up{job="rocketmq-namesrv"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "RocketMQ NameServer 实例 {{ $labels.instance }} 宕机"
|
|
description: "RocketMQ NameServer 已经宕机超过1分钟"
|
|
|
|
# Broker告警
|
|
- alert: RocketMQBrokerDown
|
|
expr: up{job="rocketmq-broker"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "RocketMQ Broker {{ $labels.instance }} 宕机"
|
|
description: "RocketMQ Broker 已经宕机超过1分钟"
|
|
|
|
# 消费堆积告警
|
|
- alert: RocketMQConsumerLag
|
|
expr: rocketmq_consumer_tps > 0 and (rocketmq_consumer_tps - rocketmq_producer_tps) > 1000
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "RocketMQ 消费堆积 - {{ $labels.group }}"
|
|
description: "消费者组 {{ $labels.group }} 在主题 {{ $labels.topic }} 上消费堆积"
|
|
|
|
# 生产者发送失败告警
|
|
- alert: RocketMQProducerSendFailed
|
|
expr: rate(rocketmq_producer_send_failed_total[5m]) > 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "RocketMQ 生产者发送失败率升高"
|
|
description: "消息发送失败率超过阈值"
|
|
|
|
# 内存使用率告警
|
|
- alert: RocketMQMemoryUsageHigh
|
|
expr: (rocketmq_memory_used / rocketmq_memory_total) > 0.85
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "RocketMQ 内存使用率过高 - {{ $labels.instance }}"
|
|
description: "实例 {{ $labels.instance }} 内存使用率超过85%"
|
|
|
|
# 磁盘使用率告警
|
|
- alert: RocketMQDiskUsageHigh
|
|
expr: (rocketmq_disk_used / rocketmq_disk_total) > 0.80
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "RocketMQ 磁盘使用率过高 - {{ $labels.instance }}"
|
|
description: "实例 {{ $labels.instance }} 磁盘使用率超过80%"
|