Files
arista-evpn-vxlan-clab/monitoring/prometheus/prometheus.yml

83 lines
2.1 KiB
YAML

# Prometheus configuration for EVPN-VXLAN fabric monitoring
# Enhanced for Flow Plugin visualization
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
monitor: 'evpn-fabric-monitor'
cluster: 'evpn-vxlan-lab'
# Alertmanager configuration (optional)
# alerting:
# alertmanagers:
# - static_configs:
# - targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them
# rule_files:
# - "alerts/*.yml"
# - "recording_rules/*.yml"
scrape_configs:
# Scrape Prometheus itself
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
labels:
component: 'prometheus'
# Scrape gnmic for network telemetry
- job_name: 'gnmic'
scrape_interval: 10s
scrape_timeout: 10s
static_configs:
- targets: ['gnmic:9804']
labels:
component: 'gnmic-collector'
fabric: 'evpn-vxlan'
# Enhanced metric relabeling for Flow Plugin
metric_relabel_configs:
# Keep interface metrics - critical for flow visualization
- source_labels: [__name__]
regex: 'gnmic_interfaces_.*'
action: keep
# Keep BGP metrics for overlay health
- source_labels: [__name__]
regex: 'gnmic_.*bgp.*'
action: keep
# Keep MLAG metrics for redundancy visibility
- source_labels: [__name__]
regex: 'gnmic_.*lacp.*'
action: keep
# Keep system metrics
- source_labels: [__name__]
regex: 'gnmic_system.*'
action: keep
# Keep VXLAN metrics
- source_labels: [__name__]
regex: 'gnmic_.*vxlan.*|gnmic_.*vlan.*'
action: keep
# Drop everything else to reduce storage
- source_labels: [__name__]
regex: 'gnmic_.*'
action: drop
# Add fabric topology labels from device names
- source_labels: [source]
regex: '(spine|leaf)(\d+)'
target_label: device_type
replacement: '$1'
- source_labels: [source]
regex: '(spine|leaf)(\d+)'
target_label: device_number
replacement: '$2'