Add Grafana monitoring stack with gNMI telemetry and Network Weathermap #17

Closed
Damien wants to merge 28 commits from feature/grafana-monitoring into main
Showing only changes of commit b34b0eed7d - Show all commits

View File

@@ -1,5 +1,5 @@
# gNMIc configuration for Arista EVPN-VXLAN fabric # gNMIc configuration for Arista EVPN-VXLAN fabric
# Collects gNMI telemetry and exposes Prometheus metrics # Enhanced for Flow Plugin visualization with comprehensive telemetry
# #
# Usage: # Usage:
# gnmic subscribe --config /path/to/gnmic.yaml # gnmic subscribe --config /path/to/gnmic.yaml
@@ -11,9 +11,9 @@
# gnmic -a 172.16.0.1:6030 -u admin -p admin --insecure \ # gnmic -a 172.16.0.1:6030 -u admin -p admin --insecure \
# subscribe --path /interfaces/interface/state --stream-mode sample --sample-interval 10s # subscribe --path /interfaces/interface/state --stream-mode sample --sample-interval 10s
# ============================================================================== # ============================================================================
# Global settings # Global settings
# ============================================================================== # ============================================================================
username: admin username: admin
password: admin password: admin
insecure: true insecure: true
@@ -23,171 +23,278 @@ debug: true
timeout: 30s timeout: 30s
retry: 10s retry: 10s
# ============================================================================== # ============================================================================
# Target devices - All switches in the fabric # Target devices - All switches in the fabric
# ============================================================================== # ============================================================================
targets: targets:
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Spine switches (AS 65000) # Spine switches (AS 65000)
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
spine1: spine1:
address: 172.16.0.1:6030 address: 172.16.0.1:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
labels: labels:
role: spine role: spine
fabric_tier: spine fabric_tier: spine
device: spine1 device: spine1
asn: "65000"
spine2: spine2:
address: 172.16.0.2:6030 address: 172.16.0.2:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
labels: labels:
role: spine role: spine
fabric_tier: spine fabric_tier: spine
device: spine2 device: spine2
asn: "65000"
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Leaf switches - VTEP1 (AS 65001) # Leaf switches - VTEP1 (AS 65001)
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
leaf1: leaf1:
address: 172.16.0.25:6030 address: 172.16.0.25:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep1 vtep: vtep1
mlag_pair: "1" mlag_pair: "1"
device: leaf1 device: leaf1
asn: "65001"
leaf2: leaf2:
address: 172.16.0.50:6030 address: 172.16.0.50:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep1 vtep: vtep1
mlag_pair: "1" mlag_pair: "1"
device: leaf2 device: leaf2
asn: "65001"
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Leaf switches - VTEP2 (AS 65002) # Leaf switches - VTEP2 (AS 65002)
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
leaf3: leaf3:
address: 172.16.0.27:6030 address: 172.16.0.27:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep2 vtep: vtep2
mlag_pair: "2" mlag_pair: "2"
device: leaf3 device: leaf3
asn: "65002"
leaf4: leaf4:
address: 172.16.0.28:6030 address: 172.16.0.28:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep2 vtep: vtep2
mlag_pair: "2" mlag_pair: "2"
device: leaf4 device: leaf4
asn: "65002"
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Leaf switches - VTEP3 (AS 65003) # Leaf switches - VTEP3 (AS 65003)
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
leaf5: leaf5:
address: 172.16.0.29:6030 address: 172.16.0.29:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep3 vtep: vtep3
mlag_pair: "3" mlag_pair: "3"
device: leaf5 device: leaf5
asn: "65003"
leaf6: leaf6:
address: 172.16.0.30:6030 address: 172.16.0.30:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep3 vtep: vtep3
mlag_pair: "3" mlag_pair: "3"
device: leaf6 device: leaf6
asn: "65003"
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Leaf switches - VTEP4 (AS 65004) # Leaf switches - VTEP4 (AS 65004)
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
leaf7: leaf7:
address: 172.16.0.31:6030 address: 172.16.0.31:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep4 vtep: vtep4
mlag_pair: "4" mlag_pair: "4"
device: leaf7 device: leaf7
asn: "65004"
leaf8: leaf8:
address: 172.16.0.32:6030 address: 172.16.0.32:6030
subscriptions: subscriptions:
- interfaces - interfaces
- system - system
- bgp
- routing
- vxlan
- mlag
labels: labels:
role: leaf role: leaf
fabric_tier: leaf fabric_tier: leaf
vtep: vtep4 vtep: vtep4
mlag_pair: "4" mlag_pair: "4"
device: leaf8 device: leaf8
asn: "65004"
# ============================================================================== # ============================================================================
# Subscriptions - define what telemetry to collect # Subscriptions - define what telemetry to collect
# ============================================================================== # ============================================================================
subscriptions: subscriptions:
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# Interface statistics - for weathermap bandwidth visualization # Interface statistics - for Flow Plugin bandwidth visualization
# Simplified path that works with Arista cEOS OpenConfig implementation # High-frequency sampling for real-time traffic visualization
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
interfaces: interfaces:
paths: paths:
- /interfaces/interface/state # Interface state and statistics
- /interfaces/interface/state/counters
- /interfaces/interface/state/oper-status
- /interfaces/interface/state/admin-status
# Interface configuration for metadata
- /interfaces/interface/config/name
- /interfaces/interface/config/description
# Ethernet-specific counters
- /interfaces/interface/ethernet/state/counters
mode: stream mode: stream
stream-mode: sample stream-mode: sample
sample-interval: 15s sample-interval: 10s
encoding: json_ietf encoding: json_ietf
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
# System information - hostname, uptime, memory # System information - hostname, uptime, memory, CPU
# --------------------------------------------------------------------------- # --------------------------------------------------------------------------
system: system:
paths: paths:
- /system/state - /system/state
- /system/memory/state
- /system/cpus/cpu/state
mode: stream mode: stream
stream-mode: sample stream-mode: sample
sample-interval: 30s sample-interval: 30s
encoding: json_ietf encoding: json_ietf
# ============================================================================== # --------------------------------------------------------------------------
# BGP telemetry - for fabric health and EVPN overlay monitoring
# --------------------------------------------------------------------------
bgp:
paths:
# BGP neighbor state
- /network-instances/network-instance/protocols/protocol/bgp/neighbors/neighbor/state
# BGP AFI/SAFI state (including EVPN)
- /network-instances/network-instance/protocols/protocol/bgp/neighbors/neighbor/afi-safis/afi-safi/state
# BGP global state
- /network-instances/network-instance/protocols/protocol/bgp/global/state
mode: stream
stream-mode: sample
sample-interval: 30s
encoding: json_ietf
# --------------------------------------------------------------------------
# Routing information - for underlay health
# --------------------------------------------------------------------------
routing:
paths:
- /network-instances/network-instance/protocols/protocol/static-routes
- /network-instances/network-instance/afts/ipv4-unicast/ipv4-entry
mode: stream
stream-mode: sample
sample-interval: 60s
encoding: json_ietf
# --------------------------------------------------------------------------
# VXLAN telemetry - for overlay visibility
# --------------------------------------------------------------------------
vxlan:
paths:
- /network-instances/network-instance/vlans/vlan/members/member/state
- /network-instances/network-instance/connection-points/connection-point/endpoints
mode: stream
stream-mode: on_change
encoding: json_ietf
# --------------------------------------------------------------------------
# MLAG telemetry - for redundancy monitoring
# --------------------------------------------------------------------------
mlag:
paths:
- /lacp/interfaces/interface/state
- /lacp/interfaces/interface/members/member/state
mode: stream
stream-mode: sample
sample-interval: 15s
encoding: json_ietf
# ============================================================================
# Prometheus output configuration # Prometheus output configuration
# ============================================================================== # ============================================================================
outputs: outputs:
prometheus: prometheus:
type: prometheus type: prometheus
@@ -197,17 +304,20 @@ outputs:
append-subscription-name: true append-subscription-name: true
export-timestamps: true export-timestamps: true
strings-as-labels: true strings-as-labels: true
debug: true debug: false
# Expiration time for metrics (prevents stale data) # Expiration time for metrics (prevents stale data)
expiration: 120s expiration: 120s
# Event processors to clean up metric names # Event processors to clean up metric names
event-processors: event-processors:
- trim-prefixes - trim-prefixes
- add-source-label
- interface-name-processor
# ============================================================================== # ============================================================================
# Event processors - clean up and transform metrics # Event processors - clean up and transform metrics
# ============================================================================== # ============================================================================
processors: processors:
# Remove long path prefixes from metric names
trim-prefixes: trim-prefixes:
event-strings: event-strings:
value-names: value-names:
@@ -215,3 +325,25 @@ processors:
transforms: transforms:
- path-base: - path-base:
apply-on: "name" apply-on: "name"
# Add source label from device name
add-source-label:
event-strings:
value-names:
- ".*"
transforms:
- replace:
apply-on: "name"
old: ""
new: ""
# Process interface names for better readability
interface-name-processor:
event-strings:
value-names:
- ".*interface.*"
transforms:
- replace:
apply-on: "value"
old: "Ethernet"
new: "eth"