diff --git a/src/infrahub/README.md b/src/infrahub/README.md new file mode 100644 index 0000000..4e20a2e --- /dev/null +++ b/src/infrahub/README.md @@ -0,0 +1,282 @@ +# Infrahub Client Module + +This module provides an async client for querying fabric intent data from a remote +[Infrahub](https://docs.infrahub.app/) instance. It wraps `infrahub-sdk` with structured +Pydantic model responses, TTL-based caching, and a typed exception hierarchy. + +It is used by the Reconciler and Prefect flows as the single entry point to the +Source of Truth. + +## Features + +- Async context manager support (`async with FabricInfrahubClient(...) as client`) +- Typed, immutable Pydantic v2 response models (frozen, validated) +- TTL-based in-memory cache (60 s) to avoid redundant SDK queries per client instance +- Structured exception hierarchy for fine-grained error handling +- Branch selection — query any Infrahub branch (default: `main`) +- Full coverage of the fabric schema: devices, VLANs, BGP, VRFs, VTEP, EVPN, MLAG + +## Installation + +Ensure the required dependencies are installed: + +```bash +uv add "infrahub-sdk>=0.16.0" "pydantic>=2.0" +``` + +Or via `pyproject.toml`: + +```toml +dependencies = [ + "infrahub-sdk>=0.16.0", + "pydantic>=2.0", +] +``` + +## Usage + +### Basic connection + +```python +import asyncio +from src.infrahub import FabricInfrahubClient + +async def main(): + async with FabricInfrahubClient( + url="http://infrahub:8080", + api_token="your-api-token", + branch="main", # optional, default: "main" + ) as client: + device = await client.get_device("leaf1") + print(device.name, device.role, device.asn) + +asyncio.run(main()) +``` + +### `get_device()` — fetch a device intent + +```python +from src.infrahub import FabricInfrahubClient, InfrahubNotFoundError + +async with FabricInfrahubClient(url="http://infrahub:8080", api_token="tok") as client: + device = await client.get_device("leaf1") + # DeviceIntent(name='leaf1', role='leaf', status='active', + # platform='EOS', site='dc1', asn=65001) + print(f"{device.name} — role={device.role}, ASN={device.asn}") +``` + +### `get_device_vlans()` — fetch VLANs for a device + +VLANs are resolved via the device's VTEP `vlan_vni_mappings`. If the device has +no VTEP, the fallback resolves VLANs through its SVI interfaces. + +```python +vlans = await client.get_device_vlans("leaf1") +for vlan in vlans: + # VlanIntent(vlan_id=10, name='PROD', status='active', + # vlan_type='standard', vni=10010, stp_enabled=True) + print(f"VLAN {vlan.vlan_id} — VNI {vlan.vni}, type={vlan.vlan_type}") +``` + +### `get_device_bgp_config()` — fetch BGP router configuration + +```python +bgp = await client.get_device_bgp_config("leaf1") +# BgpRouterConfigIntent(router_id='10.0.0.1', local_asn=65001, +# default_ipv4_unicast=True, ecmp_max_paths=4) +print(f"Router-ID: {bgp.router_id}, ASN: {bgp.local_asn}, ECMP: {bgp.ecmp_max_paths}") +``` + +### `get_device_bgp_peer_groups()` — fetch BGP peer groups + +```python +peer_groups = await client.get_device_bgp_peer_groups("leaf1") +for pg in peer_groups: + # BgpPeerGroupIntent(name='EVPN-PEERS', peer_group_type='evpn', + # remote_asn=65000, update_source='Loopback0', + # send_community='extended', ebgp_multihop=3, + # next_hop_unchanged=True) + print(f"{pg.name} — type={pg.peer_group_type}, remote_asn={pg.remote_asn}") +``` + +### `get_device_bgp_sessions()` — fetch BGP sessions + +```python +sessions = await client.get_device_bgp_sessions("leaf1") +for sess in sessions: + # BgpSessionIntent(peer_address='10.0.0.2', description='to-spine1', + # enabled=True, peer_group='UNDERLAY', remote_asn=65000) + print(f"{sess.peer_address} — group={sess.peer_group}, enabled={sess.enabled}") +``` + +### `get_device_vrfs()` — fetch VRFs assigned to a device + +VRFs are resolved via `InfraVRFDeviceAssignment` (device-specific RD + route targets). + +```python +vrfs = await client.get_device_vrfs("leaf1") +for vrf in vrfs: + # VrfIntent(name='PROD', route_distinguisher='10.0.0.1:100', + # vrf_id=100, l3vni=10000, + # import_targets=['65000:100'], export_targets=['65000:100']) + print(f"VRF {vrf.name} — RD={vrf.route_distinguisher}, L3VNI={vrf.l3vni}") + print(f" import: {vrf.import_targets}") + print(f" export: {vrf.export_targets}") +``` + +### `get_device_vtep()` — fetch VTEP configuration + +Returns `None` if the device has no VTEP. + +```python +vtep = await client.get_device_vtep("leaf1") +if vtep: + # VtepIntent(source_address='10.0.0.1', udp_port=4789, + # learn_restrict=False, vlan_vni_mappings=[(10, 10010), (20, 10020)]) + print(f"VTEP source: {vtep.source_address}, port: {vtep.udp_port}") + for vlan_id, vni in vtep.vlan_vni_mappings: + print(f" VLAN {vlan_id} → VNI {vni}") +``` + +### `get_device_evpn_instances()` — fetch EVPN instances + +```python +evpn_instances = await client.get_device_evpn_instances("leaf1") +for ev in evpn_instances: + # EvpnInstanceIntent(route_distinguisher='10.0.0.1:10', + # route_target_import='65000:10', + # route_target_export='65000:10', + # redistribute_learned=True, vlan_id=10) + print(f"EVPN VLAN {ev.vlan_id} — RD={ev.route_distinguisher}") +``` + +### `get_mlag_domain()` — fetch MLAG domain + +Returns `None` if the device is not part of an MLAG domain. + +```python +domain = await client.get_mlag_domain("leaf1") +if domain: + # MlagDomainIntent(domain_id='1', virtual_mac='00:1c:73:00:00:01', + # heartbeat_vrf='MGMT', dual_primary_detection=True, + # dual_primary_delay=10, dual_primary_action='errdisable', + # peer_devices=['leaf1', 'leaf2']) + peer = [d for d in domain.peer_devices if d != "leaf1"][0] + print(f"MLAG domain {domain.domain_id} — peer: {peer}") +``` + +### `get_mlag_peer_config()` — fetch MLAG peer configuration + +Returns `None` if no MLAG peer config exists for this device. + +```python +peer_cfg = await client.get_mlag_peer_config("leaf1") +if peer_cfg: + # MlagPeerConfigIntent(local_interface_ip='10.255.255.0/31', + # peer_address='10.255.255.1', + # heartbeat_peer_ip='192.168.0.2', + # peer_link='Port-Channel1') + print(f"Local IP: {peer_cfg.local_interface_ip}") + print(f"Peer: {peer_cfg.peer_address} via {peer_cfg.peer_link}") +``` + +### Querying a specific branch + +```python +async with FabricInfrahubClient( + url="http://infrahub:8080", + api_token="tok", + branch="proposed-change", +) as client: + device = await client.get_device("leaf1") +``` + +## Pydantic Models + +All models are **frozen** (`model_config = ConfigDict(frozen=True)`) — instances are +immutable and hashable. + +| Model | Description | +|---|---| +| `DeviceIntent` | Network device — name, role, status, platform, site, ASN | +| `VlanIntent` | VLAN — vlan_id, name, status, vlan_type, VNI (if any), STP | +| `VniIntent` | VXLAN Network Identifier — vni, vni_type, description | +| `BgpRouterConfigIntent` | BGP router config — router_id, local ASN, ECMP, default IPv4 unicast | +| `BgpPeerGroupIntent` | BGP peer group — name, type, remote ASN, update-source, community, multihop | +| `BgpSessionIntent` | BGP session — peer address, description, enabled, peer group, remote ASN | +| `VrfIntent` | VRF — name, RD, VRF ID, L3VNI, import/export route targets | +| `VtepIntent` | VTEP — source address, UDP port, learn-restrict, VLAN→VNI mappings | +| `MlagDomainIntent` | MLAG domain — domain ID, virtual MAC, heartbeat VRF, dual-primary settings | +| `MlagPeerConfigIntent` | MLAG peer config — local IP, peer address, heartbeat IP, peer-link interface | +| `EvpnInstanceIntent` | EVPN instance — RD, RT import/export, redistribute-learned, VLAN ID | + +## Error Handling + +```python +from src.infrahub import ( + FabricInfrahubClient, + InfrahubClientError, + InfrahubConnectionError, + InfrahubNotFoundError, + InfrahubQueryError, +) + +async with FabricInfrahubClient(url="http://infrahub:8080", api_token="tok") as client: + try: + device = await client.get_device("unknown-device") + except InfrahubNotFoundError as e: + # Device does not exist in Infrahub + print(f"Not found: {e}") + except InfrahubQueryError as e: + # SDK query failed (network error, schema mismatch, etc.) + print(f"Query error: {e}") + except InfrahubConnectionError as e: + # Could not reach the Infrahub instance + print(f"Connection error: {e}") + except InfrahubClientError as e: + # Catch-all for any other client error + print(f"Infrahub error: {e}") +``` + +### Exception hierarchy + +``` +InfrahubClientError ← base for all client errors +├── InfrahubConnectionError ← raised when the instance is unreachable +├── InfrahubQueryError ← raised when an SDK query fails or returns unexpected data +└── InfrahubNotFoundError ← raised when a requested node does not exist +``` + +## Environment Variables + +The `infrahub-sdk` library reads the following environment variables as defaults. +You can set them instead of passing values explicitly to `FabricInfrahubClient`. + +| Variable | Description | Default | +|---|---|---| +| `INFRAHUB_ADDRESS` | Base URL of the Infrahub instance | — | +| `INFRAHUB_API_TOKEN` | API token for authentication | — | +| `INFRAHUB_DEFAULT_BRANCH` | Branch to query when none is specified | `main` | + +> **Note:** `FabricInfrahubClient.__init__` always passes `url`, `api_token`, and `branch` +> explicitly to `infrahub_sdk.Config`, so environment variables only act as fallbacks +> at the SDK level when the client is initialised without arguments. + +## Cache + +Each `FabricInfrahubClient` instance maintains an in-memory TTL cache. + +- **Scope:** per-instance (not shared across instances or processes) +- **TTL:** 60 seconds (constant `_CACHE_TTL` in `client.py`) +- **Key format:** `:` (e.g. `device:leaf1`, `bgp_config:leaf1`) +- **Behaviour:** on the first call the result is fetched from Infrahub and stored; + subsequent calls within the TTL window return the cached value without hitting the SDK + +```python +async with FabricInfrahubClient(url="http://infrahub:8080", api_token="tok") as client: + d1 = await client.get_device("leaf1") # → SDK query + d2 = await client.get_device("leaf1") # → cache hit, no SDK call + assert d1 == d2 +``` + +To bypass the cache, create a new `FabricInfrahubClient` instance.