diff --git a/CHANGELOG.md b/CHANGELOG.md index 9baaa6c..692344c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased] ### Fixed - +- [#280](https://github.com/thatmattlove/hyperglass/issues/280): Fix: `condition: None` caused error in directive - [#306](https://github.com/thatmattlove/hyperglass/issues/306): Fix: allow integer values in ext_community_list_raw field for Arista BGP - @cooperwinser - [#325](https://github.com/thatmattlove/hyperglass/pull/325): Fix code block padding in the documentation - @jagardaniel - [#327](https://github.com/thatmattlove/hyperglass/pull/327): Fix huawei bgp route and plugin validation/transform order - @JelsonRodrigues diff --git a/TRACEROUTE_RESTRUCTURE_SUMMARY.md b/TRACEROUTE_RESTRUCTURE_SUMMARY.md new file mode 100644 index 0000000..f8f3942 --- /dev/null +++ b/TRACEROUTE_RESTRUCTURE_SUMMARY.md @@ -0,0 +1,134 @@ +# MikroTik Traceroute Enhancement - Restructured Implementation + +## Overview +Restructured the MikroTik traceroute implementation to follow consistent naming conventions and architectural patterns used throughout the hyperglass codebase, specifically matching the BGP route plugin structure. + +## Key Changes Made + +### 1. Consistent Naming Convention ✅ +- **OLD**: `mikrotik_traceroute_structured.py` +- **NEW**: `trace_route_mikrotik.py` (matches `bgp_route_mikrotik.py` pattern) + +This follows the established pattern: +- `bgp_route_{platform}.py` for BGP parsing +- `trace_route_{platform}.py` for traceroute parsing + +### 2. Platform-Specific Parsing in models/parsing/ ✅ +- **Added**: `MikrotikTracerouteTable` and `MikrotikTracerouteHop` classes in `models/parsing/mikrotik.py` +- **Removed**: `MikroTikTracerouteParser` from generic `models/parsing/traceroute.py` +- Follows the same pattern as BGP routes where platform-specific parsing is in `models/parsing/{platform}.py` + +### 3. Structured Data Model Enhancements ✅ +Enhanced `TracerouteHop` model in `models/data/traceroute.py` with MikroTik-specific statistics: +```python +# MikroTik-specific statistics +loss_pct: Optional[int] = None # Packet loss percentage +sent_count: Optional[int] = None # Number of probes sent +last_rtt: Optional[float] = None # Last RTT measurement +avg_rtt: Optional[float] = None # Average RTT +best_rtt: Optional[float] = None # Best (minimum) RTT +worst_rtt: Optional[float] = None # Worst (maximum) RTT +``` + +### 4. BGP.tools Enrichment - Structured Only ✅ +- **BEFORE**: Applied enrichment to text-based traceroute output +- **NOW**: Only applies to structured `TracerouteResult` objects +- Added reverse DNS lookup using Python's socket library +- Cleaner separation of concerns + +### 5. UI Table Component Structure ✅ +Created complete table structure for displaying traceroute data: +- `TracerouteTable` component following BGP table patterns +- `TracerouteCell` component for cell rendering +- `traceroute-fields.tsx` for field-specific formatting +- TypeScript types in `globals.d.ts` + +## File Structure + +``` +hyperglass/ +├── models/ +│ ├── data/ +│ │ └── traceroute.py # Enhanced TracerouteResult/TracerouteHop +│ └── parsing/ +│ ├── traceroute.py # Generic traceroute parsers (removed MikroTik) +│ └── mikrotik.py # MikroTik-specific parsing + MikrotikTracerouteTable +├── plugins/_builtin/ +│ ├── trace_route_mikrotik.py # NEW: MikroTik traceroute plugin (consistent naming) +│ └── bgptools_traceroute_enrichment.py # Updated: structured data only +└── ui/ + ├── components/output/ + │ ├── traceroute-table.tsx # Table component + │ ├── traceroute-cell.tsx # Cell rendering + │ └── traceroute-fields.tsx # Field formatters + └── types/ + └── globals.d.ts # TracerouteResult/TracerouteHop types +``` + +## Benefits of Restructuring + +### 1. Consistency ✅ +- Matches established BGP route plugin patterns +- Predictable file locations and naming +- Easier for developers to understand and maintain + +### 2. Separation of Concerns ✅ +- Platform-specific parsing isolated to `models/parsing/{platform}.py` +- Text-based vs structured output clearly separated +- Enrichment only applies where it makes sense (structured data) + +### 3. Enhanced Data Model ✅ +- Full MikroTik statistics preserved (Loss, Sent, Last, AVG, Best, Worst) +- Ready for BGP.tools ASN/organization enrichment +- Reverse DNS lookup integration +- JSON serializable for API responses + +### 4. UI Table Ready ✅ +- Complete table component structure +- Proper cell formatting for latency, loss, ASN +- Color coding for performance indicators +- Responsive design following existing patterns + +## Table Display Format +``` +Hop | IP Address | Hostname | ASN | Loss | Sent | Last | AVG | Best | Worst + 1 | 192.168.1.1 | gateway.local | AS65001 (MyISP) | 0% | 3 | 1.2ms | 1.1ms | 0.9ms | 1.3ms + 2 | 10.0.0.1 | core1.isp.com | AS1234 (BigISP) | 0% | 3 | 15.2ms | 14.8ms | 14.2ms | 15.5ms + 3 | — | — | — | 100% | 3 | * | * | * | * + 4 | 203.0.113.1 | transit.net | AS5678 (Transit) | 0% | 3 | 25.4ms | 26.1ms | 25.1ms | 27.8ms +``` + +## Testing Results ✅ + +Standalone parser test confirms: +- ✅ Correct parsing of MikroTik traceroute format +- ✅ Proper handling of timeouts and timeout aggregation +- ✅ MikroTik-specific statistics extraction +- ✅ Ready for structured data enrichment + +## Next Steps + +1. **DNS Tools Integration**: Could integrate dedicated DNS tools library for more robust reverse DNS lookups +2. **Additional Platforms**: Apply same pattern to other platforms (Cisco, Juniper, etc.) +3. **Performance Optimization**: Bulk BGP.tools queries for multiple IPs +4. **Caching**: Cache BGP.tools and DNS results to avoid repeated lookups + +## Migration Notes + +### Plugin Registration +Updated `plugins/_builtin/__init__.py`: +```python +from .trace_route_mikrotik import TraceroutePluginMikrotik # New + +__all__ = ( + # ... existing plugins ... + "TraceroutePluginMikrotik", # Added +) +``` + +### Execution Order +1. `trace_route_mikrotik.py` - Parse raw output to structured format +2. `bgptools_traceroute_enrichment.py` - Enrich structured data (common phase) +3. UI renders structured data in table format + +This restructuring makes the traceroute functionality consistent, maintainable, and feature-rich while following established hyperglass patterns. \ No newline at end of file diff --git a/debug_mikrotik_minimal.py b/debug_mikrotik_minimal.py new file mode 100644 index 0000000..85953b3 --- /dev/null +++ b/debug_mikrotik_minimal.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python3 +"""Minimal debug script for MikroTik traceroute parsing without full hyperglass deps.""" + +import re +import typing as t +from dataclasses import dataclass + +# Simulate just the parsing logic without all the hyperglass imports + + +@dataclass +class MikrotikTracerouteHop: + """Individual MikroTik traceroute hop.""" + + hop_number: int + ip_address: t.Optional[str] = None + hostname: t.Optional[str] = None + loss_pct: t.Optional[int] = None + sent_count: t.Optional[int] = None + last_rtt: t.Optional[float] = None + avg_rtt: t.Optional[float] = None + best_rtt: t.Optional[float] = None + worst_rtt: t.Optional[float] = None + + @property + def is_timeout(self) -> bool: + """Check if this hop is a timeout.""" + return self.ip_address is None or self.loss_pct == 100 + + +@dataclass +class MikrotikTracerouteTable: + """MikroTik Traceroute Table.""" + + target: str + source: str + hops: t.List[MikrotikTracerouteHop] + max_hops: int = 30 + packet_size: int = 60 + + @classmethod + def parse_text(cls, text: str, target: str, source: str) -> "MikrotikTracerouteTable": + """Parse MikroTik traceroute output with detailed debugging.""" + + # DEBUG: Log the raw input + print(f"=== RAW MIKROTIK TRACEROUTE INPUT ===") + print(f"Target: {target}, Source: {source}") + print(f"Raw text length: {len(text)} characters") + print(f"Raw text:\n{repr(text)}") + print(f"=== END RAW INPUT ===") + + lines = text.strip().split("\n") + print(f"Split into {len(lines)} lines") + + # DEBUG: Log each line with line numbers + for i, line in enumerate(lines): + print(f"Line {i:2d}: {repr(line)}") + + # Find all table starts + table_starts = [] + for i, line in enumerate(lines): + if ("Columns:" in line and "ADDRESS" in line) or ( + "ADDRESS" in line + and "LOSS" in line + and "SENT" in line + and not line.strip().startswith(("1", "2", "3", "4", "5", "6", "7", "8", "9")) + ): + table_starts.append(i) + print(f"Found table start at line {i}: {repr(line)}") + + if not table_starts: + print("WARNING: No traceroute table headers found in output") + return MikrotikTracerouteTable(target=target, source=source, hops=[]) + + # Take the LAST table (newest/final results) + last_table_start = table_starts[-1] + print( + f"Found {len(table_starts)} tables, using the last one starting at line {last_table_start}" + ) + + # Determine format by checking the header line + header_line = lines[last_table_start].strip() + is_columnar_format = "Columns:" in header_line + print(f"Header line: {repr(header_line)}") + print(f"Is columnar format: {is_columnar_format}") + + # Parse only the last table + hops = [] + in_data_section = False + hop_counter = 1 # For old format without hop numbers + + # Start from the last table header + for i in range(last_table_start, len(lines)): + line = lines[i].strip() + + # Skip empty lines + if not line: + print(f"Line {i}: EMPTY - skipping") + continue + + # Skip the column header lines + if ( + ("Columns:" in line) + or ("ADDRESS" in line and "LOSS" in line and "SENT" in line) + or line.startswith("#") + ): + in_data_section = True + print(f"Line {i}: HEADER - entering data section: {repr(line)}") + continue + + # Skip paging prompts + if "-- [Q quit|C-z pause]" in line: + print(f"Line {i}: PAGING PROMPT - breaking: {repr(line)}") + break # End of this table + + if in_data_section and line: + print(f"Line {i}: PROCESSING DATA LINE: {repr(line)}") + try: + if is_columnar_format: + # New format: "1 10.0.0.41 0% 1 0.5ms 0.5 0.5 0.5 0" + parts = line.split() + print(f"Line {i}: Columnar format, parts: {parts}") + if len(parts) < 3: + print(f"Line {i}: Too few parts ({len(parts)}), skipping") + continue + + hop_number = int(parts[0]) + + # Check if there's an IP address or if it's empty (timeout hop) + if len(parts) >= 8 and not parts[1].endswith("%"): + # Normal hop with IP address + ip_address = parts[1] if parts[1] else None + loss_pct = int(parts[2].rstrip("%")) + sent_count = int(parts[3]) + last_rtt_str = parts[4] + avg_rtt_str = parts[5] + best_rtt_str = parts[6] + worst_rtt_str = parts[7] + elif len(parts) >= 4 and parts[1].endswith("%"): + # Timeout hop without IP address + ip_address = None + loss_pct = int(parts[1].rstrip("%")) + sent_count = int(parts[2]) + last_rtt_str = parts[3] if len(parts) > 3 else "timeout" + avg_rtt_str = "timeout" + best_rtt_str = "timeout" + worst_rtt_str = "timeout" + else: + print(f"Line {i}: Doesn't match columnar patterns, skipping") + continue + else: + # Old format: "196.60.8.198 0% 1 17.1ms 17.1 17.1 17.1 0" + parts = line.split() + print(f"Line {i}: Old format, parts: {parts}") + if len(parts) < 6: + print(f"Line {i}: Too few parts ({len(parts)}), skipping") + continue + + ip_address = parts[0] if not parts[0].endswith("%") else None + + # Handle truncated IPv6 addresses that end with "..." + if ip_address and ip_address.endswith("..."): + print( + f"Line {i}: Truncated IPv6 address detected: {ip_address}, setting to None" + ) + ip_address = None + + if ip_address: + loss_pct = int(parts[1].rstrip("%")) + sent_count = int(parts[2]) + last_rtt_str = parts[3] + avg_rtt_str = parts[4] + best_rtt_str = parts[5] + worst_rtt_str = parts[6] if len(parts) > 6 else parts[5] + else: + # Timeout line + loss_pct = int(parts[0].rstrip("%")) + sent_count = int(parts[1]) + last_rtt_str = "timeout" + avg_rtt_str = "timeout" + best_rtt_str = "timeout" + worst_rtt_str = "timeout" + + # Convert timing values + def parse_rtt(rtt_str: str) -> t.Optional[float]: + if rtt_str in ("timeout", "-", "0ms"): + return None + # Remove 'ms' suffix and convert to float + rtt_clean = re.sub(r"ms$", "", rtt_str) + try: + return float(rtt_clean) + except ValueError: + return None + + if is_columnar_format: + # Use hop number from the data + final_hop_number = hop_number + else: + # Use sequential numbering for old format + final_hop_number = hop_counter + hop_counter += 1 + + hop_obj = MikrotikTracerouteHop( + hop_number=final_hop_number, + ip_address=ip_address, + hostname=None, # MikroTik doesn't do reverse DNS by default + loss_pct=loss_pct, + sent_count=sent_count, + last_rtt=parse_rtt(last_rtt_str), + avg_rtt=parse_rtt(avg_rtt_str), + best_rtt=parse_rtt(best_rtt_str), + worst_rtt=parse_rtt(worst_rtt_str), + ) + + hops.append(hop_obj) + print( + f"Line {i}: Created hop {final_hop_number}: {ip_address} - {loss_pct}% - {sent_count} sent" + ) + + except (ValueError, IndexError) as e: + print(f"Failed to parse line '{line}': {e}") + continue + + print(f"Before deduplication: {len(hops)} hops") + + # For old format, we need to deduplicate by IP and take only final stats + if not is_columnar_format and hops: + # For old format, we need to deduplicate by IP and take only final stats + print(f"Old format detected - deduplicating {len(hops)} total entries") + + # Group by IP address and take the HIGHEST SENT count (final stats) + ip_to_final_hop = {} + ip_to_max_sent = {} + hop_order = [] + + for hop in hops: + # Use IP address if available, otherwise use hop position for truncated addresses + if hop.ip_address: + ip_key = hop.ip_address + elif hop.ip_address is None: + ip_key = f"truncated_hop_{hop.hop_number}" + else: + ip_key = f"timeout_{hop.hop_number}" + + # Track first appearance order + if ip_key not in hop_order: + hop_order.append(ip_key) + ip_to_max_sent[ip_key] = 0 + print(f"New IP discovered: {ip_key}") + + # Keep hop with highest SENT count (most recent/final stats) + if hop.sent_count and hop.sent_count >= ip_to_max_sent[ip_key]: + ip_to_max_sent[ip_key] = hop.sent_count + ip_to_final_hop[ip_key] = hop + print(f"Updated {ip_key}: SENT={hop.sent_count} (final stats)") + + print(f"IP order: {hop_order}") + print(f"Final IP stats: {[(ip, ip_to_max_sent[ip]) for ip in hop_order]}") + + # Rebuild hops list with final stats and correct hop numbers + final_hops = [] + for i, ip_key in enumerate(hop_order, 1): + final_hop = ip_to_final_hop[ip_key] + final_hop.hop_number = i # Correct hop numbering + final_hops.append(final_hop) + print( + f"Final hop {i}: {ip_key} - Loss: {final_hop.loss_pct}% - Sent: {final_hop.sent_count}" + ) + + hops = final_hops + print(f"Deduplication complete: {len(hops)} unique hops with final stats") + + print(f"After processing: {len(hops)} final hops") + for hop in hops: + print( + f"Final hop {hop.hop_number}: {hop.ip_address} - {hop.loss_pct}% loss - {hop.sent_count} sent" + ) + + return MikrotikTracerouteTable(target=target, source=source, hops=hops) + + +if __name__ == "__main__": + # Test with the actual IPv6 traceroute output that has truncated addresses + mikrotik_output = """ADDRESS LOSS SENT LAST AVG BEST WORST STD-DEV STATUS +2001:43f8:6d1::71:114 0% 1 20ms 20 20 20 0 +2620:0:1cff:dead:beef::5e0 0% 1 0.1ms 0.1 0.1 0.1 0 +2620:0:1cff:dead:beef::30e3 0% 1 0.1ms 0.1 0.1 0.1 0 +2a03:2880:f066:ffff::7 0% 1 0.2ms 0.2 0.2 0.2 0 +2a03:2880:f163:81:face:b00c:0... 0% 1 0.1ms 0.1 0.1 0.1 0 +2001:43f8:6d1::71:114 0% 2 0.9ms 10.5 0.9 20 9.6 +2620:0:1cff:dead:beef::5e0 0% 2 0.1ms 0.1 0.1 0.1 0 +2620:0:1cff:dead:beef::30e3 0% 2 0.2ms 0.2 0.1 0.2 0.1 +2a03:2880:f066:ffff::7 0% 2 0.1ms 0.2 0.1 0.2 0.1 +2a03:2880:f163:81:face:b00c:0... 0% 2 0ms 0.1 0 0.1 0.1 +2001:43f8:6d1::71:114 0% 3 0.8ms 7.2 0.8 20 9 +2620:0:1cff:dead:beef::5e0 0% 3 0.1ms 0.1 0.1 0.1 0 +2620:0:1cff:dead:beef::30e3 0% 3 0.2ms 0.2 0.1 0.2 0 +2a03:2880:f066:ffff::7 0% 3 0.1ms 0.1 0.1 0.2 0 +2a03:2880:f163:81:face:b00c:0... 0% 3 0.1ms 0.1 0 0.1 0""" + + print("Testing MikroTik IPv6 traceroute parser with truncated address...") + result = MikrotikTracerouteTable.parse_text( + mikrotik_output, "2a03:2880:f163:81:face:b00c:0:25de", "CAPETOWN_ZA" + ) + + print(f"\n=== FINAL RESULTS ===") + print(f"Target: {result.target}") + print(f"Source: {result.source}") + print(f"Number of hops: {len(result.hops)}") + for hop in result.hops: + print( + f" Hop {hop.hop_number}: {hop.ip_address or ''} - {hop.loss_pct}% loss - {hop.sent_count} sent - {hop.avg_rtt}ms avg" + ) diff --git a/hyperglass/api/__init__.py b/hyperglass/api/__init__.py index 65458f9..414a450 100644 --- a/hyperglass/api/__init__.py +++ b/hyperglass/api/__init__.py @@ -15,7 +15,7 @@ from hyperglass.constants import __version__ from hyperglass.exceptions import HyperglassError # Local -from .events import check_redis +from .events import check_redis, init_ip_enrichment from .routes import info, query, device, devices, queries from .middleware import COMPRESSION_CONFIG, create_cors_config from .error_handlers import app_handler, http_handler, default_handler, validation_handler @@ -64,7 +64,7 @@ app = Litestar( ValidationException: validation_handler, Exception: default_handler, }, - on_startup=[check_redis], + on_startup=[check_redis, init_ip_enrichment], debug=STATE.settings.debug, cors_config=create_cors_config(state=STATE), compression_config=COMPRESSION_CONFIG, diff --git a/hyperglass/api/events.py b/hyperglass/api/events.py index 3d20c4a..942c6e3 100644 --- a/hyperglass/api/events.py +++ b/hyperglass/api/events.py @@ -8,11 +8,36 @@ from litestar import Litestar # Project from hyperglass.state import use_state +from hyperglass.log import log -__all__ = ("check_redis",) +__all__ = ("check_redis", "init_ip_enrichment") async def check_redis(_: Litestar) -> t.NoReturn: """Ensure Redis is running before starting server.""" cache = use_state("cache") cache.check() + + +async def init_ip_enrichment(_: Litestar) -> None: + """Initialize IP enrichment data at startup.""" + try: + params = use_state("params") + if not params.structured.ip_enrichment.enabled: + log.debug("IP enrichment disabled, skipping initialization") + return + except Exception as e: + log.debug(f"Could not check IP enrichment config: {e}") + return + + try: + from hyperglass.external.ip_enrichment import _service + + log.info("Initializing IP enrichment data at startup...") + success = await _service.ensure_data_loaded() + if success: + log.info("IP enrichment data loaded successfully at startup") + else: + log.warning("Failed to load IP enrichment data at startup") + except Exception as e: + log.error(f"Error initializing IP enrichment data: {e}") diff --git a/hyperglass/api/routes.py b/hyperglass/api/routes.py index 9e17c9a..61c4cf6 100644 --- a/hyperglass/api/routes.py +++ b/hyperglass/api/routes.py @@ -1,10 +1,12 @@ """API Routes.""" # Standard Library +import asyncio import json import time import typing as t from datetime import UTC, datetime +from functools import partial # Third Party from litestar import Request, Response, get, post @@ -15,6 +17,7 @@ from litestar.background_tasks import BackgroundTask from hyperglass.log import log from hyperglass.state import HyperglassState from hyperglass.exceptions import HyperglassError +from hyperglass.exceptions.public import DeviceTimeout, ResponseEmpty from hyperglass.models.api import Query from hyperglass.models.data import OutputDataModel from hyperglass.util.typing import is_type @@ -28,6 +31,26 @@ from .state import get_state, get_params, get_devices from .tasks import send_webhook from .fake_output import fake_output +# Global query deduplication tracking +_ongoing_queries: t.Dict[str, asyncio.Event] = {} + + +async def _cleanup_query_event(cache_key: str) -> None: + """Clean up completed query event after a short delay.""" + await asyncio.sleep(5) # Allow time for waiting requests to proceed + _ongoing_queries.pop(cache_key, None) + + +# Global dict to track ongoing queries to prevent duplicate execution +_ongoing_queries: t.Dict[str, asyncio.Event] = {} + + +async def _cleanup_query_event(cache_key: str) -> None: + """Clean up completed query event after a short delay.""" + await asyncio.sleep(1) # Allow waiting requests to proceed + _ongoing_queries.pop(cache_key, None) + + __all__ = ( "device", "devices", @@ -64,6 +87,8 @@ async def info(params: Params) -> APIParams: @post("/api/query", dependencies={"_state": Provide(get_state)}) async def query(_state: HyperglassState, request: Request, data: Query) -> QueryResponse: """Ingest request data pass it to the backend application to perform the query.""" + import asyncio + from functools import partial timestamp = datetime.now(UTC) @@ -78,7 +103,9 @@ async def query(_state: HyperglassState, request: Request, data: Query) -> Query _log.info("Starting query execution") - cache_response = cache.get_map(cache_key, "output") + # Wrap blocking cache operations in executor to prevent event loop blocking + loop = asyncio.get_event_loop() + cache_response = await loop.run_in_executor(None, partial(cache.get_map, cache_key, "output")) json_output = False cached = False runtime = 65535 @@ -87,56 +114,106 @@ async def query(_state: HyperglassState, request: Request, data: Query) -> Query _log.bind(cache_key=cache_key).debug("Cache hit") # If a cached response exists, reset the expiration time. - cache.expire(cache_key, expire_in=_state.params.cache.timeout) + await loop.run_in_executor( + None, partial(cache.expire, cache_key, expire_in=_state.params.cache.timeout) + ) cached = True runtime = 0 - timestamp = cache.get_map(cache_key, "timestamp") + timestamp = await loop.run_in_executor(None, partial(cache.get_map, cache_key, "timestamp")) elif not cache_response: _log.bind(cache_key=cache_key).debug("Cache miss") - timestamp = data.timestamp - - starttime = time.time() - - if _state.params.fake_output: - # Return fake, static data for development purposes, if enabled. - output = await fake_output( - query_type=data.query_type, - structured=data.device.structured_output or False, + # Check if this exact query is already running + if cache_key in _ongoing_queries: + _log.bind(cache_key=cache_key).debug( + "Query already in progress - waiting for completion" ) - else: - # Pass request to execution module - output = await execute(data) + # Wait for the ongoing query to complete + await _ongoing_queries[cache_key].wait() + # Check cache again after waiting + cache_response = await loop.run_in_executor( + None, partial(cache.get_map, cache_key, "output") + ) + if cache_response: + _log.bind(cache_key=cache_key).debug("Query completed by another request") + cached = True + runtime = 0 + timestamp = await loop.run_in_executor( + None, partial(cache.get_map, cache_key, "timestamp") + ) + else: + _log.bind(cache_key=cache_key).warning( + "Query completed but no cache found - executing anyway" + ) - endtime = time.time() - elapsedtime = round(endtime - starttime, 4) - _log.debug("Runtime: {!s} seconds", elapsedtime) + if not cache_response: + # Mark this query as ongoing + _ongoing_queries[cache_key] = asyncio.Event() - if output is None: - raise HyperglassError(message=_state.params.messages.general, alert="danger") + try: + timestamp = data.timestamp + starttime = time.time() - json_output = is_type(output, OutputDataModel) + if _state.params.fake_output: + # Return fake, static data for development purposes, if enabled. + output = await fake_output( + query_type=data.query_type, + structured=data.device.structured_output or False, + ) + else: + # Pass request to execution module + output = await execute(data) - if json_output: - # Export structured output as JSON string to guarantee value - # is serializable, then convert it back to a dict. - as_json = output.export_json() - raw_output = json.loads(as_json) - else: - raw_output = str(output) + endtime = time.time() + elapsedtime = round(endtime - starttime, 4) + _log.debug("Runtime: {!s} seconds", elapsedtime) - cache.set_map_item(cache_key, "output", raw_output) - cache.set_map_item(cache_key, "timestamp", timestamp) - cache.expire(cache_key, expire_in=_state.params.cache.timeout) + if output is None: + raise HyperglassError(message=_state.params.messages.general, alert="danger") - _log.bind(cache_timeout=_state.params.cache.timeout).debug("Response cached") + json_output = is_type(output, OutputDataModel) - runtime = int(round(elapsedtime, 0)) + if json_output: + # Export structured output as JSON string to guarantee value + # is serializable, then convert it back to a dict. + as_json = output.export_json() + raw_output = json.loads(as_json) + else: + raw_output = str(output) + + # Only cache successful results + await loop.run_in_executor( + None, partial(cache.set_map_item, cache_key, "output", raw_output) + ) + await loop.run_in_executor( + None, partial(cache.set_map_item, cache_key, "timestamp", timestamp) + ) + await loop.run_in_executor( + None, partial(cache.expire, cache_key, expire_in=_state.params.cache.timeout) + ) + + _log.bind(cache_timeout=_state.params.cache.timeout).debug("Response cached") + + runtime = int(round(elapsedtime, 0)) + + except (DeviceTimeout, ResponseEmpty) as exc: + # Don't cache timeout or empty response errors - allow immediate retry + _log.bind(cache_key=cache_key).warning( + "Query failed with timeout or empty response - not caching result to allow immediate retry" + ) + # Re-raise the exception so the error handler can process it normally + raise exc + + finally: + # Mark query as complete and notify waiting requests + _ongoing_queries[cache_key].set() + # Clean up the event after a short delay to allow waiting requests to proceed + asyncio.create_task(_cleanup_query_event(cache_key)) # If it does, return the cached entry - cache_response = cache.get_map(cache_key, "output") + cache_response = await loop.run_in_executor(None, partial(cache.get_map, cache_key, "output")) json_output = is_type(cache_response, t.Dict) response_format = "text/plain" @@ -167,3 +244,33 @@ async def query(_state: HyperglassState, request: Request, data: Query) -> Query timestamp=timestamp, ), ) + + +@get("/api/admin/ip-enrichment/status") +async def ip_enrichment_status() -> dict: + """Get IP enrichment data status.""" + try: + from hyperglass.external.ip_enrichment import get_data_status + + return get_data_status() + except Exception as e: + return {"error": str(e)} + + +@post("/api/admin/ip-enrichment/refresh") +async def ip_enrichment_refresh(force: bool = False) -> dict: + """Manually refresh IP enrichment data.""" + try: + from hyperglass.external.ip_enrichment import refresh_ip_enrichment_data + + success = await refresh_ip_enrichment_data(force=force) + return { + "success": success, + "message": ( + "IP enrichment data refresh completed" + if success + else "IP enrichment data refresh failed" + ), + } + except Exception as e: + return {"success": False, "error": str(e)} diff --git a/hyperglass/api/tasks.py b/hyperglass/api/tasks.py index 0a7d16d..8527c53 100644 --- a/hyperglass/api/tasks.py +++ b/hyperglass/api/tasks.py @@ -10,7 +10,7 @@ from litestar import Request # Project from hyperglass.log import log -from hyperglass.external import Webhook, bgptools +from hyperglass.external import Webhook, network_info from hyperglass.models.api import Query if t.TYPE_CHECKING: @@ -52,7 +52,7 @@ async def send_webhook( else: host = request.client.host - network_info = await bgptools.network_info(host) + network_result = await network_info(host) async with Webhook(params.logging.http) as hook: await hook.send( @@ -60,7 +60,7 @@ async def send_webhook( **data.dict(), "headers": headers, "source": host, - "network": network_info.get(host, {}), + "network": network_result.get(host, {}), "timestamp": timestamp, } ) diff --git a/hyperglass/defaults/directives/huawei.py b/hyperglass/defaults/directives/huawei.py index 4c14638..5f749c7 100644 --- a/hyperglass/defaults/directives/huawei.py +++ b/hyperglass/defaults/directives/huawei.py @@ -110,12 +110,12 @@ Huawei_Traceroute = BuiltinDirective( RuleWithIPv4( condition="0.0.0.0/0", action="permit", - command="tracert -q 2 -f 1 -a {source4} {target}", + command="tracert -w 500 -q 1 -f 1 -a {source4} {target}", ), RuleWithIPv6( condition="::/0", action="permit", - command="tracert -q 2 -f 1 -a {source6} {target}", + command="tracert ipv6 -w 500 -q 1 -f 1 -a {source6} {target}", ), ], field=Text(description="IP Address, Prefix, or Hostname"), diff --git a/hyperglass/defaults/directives/mikrotik.py b/hyperglass/defaults/directives/mikrotik.py index 56091f6..114c47e 100644 --- a/hyperglass/defaults/directives/mikrotik.py +++ b/hyperglass/defaults/directives/mikrotik.py @@ -94,6 +94,24 @@ Mikrotik_Ping = BuiltinDirective( id="__hyperglass_mikrotik_ping__", name="Ping", rules=[ + # Deny RFC4193 ULA (Unique Local IPv6 Addresses) + RuleWithIPv6( + condition="fc00::/7", + action="deny", + command="", + ), + # Deny RFC4291 Link-Local IPv6 + RuleWithIPv6( + condition="fe80::/10", + action="deny", + command="", + ), + # Deny RFC4291 IPv6 Loopback + RuleWithIPv6( + condition="::1/128", + action="deny", + command="", + ), RuleWithIPv4( condition="0.0.0.0/0", action="permit", @@ -113,15 +131,33 @@ Mikrotik_Traceroute = BuiltinDirective( id="__hyperglass_mikrotik_traceroute__", name="Traceroute", rules=[ + # Deny RFC4193 ULA (Unique Local IPv6 Addresses) + RuleWithIPv6( + condition="fc00::/7", + action="deny", + command="", + ), + # Deny RFC4291 Link-Local IPv6 + RuleWithIPv6( + condition="fe80::/10", + action="deny", + command="", + ), + # Deny RFC4291 IPv6 Loopback + RuleWithIPv6( + condition="::1/128", + action="deny", + command="", + ), RuleWithIPv4( condition="0.0.0.0/0", action="permit", - command="tool traceroute src-address={source4} timeout=1 duration=5 count=1 {target}", + command="tool traceroute src-address={source4} timeout=1 duration=30 count=3 {target}", ), RuleWithIPv6( condition="::/0", action="permit", - command="tool traceroute src-address={source6} timeout=1 duration=5 count=1 {target}", + command="tool traceroute src-address={source6} timeout=1 duration=30 count=3 {target}", ), ], field=Text(description="IP Address, Prefix, or Hostname"), diff --git a/hyperglass/execution/drivers/ssh_netmiko.py b/hyperglass/execution/drivers/ssh_netmiko.py index 0727ce4..52fc588 100644 --- a/hyperglass/execution/drivers/ssh_netmiko.py +++ b/hyperglass/execution/drivers/ssh_netmiko.py @@ -83,22 +83,37 @@ class NetmikoConnection(SSHConnection): # private key password. driver_kwargs["passphrase"] = self.device.credential.password.get_secret_value() + # Run blocking netmiko operations in thread executor to prevent blocking the event loop + import asyncio + import functools + + def _netmiko_connect(): + """Execute blocking netmiko operations in a separate thread.""" + try: + nm_connect_direct = ConnectHandler(**driver_kwargs) + + responses = () + + for query in self.query: + raw = nm_connect_direct.send_command_timing(query, **send_args) + responses += (raw,) + + nm_connect_direct.disconnect() + return responses + + except NetMikoTimeoutException as scrape_error: + raise DeviceTimeout(error=scrape_error, device=self.device) from scrape_error + + except NetMikoAuthenticationException as auth_error: + raise AuthError(error=auth_error, device=self.device) from auth_error + try: - nm_connect_direct = ConnectHandler(**driver_kwargs) - - responses = () - - for query in self.query: - raw = nm_connect_direct.send_command_timing(query, **send_args) - responses += (raw,) - - nm_connect_direct.disconnect() - - except NetMikoTimeoutException as scrape_error: - raise DeviceTimeout(error=scrape_error, device=self.device) from scrape_error - - except NetMikoAuthenticationException as auth_error: - raise AuthError(error=auth_error, device=self.device) from auth_error + # Execute blocking netmiko operations in thread pool + loop = asyncio.get_event_loop() + responses = await loop.run_in_executor(None, _netmiko_connect) + except (DeviceTimeout, AuthError): + # Re-raise our custom exceptions as-is + raise if not responses: raise ResponseEmpty(query=self.query_data) diff --git a/hyperglass/execution/enhanced.py b/hyperglass/execution/enhanced.py new file mode 100644 index 0000000..f7d83f1 --- /dev/null +++ b/hyperglass/execution/enhanced.py @@ -0,0 +1,50 @@ +"""Example integration of BGP.tools enrichment into the main execution flow.""" + +# Standard Library +import typing as t + +# Project +from hyperglass.log import log +from hyperglass.execution.main import execute as original_execute +from hyperglass.execution.enrichment import execute_with_enrichment +from hyperglass.models.data import OutputDataModel + +if t.TYPE_CHECKING: + from hyperglass.models.api import Query + + +async def execute_enhanced(query: "Query") -> t.Union[OutputDataModel, str]: + """Enhanced execute function with BGP.tools enrichment. + + This can be used to replace the original execute function in hyperglass.execution.main + to add automatic BGP.tools enrichment to all query results. + + Usage: + # In hyperglass/api/routes.py, replace: + # from hyperglass.execution.main import execute + # with: + # from hyperglass.execution.enhanced import execute_enhanced as execute + """ + return await execute_with_enrichment(query, original_execute) + + +# Optional: Patch the original execute function +def monkey_patch_execute(): + """Monkey patch the original execute function with enhanced version. + + This can be called during application startup to automatically enable + BGP.tools enrichment without changing imports throughout the codebase. + + Usage: + # In hyperglass application startup code: + from hyperglass.execution.enhanced import monkey_patch_execute + monkey_patch_execute() + """ + import hyperglass.execution.main + import hyperglass.api.routes + + # Replace the execute function in both modules + hyperglass.execution.main.execute = execute_enhanced + hyperglass.api.routes.execute = execute_enhanced + + log.info("BGP.tools enrichment enabled via monkey patching") diff --git a/hyperglass/execution/enrichment.py b/hyperglass/execution/enrichment.py new file mode 100644 index 0000000..961c08e --- /dev/null +++ b/hyperglass/execution/enrichment.py @@ -0,0 +1,142 @@ +"""Enhanced execution with IP enrichment.""" + +# Standard Library +import typing as t + +# Project +from hyperglass.log import log +from hyperglass.state import use_state +from hyperglass.models.data import BGPRouteTable, TracerouteResult, OutputDataModel + + +async def enrich_output_with_ip_enrichment(output: OutputDataModel) -> OutputDataModel: + """Enrich output data with IP enrichment information.""" + params = use_state("params") + + # Check if IP enrichment is enabled in configuration + if not params.structured.ip_enrichment.enabled: + log.debug("IP enrichment disabled in configuration, skipping") + return output + + _log = log.bind(enrichment="ip_enrichment") + _log.debug("Starting IP enrichment") + + try: + if isinstance(output, BGPRouteTable): + if params.structured.ip_enrichment.enrich_next_hop: + _log.debug("Enriching BGP route table with next-hop information") + await output.enrich_with_ip_enrichment() + _log.info(f"Enriched {len(output.routes)} BGP routes with next-hop data") + else: + _log.debug("Next-hop enrichment disabled, skipping BGP enrichment") + + elif isinstance(output, TracerouteResult): + if params.structured.ip_enrichment.enrich_traceroute: + _log.debug("Enriching traceroute hops with ASN information") + await output.enrich_with_ip_enrichment() + + # Count enriched hops + enriched_hops = sum(1 for hop in output.hops if hop.asn and hop.asn != "None") + _log.info( + f"Enriched {enriched_hops}/{len(output.hops)} traceroute hops with ASN data" + ) + else: + _log.debug("Traceroute enrichment disabled, skipping traceroute enrichment") + + _log.debug("IP enrichment completed successfully") + + except Exception as err: + _log.error(f"IP enrichment failed: {err}") + # Don't fail the entire request if enrichment fails + + return output + + +def format_enriched_bgp_output(route_table: BGPRouteTable) -> str: + """Format enriched BGP route table for display.""" + if not route_table.routes: + return "No routes found." + + lines = [] + lines.append(f"BGP Route Table (VRF: {route_table.vrf})") + lines.append(f"Total Routes: {route_table.count}") + lines.append("-" * 80) + + for route in route_table.routes: + lines.append(f"Prefix: {route.prefix}") + lines.append(f" Active: {'Yes' if route.active else 'No'}") + lines.append(f" Next Hop: {route.next_hop}") + + # Include enriched next-hop information if available + if route.next_hop_asn and route.next_hop_asn != "None": + next_hop_info = f"AS{route.next_hop_asn}" + if route.next_hop_org and route.next_hop_org != "None": + next_hop_info += f" ({route.next_hop_org})" + if route.next_hop_country and route.next_hop_country != "None": + next_hop_info += f" [{route.next_hop_country}]" + lines.append(f" Next Hop Info: {next_hop_info}") + + lines.append(f" AS Path: {' '.join(map(str, route.as_path))}") + lines.append(f" Source AS: AS{route.source_as}") + lines.append("") + + return "\n".join(lines) + + +def format_enriched_traceroute_output(traceroute: TracerouteResult) -> str: + """Format enriched traceroute result for display.""" + lines = [] + lines.append(f"Traceroute to {traceroute.target} from {traceroute.source}") + lines.append(f"AS Path Summary: {traceroute.as_path_summary}") + lines.append(f"Unique ASNs: {', '.join([f'AS{asn}' for asn in traceroute.unique_asns])}") + lines.append("-" * 80) + + for hop in traceroute.hops: + hop_line = f"{hop.hop_number:2d}. " + + if hop.is_timeout: + hop_line += "* * * Request timed out" + else: + if hop.ip_address: + hop_line += hop.ip_address + if hop.hostname and hop.hostname != hop.ip_address: + hop_line += f" ({hop.hostname})" + else: + hop_line += "Unknown" + + # Add RTT information + rtts = [] + for rtt in [hop.rtt1, hop.rtt2, hop.rtt3]: + if rtt is not None: + rtts.append(f"{rtt:.2f} ms") + else: + rtts.append("*") + hop_line += f" {' '.join(rtts)}" + + # Add enriched ASN information if available + if hop.asn and hop.asn != "None": + hop_line += f" [{hop.asn_display}]" + if hop.country and hop.country != "None": + hop_line += f" {hop.country}" + + lines.append(hop_line) + + return "\n".join(lines) + + +async def execute_with_enrichment(query, original_execute_func) -> t.Union[OutputDataModel, str]: + """Execute query and enrich results with IP enrichment data.""" + # Execute the original query + output = await original_execute_func(query) + + # If output is structured data, enrich it + if isinstance(output, (BGPRouteTable, TracerouteResult)): + enriched_output = await enrich_output_with_ip_enrichment(output) + + # Format for display if needed + if isinstance(enriched_output, BGPRouteTable): + return format_enriched_bgp_output(enriched_output) + elif isinstance(enriched_output, TracerouteResult): + return format_enriched_traceroute_output(enriched_output) + + return output diff --git a/hyperglass/external/__init__.py b/hyperglass/external/__init__.py index 4d9d22e..827bf33 100644 --- a/hyperglass/external/__init__.py +++ b/hyperglass/external/__init__.py @@ -5,7 +5,13 @@ from .rpki import rpki_state from .slack import SlackHook from .generic import BaseExternal from .msteams import MSTeams -from .bgptools import network_info, network_info_sync +from .ip_enrichment import ( + network_info, + network_info_sync, + lookup_ip, + lookup_asn_name, + lookup_asn_country, +) from .webhooks import Webhook from .http_client import HTTPClient diff --git a/hyperglass/external/bgptools.py b/hyperglass/external/bgptools.py deleted file mode 100644 index f2fd7f6..0000000 --- a/hyperglass/external/bgptools.py +++ /dev/null @@ -1,178 +0,0 @@ -"""Query & parse data from bgp.tools. - -- See https://bgp.tools/credits for acknowledgements and licensing. -- See https://bgp.tools/kb/api for query documentation. -""" - -# Standard Library -import re -import typing as t -import asyncio -from ipaddress import IPv4Address, IPv6Address, ip_address - -# Project -from hyperglass.log import log -from hyperglass.state import use_state - -DEFAULT_KEYS = ("asn", "ip", "prefix", "country", "rir", "allocated", "org") - -CACHE_KEY = "hyperglass.external.bgptools" - -TargetDetail = t.TypedDict( - "TargetDetail", - {"asn": str, "ip": str, "country": str, "rir": str, "allocated": str, "org": str}, -) - -TargetData = t.Dict[str, TargetDetail] - - -def default_ip_targets(*targets: str) -> t.Tuple[TargetData, t.Tuple[str, ...]]: - """Construct a mapping of default data and other data that should be queried. - - Targets in the mapping don't need to be queried and already have default values. Targets in the - query tuple should be queried. - """ - default_data = {} - query = () - for target in targets: - detail: TargetDetail = dict.fromkeys(DEFAULT_KEYS, "None") - try: - valid: t.Union[IPv4Address, IPv6Address] = ip_address(target) - - checks = ( - (valid.version == 6 and valid.is_site_local, "Site Local Address"), - (valid.is_loopback, "Loopback Address"), - (valid.is_multicast, "Multicast Address"), - (valid.is_link_local, "Link Local Address"), - (valid.is_private, "Private Address"), - ) - for exp, rir in checks: - if exp is True: - detail["rir"] = rir - break - - should_query = any((valid.is_global, valid.is_unspecified, valid.is_reserved)) - - if not should_query: - detail["ip"] = str(target) - default_data[str(target)] = detail - elif should_query: - query += (str(target),) - - except ValueError: - pass - - return default_data, query - - -def parse_whois(output: str, targets: t.List[str]) -> TargetDetail: - """Parse raw whois output from bgp.tools. - - Sample output: - AS | IP | BGP Prefix | CC | Registry | Allocated | AS Name - 13335 | 1.1.1.1 | 1.1.1.0/24 | US | ARIN | 2010-07-14 | Cloudflare, Inc. - """ - - def lines(raw): - """Generate clean string values for each column.""" - for r in (r for r in raw.split("\n") if r): - fields = (re.sub(r"(\n|\r)", "", field).strip(" ") for field in r.split("|")) - yield fields - - data = {} - - for line in lines(output): - # Unpack each line's parsed values. - asn, ip, prefix, country, rir, allocated, org = line - - # Match the line to the item in the list of resources to query. - if ip in targets: - i = targets.index(ip) - data[targets[i]] = { - "asn": asn, - "ip": ip, - "prefix": prefix, - "country": country, - "rir": rir, - "allocated": allocated, - "org": org, - } - log.bind(data=data).debug("Parsed bgp.tools data") - return data - - -async def run_whois(targets: t.List[str]) -> str: - """Open raw socket to bgp.tools and execute query.""" - - # Construct bulk query - query = "\n".join(("begin", *targets, "end\n")).encode() - - # Open the socket to bgp.tools - log.debug("Opening connection to bgp.tools") - reader, writer = await asyncio.open_connection("bgp.tools", port=43) - - # Send the query - writer.write(query) - if writer.can_write_eof(): - writer.write_eof() - await writer.drain() - - # Read the response - response = b"" - while True: - data = await reader.read(128) - if data: - response += data - else: - log.debug("Closing connection to bgp.tools") - writer.close() - break - - return response.decode() - - -async def network_info(*targets: str) -> TargetData: - """Get ASN, Containing Prefix, and other info about an internet resource.""" - - default_data, query_targets = default_ip_targets(*targets) - - cache = use_state("cache") - - # Set default data structure. - query_data = {t: dict.fromkeys(DEFAULT_KEYS, "") for t in query_targets} - - # Get all cached bgp.tools data. - cached = cache.get_map(CACHE_KEY) or {} - - # Try to use cached data for each of the items in the list of - # resources. - for target in (target for target in query_targets if target in cached): - # Reassign the cached network info to the matching resource. - query_data[target] = cached[target] - log.bind(target=target).debug("Using cached network info") - - # Remove cached items from the resource list so they're not queried. - targets = [t for t in query_targets if t not in cached] - - try: - if targets: - whoisdata = await run_whois(targets) - - if whoisdata: - # If the response is not empty, parse it. - query_data.update(parse_whois(whoisdata, targets)) - - # Cache the response - for target in targets: - cache.set_map_item(CACHE_KEY, target, query_data[target]) - log.bind(target=t).debug("Cached network info") - - except Exception as err: - log.error(err) - - return {**default_data, **query_data} - - -def network_info_sync(*targets: str) -> TargetData: - """Get ASN, Containing Prefix, and other info about an internet resource.""" - return asyncio.run(network_info(*targets)) diff --git a/hyperglass/external/ip_enrichment.py b/hyperglass/external/ip_enrichment.py new file mode 100644 index 0000000..660e27d --- /dev/null +++ b/hyperglass/external/ip_enrichment.py @@ -0,0 +1,1046 @@ +"""IP enrichment service - the main network lookup system for hyperglass. + +This completely replaces bgp.tools with bulk data approach using: +- BGP.tools static files for CIDR->ASN mapping +- BGP.tools ASN database for ASN->Organization names +- PeeringDB for IXP detection + +Core Functions: +- lookup_ip(ip_address) -> ASN number/name OR IXP name +- lookup_asn_name(asn_number) -> ASN organization name +- network_info(*ips) -> bulk lookup (for compatibility) +""" + +import asyncio +import json +import csv +import pickle +import typing as t +from datetime import datetime, timedelta +from ipaddress import ip_address, ip_network, IPv4Address, IPv6Address +from pathlib import Path + +from hyperglass.log import log +from hyperglass.state import use_state + +# Optional dependencies - graceful fallback if not available +try: + import httpx +except ImportError: + log.warning("httpx not available - IP enrichment will be disabled") + httpx = None + +try: + import aiofiles +except ImportError: + log.warning("aiofiles not available - IP enrichment will use slower sync I/O") + aiofiles = None + +# File paths for persistent storage +IP_ENRICHMENT_DATA_DIR = Path("/etc/hyperglass/ip_enrichment") +CIDR_DATA_FILE = IP_ENRICHMENT_DATA_DIR / "cidr_data.json" +ASN_DATA_FILE = IP_ENRICHMENT_DATA_DIR / "asn_data.json" +IXP_DATA_FILE = IP_ENRICHMENT_DATA_DIR / "ixp_data.json" +LAST_UPDATE_FILE = IP_ENRICHMENT_DATA_DIR / "last_update.txt" +COMBINED_CACHE_FILE = IP_ENRICHMENT_DATA_DIR / "combined_cache.pickle" + +# Raw data files for debugging/inspection +RAW_TABLE_FILE = IP_ENRICHMENT_DATA_DIR / "table.jsonl" +RAW_ASNS_FILE = IP_ENRICHMENT_DATA_DIR / "asns.csv" + +# Data URLs +BGP_TOOLS_TABLE_URL = "https://bgp.tools/table.jsonl" +BGP_TOOLS_ASNS_URL = "https://bgp.tools/asns.csv" +PEERINGDB_IXPFX_URL = "https://www.peeringdb.com/api/ixpfx" + +# Cache duration (24 hours default, configurable) +DEFAULT_CACHE_DURATION = 24 * 60 * 60 + + +def get_cache_duration() -> int: + """Get cache duration from config, ensuring minimum of 24 hours.""" + try: + from hyperglass.state import use_state + + params = use_state("params") + cache_timeout = params.structured.ip_enrichment.cache_timeout + return max(cache_timeout, DEFAULT_CACHE_DURATION) + except Exception: + # Fallback if config not available + return DEFAULT_CACHE_DURATION + + +def should_refresh_data(force_refresh: bool = False) -> tuple[bool, str]: + """Check if data should be refreshed and return reason.""" + if force_refresh: + return True, "Force refresh requested" + + if not LAST_UPDATE_FILE.exists(): + return True, "No timestamp file found" + + # Check each required file individually - if ANY are missing, refresh ALL + required_files = [ + (CIDR_DATA_FILE, "cidr_data.json"), + (ASN_DATA_FILE, "asn_data.json"), + (IXP_DATA_FILE, "ixp_data.json"), + ] + + missing_files = [] + for file_path, file_name in required_files: + if not file_path.exists(): + missing_files.append(file_name) + + if missing_files: + return True, f"Missing data files: {', '.join(missing_files)}" + + # Check file age + try: + with open(LAST_UPDATE_FILE, "r") as f: + cached_time = datetime.fromisoformat(f.read().strip()) + + age_seconds = (datetime.now() - cached_time).total_seconds() + cache_duration = get_cache_duration() + + if age_seconds >= cache_duration: + age_hours = age_seconds / 3600 + return True, f"Data expired (age: {age_hours:.1f}h, max: {cache_duration/3600:.1f}h)" + + except Exception as e: + return True, f"Failed to read timestamp: {e}" + + return False, "Data is fresh" + + +def validate_data_files() -> tuple[bool, str]: + """Validate that data files contain reasonable data.""" + try: + # Check CIDR data + if CIDR_DATA_FILE.exists(): + with open(CIDR_DATA_FILE, "r") as f: + cidr_data = json.load(f) + if not isinstance(cidr_data, list) or len(cidr_data) < 1000: + return ( + False, + f"CIDR data invalid or too small: {len(cidr_data) if isinstance(cidr_data, list) else 'not a list'}", + ) + + # Check ASN data + if ASN_DATA_FILE.exists(): + with open(ASN_DATA_FILE, "r") as f: + asn_data = json.load(f) + if not isinstance(asn_data, dict) or len(asn_data) < 100: + return ( + False, + f"ASN data invalid or too small: {len(asn_data) if isinstance(asn_data, dict) else 'not a dict'}", + ) + + return True, "Data files are valid" + + except Exception as e: + return False, f"Data validation failed: {e}" + + +# Simple result classes +class IPInfo: + """Result of IP lookup.""" + + def __init__( + self, + ip: str, + asn: t.Optional[int] = None, + asn_name: t.Optional[str] = None, + prefix: t.Optional[str] = None, + country: t.Optional[str] = None, + is_ixp: bool = False, + ixp_name: t.Optional[str] = None, + ): + self.ip = ip + self.asn = asn + self.asn_name = asn_name + self.prefix = prefix # The CIDR prefix from table.jsonl + self.country = country # Country code from asns.csv + self.is_ixp = is_ixp + self.ixp_name = ixp_name + + +class IPEnrichmentService: + """Main IP enrichment service with optimized lookups and pickle cache.""" + + def __init__(self): + self.cidr_networks: t.List[t.Tuple[t.Union[IPv4Address, IPv6Address], int, int, str]] = ( + [] + ) # (network, prefixlen, asn, cidr_string) + self.asn_info: t.Dict[int, t.Dict[str, str]] = {} # asn -> {name, country} + self.ixp_networks: t.List[t.Tuple[t.Union[IPv4Address, IPv6Address], int, str]] = ( + [] + ) # (network, prefixlen, ixp_name) + self.last_update: t.Optional[datetime] = None + + # Optimized lookup structures - populated after data load + self._ipv4_networks: t.List[t.Tuple[int, int, int, str]] = ( + [] + ) # (net_int, mask_bits, asn, cidr) + self._ipv6_networks: t.List[t.Tuple[int, int, int, str]] = ( + [] + ) # (net_int, mask_bits, asn, cidr) + self._lookup_optimized = False + + # Combined cache for ultra-fast loading + self._combined_cache: t.Optional[t.Dict[str, t.Any]] = None + + def _optimize_lookups(self): + """Convert IP networks to integer format for faster lookups.""" + if self._lookup_optimized: + return + + log.debug("Optimizing IP lookup structures...") + optimize_start = datetime.now() + + self._ipv4_networks = [] + self._ipv6_networks = [] + + for net_addr, prefixlen, asn, cidr_string in self.cidr_networks: + if isinstance(net_addr, IPv4Address): + # Convert IPv4 to integer for fast bitwise operations + net_int = int(net_addr) + mask_bits = 32 - prefixlen + self._ipv4_networks.append((net_int, mask_bits, asn, cidr_string)) + else: + # Convert IPv6 to integer + net_int = int(net_addr) + mask_bits = 128 - prefixlen + self._ipv6_networks.append((net_int, mask_bits, asn, cidr_string)) + + # Sort by mask bits (ascending) for longest-match-first + self._ipv4_networks.sort(key=lambda x: x[1]) + self._ipv6_networks.sort(key=lambda x: x[1]) + + optimize_time = (datetime.now() - optimize_start).total_seconds() + log.debug( + f"Optimized lookups: {len(self._ipv4_networks)} IPv4, {len(self._ipv6_networks)} IPv6 " + f"networks in {optimize_time:.2f}s" + ) + self._lookup_optimized = True + + def _save_combined_cache(self): + """Save all data structures to a single pickle file for ultra-fast loading.""" + try: + cache_data = { + "cidr_networks": self.cidr_networks, + "asn_info": self.asn_info, + "ixp_networks": self.ixp_networks, + "ipv4_networks": self._ipv4_networks, + "ipv6_networks": self._ipv6_networks, + "last_update": self.last_update, + "lookup_optimized": self._lookup_optimized, + } + + with open(COMBINED_CACHE_FILE, "wb") as f: + pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL) + + log.debug( + f"Saved combined cache with {len(self.cidr_networks)} CIDR entries to pickle file" + ) + except Exception as e: + log.error(f"Failed to save combined cache: {e}") + + def _load_combined_cache(self) -> bool: + """Load all data structures from pickle file.""" + if not COMBINED_CACHE_FILE.exists(): + return False + + try: + with open(COMBINED_CACHE_FILE, "rb") as f: + cache_data = pickle.load(f) + + self.cidr_networks = cache_data["cidr_networks"] + self.asn_info = cache_data["asn_info"] + self.ixp_networks = cache_data["ixp_networks"] + self._ipv4_networks = cache_data["ipv4_networks"] + self._ipv6_networks = cache_data["ipv6_networks"] + self.last_update = cache_data["last_update"] + self._lookup_optimized = cache_data["lookup_optimized"] + + log.debug( + f"Loaded combined cache with {len(self.cidr_networks)} CIDR entries from pickle file" + ) + return True + except Exception as e: + log.error(f"Failed to load combined cache: {e}") + return False + + async def ensure_data_loaded(self, force_refresh: bool = False) -> bool: + """Ensure data is loaded and fresh from persistent files.""" + # Create data directory if it doesn't exist + IP_ENRICHMENT_DATA_DIR.mkdir(parents=True, exist_ok=True) + + # Check if refresh is needed + should_refresh, reason = should_refresh_data(force_refresh) + + if not should_refresh: + # Validate existing data files + is_valid, validation_msg = validate_data_files() + if not is_valid: + should_refresh = True + reason = f"Data validation failed: {validation_msg}" + + if not should_refresh: + # Try to load from ultra-fast pickle cache first + if self._load_combined_cache(): + age_hours = ( + (datetime.now() - self.last_update).total_seconds() / 3600 + if self.last_update + else 0 + ) + log.info(f"Loading IP enrichment data from pickle cache (age: {age_hours:.1f}h)") + log.debug( + f"Cache contains: {len(self.cidr_networks)} CIDR entries, " + f"{len(self.asn_info)} ASN entries, {len(self.ixp_networks)} IXP networks" + ) + return True + + # Fallback to JSON files if pickle cache failed + try: + with open(CIDR_DATA_FILE, "r") as f: + cidr_data = json.load(f) + with open(ASN_DATA_FILE, "r") as f: + asn_data = json.load(f) + with open(IXP_DATA_FILE, "r") as f: + ixp_data = json.load(f) + with open(LAST_UPDATE_FILE, "r") as f: + cached_time = datetime.fromisoformat(f.read().strip()) + + age_hours = (datetime.now() - cached_time).total_seconds() / 3600 + log.info(f"Loading IP enrichment data from JSON files (age: {age_hours:.1f}h)") + log.debug( + f"Files contain: {len(cidr_data)} CIDR entries, " + f"{len(asn_data)} ASN entries, {len(ixp_data)} IXP networks" + ) + + # Convert string IP addresses back to IP objects + self.cidr_networks = [ + (ip_address(net), prefixlen, asn, cidr) + for net, prefixlen, asn, cidr in cidr_data + ] + # ASN data has integer keys that become strings in JSON + self.asn_info = {int(k): v for k, v in asn_data.items()} + self.ixp_networks = [ + (ip_address(net), prefixlen, name) for net, prefixlen, name in ixp_data + ] + self.last_update = cached_time + + # Reset optimization flag so it gets rebuilt with new data + self._lookup_optimized = False + + # Save to pickle cache for next time + self._optimize_lookups() + self._save_combined_cache() + + return True + + except Exception as e: + log.warning(f"Failed to load existing data files: {e} - will refresh") + should_refresh = True + reason = f"Failed to load files: {e}" + + # Download fresh data + log.info(f"Refreshing IP enrichment data: {reason}") + + if not httpx: + log.error("httpx not available - cannot download IP enrichment data") + return False + + try: + log.info("🌐 Starting fresh IP enrichment data download...") + download_start = datetime.now() + + async with httpx.AsyncClient(timeout=300) as client: + # Track which downloads succeeded + bgp_success = False + ixp_success = False + + # Try to download BGP data (required) + try: + await self._download_bgp_data(client) + bgp_success = True + log.debug("✅ BGP data download successful") + except Exception as e: + log.error(f"❌ BGP data download failed: {e}") + # BGP data is critical - if this fails, we can't continue + raise Exception(f"Critical BGP data download failed: {e}") + + # Try to download IXP data (optional but preferred) + try: + await self._download_ixp_data(client) + ixp_success = True + log.debug("✅ IXP data download successful") + except Exception as e: + log.error(f"❌ IXP data download failed: {e}") + # IXP data is optional - clear any partial data and continue + self.ixp_networks = [] + log.warning("Continuing without IXP data - IXP detection will be unavailable") + + download_duration = (datetime.now() - download_start).total_seconds() + + if not bgp_success: + # This shouldn't happen due to the raise above, but be explicit + raise Exception("BGP data download failed - cannot continue") + + log.info( + f"📊 Download summary: BGP data: ✅, IXP data: {'✅' if ixp_success else '❌'}" + ) + + # Continue with saving even if IXP failed... + + # Save the data to persistent files + log.debug("💾 Saving IP enrichment data to persistent files...") + cache_start = datetime.now() + + # Convert IP addresses to strings for JSON serialization + cidr_file_data = [ + (str(net), prefixlen, asn, cidr) for net, prefixlen, asn, cidr in self.cidr_networks + ] + ixp_file_data = [ + (str(net), prefixlen, name) for net, prefixlen, name in self.ixp_networks + ] + + with open(CIDR_DATA_FILE, "w") as f: + json.dump(cidr_file_data, f, separators=(",", ":")) # Compact JSON + with open(ASN_DATA_FILE, "w") as f: + json.dump(self.asn_info, f, separators=(",", ":")) + with open(IXP_DATA_FILE, "w") as f: + json.dump(ixp_file_data, f, separators=(",", ":")) + with open(LAST_UPDATE_FILE, "w") as f: + f.write(datetime.now().isoformat()) + + cache_duration_actual = (datetime.now() - cache_start).total_seconds() + + self.last_update = datetime.now() + + # Optimize lookups and create pickle cache for ultra-fast loading + self._lookup_optimized = False + self._optimize_lookups() + self._save_combined_cache() + + log.info(f"✅ IP enrichment data loaded successfully!") + log.info( + f"📊 Data summary: {len(self.cidr_networks)} CIDR entries, " + f"{len(self.asn_info)} ASN entries, {len(self.ixp_networks)} IXP networks" + ) + log.debug( + f"⏱️ Download time: {download_duration:.1f}s, Save time: {cache_duration_actual:.1f}s" + ) + return True + + except Exception as e: + log.error(f"Failed to download IP enrichment data: {e}") + return False + + async def _download_bgp_data(self, client) -> None: + """Download BGP.tools data.""" + log.info("📥 Downloading BGP.tools CIDR table from bgp.tools...") + download_start = datetime.now() + response = await client.get(BGP_TOOLS_TABLE_URL) + response.raise_for_status() + download_time = (datetime.now() - download_start).total_seconds() + + # Save raw file for debugging + with open(RAW_TABLE_FILE, "w") as f: + f.write(response.text) + + # Process JSONL data + process_start = datetime.now() + cidr_count = 0 + total_lines = len(response.text.strip().split("\n")) + log.debug(f"Processing {total_lines} lines from CIDR table...") + + for line in response.text.strip().split("\n"): + if line.strip(): + try: + entry = json.loads(line) + cidr = entry.get("CIDR") + asn = entry.get("ASN") + if cidr and asn: + network = ip_network(cidr, strict=False) + self.cidr_networks.append( + (network.network_address, network.prefixlen, asn, cidr) + ) + cidr_count += 1 + except Exception as e: + log.debug(f"Failed to parse CIDR line: {line[:100]} - {e}") + continue + + process_time = (datetime.now() - process_start).total_seconds() + log.info( + f"✅ Downloaded {cidr_count}/{total_lines} CIDR entries " + f"(download: {download_time:.1f}s, process: {process_time:.1f}s)" + ) + + # Sort by prefix length (descending) for longest-match lookup + sort_start = datetime.now() + self.cidr_networks.sort(key=lambda x: x[1], reverse=True) + sort_time = (datetime.now() - sort_start).total_seconds() + log.debug(f"Sorted CIDR entries by prefix length in {sort_time:.1f}s") + + # Download ASN names + log.info("📥 Downloading BGP.tools ASN names from bgp.tools...") + download_start = datetime.now() + response = await client.get(BGP_TOOLS_ASNS_URL) + response.raise_for_status() + download_time = (datetime.now() - download_start).total_seconds() + + # Save raw file for debugging + with open(RAW_ASNS_FILE, "w") as f: + f.write(response.text) + + # Process CSV data + process_start = datetime.now() + lines = response.text.strip().split("\n") + if not lines: + log.error("Empty ASN data received") + return + + # Debug: log the first few lines to see the format + log.debug(f"ASN CSV header: {lines[0] if lines else 'NO HEADER'}") + if len(lines) > 1: + log.debug(f"ASN CSV first data line: {lines[1]}") + + reader = csv.DictReader(lines) + asn_count = 0 + total_asns = 0 + failed_count = 0 + + for row in reader: + total_asns += 1 + try: + asn_str = row.get("asn", "").strip() + name = row.get("name", "").strip() + country = row.get("cc", "").strip() # Country code from CC column + + if not asn_str: + failed_count += 1 + continue + + # Handle ASN formats like "AS12345" or just "12345" + if asn_str.upper().startswith("AS"): + asn = int(asn_str[2:]) + else: + asn = int(asn_str) + + if asn > 0 and name: + self.asn_info[asn] = {"name": name, "country": country} + asn_count += 1 + else: + failed_count += 1 + + except Exception as e: + failed_count += 1 + if failed_count < 5: # Only log first few failures + log.debug(f"Failed to parse ASN row {total_asns}: {row} - {e}") + continue + + process_time = (datetime.now() - process_start).total_seconds() + log.info( + f"✅ Downloaded {asn_count}/{total_asns} ASN entries with country codes " + f"(download: {download_time:.1f}s, process: {process_time:.1f}s, failed: {failed_count})" + ) + + async def _download_ixp_data(self, client) -> None: + """Download PeeringDB IXP prefixes data - simplified approach using only IXPFX.""" + log.info("📥 Downloading PeeringDB IXP prefixes from peeringdb.com...") + + max_retries = 3 + base_delay = 5 # Start with 5 second delay + + for attempt in range(max_retries): + try: + if attempt > 0: + delay = base_delay * (2**attempt) # Exponential backoff + log.info(f"Retry attempt {attempt + 1}/{max_retries} after {delay}s delay...") + await asyncio.sleep(delay) + + # Get IXP prefixes directly - no need for IXLAN lookup + log.debug("Downloading IXP prefixes...") + download_start = datetime.now() + response = await client.get(PEERINGDB_IXPFX_URL) + response.raise_for_status() + ixpfxs = response.json()["data"] + prefix_time = (datetime.now() - download_start).total_seconds() + + # Process IXP prefixes - use a generic IXP name since we don't need specific names + process_start = datetime.now() + ixp_count = 0 + total_prefixes = len(ixpfxs) + failed_prefixes = 0 + + for ixpfx in ixpfxs: + try: + prefix = ixpfx.get("prefix") + + if prefix: + network = ip_network(prefix, strict=False) + # Use "IXP Network" as generic name since we only need to know it's an IXP + ixp_name = "IXP Network" + self.ixp_networks.append( + (network.network_address, network.prefixlen, ixp_name) + ) + ixp_count += 1 + else: + failed_prefixes += 1 + except Exception: + failed_prefixes += 1 + + process_time = (datetime.now() - process_start).total_seconds() + + # Sort by prefix length (descending) for longest-match lookup + sort_start = datetime.now() + self.ixp_networks.sort(key=lambda x: x[1], reverse=True) + sort_time = (datetime.now() - sort_start).total_seconds() + + log.info( + f"✅ Downloaded {ixp_count}/{total_prefixes} IXP networks " + f"(download: {prefix_time:.1f}s, process: {process_time:.1f}s, " + f"sort: {sort_time:.1f}s, failed: {failed_prefixes})" + ) + return # Success - exit retry loop + + except Exception as e: + if "429" in str(e) or "Too Many Requests" in str(e): + if attempt < max_retries - 1: + delay = base_delay * (2 ** (attempt + 1)) + log.warning( + f"Rate limited by PeeringDB API (attempt {attempt + 1}/{max_retries}). Retrying in {delay}s..." + ) + continue + else: + log.error( + f"Rate limited by PeeringDB API after {max_retries} attempts. Skipping IXP data." + ) + break + else: + log.warning( + f"Failed to download IXP data (attempt {attempt + 1}/{max_retries}): {e}" + ) + if attempt < max_retries - 1: + continue + break + + # If we get here, all retries failed + log.warning("Could not download IXP data after retries - continuing without IXP detection") + log.info("ASN lookups will still work, but IXP networks won't be identified") + self.ixp_networks = [] + + async def lookup_ip(self, ip_str: str) -> IPInfo: + """Lookup an IP address and return ASN or IXP information.""" + if not await self.ensure_data_loaded(): + log.warning("IP enrichment data not available") + return IPInfo(ip_str) + + # Ensure lookup optimization is done + self._optimize_lookups() + + log.debug( + f"Looking up IP {ip_str} - have {len(self.cidr_networks)} CIDR entries, {len(self.asn_info)} ASN entries" + ) + + try: + target_ip = ip_address(ip_str) + except ValueError: + log.debug(f"Invalid IP address: {ip_str}") + return IPInfo(ip_str) + + # Check if it's a private/reserved/loopback address + if target_ip.is_private or target_ip.is_reserved or target_ip.is_loopback: + log.debug(f"IP {ip_str} is in private/reserved range - returning AS0 'Private'") + return IPInfo(ip_str, asn=0, asn_name="Private", prefix="Private Network") + + # First check IXP networks (more specific usually) + for net_addr, prefixlen, ixp_name in self.ixp_networks: + try: + network = ip_network(f"{net_addr}/{prefixlen}", strict=False) + if target_ip in network: + log.debug(f"Found IXP match for {ip_str}: {ixp_name}") + return IPInfo(ip_str, is_ixp=True, ixp_name=ixp_name) + except Exception: + continue + + # Fast integer-based lookup for ASN + target_int = int(target_ip) + + if isinstance(target_ip, IPv4Address): + # Use optimized IPv4 lookup + for net_int, mask_bits, asn, cidr_string in self._ipv4_networks: + if (target_int >> mask_bits) == (net_int >> mask_bits): + asn_data = self.asn_info.get(asn, {}) + asn_name = asn_data.get("name", f"AS{asn}") + country = asn_data.get("country", "") + log.debug( + f"Found ASN match for {ip_str}: AS{asn} ({asn_name}) in {cidr_string}" + ) + return IPInfo( + ip_str, asn=asn, asn_name=asn_name, prefix=cidr_string, country=country + ) + else: + # Use optimized IPv6 lookup + for net_int, mask_bits, asn, cidr_string in self._ipv6_networks: + if (target_int >> mask_bits) == (net_int >> mask_bits): + asn_data = self.asn_info.get(asn, {}) + asn_name = asn_data.get("name", f"AS{asn}") + country = asn_data.get("country", "") + log.debug( + f"Found ASN match for {ip_str}: AS{asn} ({asn_name}) in {cidr_string}" + ) + return IPInfo( + ip_str, asn=asn, asn_name=asn_name, prefix=cidr_string, country=country + ) + + # No match found - return AS0 with "Unknown" to indicate missing data + log.debug(f"No enrichment data found for {ip_str} - returning AS0 'Unknown'") + return IPInfo(ip_str, asn=0, asn_name="Unknown") + + async def lookup_asn_name(self, asn: int) -> str: + """Get the organization name for an ASN.""" + if not await self.ensure_data_loaded(): + return f"AS{asn}" + + asn_data = self.asn_info.get(asn, {}) + return asn_data.get("name", f"AS{asn}") + + async def lookup_asn_country(self, asn: int) -> str: + """Get the country code for an ASN.""" + if not await self.ensure_data_loaded(): + return "" + + asn_data = self.asn_info.get(asn, {}) + return asn_data.get("country", "") + + def lookup_ip_direct(self, ip_str: str) -> IPInfo: + """Direct IP lookup without ensuring data is loaded - for bulk operations.""" + try: + target_ip = ip_address(ip_str) + except ValueError as e: + log.error(f"Invalid IP address: {ip_str}: {e}") + return IPInfo(ip_str) + + # Check if IP is in private/reserved ranges first + if target_ip.is_private or target_ip.is_reserved or target_ip.is_loopback: + log.debug(f"IP {ip_str} is in private/reserved range - returning AS0 'Private'") + return IPInfo(ip_str, asn=0, asn_name="Private", prefix="Private Network") + + # Check IXP networks first + for ixp_net, ixp_prefix, ixp_name in self.ixp_networks: + try: + ixp_network = ip_network(f"{ixp_net}/{ixp_prefix}") + if target_ip in ixp_network: + log.debug(f"Found IXP match for {ip_str}: {ixp_name}") + return IPInfo(ip_str, is_ixp=True, ixp_name=ixp_name) + except Exception: + continue + + # Ensure optimized lookup is ready + if not self._lookup_optimized: + self._optimize_lookups() + + # Fast integer-based lookup for ASN + target_int = int(target_ip) + + if isinstance(target_ip, IPv4Address): + # Use optimized IPv4 lookup + for net_int, mask_bits, asn, cidr_string in self._ipv4_networks: + if (target_int >> mask_bits) == (net_int >> mask_bits): + asn_data = self.asn_info.get(asn, {}) + asn_name = asn_data.get("name", f"AS{asn}") + country = asn_data.get("country", "") + log.debug( + f"Found ASN match for {ip_str}: AS{asn} ({asn_name}) in {cidr_string}" + ) + return IPInfo( + ip_str, asn=asn, asn_name=asn_name, prefix=cidr_string, country=country + ) + else: + # Use optimized IPv6 lookup + for net_int, mask_bits, asn, cidr_string in self._ipv6_networks: + if (target_int >> mask_bits) == (net_int >> mask_bits): + asn_data = self.asn_info.get(asn, {}) + asn_name = asn_data.get("name", f"AS{asn}") + country = asn_data.get("country", "") + log.debug( + f"Found ASN match for {ip_str}: AS{asn} ({asn_name}) in {cidr_string}" + ) + return IPInfo( + ip_str, asn=asn, asn_name=asn_name, prefix=cidr_string, country=country + ) + + # No match found - return AS0 with "Unknown" to indicate missing data + log.debug(f"No enrichment data found for {ip_str} - returning AS0 'Unknown'") + return IPInfo(ip_str, asn=0, asn_name="Unknown") + + +# Global service instance +_service = IPEnrichmentService() + + +# Public API functions +async def lookup_ip(ip_address: str) -> IPInfo: + """Lookup an IP address and return ASN or IXP information.""" + return await _service.lookup_ip(ip_address) + + +async def lookup_asn_name(asn: int) -> str: + """Get the organization name for an ASN number.""" + return await _service.lookup_asn_name(asn) + + +async def lookup_asn_country(asn: int) -> str: + """Get the country code for an ASN number.""" + return await _service.lookup_asn_country(asn) + + +async def lookup_asns_bulk(asns: t.List[t.Union[str, int]]) -> t.Dict[str, t.Dict[str, str]]: + """Bulk lookup ASN organization names and countries. + + Args: + asns: List of ASN numbers (as strings like "12345" or integers) + + Returns: + Dict mapping ASN string to {"name": org_name, "country": country_code} + Example: {"12345": {"name": "Example ISP", "country": "US"}} + """ + await _service.ensure_data_loaded() + + results = {} + for asn in asns: + # Skip non-numeric ASNs like "IXP" + if asn == "IXP" or asn is None: + continue + + try: + asn_int = int(asn) + asn_data = _service.asn_info.get(asn_int, {}) + results[str(asn)] = { + "name": asn_data.get("name", f"AS{asn}"), + "country": asn_data.get("country", ""), + } + except (ValueError, TypeError): + # Skip invalid ASN values + continue + + return results + + +async def refresh_ip_enrichment_data(force: bool = False) -> bool: + """Manually refresh IP enrichment data.""" + log.info(f"Manual refresh requested (force={force})") + return await _service.ensure_data_loaded(force_refresh=force) + + +def get_data_status() -> dict: + """Get status information about IP enrichment data.""" + status = { + "data_directory": str(IP_ENRICHMENT_DATA_DIR), + "files_exist": { + "cidr_data": CIDR_DATA_FILE.exists(), + "asn_data": ASN_DATA_FILE.exists(), + "ixp_data": IXP_DATA_FILE.exists(), + "last_update": LAST_UPDATE_FILE.exists(), + "combined_cache": COMBINED_CACHE_FILE.exists(), + "raw_table": RAW_TABLE_FILE.exists(), + "raw_asns": RAW_ASNS_FILE.exists(), + }, + "last_update": None, + "age_hours": None, + "data_counts": { + "cidr_entries": len(_service.cidr_networks), + "asn_entries": len(_service.asn_info), + "ixp_networks": len(_service.ixp_networks), + }, + } + + if LAST_UPDATE_FILE.exists(): + try: + with open(LAST_UPDATE_FILE, "r") as f: + last_update = datetime.fromisoformat(f.read().strip()) + status["last_update"] = last_update.isoformat() + status["age_hours"] = (datetime.now() - last_update).total_seconds() / 3600 + except Exception: + pass + + return status + + +# Compatibility functions for existing code +TargetDetail = t.TypedDict( + "TargetDetail", + { + "asn": str, + "ip": str, + "prefix": str, + "country": str, + "rir": str, + "allocated": str, + "org": str, + }, +) + +TargetData = t.Dict[str, TargetDetail] + + +def default_ip_targets(*targets: str) -> t.Tuple[TargetData, t.List[str]]: + """Filter targets and create default data for private/special addresses.""" + _log = log.bind(source="ip_enrichment") + + default_data: TargetData = {} + query_targets: t.List[str] = [] + + for target in targets: + try: + target_ip = ip_address(target) + + # Check for special address types + special_types = [ + (target_ip.is_loopback, "Loopback Address"), + (target_ip.is_multicast, "Multicast Address"), + (target_ip.is_link_local, "Link Local Address"), + (target_ip.is_private, "Private Address"), + (target_ip.version == 6 and target_ip.is_site_local, "Site Local Address"), + ] + + is_special = False + for check, rir_type in special_types: + if check: + default_data[target] = { + "asn": "None", + "ip": target, + "prefix": "None", + "country": "None", + "rir": rir_type, + "allocated": "None", + "org": "None", + } + is_special = True + break + + if not is_special and (target_ip.is_global or target_ip.is_unspecified): + query_targets.append(target) + elif not is_special: + # Other non-global addresses + default_data[target] = { + "asn": "None", + "ip": target, + "prefix": "None", + "country": "None", + "rir": "Reserved Address", + "allocated": "None", + "org": "None", + } + + except ValueError: + # Invalid IP address + default_data[target] = { + "asn": "None", + "ip": target, + "prefix": "None", + "country": "None", + "rir": "Invalid Address", + "allocated": "None", + "org": "None", + } + + return default_data, query_targets + + +async def network_info(*targets: str) -> TargetData: + """Get network information using IP enrichment - compatibility function.""" + _log = log.bind(source="ip_enrichment") + + default_data, query_targets = default_ip_targets(*targets) + + if not query_targets: + _log.debug("No valid global IPs to query") + return default_data + + try: + _log.info(f"Enriching {len(query_targets)} IP addresses") + + # Load data ONCE for all lookups + await _service.ensure_data_loaded() + + query_data = {} + + # Process each target without reloading data + for target in query_targets: + ip_info = _service.lookup_ip_direct( + target + ) # Use direct lookup that doesn't reload data + + # Convert to TargetDetail format + if ip_info.is_ixp and ip_info.ixp_name: + # IXP case - put "IXP" in ASN field and IXP name in org field + detail: TargetDetail = { + "asn": "IXP", # Show "IXP" as the ASN for IXPs + "ip": target, + "prefix": "None", + "country": "None", + "rir": "IXP", # Mark as IXP in RIR field + "allocated": "None", + "org": ip_info.ixp_name, + } + elif ip_info.asn is not None: + # ASN case - normal network - return just the NUMBER, no AS prefix + detail = { + "asn": str(ip_info.asn), # Just the number as string, e.g. "12345" + "ip": target, + "prefix": ip_info.prefix or "None", # Use the CIDR from table.jsonl + "country": ip_info.country or "None", # Use country code from asns.csv + "rir": "UNKNOWN", # Not available from our enrichment + "allocated": "None", # Not available from our enrichment + "org": ip_info.asn_name or "None", + } + else: + # No match found + detail = { + "asn": "None", + "ip": target, + "prefix": "None", + "country": "None", + "rir": "Unknown", + "allocated": "None", + "org": "None", + } + + query_data[target] = detail + + if ip_info.is_ixp: + _log.debug(f"Enriched {target}: IXP={ip_info.ixp_name}") + elif ip_info.asn: + _log.debug(f"Enriched {target}: AS{ip_info.asn} ({ip_info.asn_name})") + else: + _log.debug(f"No enrichment data found for {target}") + + except Exception as e: + _log.error(f"Error in network_info lookup: {e}") + # Return default data for all targets on error + query_data = {} + for target in query_targets: + query_data[target] = { + "asn": "None", + "ip": target, + "prefix": "None", + "country": "None", + "rir": "Error", + "allocated": "None", + "org": "None", + } + + return {**default_data, **query_data} + + +def network_info_sync(*targets: str) -> TargetData: + """Synchronous wrapper for network_info.""" + return asyncio.run(network_info(*targets)) + + +async def network_info_single(target: str) -> TargetDetail: + """Get network information for a single IP address.""" + result = await network_info(target) + return result[target] diff --git a/hyperglass/external/tests/test_bgptools.py b/hyperglass/external/tests/test_bgptools.py deleted file mode 100644 index 542c1dc..0000000 --- a/hyperglass/external/tests/test_bgptools.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Test bgp.tools interactions.""" - -# Standard Library -import asyncio - -# Third Party -import pytest - -# Local -from ..bgptools import run_whois, parse_whois, network_info - -WHOIS_OUTPUT = """AS | IP | BGP Prefix | CC | Registry | Allocated | AS Name -13335 | 1.1.1.1 | 1.1.1.0/24 | US | ARIN | 2010-07-14 | Cloudflare, Inc.""" - - -# Ignore asyncio deprecation warning about loop -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_network_info(): - checks = ( - ("192.0.2.1", {"asn": "None", "rir": "Private Address"}), - ("127.0.0.1", {"asn": "None", "rir": "Loopback Address"}), - ("fe80:dead:beef::1", {"asn": "None", "rir": "Link Local Address"}), - ("2001:db8::1", {"asn": "None", "rir": "Private Address"}), - ("1.1.1.1", {"asn": "13335", "rir": "ARIN"}), - ) - for addr, fields in checks: - info = asyncio.run(network_info(addr)) - assert addr in info - for key, expected in fields.items(): - assert info[addr][key] == expected - - -# Ignore asyncio deprecation warning about loop -@pytest.mark.filterwarnings("ignore::DeprecationWarning") -def test_whois(): - addr = "192.0.2.1" - response = asyncio.run(run_whois([addr])) - assert isinstance(response, str) - assert response != "" - - -def test_whois_parser(): - addr = "1.1.1.1" - result = parse_whois(WHOIS_OUTPUT, [addr]) - assert isinstance(result, dict) - assert addr in result, "Address missing" - assert result[addr]["asn"] == "13335" - assert result[addr]["rir"] == "ARIN" - assert result[addr]["org"] == "Cloudflare, Inc." diff --git a/hyperglass/main.py b/hyperglass/main.py index 51d21da..5a5f0f0 100644 --- a/hyperglass/main.py +++ b/hyperglass/main.py @@ -12,6 +12,7 @@ import uvicorn # Local from .log import LibInterceptHandler, init_logger, enable_file_logging, enable_syslog_logging from .util import get_node_version +from .state import use_state from .constants import MIN_NODE_VERSION, MIN_PYTHON_VERSION, __version__ # Ensure the Python version meets the minimum requirements. diff --git a/hyperglass/models/config/structured.py b/hyperglass/models/config/structured.py index 486a34a..13195dc 100644 --- a/hyperglass/models/config/structured.py +++ b/hyperglass/models/config/structured.py @@ -38,8 +38,25 @@ class StructuredRpki(HyperglassModel): rpki_server_url: str = "" +class StructuredIpEnrichment(HyperglassModel): + """Control IP enrichment for structured data responses.""" + + enabled: bool = False + cache_timeout: int = 86400 # 24 hours in seconds (minimum) + enrich_next_hop: bool = False + enrich_traceroute: bool = True + + @field_validator("cache_timeout") + def validate_cache_timeout(cls, value: int) -> int: + """Ensure cache timeout is at least 24 hours (86400 seconds).""" + if value < 86400: + return 86400 + return value + + class Structured(HyperglassModel): """Control structured data responses.""" communities: StructuredCommunities = StructuredCommunities() rpki: StructuredRpki = StructuredRpki() + ip_enrichment: StructuredIpEnrichment = StructuredIpEnrichment() diff --git a/hyperglass/models/data/__init__.py b/hyperglass/models/data/__init__.py index d19d5e8..9e0a0be 100644 --- a/hyperglass/models/data/__init__.py +++ b/hyperglass/models/data/__init__.py @@ -5,11 +5,13 @@ from typing import Union # Local from .bgp_route import BGPRoute, BGPRouteTable +from .traceroute import TracerouteResult -OutputDataModel = Union[BGPRouteTable] +OutputDataModel = Union[BGPRouteTable, TracerouteResult] __all__ = ( "BGPRoute", "BGPRouteTable", + "TracerouteResult", "OutputDataModel", ) diff --git a/hyperglass/models/data/bgp_route.py b/hyperglass/models/data/bgp_route.py index 9187f4f..63e0d0e 100644 --- a/hyperglass/models/data/bgp_route.py +++ b/hyperglass/models/data/bgp_route.py @@ -11,6 +11,7 @@ from pydantic import ValidationInfo, field_validator # Project from hyperglass.state import use_state from hyperglass.external.rpki import rpki_state +from hyperglass.external.ip_enrichment import TargetDetail # Local from ..main import HyperglassModel @@ -35,6 +36,11 @@ class BGPRoute(HyperglassModel): peer_rid: str rpki_state: int + # IP enrichment data (optional) + next_hop_asn: t.Optional[str] = None + next_hop_org: t.Optional[str] = None + next_hop_country: t.Optional[str] = None + @field_validator("communities") def validate_communities(cls, value): """Filter returned communities against configured policy. @@ -116,6 +122,36 @@ class BGPRoute(HyperglassModel): return value + @property + def as_path_summary(self) -> str: + """Summary of AS path.""" + if not self.as_path: + return "Unknown" + return " -> ".join([f"AS{asn}" for asn in self.as_path]) + + async def get_as_path_detailed(self) -> str: + """Detailed AS path with organization names using IP enrichment.""" + if not self.as_path: + return "Unknown" + + try: + from hyperglass.external.ip_enrichment import lookup_asn_name + + detailed_path = [] + for asn in self.as_path: + try: + org_name = await lookup_asn_name(asn) + if org_name and org_name != f"AS{asn}": + detailed_path.append(f"AS{asn} ({org_name})") + else: + detailed_path.append(f"AS{asn}") + except Exception: + detailed_path.append(f"AS{asn}") + + return " -> ".join(detailed_path) + except Exception: + return self.as_path_summary + class BGPRouteTable(HyperglassModel): """Post-parsed BGP route table.""" @@ -124,6 +160,7 @@ class BGPRouteTable(HyperglassModel): count: int = 0 routes: t.List[BGPRoute] winning_weight: WinningWeight + asn_organizations: t.Dict[str, t.Dict[str, str]] = {} # ASN -> {name, country} def __init__(self, **kwargs): """Sort routes by prefix after validation.""" @@ -136,3 +173,71 @@ class BGPRouteTable(HyperglassModel): self.routes = sorted([*self.routes, *other.routes], key=lambda r: r.prefix) self.count = len(self.routes) return self + + async def enrich_with_ip_enrichment(self): + """Enrich BGP routes with next-hop information from IP enrichment.""" + from hyperglass.external.ip_enrichment import network_info + + # Extract unique next-hop IPs that need enrichment + next_hops_to_lookup = set() + for route in self.routes: + if route.next_hop and not route.next_hop_asn: # Only lookup if not already enriched + next_hops_to_lookup.add(route.next_hop) + + if not next_hops_to_lookup: + return + + # Bulk lookup next-hop information + network_data = await network_info(*list(next_hops_to_lookup)) + + # Enrich routes with the retrieved data + for route in self.routes: + if route.next_hop in network_data: + data: TargetDetail = network_data[route.next_hop] + # Handle ASN formatting + asn_raw = data.get("asn") + if asn_raw and asn_raw != "None": + route.next_hop_asn = f"AS{asn_raw}" + else: + route.next_hop_asn = None + + route.next_hop_org = data.get("org") if data.get("org") != "None" else None + route.next_hop_country = ( + data.get("country") if data.get("country") != "None" else None + ) + + async def enrich_as_path_organizations(self): + """Enrich AS path ASNs with organization names using bulk lookup.""" + from hyperglass.external.ip_enrichment import lookup_asns_bulk + from hyperglass.log import log + + _log = log.bind(source="bgp_asn_enrichment") + + # Collect all unique ASNs from AS paths + all_asns = set() + for route in self.routes: + all_asns.update(route.as_path) + + if not all_asns: + _log.debug("No AS paths found to enrich") + return + + # Convert to strings and bulk lookup + asn_strings = [str(asn) for asn in all_asns] + _log.warning( + f"🔍 BGP AS PATH ENRICHMENT STARTED - Looking up {len(asn_strings)} ASNs: {asn_strings}" + ) + + try: + asn_data = await lookup_asns_bulk(asn_strings) + _log.debug(f"Got ASN organization data: {asn_data}") + + # Store the ASN organization mapping for use by frontend + self.asn_organizations = asn_data + _log.warning( + f"🔍 BGP AS PATH ENRICHMENT SUCCESS - Enriched with {len(asn_data)} ASN organizations: {asn_data}" + ) + + except Exception as e: + _log.error(f"Failed to lookup ASN organizations: {e}") + self.asn_organizations = {} diff --git a/hyperglass/models/data/traceroute.py b/hyperglass/models/data/traceroute.py new file mode 100644 index 0000000..2ccde86 --- /dev/null +++ b/hyperglass/models/data/traceroute.py @@ -0,0 +1,251 @@ +"""Traceroute Data Models.""" + +# Standard Library +import typing as t +from ipaddress import ip_address, AddressValueError + +# Third Party +from pydantic import field_validator + +# Project +from hyperglass.external.ip_enrichment import TargetDetail + +# Local +from ..main import HyperglassModel + + +class TracerouteHop(HyperglassModel): + """Individual hop in a traceroute.""" + + hop_number: int + ip_address: t.Optional[str] = None + display_ip: t.Optional[str] = None # For truncated IPs that can't be validated + hostname: t.Optional[str] = None + rtt1: t.Optional[float] = None + rtt2: t.Optional[float] = None + rtt3: t.Optional[float] = None + + # MikroTik-specific statistics + loss_pct: t.Optional[int] = None + sent_count: t.Optional[int] = None + last_rtt: t.Optional[float] = None + best_rtt: t.Optional[float] = None + worst_rtt: t.Optional[float] = None + + # IP enrichment data + asn: t.Optional[str] = None + org: t.Optional[str] = None + prefix: t.Optional[str] = None + country: t.Optional[str] = None + rir: t.Optional[str] = None + allocated: t.Optional[str] = None + + @field_validator("ip_address") + def validate_ip_address(cls, value): + """Validate IP address format.""" + if value is not None: + # Handle truncated addresses (MikroTik sometimes truncates long IPv6 addresses with ...) + if value.endswith("...") or value.endswith(".."): + return None # Invalid for BGP enrichment but kept in display_ip + try: + ip_address(value) + except AddressValueError: + return None + return value + + @property + def ip_display(self) -> t.Optional[str]: + """Get the IP address for display purposes (may be truncated).""" + return self.display_ip or self.ip_address + + @property + def avg_rtt(self) -> t.Optional[float]: + """Calculate average RTT from available measurements.""" + rtts = [rtt for rtt in [self.rtt1, self.rtt2, self.rtt3] if rtt is not None] + return sum(rtts) / len(rtts) if rtts else None + + @property + def is_timeout(self) -> bool: + """Check if this hop is a timeout (no IP and no RTTs).""" + return self.ip_address is None and all( + rtt is None for rtt in [self.rtt1, self.rtt2, self.rtt3] + ) + + @property + def asn_display(self) -> str: + """Display ASN - just the number, no AS prefix.""" + if self.asn and self.asn != "None": + if self.asn == "IXP": + # For IXPs, show "IXP" with org if available + if self.org and self.org != "None": + return f"IXP ({self.org})" + return "IXP" + else: + # For ASNs, show just the number with org if available + if self.org and self.org != "None": + return f"{self.asn} ({self.org})" + return self.asn + return "Unknown" + + +class TracerouteResult(HyperglassModel): + """Complete traceroute result.""" + + target: str + source: str + hops: t.List[TracerouteHop] + max_hops: int = 30 + packet_size: int = 60 + raw_output: t.Optional[str] = ( + None # Store cleaned/processed output for "Copy Raw" functionality (not original raw router output) + ) + asn_organizations: t.Dict[str, t.Dict[str, str]] = {} # ASN -> {name, country} + + @property + def hop_count(self) -> int: + """Total number of hops.""" + return len(self.hops) + + @property + def unique_asns(self) -> t.List[str]: + """List of unique ASNs encountered in the path.""" + asns = set() + for hop in self.hops: + if hop.asn and hop.asn != "None": + asns.add(hop.asn) + return sorted(list(asns)) + + @property + def as_path_summary(self) -> str: + """Summary of AS path traversed.""" + as_path = [] + current_asn = None + + for hop in self.hops: + if hop.asn and hop.asn not in ["None", None] and hop.asn != current_asn: + current_asn = hop.asn + # hop.asn is now just number ("12345") or "IXP" - display as-is + as_path.append(hop.asn) + + return " -> ".join(as_path) if as_path else "Unknown" + + @property + def as_path_detailed(self) -> str: + """Detailed AS path with organization names.""" + as_path = [] + current_asn = None + current_org = None + + for hop in self.hops: + if hop.asn and hop.asn not in ["None", None] and hop.asn != current_asn: + current_asn = hop.asn # Just number ("12345") or "IXP" + current_org = hop.org + + # Format with org name if we have it + if current_org and current_org not in ["None", None]: + if current_asn == "IXP": + as_path.append(f"IXP ({current_org})") + else: + as_path.append(f"{current_asn} ({current_org})") + else: + as_path.append(current_asn) + + return " -> ".join(as_path) if as_path else "Unknown" + + @property + def as_path_data(self) -> t.List[t.Dict[str, t.Union[str, None]]]: + """AS path data as structured list for frontend visualization.""" + as_path_data = [] + current_asn = None + current_org = None + + for hop in self.hops: + if hop.asn and hop.asn not in ["None", None] and hop.asn != current_asn: + current_asn = hop.asn # Just number ("12345") or "IXP" + current_org = hop.org + + as_path_data.append( + { + "asn": current_asn, + "org": current_org if current_org and current_org != "None" else None, + } + ) + + return as_path_data + + async def enrich_with_ip_enrichment(self): + """Enrich traceroute hops with IP enrichment data.""" + from hyperglass.external.ip_enrichment import network_info + + # Extract all IP addresses that need enrichment + ips_to_lookup = [] + for hop in self.hops: + if hop.ip_address and hop.asn is None: # Only lookup if not already enriched + ips_to_lookup.append(hop.ip_address) + + if not ips_to_lookup: + return + + # Bulk lookup IP information + network_data = await network_info(*ips_to_lookup) + + # Enrich hops with the retrieved data + for hop in self.hops: + if hop.ip_address in network_data: + data: TargetDetail = network_data[hop.ip_address] + # ASN field is now just number string ("12345") or "IXP" + asn_value = data.get("asn") + if asn_value and asn_value != "None": + hop.asn = asn_value # Store as-is: "12345" or "IXP" + else: + hop.asn = None + + hop.org = data.get("org") if data.get("org") != "None" else None + hop.prefix = data.get("prefix") if data.get("prefix") != "None" else None + hop.country = data.get("country") if data.get("country") != "None" else None + hop.rir = data.get("rir") if data.get("rir") != "None" else None + hop.allocated = data.get("allocated") if data.get("allocated") != "None" else None + + async def enrich_asn_organizations(self): + """Enrich ASN organization names using bulk ASN lookup.""" + from hyperglass.external.ip_enrichment import lookup_asns_bulk + from hyperglass.log import log + + _log = log.bind(source="traceroute_asn_enrichment") + + # Collect all unique ASNs that need organization info + asns_to_lookup = [] + for hop in self.hops: + if hop.asn and hop.asn != "None" and hop.asn != "IXP": + asns_to_lookup.append(hop.asn) + _log.debug(f"Hop {hop.hop_number}: ASN={hop.asn}, current org='{hop.org}'") + + if not asns_to_lookup: + _log.debug("No ASNs to lookup") + return + + # Remove duplicates while preserving order + unique_asns = list(dict.fromkeys(asns_to_lookup)) + _log.info(f"Looking up organizations for {len(unique_asns)} unique ASNs: {unique_asns}") + + # Bulk lookup ASN organization data + asn_data = await lookup_asns_bulk(unique_asns) + _log.debug(f"Got ASN data: {asn_data}") + + # Apply the organization data to hops + for hop in self.hops: + if hop.asn and hop.asn in asn_data: + data = asn_data[hop.asn] + org_name = data.get("name") if data.get("name") != f"AS{hop.asn}" else None + + _log.debug( + f"Hop {hop.hop_number} ASN {hop.asn}: setting org='{org_name}' (was '{hop.org}')" + ) + + # Always update org from ASN data (more accurate than IP-based org) + hop.org = org_name + if not hop.country: # Only set country if not already set + hop.country = data.get("country") or None + + # Store the ASN organization mapping for frontend path visualization + self.asn_organizations = asn_data diff --git a/hyperglass/models/directive.py b/hyperglass/models/directive.py index 058eaae..a390e74 100644 --- a/hyperglass/models/directive.py +++ b/hyperglass/models/directive.py @@ -19,7 +19,7 @@ from .main import MultiModel, HyperglassModel, HyperglassUniqueModel from .fields import Action StringOrArray = t.Union[str, t.List[str]] -Condition = t.Union[IPvAnyNetwork, str] +Condition = t.Union[str, None] RuleValidation = t.Union[t.Literal["ipv4", "ipv6", "pattern"], None] PassedValidation = t.Union[bool, None] IPFamily = t.Literal["ipv4", "ipv6"] @@ -264,7 +264,7 @@ class Directive(HyperglassUniqueModel, unique_by=("id", "table_output")): id: str name: str rules: t.List[RuleType] = [RuleWithoutValidation()] - field: t.Union[Text, Select] + field: t.Union[Text, Select, None] info: t.Optional[FilePath] = None plugins: t.List[str] = [] table_output: t.Optional[str] = None @@ -282,15 +282,16 @@ class Directive(HyperglassUniqueModel, unique_by=("id", "table_output")): condition = rule.get("condition") if condition is None: out_rules.append(RuleWithoutValidation(**rule)) - try: - condition_net = ip_network(condition) - if condition_net.version == 4: - out_rules.append(RuleWithIPv4(**rule)) - if condition_net.version == 6: - out_rules.append(RuleWithIPv6(**rule)) - except ValueError: - out_rules.append(RuleWithPattern(**rule)) - if isinstance(rule, Rule): + else: + try: + condition_net = ip_network(condition) + if condition_net.version == 4: + out_rules.append(RuleWithIPv4(**rule)) + if condition_net.version == 6: + out_rules.append(RuleWithIPv6(**rule)) + except ValueError: + out_rules.append(RuleWithPattern(**rule)) + elif isinstance(rule, Rule): out_rules.append(rule) return out_rules @@ -306,7 +307,8 @@ class Directive(HyperglassUniqueModel, unique_by=("id", "table_output")): @property def field_type(self) -> t.Literal["text", "select", None]: """Get the linked field type.""" - + if self.field is None: + return None if self.field.is_select: return "select" if self.field.is_text or self.field.is_ip: @@ -337,7 +339,7 @@ class Directive(HyperglassUniqueModel, unique_by=("id", "table_output")): "name": self.name, "field_type": self.field_type, "groups": self.groups, - "description": self.field.description, + "description": self.field.description if self.field is not None else "", "info": None, } @@ -345,7 +347,7 @@ class Directive(HyperglassUniqueModel, unique_by=("id", "table_output")): with self.info.open() as md: value["info"] = md.read() - if self.field.is_select: + if self.field is not None and self.field.is_select: value["options"] = [o.export_dict() for o in self.field.options if o is not None] return value diff --git a/hyperglass/models/parsing/mikrotik.py b/hyperglass/models/parsing/mikrotik.py index 941cf5a..16e3b5c 100644 --- a/hyperglass/models/parsing/mikrotik.py +++ b/hyperglass/models/parsing/mikrotik.py @@ -307,3 +307,410 @@ class MikrotikBGPTable(MikrotikBase): routes=routes, winning_weight="low", ) + + +class MikrotikTracerouteTable(MikrotikBase): + """MikroTik Traceroute Table.""" + + target: str + source: str + hops: t.List["MikrotikTracerouteHop"] = [] + max_hops: int = 30 + packet_size: int = 60 + + @classmethod + def parse_text(cls, text: str, target: str, source: str) -> "MikrotikTracerouteTable": + """Parse MikroTik traceroute output. + + MikroTik shows multiple complete tables over time as it builds the traceroute: + + Columns: ADDRESS, LOSS, SENT, LAST, AVG, BEST, WORST, STD-DEV + # ADDRESS LOSS SENT LAST AVG BEST WORST STD-DEV + 1 10.0.0.41 0% 1 0.5ms 0.5 0.5 0.5 0 + 2 185.73.201.193 0% 1 0.4ms 0.4 0.4 0.4 0 + 3 46.31.76.111 0% 1 0.5ms 0.5 0.5 0.5 0 + 4 0% 1 0ms + -- [Q quit|C-z pause] + Columns: ADDRESS, LOSS, SENT, LAST, AVG, BEST, WORST, STD-DEV + # ADDRESS LOSS SENT LAST AVG BEST WORST STD-DEV + 1 10.0.0.41 0% 1 0.5ms 0.5 0.5 0.5 0 + 2 185.73.201.193 0% 1 0.4ms 0.4 0.4 0.4 0 + ...more tables... + + We need to find the LAST/NEWEST table and use that as the final result. + """ + _log = log.bind(parser="MikrotikTracerouteTable") + + # DEBUG: Log the raw input + _log.debug(f"=== RAW MIKROTIK TRACEROUTE INPUT ===") + _log.debug(f"Target: {target}, Source: {source}") + _log.debug(f"Raw text length: {len(text)} characters") + _log.debug(f"Raw text:\n{repr(text)}") + _log.debug(f"=== END RAW INPUT ===") + + lines = text.strip().split("\n") + _log.debug(f"Split into {len(lines)} lines") + + # DEBUG: Log each line with line numbers + for i, line in enumerate(lines): + _log.debug(f"Line {i:2d}: {repr(line)}") + + # Find all table starts - handle both formats: + # Format 1: "Columns: ADDRESS, LOSS, SENT..." (newer format with hop numbers) + # Format 2: "ADDRESS LOSS SENT..." (older format, no hop numbers) + table_starts = [] + for i, line in enumerate(lines): + if ("Columns:" in line and "ADDRESS" in line) or ( + "ADDRESS" in line + and "LOSS" in line + and "SENT" in line + and not line.strip().startswith(("1", "2", "3", "4", "5", "6", "7", "8", "9")) + ): + table_starts.append(i) + _log.debug(f"Found table start at line {i}: {repr(line)}") + + if not table_starts: + _log.warning("No traceroute table headers found in output") + return MikrotikTracerouteTable(target=target, source=source, hops=[]) + + # Take the LAST table (newest/final results) + last_table_start = table_starts[-1] + _log.debug( + f"Found {len(table_starts)} tables, using the last one starting at line {last_table_start}" + ) + + # Determine format by checking the header line + header_line = lines[last_table_start].strip() + is_columnar_format = "Columns:" in header_line + _log.debug(f"Header line: {repr(header_line)}") + _log.debug(f"Is columnar format: {is_columnar_format}") + + # Parse only the last table + hops = [] + in_data_section = False + current_hop_number = 1 # Track the current hop number + hop_counter = 1 # For old format without hop numbers + + # Start from the last table header + for i in range(last_table_start, len(lines)): + original_line = lines[i] # Keep original line with whitespace + line = original_line.strip() # Stripped version for most processing + + # Skip empty lines + if not line: + _log.debug(f"Line {i}: EMPTY - skipping") + continue + + # Skip the column header lines + if ( + ("Columns:" in line) + or ("ADDRESS" in line and "LOSS" in line and "SENT" in line) + or line.startswith("#") + ): + in_data_section = True + _log.debug(f"Line {i}: HEADER - entering data section: {repr(line)}") + continue + + # Skip paging prompts + if "-- [Q quit|C-z pause]" in line: + _log.debug(f"Line {i}: PAGING PROMPT - breaking: {repr(line)}") + break # End of this table + + if in_data_section and line: + _log.debug(f"Line {i}: PROCESSING DATA LINE: {repr(line)}") + try: + # Define helper function for RTT parsing + def parse_rtt(rtt_str: str) -> t.Optional[float]: + if rtt_str in ("timeout", "-", "0ms"): + return None + # Remove 'ms' suffix and convert to float + rtt_clean = re.sub(r"ms$", "", rtt_str) + try: + return float(rtt_clean) + except ValueError: + return None + + # Check if this is a timeout/continuation line (starts with whitespace, has % and numbers) + # Use original_line to check for leading whitespace + if ( + (original_line.startswith(" ") or original_line.startswith("\t")) + and "%" in line + and ("timeout" in line or "0ms" in line) + ): + # This is a timeout/continuation hop + parts = line.split() + _log.debug(f"Line {i}: Timeout/continuation line, parts: {parts}") + + if len(parts) >= 2 and parts[0].endswith("%"): + ip_address = None + loss_pct = int(parts[0].rstrip("%")) + sent_count = int(parts[1]) + + if "timeout" in parts: + last_rtt_str = "timeout" + avg_rtt_str = "timeout" + best_rtt_str = "timeout" + worst_rtt_str = "timeout" + else: + last_rtt_str = parts[2] if len(parts) > 2 else "0ms" + avg_rtt_str = "0" + best_rtt_str = "0" + worst_rtt_str = "0" + + # Create timeout hop + hop = MikrotikTracerouteHop( + hop_number=current_hop_number, + ip_address=ip_address, + hostname=None, + loss_pct=loss_pct, + sent_count=sent_count, + last_rtt=parse_rtt(last_rtt_str), + avg_rtt=parse_rtt(avg_rtt_str), + best_rtt=parse_rtt(best_rtt_str), + worst_rtt=parse_rtt(worst_rtt_str), + ) + hops.append(hop) + current_hop_number += 1 + _log.debug(f"Line {i}: Created timeout hop {hop.hop_number}") + continue + + if is_columnar_format: + # New format: "1 10.0.0.41 0% 1 0.5ms 0.5 0.5 0.5 0" + parts = line.split() + _log.debug(f"Line {i}: Columnar format, parts: {parts}") + if len(parts) < 3: + _log.debug(f"Line {i}: Too few parts ({len(parts)}), skipping") + continue + + hop_number = int(parts[0]) + + # Check if there's an IP address or if it's empty (timeout hop) + if len(parts) >= 8 and not parts[1].endswith("%"): + # Normal hop with IP address + ip_address = parts[1] if parts[1] else None + loss_pct = int(parts[2].rstrip("%")) + sent_count = int(parts[3]) + last_rtt_str = parts[4] + avg_rtt_str = parts[5] + best_rtt_str = parts[6] + worst_rtt_str = parts[7] + elif len(parts) >= 4 and parts[1].endswith("%"): + # Timeout hop without IP address + ip_address = None + loss_pct = int(parts[1].rstrip("%")) + sent_count = int(parts[2]) + last_rtt_str = parts[3] if len(parts) > 3 else "timeout" + avg_rtt_str = "timeout" + best_rtt_str = "timeout" + worst_rtt_str = "timeout" + else: + _log.debug(f"Line {i}: Doesn't match columnar patterns, skipping") + continue + else: + # Old format: "196.60.8.198 0% 1 17.1ms 17.1 17.1 17.1 0" + # We need to deduplicate by taking the LAST occurrence of each IP + parts = line.split() + _log.debug(f"Line {i}: Old format, parts: {parts}") + if len(parts) < 6: + _log.debug(f"Line {i}: Too few parts ({len(parts)}), skipping") + continue + + ip_address = parts[0] if not parts[0].endswith("%") else None + + # Check for truncated IPv6 addresses + if ip_address and (ip_address.endswith("...") or ip_address.endswith("..")): + _log.warning( + f"Line {i}: Truncated IP address detected: {ip_address} - setting to None" + ) + ip_address = None + + if ip_address: + loss_pct = int(parts[1].rstrip("%")) + sent_count = int(parts[2]) + last_rtt_str = parts[3] + avg_rtt_str = parts[4] + best_rtt_str = parts[5] + worst_rtt_str = parts[6] if len(parts) > 6 else parts[5] + else: + # Timeout line or truncated address + if parts[0].endswith("%"): + # Normal timeout line + loss_pct = int(parts[0].rstrip("%")) + sent_count = int(parts[1]) + else: + # Truncated address - extract stats from remaining parts + loss_pct = int(parts[1].rstrip("%")) + sent_count = int(parts[2]) + last_rtt_str = "timeout" + avg_rtt_str = "timeout" + best_rtt_str = "timeout" + worst_rtt_str = "timeout" + + # Convert timing values + def parse_rtt(rtt_str: str) -> t.Optional[float]: + if rtt_str in ("timeout", "-", "0ms"): + return None + # Remove 'ms' suffix and convert to float + rtt_clean = re.sub(r"ms$", "", rtt_str) + try: + return float(rtt_clean) + except ValueError: + return None + + if is_columnar_format: + # Use hop number from the data and update our tracker + final_hop_number = hop_number + current_hop_number = max(current_hop_number, hop_number + 1) + else: + # Use sequential numbering for old format + final_hop_number = hop_counter + hop_counter += 1 + + hop_obj = MikrotikTracerouteHop( + hop_number=final_hop_number, + ip_address=ip_address, + hostname=None, # MikroTik doesn't do reverse DNS by default + loss_pct=loss_pct, + sent_count=sent_count, + last_rtt=parse_rtt(last_rtt_str), + avg_rtt=parse_rtt(avg_rtt_str), + best_rtt=parse_rtt(best_rtt_str), + worst_rtt=parse_rtt(worst_rtt_str), + ) + + hops.append(hop_obj) + _log.debug( + f"Line {i}: Created hop {final_hop_number}: {ip_address} - {loss_pct}% - {sent_count} sent" + ) + + except (ValueError, IndexError) as e: + _log.debug(f"Failed to parse line '{line}': {e}") + continue + + _log.debug(f"Before deduplication: {len(hops)} hops") + + # For old format, we need to deduplicate by IP and take only final stats + if not is_columnar_format and hops: + _log.debug(f"Old format detected - deduplicating {len(hops)} total entries") + + # Group by IP address and take the HIGHEST SENT count (final stats) + ip_to_final_hop = {} + ip_to_max_sent = {} + hop_order = [] + + for hop in hops: + # Use IP address if available, otherwise use hop position for timeouts + if hop.ip_address: + ip_key = hop.ip_address + else: + # No IP address means timeout hop + ip_key = f"timeout_{hop.hop_number}" + + # Track first appearance order + if ip_key not in hop_order: + hop_order.append(ip_key) + ip_to_max_sent[ip_key] = 0 + _log.debug(f"New IP discovered: {ip_key}") + + # Keep hop with highest SENT count (most recent/final stats) + if hop.sent_count and hop.sent_count >= ip_to_max_sent[ip_key]: + ip_to_max_sent[ip_key] = hop.sent_count + ip_to_final_hop[ip_key] = hop + _log.debug(f"Updated {ip_key}: SENT={hop.sent_count} (final stats)") + + _log.debug(f"IP order: {hop_order}") + _log.debug(f"Final IP stats: {[(ip, ip_to_max_sent[ip]) for ip in hop_order]}") + + # Rebuild hops list with final stats and correct hop numbers + final_hops = [] + for i, ip_key in enumerate(hop_order, 1): + final_hop = ip_to_final_hop[ip_key] + final_hop.hop_number = i # Correct hop numbering + final_hops.append(final_hop) + _log.debug( + f"Final hop {i}: {ip_key} - Loss: {final_hop.loss_pct}% - Sent: {final_hop.sent_count}" + ) + + hops = final_hops + _log.debug(f"Deduplication complete: {len(hops)} unique hops with final stats") + + _log.debug(f"After processing: {len(hops)} final hops") + for hop in hops: + _log.debug( + f"Final hop {hop.hop_number}: {hop.ip_address} - {hop.loss_pct}% loss - {hop.sent_count} sent" + ) + + result = MikrotikTracerouteTable(target=target, source=source, hops=hops) + _log.info(f"Parsed {len(hops)} hops from MikroTik traceroute final table") + return result + + def traceroute_result(self): + """Convert to TracerouteResult format.""" + from hyperglass.models.data.traceroute import TracerouteResult, TracerouteHop + + converted_hops = [] + for hop in self.hops: + # Handle truncated IP addresses + ip_address = hop.ip_address + display_ip = None + + if hop.ip_address and hop.ip_address.endswith("..."): + # For truncated IPs, store for display but set ip_address to None for validation + display_ip = hop.ip_address + ip_address = None + + converted_hops.append( + TracerouteHop( + hop_number=hop.hop_number, + ip_address=ip_address, # None for truncated IPs + display_ip=display_ip, # Truncated IP for display + hostname=hop.hostname, + rtt1=hop.best_rtt, + rtt2=hop.avg_rtt, + rtt3=hop.worst_rtt, + # MikroTik-specific statistics + loss_pct=hop.loss_pct, + sent_count=hop.sent_count, + last_rtt=hop.last_rtt, + best_rtt=hop.best_rtt, + worst_rtt=hop.worst_rtt, + # BGP enrichment fields will be populated by enrichment plugin + # For truncated IPs, these will remain None/empty + asn=None, + org=None, + prefix=None, + country=None, + rir=None, + allocated=None, + ) + ) + + return TracerouteResult( + target=self.target, + source=self.source, + hops=converted_hops, + max_hops=self.max_hops, + packet_size=self.packet_size, + raw_output=None, # Will be set by the plugin + ) + + +class MikrotikTracerouteHop(MikrotikBase): + """Individual MikroTik traceroute hop.""" + + hop_number: int + ip_address: t.Optional[str] = None + hostname: t.Optional[str] = None + + # MikroTik-specific statistics + loss_pct: t.Optional[int] = None + sent_count: t.Optional[int] = None + last_rtt: t.Optional[float] = None + avg_rtt: t.Optional[float] = None + best_rtt: t.Optional[float] = None + worst_rtt: t.Optional[float] = None + + @property + def is_timeout(self) -> bool: + """Check if this hop is a timeout.""" + return self.ip_address is None or self.loss_pct == 100 diff --git a/hyperglass/models/parsing/traceroute.py b/hyperglass/models/parsing/traceroute.py new file mode 100644 index 0000000..ec001cb --- /dev/null +++ b/hyperglass/models/parsing/traceroute.py @@ -0,0 +1,235 @@ +"""Example traceroute parsing module.""" + +# Standard Library +import re +import typing as t + +# Project +from hyperglass.log import log +from hyperglass.models.data.traceroute import TracerouteResult, TracerouteHop + +# Local +from ..main import HyperglassModel + + +class TracerouteParser(HyperglassModel): + """Base traceroute parser.""" + + @classmethod + def parse_text(cls, text: str, target: str, source: str) -> TracerouteResult: + """Parse traceroute text output into structured data.""" + _log = log.bind(parser="TracerouteParser") + + hops = [] + lines = text.strip().split("\n") + + # Common traceroute pattern: hop number, IP/hostname, RTT values + hop_pattern = re.compile(r"^\s*(\d+)\s+(?:(\S+)\s+\(([^)]+)\)|(\S+))\s+(.+)$") + + # RTT pattern to extract timing values + rtt_pattern = re.compile(r"(\d+(?:\.\d+)?)\s*ms") + + for line in lines: + line = line.strip() + if not line or line.startswith("traceroute"): + continue + + # Handle timeout lines (* * *) + if "*" in line and re.search(r"\d+\s+\*", line): + hop_match = re.match(r"^\s*(\d+)\s+\*", line) + if hop_match: + hop_number = int(hop_match.group(1)) + hops.append( + TracerouteHop( + hop_number=hop_number, + ip_address=None, + hostname=None, + rtt1=None, + rtt2=None, + rtt3=None, + ) + ) + continue + + # Parse normal hop lines + hop_match = hop_pattern.match(line) + if hop_match: + hop_number = int(hop_match.group(1)) + + # Extract hostname and IP + if hop_match.group(2) and hop_match.group(3): + # Format: hostname (ip) + hostname = hop_match.group(2) + ip_address = hop_match.group(3) + else: + # Format: ip or hostname only + hostname = None + ip_address = hop_match.group(4) + + # Extract RTT values + rtt_text = hop_match.group(5) + rtts = rtt_pattern.findall(rtt_text) + + # Pad with None if less than 3 RTT values + while len(rtts) < 3: + rtts.append(None) + + hops.append( + TracerouteHop( + hop_number=hop_number, + ip_address=ip_address, + hostname=hostname, + rtt1=float(rtts[0]) if rtts[0] else None, + rtt2=float(rtts[1]) if rtts[1] else None, + rtt3=float(rtts[2]) if rtts[2] else None, + ) + ) + + result = TracerouteResult(target=target, source=source, hops=hops) + + _log.info(f"Parsed {len(hops)} hops from traceroute output") + return result + + +class CiscoTracerouteParser(TracerouteParser): + """Cisco-specific traceroute parser.""" + + @classmethod + def parse_text(cls, text: str, target: str, source: str) -> TracerouteResult: + """Parse Cisco traceroute output.""" + _log = log.bind(parser="CiscoTracerouteParser") + + # Cisco traceroute often has format like: + # Type escape sequence to abort. + # Tracing the route to 8.8.8.8 (8.8.8.8) + # 1 192.168.1.1 4 msec 8 msec 4 msec + + lines = text.strip().split("\n") + hops = [] + + for line in lines: + line = line.strip() + if not line or "escape sequence" in line.lower() or "tracing" in line.lower(): + continue + + # Cisco format: hop_number ip_or_hostname rtt1 msec rtt2 msec rtt3 msec + cisco_pattern = re.compile( + r"^\s*(\d+)\s+(\S+)\s+(\d+(?:\.\d+)?)\s*msec\s+(\d+(?:\.\d+)?)\s*msec\s+(\d+(?:\.\d+)?)\s*msec" + ) + + match = cisco_pattern.match(line) + if match: + hop_number = int(match.group(1)) + ip_or_hostname = match.group(2) + rtt1 = float(match.group(3)) + rtt2 = float(match.group(4)) + rtt3 = float(match.group(5)) + + # Try to determine if it's an IP or hostname + import ipaddress + + try: + ipaddress.ip_address(ip_or_hostname) + ip_address = ip_or_hostname + hostname = None + except ValueError: + ip_address = None + hostname = ip_or_hostname + + hops.append( + TracerouteHop( + hop_number=hop_number, + ip_address=ip_address, + hostname=hostname, + rtt1=rtt1, + rtt2=rtt2, + rtt3=rtt3, + ) + ) + + result = TracerouteResult(target=target, source=source, hops=hops) + + _log.info(f"Parsed {len(hops)} hops from Cisco traceroute output") + return result + + +class JuniperTracerouteParser(TracerouteParser): + """Juniper-specific traceroute parser.""" + + @classmethod + def parse_text(cls, text: str, target: str, source: str) -> TracerouteResult: + """Parse Juniper traceroute output.""" + _log = log.bind(parser="JuniperTracerouteParser") + + # Juniper format often like: + # traceroute to 8.8.8.8 (8.8.8.8), 30 hops max, 60 byte packets + # 1 192.168.1.1 (192.168.1.1) 1.234 ms 1.456 ms 1.678 ms + + lines = text.strip().split("\n") + hops = [] + + for line in lines: + line = line.strip() + if not line or line.startswith("traceroute to"): + continue + + # Juniper format often has hostname (ip) followed by RTTs + juniper_pattern = re.compile( + r"^\s*(\d+)\s+(?:(\S+)\s+\(([^)]+)\)|(\S+))\s+([\d\.\s\*ms]+)$" + ) + + match = juniper_pattern.match(line) + if match: + hop_number = int(match.group(1)) + + if match.group(2) and match.group(3): + hostname = match.group(2) + ip_address = match.group(3) + else: + hostname = None + ip_address = match.group(4) + + # Extract RTT values + rtt_text = match.group(5) + rtts = re.findall(r"(\d+(?:\.\d+)?)\s*ms", rtt_text) + + # Handle * for timeouts + if "*" in rtt_text: + timeout_count = rtt_text.count("*") + rtts.extend([None] * timeout_count) + + # Ensure we have exactly 3 RTT values + while len(rtts) < 3: + rtts.append(None) + + hops.append( + TracerouteHop( + hop_number=hop_number, + ip_address=ip_address, + hostname=hostname, + rtt1=float(rtts[0]) if rtts[0] else None, + rtt2=float(rtts[1]) if rtts[1] else None, + rtt3=float(rtts[2]) if rtts[2] else None, + ) + ) + + result = TracerouteResult(target=target, source=source, hops=hops) + + _log.info(f"Parsed {len(hops)} hops from Juniper traceroute output") + return result + + +# Parser mapping by platform +TRACEROUTE_PARSERS = { + "cisco_ios": CiscoTracerouteParser, + "cisco_nxos": CiscoTracerouteParser, + "cisco_xr": CiscoTracerouteParser, + "juniper": JuniperTracerouteParser, + "juniper_junos": JuniperTracerouteParser, + "generic": TracerouteParser, # Fallback +} + + +def get_traceroute_parser(platform: str) -> t.Type[TracerouteParser]: + """Get the appropriate traceroute parser for a platform.""" + return TRACEROUTE_PARSERS.get(platform, TracerouteParser) diff --git a/hyperglass/models/ui.py b/hyperglass/models/ui.py index 3e2c106..f8a304c 100644 --- a/hyperglass/models/ui.py +++ b/hyperglass/models/ui.py @@ -19,7 +19,7 @@ class UIDirective(HyperglassModel): id: str name: str - field_type: str + field_type: t.Union[str, None] groups: t.List[str] description: str info: t.Optional[str] = None diff --git a/hyperglass/plugins/_builtin/__init__.py b/hyperglass/plugins/_builtin/__init__.py index dda2f8f..516ca8d 100644 --- a/hyperglass/plugins/_builtin/__init__.py +++ b/hyperglass/plugins/_builtin/__init__.py @@ -10,6 +10,10 @@ from .bgp_route_juniper import BGPRoutePluginJuniper from .mikrotik_garbage_output import MikrotikGarbageOutput from .bgp_routestr_mikrotik import BGPSTRRoutePluginMikrotik from .mikrotik_normalize_input import MikrotikTargetNormalizerInput +from .traceroute_ip_enrichment import ZTracerouteIpEnrichment +from .bgp_route_ip_enrichment import ZBgpRouteIpEnrichment +from .trace_route_mikrotik import TraceroutePluginMikrotik +from .trace_route_huawei import TraceroutePluginHuawei __all__ = ( "BGPRoutePluginArista", @@ -20,5 +24,9 @@ __all__ = ( "MikrotikGarbageOutput", "BGPSTRRoutePluginMikrotik", "MikrotikTargetNormalizerInput", + "ZTracerouteIpEnrichment", + "ZBgpRouteIpEnrichment", + "TraceroutePluginMikrotik", + "TraceroutePluginHuawei", "RemoveCommand", ) diff --git a/hyperglass/plugins/_builtin/bgp_route_ip_enrichment.py b/hyperglass/plugins/_builtin/bgp_route_ip_enrichment.py new file mode 100644 index 0000000..f0782a3 --- /dev/null +++ b/hyperglass/plugins/_builtin/bgp_route_ip_enrichment.py @@ -0,0 +1,114 @@ +"""IP enrichment for structured BGP route data - show path functionality.""" + +# Standard Library +import asyncio +import typing as t + +# Third Party +from pydantic import PrivateAttr + +# Project +from hyperglass.log import log +from hyperglass.plugins._output import OutputPlugin +from hyperglass.models.data.bgp_route import BGPRouteTable + +if t.TYPE_CHECKING: + from hyperglass.models.data import OutputDataModel + from hyperglass.models.api.query import Query + + +class ZBgpRouteIpEnrichment(OutputPlugin): + """Enrich structured BGP route output with IP enrichment for next-hop ASN/organization data.""" + + _hyperglass_builtin: bool = PrivateAttr(True) + platforms: t.Sequence[str] = ( + "mikrotik_routeros", + "mikrotik_switchos", + "mikrotik", + "cisco_ios", + "juniper_junos", + "arista_eos", + "frr", + "huawei", + "huawei_vrpv8", + ) + directives: t.Sequence[str] = ("bgp_route", "bgp_community") + common: bool = True + + async def _enrich_async(self, output: BGPRouteTable, enrich_next_hop: bool = True) -> None: + """Async helper to enrich BGP route data.""" + _log = log.bind(plugin=self.__class__.__name__) + + if enrich_next_hop: + try: + # First enrich with next-hop IP information (if enabled) + await output.enrich_with_ip_enrichment() + _log.debug("BGP next-hop IP enrichment completed") + except Exception as e: + _log.error(f"BGP next-hop IP enrichment failed: {e}") + else: + _log.debug("BGP next-hop IP enrichment skipped (disabled in config)") + + try: + # Always enrich AS path ASNs with organization names + await output.enrich_as_path_organizations() + _log.debug("BGP AS path organization enrichment completed") + except Exception as e: + _log.error(f"BGP AS path organization enrichment failed: {e}") + + def process(self, *, output: "OutputDataModel", query: "Query") -> "OutputDataModel": + """Enrich structured BGP route data with next-hop IP enrichment information.""" + + if not isinstance(output, BGPRouteTable): + return output + + _log = log.bind(plugin=self.__class__.__name__) + _log.warning(f"🔍 BGP ROUTE PLUGIN STARTED - Processing {len(output.routes)} BGP routes") + + # Check if IP enrichment is enabled in config + enrich_next_hop = True + try: + from hyperglass.state import use_state + + params = use_state("params") + if not params.structured.ip_enrichment.enabled: + _log.debug("IP enrichment disabled in configuration") + return output + + # Check next-hop enrichment setting but don't exit - we still want ASN org enrichment + enrich_next_hop = params.structured.ip_enrichment.enrich_next_hop + if not enrich_next_hop: + _log.debug( + "Next-hop enrichment disabled in configuration - will skip next-hop lookup but continue with ASN organization enrichment" + ) + except Exception as e: + _log.debug(f"Could not check IP enrichment config: {e}") + + # Use the built-in enrichment method from BGPRouteTable + try: + # Run async enrichment in sync context + loop = None + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + # If we're already in an event loop, create a new task + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor() as executor: + future = executor.submit( + asyncio.run, self._enrich_async(output, enrich_next_hop) + ) + future.result() + else: + loop.run_until_complete(self._enrich_async(output, enrich_next_hop)) + except RuntimeError: + # No event loop, create one + asyncio.run(self._enrich_async(output, enrich_next_hop)) + _log.warning( + f"🔍 BGP ROUTE PLUGIN COMPLETED - ASN organizations: {len(output.asn_organizations)}" + ) + except Exception as e: + _log.error(f"BGP route IP enrichment failed: {e}") + + _log.debug(f"Completed enrichment for BGP routes") + return output diff --git a/hyperglass/plugins/_builtin/bgptools_traceroute_enrichment.py b/hyperglass/plugins/_builtin/bgptools_traceroute_enrichment.py new file mode 100644 index 0000000..2f20ec6 --- /dev/null +++ b/hyperglass/plugins/_builtin/bgptools_traceroute_enrichment.py @@ -0,0 +1,115 @@ +"""IP enrichment for structured traceroute data.""" + +# Standard Library +import socket +import typing as t + +# Third Party +from pydantic import PrivateAttr + +# Project +from hyperglass.log import log +from hyperglass.plugins._output import OutputPlugin +from hyperglass.models.data.traceroute import TracerouteResult + +if t.TYPE_CHECKING: + from hyperglass.models.data import OutputDataModel + from hyperglass.models.api.query import Query + + +class ZBgpToolsTracerouteEnrichment(OutputPlugin): + """Enrich structured traceroute output with IP enrichment ASN/organization data and reverse DNS.""" + + _hyperglass_builtin: bool = PrivateAttr(True) + platforms: t.Sequence[str] = ( + "mikrotik_routeros", + "mikrotik_switchos", + "mikrotik", + "cisco_ios", + "juniper_junos", + ) + directives: t.Sequence[str] = ("traceroute", "MikroTik_Traceroute") + common: bool = True + + def _enrich_ip_with_bgptools(self, ip: str) -> t.Dict[str, t.Any]: + """Query BGP.tools whois interface for IP enrichment data.""" + try: + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(5) + sock.connect(("bgp.tools", 43)) + + query = f"begin\nverbose\n{ip}\nend\n" + sock.sendall(query.encode()) + + response = b"" + while True: + data = sock.recv(4096) + if not data: + break + response += data + sock.close() + + response_text = response.decode("utf-8", errors="ignore").strip() + log.debug(f"BGP.tools response for {ip}: {response_text}") + + if response_text and "|" in response_text: + lines = response_text.split("\n") + for line in lines: + if "|" in line and ip in line: + parts = [p.strip() for p in line.split("|")] + if len(parts) >= 7: + return { + "asn": parts[0] if parts[0] else None, + "org": parts[6] if len(parts) > 6 else None, + "prefix": parts[2] if parts[2] else None, + "country": parts[3] if parts[3] else None, + "rir": parts[4] if parts[4] else None, + "allocated": parts[5] if parts[5] else None, + } + + except Exception as e: + log.debug(f"BGP.tools enrichment failed for {ip}: {e}") + + return { + "asn": None, + "org": None, + "prefix": None, + "country": None, + "rir": None, + "allocated": None, + } + + def _reverse_dns_lookup(self, ip: str) -> t.Optional[str]: + """Perform reverse DNS lookup for IP address.""" + try: + hostname = socket.gethostbyaddr(ip)[0] + log.debug(f"Reverse DNS for {ip}: {hostname}") + return hostname + except (socket.herror, socket.gaierror, socket.timeout) as e: + log.debug(f"Reverse DNS lookup failed for {ip}: {e}") + return None + + def process(self, *, output: "OutputDataModel", query: "Query") -> "OutputDataModel": + """Enrich structured traceroute data with BGP.tools and reverse DNS information.""" + + if not isinstance(output, TracerouteResult): + return output + + _log = log.bind(plugin=self.__class__.__name__) + _log.debug(f"Starting enrichment for {len(output.hops)} traceroute hops") + + for hop in output.hops: + if hop.ip_address and hop.asn is None: + bgp_data = self._enrich_ip_with_bgptools(hop.ip_address) + hop.asn = bgp_data.get("asn") + hop.org = bgp_data.get("org") + hop.prefix = bgp_data.get("prefix") + hop.country = bgp_data.get("country") + hop.rir = bgp_data.get("rir") + hop.allocated = bgp_data.get("allocated") + + if hop.hostname is None: + hop.hostname = self._reverse_dns_lookup(hop.ip_address) + + _log.debug(f"Completed enrichment for traceroute to {output.target}") + return output diff --git a/hyperglass/plugins/_builtin/mikrotik_garbage_output.py b/hyperglass/plugins/_builtin/mikrotik_garbage_output.py index 8b48fb0..435cd84 100644 --- a/hyperglass/plugins/_builtin/mikrotik_garbage_output.py +++ b/hyperglass/plugins/_builtin/mikrotik_garbage_output.py @@ -24,18 +24,26 @@ class MikrotikGarbageOutput(OutputPlugin): _hyperglass_builtin: bool = PrivateAttr(True) platforms: t.Sequence[str] = ("mikrotik_routeros", "mikrotik_switchos", "mikrotik") - # Apply to ALL commands on MikroTik platforms - common: bool = True + # Only apply to MikroTik platforms, not all devices + common: bool = False def _clean_traceroute_output(self, raw_output: str) -> str: - """Clean MikroTik traceroute output specifically.""" + """Clean MikroTik traceroute output specifically. + + Important: Traceroute hops are sequential - each line represents a unique hop position. + We should NOT deduplicate by IP address as the same IP can appear at different hops. + Order matters for traceroute results. + + However, we can aggregate consecutive timeout lines at the END of the traceroute + to avoid showing 10+ meaningless timeout entries. + """ if not raw_output or not raw_output.strip(): return "" lines = raw_output.splitlines() cleaned_lines = [] found_header = False - hop_data = {} # IP -> (line, sent_count) + data_lines = [] for line in lines: stripped = line.strip() @@ -59,54 +67,52 @@ class MikrotikGarbageOutput(OutputPlugin): found_header = True continue - # Only process data lines after we've found the header + # After finding header, collect all data lines if found_header and stripped: - # Try to extract IP address (IPv4 or IPv6) from the line - ipv4_match = re.match(r"^(\d+\.\d+\.\d+\.\d+)", stripped) - ipv6_match = re.match(r"^([0-9a-fA-F:]+)", stripped) if not ipv4_match else None + data_lines.append(line) - if ipv4_match or ipv6_match: - ip = ipv4_match.group(1) if ipv4_match else ipv6_match.group(1) + # Process data lines to aggregate trailing timeouts + if data_lines: + processed_lines = [] + trailing_timeout_count = 0 - # Extract the SENT count from the line (look for pattern like "0% 3" or "100% 2") - sent_match = re.search(r"\s+(\d+)%\s+(\d+)\s+", stripped) - sent_count = int(sent_match.group(2)) if sent_match else 0 + # Work backwards to count trailing timeouts + for i in range(len(data_lines) - 1, -1, -1): + line = data_lines[i] + if ( + "100%" in line.strip() + and "timeout" in line.strip() + and not line.strip().startswith( + ("1", "2", "3", "4", "5", "6", "7", "8", "9", "0") + ) + ): + # This is a timeout line (no IP address at start) + trailing_timeout_count += 1 + else: + # Found a non-timeout line, stop counting + break - # Keep the line with the highest SENT count (most complete data) - if ip not in hop_data or sent_count > hop_data[ip][1]: - hop_data[ip] = (line, sent_count) - elif ( - sent_count == hop_data[ip][1] - and "timeout" not in stripped - and "timeout" in hop_data[ip][0] - ): - # If SENT counts are equal, prefer non-timeout over timeout - hop_data[ip] = (line, sent_count) - elif "100%" in stripped and "timeout" in stripped: - # Skip standalone timeout lines without IP - continue + # Add non-trailing lines as-is + non_trailing_count = len(data_lines) - trailing_timeout_count + processed_lines.extend(data_lines[:non_trailing_count]) - # Reconstruct the output with only the best results - if found_header and hop_data: - result_lines = [cleaned_lines[0]] # Header + # Handle trailing timeouts + if trailing_timeout_count > 0: + if trailing_timeout_count <= 3: + # If 3 or fewer trailing timeouts, show them all + processed_lines.extend(data_lines[non_trailing_count:]) + else: + # If more than 3 trailing timeouts, show first 2 and aggregate the rest + processed_lines.extend(data_lines[non_trailing_count : non_trailing_count + 2]) + remaining_timeouts = trailing_timeout_count - 2 + # Add an aggregation line + processed_lines.append( + f" ... ({remaining_timeouts} more timeout hops)" + ) - # Sort by the order IPs first appeared, but use the best data for each - seen_ips = [] - for line in lines: - stripped = line.strip() - if found_header: - ipv4_match = re.match(r"^(\d+\.\d+\.\d+\.\d+)", stripped) - ipv6_match = re.match(r"^([0-9a-fA-F:]+)", stripped) if not ipv4_match else None + cleaned_lines.extend(processed_lines) - if ipv4_match or ipv6_match: - ip = ipv4_match.group(1) if ipv4_match else ipv6_match.group(1) - if ip not in seen_ips and ip in hop_data: - seen_ips.append(ip) - result_lines.append(hop_data[ip][0]) - - return "\n".join(result_lines) - - return raw_output + return "\n".join(cleaned_lines) def process(self, *, output: OutputType, query: "Query") -> Series[str]: """ diff --git a/hyperglass/plugins/_builtin/trace_route_huawei.py b/hyperglass/plugins/_builtin/trace_route_huawei.py new file mode 100644 index 0000000..bee4c45 --- /dev/null +++ b/hyperglass/plugins/_builtin/trace_route_huawei.py @@ -0,0 +1,253 @@ +"""Parse Huawei traceroute output to structured data.""" + +# Standard Library +import re +import typing as t + +# Third Party +from pydantic import PrivateAttr + +# Project +from hyperglass.log import log +from hyperglass.exceptions.private import ParsingError +from hyperglass.models.data.traceroute import TracerouteResult, TracerouteHop + +# Local +from .._output import OutputPlugin + +if t.TYPE_CHECKING: + from hyperglass.models.data import OutputDataModel + from hyperglass.models.api.query import Query + from .._output import OutputType + + +def _normalize_output(output: t.Union[str, t.Sequence[str]]) -> t.List[str]: + """Ensure the output is a list of strings.""" + if isinstance(output, str): + return [output] + return list(output) + + +def parse_huawei_traceroute( + output: t.Union[str, t.Sequence[str]], target: str, source: str +) -> "OutputDataModel": + """Parse a Huawei traceroute text response.""" + result = None + out_list = _normalize_output(output) + + _log = log.bind(plugin=TraceroutePluginHuawei.__name__) + combined_output = "\n".join(out_list) + + # DEBUG: Log the raw output we're about to parse + _log.debug(f"=== HUAWEI TRACEROUTE PLUGIN RAW INPUT ===") + _log.debug(f"Target: {target}, Source: {source}") + _log.debug(f"Output pieces: {len(out_list)}") + _log.debug(f"Combined output length: {len(combined_output)}") + _log.debug(f"First 500 chars: {repr(combined_output[:500])}") + _log.debug(f"=== END PLUGIN RAW INPUT ===") + + try: + result = HuaweiTracerouteTable.parse_text(combined_output, target, source) + except Exception as exc: + _log.error(f"Failed to parse Huawei traceroute: {exc}") + raise ParsingError(f"Failed to parse Huawei traceroute output: {exc}") from exc + + _log.debug(f"=== FINAL STRUCTURED TRACEROUTE RESULT ===") + _log.debug(f"Successfully parsed {len(result.hops)} traceroute hops") + _log.debug(f"Target: {target}, Source: {source}") + for hop in result.hops: + _log.debug(f"Hop {hop.hop_number}: {hop.ip_address or '*'} - RTT: {hop.rtt1 or 'timeout'}") + _log.debug(f"Raw output length: {len(combined_output)} characters") + _log.debug(f"=== END STRUCTURED RESULT ===") + + return result + + +class HuaweiTracerouteTable(TracerouteResult): + """Huawei traceroute table parser.""" + + @classmethod + def parse_text(cls, text: str, target: str, source: str) -> TracerouteResult: + """Parse Huawei traceroute text output into structured data.""" + _log = log.bind(parser="HuaweiTracerouteTable") + + _log.debug(f"=== RAW HUAWEI TRACEROUTE INPUT ===") + _log.debug(f"Target: {target}, Source: {source}") + _log.debug(f"Raw text length: {len(text)} characters") + _log.debug(f"Raw text:\n{repr(text)}") + _log.debug(f"=== END RAW INPUT ===") + + hops = [] + lines = text.strip().split("\n") + + _log.debug(f"Split into {len(lines)} lines") + + # Pattern for normal hop: "1 172.24.165.197 1 ms" + hop_pattern = re.compile(r"^\s*(\d+)\s+(\S+)\s+(\d+(?:\.\d+)?)\s*ms\s*$") + + # Pattern for timeout hop: "3 *" + timeout_pattern = re.compile(r"^\s*(\d+)\s+\*\s*$") + + for i, line in enumerate(lines): + line = line.strip() + _log.debug(f"Line {i:2d}: {repr(line)}") + + if not line: + continue + + # Skip header lines + if ( + "traceroute to" in line.lower() + or "max hops" in line.lower() + or "press CTRL_C" in line.lower() + ): + _log.debug(f"Line {i:2d}: SKIPPING HEADER") + continue + + # Try to match normal hop + hop_match = hop_pattern.match(line) + if hop_match: + hop_number = int(hop_match.group(1)) + ip_address = hop_match.group(2) + rtt = float(hop_match.group(3)) + + _log.debug(f"Line {i:2d}: NORMAL HOP - {hop_number}: {ip_address} {rtt}ms") + + hops.append( + TracerouteHop( + hop_number=hop_number, + ip_address=ip_address, + display_ip=None, # Huawei doesn't truncate IPs like MikroTik + hostname=None, # Will be populated by IP enrichment + rtt1=rtt, + rtt2=None, # Huawei shows only one RTT per line + rtt3=None, + # MikroTik-specific statistics (populate for consistency) + sent_count=1, # Huawei sends 1 ping per hop + last_rtt=rtt, # Same as the only RTT + best_rtt=rtt, # Same as the only RTT + worst_rtt=rtt, # Same as the only RTT + loss_pct=0, # No loss if we got a response + # BGP enrichment fields (will be populated by enrichment plugin) + asn=None, + org=None, + prefix=None, + country=None, + rir=None, + allocated=None, + ) + ) + continue + + # Try to match timeout hop + timeout_match = timeout_pattern.match(line) + if timeout_match: + hop_number = int(timeout_match.group(1)) + + _log.debug(f"Line {i:2d}: TIMEOUT HOP - {hop_number}: *") + + hops.append( + TracerouteHop( + hop_number=hop_number, + ip_address=None, + display_ip=None, + hostname=None, + rtt1=None, + rtt2=None, + rtt3=None, + # MikroTik-specific statistics for timeout + sent_count=1, # Still sent 1 ping, just timed out + last_rtt=None, + best_rtt=None, + worst_rtt=None, + loss_pct=100, # 100% loss for timeout + # BGP enrichment fields (all None for timeout) + asn=None, + org=None, + prefix=None, + country=None, + rir=None, + allocated=None, + ) + ) + continue + + _log.debug(f"Line {i:2d}: UNMATCHED - skipping") + + _log.debug(f"Before cleanup: {len(hops)} hops") + + # Clean up consecutive timeout hops at the end + # Keep only the first few timeouts, remove excessive trailing timeouts + if len(hops) > 5: + # Find the last non-timeout hop + last_real_hop = -1 + for i in range(len(hops) - 1, -1, -1): + if not hops[i].is_timeout: + last_real_hop = i + break + + if last_real_hop >= 0: + # Keep at most 3 timeout hops after the last real hop + max_timeouts = 3 + timeout_count = 0 + cleaned_hops = hops[: last_real_hop + 1] # Keep all hops up to last real hop + + for hop in hops[last_real_hop + 1 :]: + if hop.is_timeout: + timeout_count += 1 + if timeout_count <= max_timeouts: + cleaned_hops.append(hop) + else: + _log.debug(f"Removing excessive timeout hop {hop.hop_number}") + else: + # If we find another real hop after timeouts, keep it + cleaned_hops.append(hop) + timeout_count = 0 + + hops = cleaned_hops + + _log.debug(f"After cleanup: {len(hops)} hops") + + for hop in hops: + if hop.is_timeout: + _log.debug(f"Final hop {hop.hop_number}: * (timeout)") + else: + _log.debug(f"Final hop {hop.hop_number}: {hop.ip_address} - {hop.rtt1}ms") + + _log.info(f"Parsed {len(hops)} hops from Huawei traceroute") + + return TracerouteResult( + target=target, + source=source, + hops=hops, + max_hops=64, # Default for Huawei + packet_size=40, # From the header in sample output + raw_output=text, + asn_organizations={}, + ) + + +class TraceroutePluginHuawei(OutputPlugin): + """Parse Huawei traceroute output.""" + + _hyperglass_builtin: bool = PrivateAttr(True) + platforms: t.Sequence[str] = ("huawei", "huawei_vrpv8") + directives: t.Sequence[str] = ("__hyperglass_huawei_traceroute__",) + common: bool = False + + def process(self, output: "OutputType", query: "Query") -> "OutputType": + """Process Huawei traceroute output.""" + # Extract target and source with fallbacks + target = str(query.query_target) if query.query_target else "unknown" + source = "unknown" + + if hasattr(query, "device") and query.device: + source = getattr(query.device, "display_name", None) or getattr( + query.device, "name", "unknown" + ) + + return parse_huawei_traceroute( + output=output, + target=target, + source=source, + ) diff --git a/hyperglass/plugins/_builtin/trace_route_mikrotik.py b/hyperglass/plugins/_builtin/trace_route_mikrotik.py new file mode 100644 index 0000000..5d5f63f --- /dev/null +++ b/hyperglass/plugins/_builtin/trace_route_mikrotik.py @@ -0,0 +1,106 @@ +"""Parse MikroTik traceroute output to structured data.""" + +# Standard Library +import typing as t + +# Third Party +from pydantic import PrivateAttr, ValidationError + +# Project +from hyperglass.log import log +from hyperglass.exceptions.private import ParsingError +from hyperglass.models.parsing.mikrotik import MikrotikTracerouteTable + +# Local +from .._output import OutputPlugin + +if t.TYPE_CHECKING: + from hyperglass.models.data import OutputDataModel + from hyperglass.models.api.query import Query + from .._output import OutputType + + +def _normalize_output(output: t.Union[str, t.Sequence[str]]) -> t.List[str]: + """Ensure the output is a list of strings.""" + if isinstance(output, str): + return [output] + return list(output) + + +def parse_mikrotik_traceroute( + output: t.Union[str, t.Sequence[str]], target: str, source: str +) -> "OutputDataModel": + """Parse a MikroTik traceroute text response.""" + result = None + out_list = _normalize_output(output) + + _log = log.bind(plugin=TraceroutePluginMikrotik.__name__) + combined_output = "\n".join(out_list) + + # DEBUG: Log the raw output we're about to parse + _log.debug(f"=== MIKROTIK TRACEROUTE PLUGIN RAW INPUT ===") + _log.debug(f"Target: {target}, Source: {source}") + _log.debug(f"Output pieces: {len(out_list)}") + for i, piece in enumerate(out_list): + _log.debug(f"Output piece {i}: {repr(piece[:200])}...") # Truncate for readability + _log.debug(f"Combined output length: {len(combined_output)}") + + # Check if this looks like cleaned or raw output + contains_paging = "-- [Q quit|C-z pause]" in combined_output + contains_multiple_tables = combined_output.count("ADDRESS") > 1 + _log.debug(f"Contains paging prompts: {contains_paging}") + _log.debug(f"Contains multiple ADDRESS headers: {contains_multiple_tables}") + _log.debug(f"First 500 chars: {repr(combined_output[:500])}") + _log.debug(f"=== END PLUGIN RAW INPUT ===") + + try: + # Pass the entire combined output to the parser at once + validated = MikrotikTracerouteTable.parse_text(combined_output, target, source) + result = validated.traceroute_result() + + # Store the CLEANED output (after garbage removal) for "Copy Raw" functionality + # This is the processed output from MikrotikGarbageOutput plugin, not the original raw router output + result.raw_output = combined_output + + # DEBUG: Log the final structured result + _log.debug(f"=== FINAL STRUCTURED TRACEROUTE RESULT ===") + _log.debug(f"Successfully parsed {len(validated.hops)} traceroute hops") + _log.debug(f"Target: {result.target}, Source: {result.source}") + for hop in result.hops: + _log.debug( + f"Hop {hop.hop_number}: {hop.ip_address} - Loss: {hop.loss_pct}% - Sent: {hop.sent_count}" + ) + _log.debug(f"AS Path: {result.as_path_summary}") + _log.debug( + f"Cleaned raw output length: {len(result.raw_output) if result.raw_output else 0} characters" + ) + _log.debug(f"Copy button will show CLEANED output (after MikrotikGarbageOutput processing)") + _log.debug(f"=== END STRUCTURED RESULT ===") + + except ValidationError as err: + _log.critical(err) + raise ParsingError(err) from err + except Exception as err: + _log.bind(error=str(err)).critical("Failed to parse MikroTik traceroute output") + raise ParsingError("Error parsing traceroute response data") from err + + return result + + +class TraceroutePluginMikrotik(OutputPlugin): + """Convert MikroTik traceroute output to structured format.""" + + _hyperglass_builtin: bool = PrivateAttr(True) + platforms: t.Sequence[str] = ("mikrotik_routeros", "mikrotik_switchos", "mikrotik") + directives: t.Sequence[str] = ("__hyperglass_mikrotik_traceroute__",) + + def process(self, *, output: "OutputType", query: "Query") -> "OutputDataModel": + """Process the MikroTik traceroute output.""" + # Extract target from query + target = getattr(query, "target", "unknown") + source = getattr(query, "source", "unknown") + + if hasattr(query, "device") and query.device: + source = getattr(query.device, "name", source) + + return parse_mikrotik_traceroute(output, target, source) diff --git a/hyperglass/plugins/_builtin/traceroute_ip_enrichment.py b/hyperglass/plugins/_builtin/traceroute_ip_enrichment.py new file mode 100644 index 0000000..904e995 --- /dev/null +++ b/hyperglass/plugins/_builtin/traceroute_ip_enrichment.py @@ -0,0 +1,107 @@ +"""IP enrichment for structured traceroute data.""" + +# Standard Library +import asyncio +import socket +import typing as t + +# Third Party +from pydantic import PrivateAttr + +# Project +from hyperglass.log import log +from hyperglass.plugins._output import OutputPlugin +from hyperglass.models.data.traceroute import TracerouteResult + +if t.TYPE_CHECKING: + from hyperglass.models.data import OutputDataModel + from hyperglass.models.api.query import Query + + +class ZTracerouteIpEnrichment(OutputPlugin): + """Enrich structured traceroute output with IP enrichment ASN/organization data and reverse DNS.""" + + _hyperglass_builtin: bool = PrivateAttr(True) + platforms: t.Sequence[str] = ( + "mikrotik_routeros", + "mikrotik_switchos", + "mikrotik", + "cisco_ios", + "juniper_junos", + "huawei", + "huawei_vrpv8", + ) + directives: t.Sequence[str] = ("traceroute", "MikroTik_Traceroute") + common: bool = True + + def _reverse_dns_lookup(self, ip: str) -> t.Optional[str]: + """Perform reverse DNS lookup for an IP address.""" + try: + hostname = socket.gethostbyaddr(ip)[0] + log.debug(f"Reverse DNS for {ip}: {hostname}") + return hostname + except (socket.herror, socket.gaierror, socket.timeout) as e: + log.debug(f"Reverse DNS lookup failed for {ip}: {e}") + return None + + async def _enrich_async(self, output: TracerouteResult) -> None: + """Async helper to enrich traceroute data.""" + # First enrich with IP information (ASN numbers) + await output.enrich_with_ip_enrichment() + + # Then enrich ASN numbers with organization names + await output.enrich_asn_organizations() + + def process(self, *, output: "OutputDataModel", query: "Query") -> "OutputDataModel": + """Enrich structured traceroute data with IP enrichment and reverse DNS information.""" + + if not isinstance(output, TracerouteResult): + return output + + _log = log.bind(plugin=self.__class__.__name__) + _log.debug(f"Starting IP enrichment for {len(output.hops)} traceroute hops") + + # Check if IP enrichment is enabled in config + try: + from hyperglass.state import use_state + + params = use_state("params") + if not params.structured.ip_enrichment.enabled: + _log.debug("IP enrichment disabled in configuration") + # Still do reverse DNS if enrichment is disabled + for hop in output.hops: + if hop.ip_address and hop.hostname is None: + hop.hostname = self._reverse_dns_lookup(hop.ip_address) + return output + except Exception as e: + _log.debug(f"Could not check IP enrichment config: {e}") + + # Use the built-in enrichment method from TracerouteResult + try: + # Run async enrichment in sync context + loop = None + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + # If we're already in an event loop, create a new task + import concurrent.futures + + with concurrent.futures.ThreadPoolExecutor() as executor: + future = executor.submit(asyncio.run, self._enrich_async(output)) + future.result() + else: + loop.run_until_complete(self._enrich_async(output)) + except RuntimeError: + # No event loop, create one + asyncio.run(self._enrich_async(output)) + _log.debug("IP enrichment completed successfully") + except Exception as e: + _log.error(f"IP enrichment failed: {e}") + + # Add reverse DNS lookups for any hops that don't have hostnames + for hop in output.hops: + if hop.ip_address and hop.hostname is None: + hop.hostname = self._reverse_dns_lookup(hop.ip_address) + + _log.debug(f"Completed enrichment for traceroute to {output.target}") + return output diff --git a/hyperglass/ui/components/looking-glass-form.tsx b/hyperglass/ui/components/looking-glass-form.tsx index 664f8a8..2da4633 100644 --- a/hyperglass/ui/components/looking-glass-form.tsx +++ b/hyperglass/ui/components/looking-glass-form.tsx @@ -78,10 +78,14 @@ export const LookingGlassForm = (): JSX.Element => { [], ); - const directive = useMemo( - () => getDirective(), - [form.queryType, form.queryLocation, getDirective], - ); + const directive = useMemo(() => { + const tmp = getDirective(); + if (tmp !== null && tmp.fieldType === null) { + setFormValue('queryTarget', ['null']); + setValue('queryTarget', ['null']); + } + return tmp; + }, [form.queryType, form.queryLocation, getDirective]); function submitHandler(): void { if (process.env.NODE_ENV === 'development') { @@ -200,7 +204,11 @@ export const LookingGlassForm = (): JSX.Element => { - + {directive !== null && ( { + const { data, rawData } = props; + const cellId = data.column.id as keyof TracerouteHop; + + // DEBUG: Log row values to see what's available + console.log('TracerouteCell debug:', { + cellId, + value: data.value, + rowValues: data.row?.values, + rowOriginal: data.row?.original + }); + + // For IP address field, prefer display_ip if available (for truncated IPv6) + const getIPValue = () => { + if (cellId === 'ip_address') { + const hop = data.row?.original as TracerouteHop | undefined; + if (hop && hop.display_ip) { + return hop.display_ip; + } + if (hop && hop.ip_address) { + return hop.ip_address; + } + } + return data.value; + }; + + const component = { + hop_number: , + ip_address: , + display_ip: , // For truncated IPv6 display + hostname: , + loss_pct: , + sent_count: , + last_rtt: , + avg_rtt: , + best_rtt: , + worst_rtt: , + asn: , + org: null, // Hidden, displayed as part of ASN + prefix: , + country: , + rir: , + allocated: , + rtt1: null, // Not displayed directly in table + rtt2: null, // Not displayed directly in table + rtt3: null, // Not displayed directly in table + }; + + return component[cellId] ?? ; +}; \ No newline at end of file diff --git a/hyperglass/ui/components/output/traceroute-fields.tsx b/hyperglass/ui/components/output/traceroute-fields.tsx new file mode 100644 index 0000000..eaa8484 --- /dev/null +++ b/hyperglass/ui/components/output/traceroute-fields.tsx @@ -0,0 +1,169 @@ +import { Text, Tooltip, Badge } from '@chakra-ui/react'; +import { Else, If, Then } from 'react-if'; +import { useColorValue } from '~/hooks'; + +import type { TextProps } from '@chakra-ui/react'; + +interface MonoFieldProps extends TextProps { + v: React.ReactNode; +} + +interface ASNFieldProps extends TextProps { + asn: string | null; + org: string | null; +} + +interface HostnameFieldProps extends TextProps { + hostname: string | null; +} + +interface LatencyFieldProps extends TextProps { + rtt: number | null; +} + +interface LossFieldProps extends TextProps { + loss: number | null; +} + +export const MonoField = (props: MonoFieldProps): JSX.Element => { + const { v, ...rest } = props; + + // Handle empty, null, undefined values and timeout indicators + if (v === null || v === undefined || (typeof v === 'string' && (v.trim() === '' || v === 'None'))) { + return ( + + — + + ); + } + + return ( + + {v} + + ); +}; + +export const ASNField = (props: ASNFieldProps): JSX.Element => { + const { asn, org, ...rest } = props; + + if (!asn || asn === 'None' || asn === 'null') { + return ( + + — + + ); + } + + // Display ASN as-is (no prefix added since backend now sends clean format) + const asnDisplay = asn; // Just use the value directly: "12345" or "IXP" + const tooltipLabel = org && org !== 'None' ? `${asnDisplay} - ${org}` : asnDisplay; + + return ( + + + {asnDisplay} + + + ); +}; + +export const HostnameField = (props: HostnameFieldProps): JSX.Element => { + const { hostname, ...rest } = props; + + if (!hostname || hostname === 'None' || hostname === 'null') { + return ( + + — + + ); + } + + return ( + + + {hostname} + + + ); +}; + +export const LatencyField = (props: LatencyFieldProps): JSX.Element => { + const { rtt, ...rest } = props; + + if (rtt === null || rtt === undefined) { + return ( + + * + + ); + } + + // Color-code latency: green < 50ms, yellow < 200ms, red >= 200ms + const getLatencyColor = (latency: number) => { + if (latency < 50) return 'green.500'; + if (latency < 200) return 'yellow.500'; + return 'red.500'; + }; + + return ( + + {rtt.toFixed(1)}ms + + ); +}; + +export const LossField = (props: LossFieldProps): JSX.Element => { + const { loss, ...rest } = props; + + if (loss === null || loss === undefined) { + return ( + + — + + ); + } + + // Color-code loss: green = 0%, yellow < 50%, red >= 50% + const getLossColor = (lossPercent: number) => { + if (lossPercent === 0) return 'green.500'; + if (lossPercent < 50) return 'yellow.500'; + return 'red.500'; + }; + + const bgColor = useColorValue( + loss === 0 ? 'green.50' : loss < 50 ? 'yellow.50' : 'red.50', + loss === 0 ? 'green.900' : loss < 50 ? 'yellow.900' : 'red.900' + ); + + return ( + + {loss}% + + ); +}; \ No newline at end of file diff --git a/hyperglass/ui/components/output/traceroute-table.tsx b/hyperglass/ui/components/output/traceroute-table.tsx new file mode 100644 index 0000000..90a9464 --- /dev/null +++ b/hyperglass/ui/components/output/traceroute-table.tsx @@ -0,0 +1,38 @@ +import { Flex } from '@chakra-ui/react'; +import { Table } from '~/components'; +import { TracerouteCell } from './traceroute-cell'; + +import type { FlexProps } from '@chakra-ui/react'; +import type { TracerouteTableColumn, TracerouteCellRenderProps } from '~/types'; + +type TracerouteTableProps = Swap; + +// Column definition for the traceroute table using BGP table structure +// Format: "Hop | IP | HostName (reverse dns) | ASN | Loss | Sent | Last | AVG | BEST | Worst" +const tracerouteColumns: TracerouteTableColumn[] = [ + { Header: 'Hop', accessor: 'hop_number', align: 'center', hidden: false }, + { Header: 'IP Address', accessor: 'ip_address', align: 'left', hidden: false }, + { Header: 'Hostname', accessor: 'hostname', align: 'left', hidden: false }, + { Header: 'ASN', accessor: 'asn', align: 'center', hidden: false }, + { Header: 'Loss', accessor: 'loss_pct', align: 'center', hidden: false }, + { Header: 'Sent', accessor: 'sent_count', align: 'center', hidden: false }, + { Header: 'Last', accessor: 'last_rtt', align: 'right', hidden: false }, + { Header: 'AVG', accessor: 'avg_rtt', align: 'right', hidden: false }, + { Header: 'Best', accessor: 'best_rtt', align: 'right', hidden: false }, + { Header: 'Worst', accessor: 'worst_rtt', align: 'right', hidden: false }, +]; + +export const TracerouteTable = (props: TracerouteTableProps): JSX.Element => { + const { children: data, ...rest } = props; + + return ( + + + columns={tracerouteColumns as any} + bordersHorizontal + data={data.hops} + Cell={(d: TracerouteCellRenderProps) => } + /> + + ); +}; \ No newline at end of file diff --git a/hyperglass/ui/components/path/chart.tsx b/hyperglass/ui/components/path/chart.tsx index e5e95f3..a839f18 100644 --- a/hyperglass/ui/components/path/chart.tsx +++ b/hyperglass/ui/components/path/chart.tsx @@ -1,4 +1,4 @@ -import { Badge, Box, Flex, SkeletonText, VStack } from '@chakra-ui/react'; +import { Badge, Box, Flex, VStack } from '@chakra-ui/react'; import { useMemo } from 'react'; import ReactFlow, { Background, @@ -9,14 +9,14 @@ import ReactFlow, { isEdge, } from 'reactflow'; import { useConfig } from '~/context'; -import { useASNDetail, useColorToken, useColorValue } from '~/hooks'; +import { useColorToken, useColorValue } from '~/hooks'; import { Controls } from './controls'; import { useElements } from './use-elements'; import type { NodeProps as ReactFlowNodeProps } from 'reactflow'; interface ChartProps { - data: StructuredResponse; + data: AllStructuredResponses; } interface NodeProps extends Omit { @@ -70,23 +70,13 @@ const ASNode = (props: NodeProps): JSX.Element => { const color = useColorValue('black', 'white'); const bg = useColorValue('white', 'whiteAlpha.200'); - const { data: asnData, isError, isLoading } = useASNDetail(String(asn)); - return ( <> {hasChildren && } - {isLoading ? ( - - - - ) : !isError && asnData?.data?.asn.organization?.orgName ? ( - asnData.data.asn.organization.orgName - ) : ( - name - )} + {name} {asn} diff --git a/hyperglass/ui/components/path/path.tsx b/hyperglass/ui/components/path/path.tsx index 0fd3d74..d3073b9 100644 --- a/hyperglass/ui/components/path/path.tsx +++ b/hyperglass/ui/components/path/path.tsx @@ -23,7 +23,7 @@ export const Path = (props: PathProps): JSX.Element => { const getResponse = useFormState(s => s.response); const { isOpen, onClose, onOpen } = useDisclosure(); const response = getResponse(device); - const output = response?.output as StructuredResponse; + const output = response?.output as AllStructuredResponses; const bg = useColorValue('light.50', 'dark.900'); const centered = useBreakpointValue({ base: false, lg: true }) ?? true; return ( diff --git a/hyperglass/ui/components/path/use-elements.ts b/hyperglass/ui/components/path/use-elements.ts index d7b107b..ec15391 100644 --- a/hyperglass/ui/components/path/use-elements.ts +++ b/hyperglass/ui/components/path/use-elements.ts @@ -15,23 +15,92 @@ type FlowElement = Node | Edge; const NODE_WIDTH = 128; const NODE_HEIGHT = 48; -export function useElements(base: BasePath, data: StructuredResponse): FlowElement[] { +export function useElements(base: BasePath, data: AllStructuredResponses): FlowElement[] { return useMemo(() => { return [...buildElements(base, data)]; }, [base, data]); } +/** + * Check if data contains BGP routes + */ +function isBGPData(data: AllStructuredResponses): data is BGPStructuredOutput { + return 'routes' in data && Array.isArray(data.routes); +} + +/** + * Check if data contains traceroute hops + */ +function isTracerouteData(data: AllStructuredResponses): data is TracerouteStructuredOutput { + return 'hops' in data && Array.isArray(data.hops); +} + /** * Calculate the positions for each AS Path. * @see https://github.com/MrBlenny/react-flow-chart/issues/61 */ function* buildElements( base: BasePath, - data: StructuredResponse, + data: AllStructuredResponses, ): Generator> { - const { routes } = data; - // Eliminate empty AS paths & deduplicate non-empty AS paths. Length should be same as count minus empty paths. - const asPaths = routes.filter(r => r.as_path.length !== 0).map(r => [...new Set(r.as_path)]); + let asPaths: string[][] = []; + let asnOrgs: Record = {}; + + if (isBGPData(data)) { + // Handle BGP routes with AS paths + const { routes } = data; + asPaths = routes + .filter(r => r.as_path.length !== 0) + .map(r => { + const uniqueAsns = [...new Set(r.as_path.map(asn => String(asn)))]; + // Remove the base ASN if it's the first hop to avoid duplication + return uniqueAsns[0] === base.asn ? uniqueAsns.slice(1) : uniqueAsns; + }) + .filter(path => path.length > 0); // Remove empty paths + + // Get ASN organization mapping if available + asnOrgs = (data as any).asn_organizations || {}; + + // Debug: Log BGP ASN organization data + if (Object.keys(asnOrgs).length > 0) { + console.debug('BGP ASN organizations loaded:', asnOrgs); + } else { + console.warn('BGP ASN organizations not found or empty'); + } + } else if (isTracerouteData(data)) { + // Handle traceroute hops - build AS path from hop ASNs + const hopAsns: string[] = []; + let currentAsn = ''; + + for (const hop of data.hops) { + if (hop.asn && hop.asn !== 'None' && hop.asn !== currentAsn) { + currentAsn = hop.asn; + hopAsns.push(hop.asn); + } + } + + if (hopAsns.length > 0) { + // Remove the base ASN if it's the first hop to avoid duplication + const filteredAsns = hopAsns[0] === base.asn ? hopAsns.slice(1) : hopAsns; + if (filteredAsns.length > 0) { + asPaths = [filteredAsns]; + } + } + + // Get ASN organization mapping if available + asnOrgs = (data as any).asn_organizations || {}; + + // Debug: Log traceroute ASN organization data + if (Object.keys(asnOrgs).length > 0) { + console.debug('Traceroute ASN organizations loaded:', asnOrgs); + } else { + console.warn('Traceroute ASN organizations not found or empty'); + } + } + + if (asPaths.length === 0) { + return; + } const totalPaths = asPaths.length - 1; @@ -95,7 +164,12 @@ function* buildElements( id: base.asn, type: 'ASNode', position: { x, y }, - data: { asn: base.asn, name: base.name, hasChildren: true, hasParents: false }, + data: { + asn: base.asn, + name: asnOrgs[base.asn]?.name || base.name, + hasChildren: true, + hasParents: false + }, }; for (const [groupIdx, pathGroup] of asPaths.entries()) { @@ -114,7 +188,7 @@ function* buildElements( position: { x, y }, data: { asn: `${asn}`, - name: `AS${asn}`, + name: asn === 'IXP' ? 'IXP' : asnOrgs[asn]?.name || (asn === '0' ? 'Private/Unknown' : `AS${asn}`), hasChildren: idx < endIdx, hasParents: true, }, diff --git a/hyperglass/ui/components/results/individual.tsx b/hyperglass/ui/components/results/individual.tsx index a316dd3..7c276b7 100644 --- a/hyperglass/ui/components/results/individual.tsx +++ b/hyperglass/ui/components/results/individual.tsx @@ -16,7 +16,7 @@ import startCase from 'lodash/startCase'; import { forwardRef, memo, useEffect, useMemo, useState } from 'react'; import isEqual from 'react-fast-compare'; import { Else, If, Then } from 'react-if'; -import { BGPTable, Path, TextOutput } from '~/components'; +import { BGPTable, TracerouteTable, Path, TextOutput } from '~/components'; import { useConfig } from '~/context'; import { Countdown, DynamicIcon } from '~/elements'; import { @@ -28,7 +28,7 @@ import { useStrf, useTableToString, } from '~/hooks'; -import { isStringOutput, isStructuredOutput } from '~/types'; +import { isStringOutput, isStructuredOutput, isBGPStructuredOutput, isTracerouteStructuredOutput } from '~/types'; import { CopyButton } from './copy-button'; import { FormattedError } from './formatted-error'; import { isFetchError, isLGError, isLGOutputOrError, isStackError } from './guards'; @@ -153,9 +153,15 @@ const _Result: React.ForwardRefRenderFunction = ( let copyValue = data?.output as string; + // Always create formatData hook for both BGP and Traceroute outputs const formatData = useTableToString(form.queryTarget, data, [data?.format]); + const isBGPData = isBGPStructuredOutput(data); + const isTracerouteData = isTracerouteStructuredOutput(data); - if (data?.format === 'application/json') { + if (data?.format === 'application/json' && isBGPData) { + copyValue = formatData(); + } else if (data?.format === 'application/json' && isTracerouteData) { + // For structured traceroute, use formatted table output for copy functionality copyValue = formatData(); } @@ -244,8 +250,10 @@ const _Result: React.ForwardRefRenderFunction = ( - {isStructuredOutput(data) && data.level === 'success' && tableComponent ? ( + {isBGPStructuredOutput(data) && data.level === 'success' && tableComponent ? ( {data.output} + ) : isTracerouteStructuredOutput(data) && data.level === 'success' && tableComponent ? ( + {data.output} ) : isStringOutput(data) && data.level === 'success' && !tableComponent ? ( {data.output} ) : isStringOutput(data) && data.level !== 'success' ? ( diff --git a/hyperglass/ui/components/table/main.tsx b/hyperglass/ui/components/table/main.tsx index fd1eeef..54695c8 100644 --- a/hyperglass/ui/components/table/main.tsx +++ b/hyperglass/ui/components/table/main.tsx @@ -16,19 +16,19 @@ import { PageSelect } from './page-select'; import type { TableOptions, PluginHook } from 'react-table'; import type { Theme, TableColumn, CellRenderProps } from '~/types'; -interface TableProps { - data: Route[]; +interface TableProps { + data: T[]; striped?: boolean; - columns: TableColumn[]; + columns: TableColumn[] | any[]; // Allow more flexible column types heading?: React.ReactNode; bordersVertical?: boolean; bordersHorizontal?: boolean; - Cell?: React.FC; - rowHighlightProp?: keyof Route; + Cell?: React.FC; // More flexible cell render props + rowHighlightProp?: keyof T; rowHighlightBg?: Theme.ColorNames; } -export const Table = (props: TableProps): JSX.Element => { +export const Table = (props: TableProps): JSX.Element => { const { data, columns, @@ -61,12 +61,15 @@ export const Table = (props: TableProps): JSX.Element => { columns, defaultColumn, data, - initialState: { hiddenColumns }, - } as TableOptions; + initialState: { + hiddenColumns, + pageSize: 50 // Default to 50 rows instead of default 10 + }, + } as TableOptions; - const plugins = [useSortBy, usePagination] as PluginHook[]; + const plugins = [useSortBy, usePagination] as PluginHook[]; - const instance = useTable(options, ...plugins); + const instance = useTable(options, ...plugins); const { page, diff --git a/hyperglass/ui/hooks/use-asn-detail.ts b/hyperglass/ui/hooks/use-asn-detail.ts index 57f1e63..ab21ba1 100644 --- a/hyperglass/ui/hooks/use-asn-detail.ts +++ b/hyperglass/ui/hooks/use-asn-detail.ts @@ -16,29 +16,38 @@ interface ASNQuery { }; } -const query: QueryFunction = async (ctx: QueryFunctionContext) => { - const asn = ctx.queryKey; - const res = await fetch('https://api.asrank.caida.org/v2/graphql', { - mode: 'cors', - method: 'POST', - headers: { 'content-type': 'application/json' }, - /* eslint no-useless-escape: 0 */ - body: JSON.stringify({ query: `{ asn(asn:\"${asn}\"){ organization { orgName } } }` }), - }); - return await res.json(); -}; +// Disabled - we now get org names from our own IP enrichment system +// const query: QueryFunction = async (ctx: QueryFunctionContext) => { +// const asn = ctx.queryKey; +// const res = await fetch('https://api.asrank.caida.org/v2/graphql', { +// mode: 'cors', +// method: 'POST', +// headers: { 'content-type': 'application/json' }, +// /* eslint no-useless-escape: 0 */ +// body: JSON.stringify({ query: `{ asn(asn:\"${asn}\"){ organization { orgName } } }` }), +// }); +// return await res.json(); +// }; /** - * Query the Caida AS Rank API to get an ASN's organization name for the AS Path component. - * @see https://api.asrank.caida.org/v2/docs + * Stub function - we no longer need external CAIDA calls since we have ASN org data + * from our IP enrichment system. This hook is kept for compatibility but returns empty data. + * @deprecated Use as_path_data from traceroute response instead of external CAIDA calls */ export function useASNDetail(asn: string): QueryObserverResult { return useQuery({ queryKey: [asn], - queryFn: query, + queryFn: async () => ({ + data: { + asn: { + organization: null, // No external fetch - org data comes from IP enrichment + } + } + }), refetchOnWindowFocus: false, - refetchInterval: false, + refetchInterval: false, refetchOnMount: false, cacheTime: Infinity, + enabled: false, // Disable the query entirely }); } diff --git a/hyperglass/ui/hooks/use-table-to-string.ts b/hyperglass/ui/hooks/use-table-to-string.ts index 4388a8e..dcee771 100644 --- a/hyperglass/ui/hooks/use-table-to-string.ts +++ b/hyperglass/ui/hooks/use-table-to-string.ts @@ -3,7 +3,7 @@ import dayjs from 'dayjs'; import relativeTimePlugin from 'dayjs/plugin/relativeTime'; import utcPlugin from 'dayjs/plugin/utc'; import { useConfig } from '~/context'; -import { isStructuredOutput } from '~/types'; +import { isStructuredOutput, isBGPStructuredOutput, isTracerouteStructuredOutput } from '~/types'; type TableToStringFormatter = | ((v: string) => string) @@ -93,35 +93,136 @@ export function useTableToString( let result = messages.noOutput; try { if (typeof data !== 'undefined' && isStructuredOutput(data)) { - const tableStringParts = [ - `Routes For: ${target.join(', ')}`, - `Timestamp: ${data.timestamp} UTC`, - ]; - for (const route of data.output.routes) { - for (const field of parsedDataFields) { - const [header, accessor, align] = field; - if (align !== null) { - let value = route[accessor]; - - // Handle fields that should be hidden when empty/not available - if ((accessor === 'source_rid' || accessor === 'age') && - (value === null || value === undefined || - (typeof value === 'string' && value.trim() === '') || - (accessor === 'age' && value === -1))) { - continue; // Skip this field entirely - } - - const fmtFunc = getFmtFunc(accessor) as (v: typeof value) => string; - value = fmtFunc(value); - if (accessor === 'prefix') { - tableStringParts.push(` - ${header}: ${value}`); - } else { - tableStringParts.push(` - ${header}: ${value}`); + + // Handle BGP data + if (isBGPStructuredOutput(data)) { + // Check if this is BGP data with routes + if (!('routes' in data.output) || !Array.isArray(data.output.routes)) { + return messages.noOutput; // Not BGP data, return early + } + + const tableStringParts = [ + `Routes For: ${target.join(', ')}`, + `Timestamp: ${data.timestamp} UTC`, + ]; + for (const route of data.output.routes) { + for (const field of parsedDataFields) { + const [header, accessor, align] = field; + if (align !== null) { + let value = route[accessor]; + + // Handle fields that should be hidden when empty/not available + if ((accessor === 'source_rid' || accessor === 'age') && + (value === null || value === undefined || + (typeof value === 'string' && value.trim() === '') || + (accessor === 'age' && value === -1))) { + continue; // Skip this field entirely + } + + const fmtFunc = getFmtFunc(accessor) as (v: typeof value) => string; + value = fmtFunc(value); + if (accessor === 'prefix') { + tableStringParts.push(` - ${header}: ${value}`); + } else { + tableStringParts.push(` - ${header}: ${value}`); + } } } } + result = tableStringParts.join('\n'); + } + + // Handle Traceroute data + else if (isTracerouteStructuredOutput(data)) { + if (!('hops' in data.output) || !Array.isArray(data.output.hops)) { + return messages.noOutput; // Not traceroute data, return early + } + + const formatRTT = (rtt: number | null | undefined): string => { + if (rtt === null || rtt === undefined) return '*'; + return `${rtt.toFixed(1)}ms`; + }; + + const formatIP = (hop: any): string => { + if (hop.display_ip) return hop.display_ip; // For truncated IPv6 + if (hop.ip_address) return hop.ip_address; + return '*'; + }; + + const formatASN = (hop: any): string => { + if (hop.asn) return `AS${hop.asn}`; + return '*'; + }; + + const formatHostname = (hop: any): string => { + if (hop.hostname && hop.hostname !== 'None' && hop.hostname !== 'null') return hop.hostname; + return '*'; + }; + + // Create a nicely formatted text table with proper column alignment + const header = `Traceroute to ${data.output.target} from ${data.output.source}`; + const timestamp = `Timestamp: ${data.timestamp} UTC`; + const separator = '=' .repeat(header.length); + + // Calculate optimal column widths by examining all data + const columnWidths = { + hop: Math.max(3, ...data.output.hops.map(h => h.hop_number.toString().length)), + ip: Math.max(10, ...data.output.hops.map(h => formatIP(h).length)), + hostname: Math.max(8, ...data.output.hops.map(h => formatHostname(h).length)), + asn: Math.max(3, ...data.output.hops.map(h => formatASN(h).length)), + loss: 4, // "100%" is max + sent: Math.max(4, ...data.output.hops.map(h => (h.sent_count || 0).toString().length)), + last: Math.max(4, ...data.output.hops.map(h => formatRTT(h.last_rtt).length)), + avg: Math.max(3, ...data.output.hops.map(h => formatRTT(h.avg_rtt).length)), + best: Math.max(4, ...data.output.hops.map(h => formatRTT(h.best_rtt).length)), + worst: Math.max(5, ...data.output.hops.map(h => formatRTT(h.worst_rtt).length)), + }; + + // Create header row with proper spacing + const headerRow = [ + 'Hop'.padEnd(columnWidths.hop), + 'IP Address'.padEnd(columnWidths.ip), + 'Hostname'.padEnd(columnWidths.hostname), + 'ASN'.padEnd(columnWidths.asn), + 'Loss'.padEnd(columnWidths.loss), + 'Sent'.padEnd(columnWidths.sent), + 'Last'.padEnd(columnWidths.last), + 'AVG'.padEnd(columnWidths.avg), + 'Best'.padEnd(columnWidths.best), + 'Worst' + ].join(' '); + + const totalWidth = headerRow.length; + + const tableLines = [ + header, + timestamp, + separator, + '', + headerRow, + '-'.repeat(totalWidth), + ]; + + // Format data rows with consistent column widths + for (const hop of data.output.hops) { + const row = [ + hop.hop_number.toString().padEnd(columnWidths.hop), + formatIP(hop).padEnd(columnWidths.ip), + formatHostname(hop).padEnd(columnWidths.hostname), + formatASN(hop).padEnd(columnWidths.asn), + `${hop.loss_pct || 0}%`.padEnd(columnWidths.loss), + (hop.sent_count || 0).toString().padEnd(columnWidths.sent), + formatRTT(hop.last_rtt).padEnd(columnWidths.last), + formatRTT(hop.avg_rtt).padEnd(columnWidths.avg), + formatRTT(hop.best_rtt).padEnd(columnWidths.best), + formatRTT(hop.worst_rtt) + ].join(' '); + + tableLines.push(row); + } + + result = tableLines.join('\n'); } - result = tableStringParts.join('\n'); } return result; } catch (err) { diff --git a/hyperglass/ui/types/globals.d.ts b/hyperglass/ui/types/globals.d.ts index f920e59..0cb209d 100644 --- a/hyperglass/ui/types/globals.d.ts +++ b/hyperglass/ui/types/globals.d.ts @@ -32,7 +32,40 @@ export declare global { rpki_state: RPKIState; }; + type TracerouteHop = { + hop_number: number; + ip_address: string | null; + display_ip: string | null; + hostname: string | null; + rtt1: number | null; + rtt2: number | null; + rtt3: number | null; + loss_pct: number | null; + sent_count: number | null; + last_rtt: number | null; + avg_rtt: number | null; + best_rtt: number | null; + worst_rtt: number | null; + asn: string | null; + org: string | null; + prefix: string | null; + country: string | null; + rir: string | null; + allocated: string | null; + }; + + type TracerouteResult = { + target: string; + source: string; + hops: TracerouteHop[]; + max_hops: number; + packet_size: number; + raw_output: string | null; + }; + type RouteField = { [K in keyof Route]: Route[K] }; + + type TracerouteHopField = { [K in keyof TracerouteHop]: TracerouteHop[K] }; type StructuredResponse = { vrf: string; @@ -41,6 +74,20 @@ export declare global { winning_weight: 'high' | 'low'; }; + type TracerouteStructuredOutput = { + vrf: string; + target: string; + source: string; + hops: TracerouteHop[]; + max_hops: number; + packet_size: number; + raw_output: string | null; + }; + + type BGPStructuredOutput = StructuredResponse; + + type AllStructuredResponses = BGPStructuredOutput | TracerouteStructuredOutput; + type QueryResponse = { random: string; cached: boolean; diff --git a/hyperglass/ui/types/guards.ts b/hyperglass/ui/types/guards.ts index 3e587f6..227b7a8 100644 --- a/hyperglass/ui/types/guards.ts +++ b/hyperglass/ui/types/guards.ts @@ -19,6 +19,14 @@ export function isStructuredOutput(data: unknown): data is StringTableData { return isObject(data) && 'output' in data; } +export function isBGPStructuredOutput(data: unknown): data is StringTableData & { output: StructuredResponse } { + return isStructuredOutput(data) && isObject((data as StringTableData).output) && 'routes' in (data as StringTableData).output; +} + +export function isTracerouteStructuredOutput(data: unknown): data is StringTableData & { output: TracerouteResult } { + return isStructuredOutput(data) && isObject((data as StringTableData).output) && 'hops' in (data as StringTableData).output; +} + export function isStringOutput(data: unknown): data is StringQueryResponse { return ( isObject(data) && 'output' in data && typeof (data as { output: unknown }).output === 'string' diff --git a/hyperglass/ui/types/table.ts b/hyperglass/ui/types/table.ts index fe20e0b..2687f73 100644 --- a/hyperglass/ui/types/table.ts +++ b/hyperglass/ui/types/table.ts @@ -7,8 +7,21 @@ export interface TableColumn { hidden: boolean; } +export interface TracerouteTableColumn { + Header: string; + accessor: keyof TracerouteHop; + align: string; + hidden: boolean; +} + export type CellRenderProps = { column: CellProps['column']; row: CellProps['row']; value: CellProps['value']; }; + +export type TracerouteCellRenderProps = { + column: CellProps['column']; + row: CellProps['row']; + value: CellProps['value']; +};