diff --git a/hyperglass/defaults/directives/frr.py b/hyperglass/defaults/directives/frr.py index eb6baaf..3d7fb69 100644 --- a/hyperglass/defaults/directives/frr.py +++ b/hyperglass/defaults/directives/frr.py @@ -101,12 +101,12 @@ FRRouting_Traceroute = BuiltinDirective( RuleWithIPv4( condition="0.0.0.0/0", action="permit", - command="traceroute -4 -w 1 -q 1 -s {source4} {target}", + command="traceroute -4 -I -n -q 1 -w 1 -s {source4} {target}", ), RuleWithIPv6( condition="::/0", action="permit", - command="traceroute -6 -w 1 -q 1 -s {source6} {target}", + command="traceroute -6 -I -n -q 1 -w 1 -s {source6} {target}", ), ], field=Text(description="IP Address, Prefix, or Hostname"), diff --git a/hyperglass/plugins/_builtin/trace_route_frr.py b/hyperglass/plugins/_builtin/trace_route_frr.py index fd4ec6e..c745cd3 100644 --- a/hyperglass/plugins/_builtin/trace_route_frr.py +++ b/hyperglass/plugins/_builtin/trace_route_frr.py @@ -3,6 +3,7 @@ # Standard Library import re import typing as t +from ipaddress import ip_address as validate_ip # Third Party from pydantic import PrivateAttr @@ -79,422 +80,195 @@ class FrrTracerouteTable(TracerouteResult): _log.debug(f"=== END RAW INPUT ===") hops = [] - lines = text.strip().split("\n") - + + # Parse the traceroute output line by line + lines = text.strip().split('\n') _log.debug(f"Split into {len(lines)} lines") - - # Pattern for normal hop: " 1 bdr2.std.douala-ix.net (196.49.84.34) 0.520 ms 0.451 ms 0.418 ms" - hop_pattern = re.compile( - r"^\s*(\d+)\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms(?:\s+(\d+(?:\.\d+)?)\s*ms)?(?:\s+(\d+(?:\.\d+)?)\s*ms)?" - ) - - # Pattern for timeout hop: " 3 * * *" - timeout_pattern = re.compile(r"^\s*(\d+)\s+\*\s*\*\s*\*") - + + # Patterns for different line formats + # Pattern for regular hop: " 1 102.216.76.37 0.221 ms" + simple_pattern = re.compile(r"^\s*(\d+)\s+([\d\.:a-fA-F]+)\s+([\d.]+)\s+ms") + + # Pattern for timeout hop: " 9 *" + timeout_pattern = re.compile(r"^\s*(\d+)\s+\*\s*$") + + # Pattern for hop with hostname: " 2 hostname.example.com (192.168.1.1) 15.234 ms" + hostname_pattern = re.compile(r"^\s*(\d+)\s+(\S+)\s+\(([\d\.:a-fA-F]+)\)\s+([\d.]+)\s+ms") + + # Pattern for multiple RTTs: " 3 192.168.1.1 15.234 ms 16.123 ms 14.567 ms" + multi_rtt_pattern = re.compile(r"^\s*(\d+)\s+([\d\.:a-fA-F]+)\s+([\d.]+)\s+ms(?:\s+([\d.]+)\s+ms)?(?:\s+([\d.]+)\s+ms)?") + # Pattern for partial timeout: " 7 port-channel4.core4.mrs1.he.net (184.105.81.30) 132.624 ms 132.589 ms *" partial_timeout_pattern = re.compile( - r"^\s*(\d+)\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms(?:\s+(\d+(?:\.\d+)?)\s*ms)?\s+\*" + r"^\s*(\d+)\s+(\S+)\s+\(([\d\.:a-fA-F]+)\)\s+(?:([\d.]+)\s+ms\s+)?(?:([\d.]+)\s+ms\s+)?\*" ) - - # Pattern for IP-only hop: "15 72.251.0.8 (72.251.0.8) 360.370 ms 352.170 ms 354.132 ms" - ip_only_pattern = re.compile( - r"^\s*(\d+)\s+([0-9a-fA-F:.]+)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms(?:\s+(\d+(?:\.\d+)?)\s*ms)?(?:\s+(\d+(?:\.\d+)?)\s*ms)?" - ) - - # Complex multi-IP patterns for load balancing scenarios - # Pattern 1: "18 * 2001:41d0:0:50::7:1009 (2001:41d0:0:50::7:1009) 353.548 ms 351.516 ms" - partial_multi_pattern = re.compile( - r"^\s*(\d+)\s+\*\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms(?:\s+(\d+(?:\.\d+)?)\s*ms)?" - ) - - # Pattern 2: "12 2001:41d0:aaaa:100::3 (2001:41d0:aaaa:100::3) 274.418 ms 2001:41d0:aaaa:100::5 (2001:41d0:aaaa:100::5) 269.972 ms 282.653 ms" - dual_ip_pattern = re.compile( - r"^\s*(\d+)\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms(?:\s+(\d+(?:\.\d+)?)\s*ms)?" - ) - - # Pattern 3: More complex multi-IP lines (3 or more IPs) - # "19 2001:41d0:0:50::3:211b (2001:41d0:0:50::3:211b) 351.213 ms 2001:41d0:0:50::7:100f (2001:41d0:0:50::7:100f) 351.090 ms 2001:41d0:0:50::7:100b (2001:41d0:0:50::7:100b) 351.282 ms" - multi_ip_pattern = re.compile( - r"^\s*(\d+)\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms\s+(.+?)\s+\(([^)]+)\)\s+(\d+(?:\.\d+)?)\s*ms" - ) - + for i, line in enumerate(lines): line = line.strip() - _log.debug(f"Line {i:2d}: {repr(line)}") - if not line: continue - + + _log.debug(f"Line {i:2}: {repr(line)}") + # Skip header lines - if ( - "traceroute to" in line.lower() - or "hops max" in line.lower() - or "byte packets" in line.lower() - ): - _log.debug(f"Line {i:2d}: SKIPPING HEADER") + if "traceroute to" in line.lower() or "hops max" in line.lower(): + _log.debug(f"Line {i:2}: SKIPPING HEADER") continue - + + hop_number = None + ip_address = None + hostname = None + rtt1 = None + rtt2 = None + rtt3 = None + # Try to match timeout hop first timeout_match = timeout_pattern.match(line) if timeout_match: hop_number = int(timeout_match.group(1)) - - _log.debug(f"Line {i:2d}: TIMEOUT HOP - {hop_number}: * * *") - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=None, - display_ip=None, - hostname=None, - rtt1=None, - rtt2=None, - rtt3=None, - sent_count=3, # FRR sends 3 pings per hop - last_rtt=None, - best_rtt=None, - worst_rtt=None, - loss_pct=100, # 100% loss for timeout - # BGP enrichment fields (all None for timeout) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - # Try to match multi-IP pattern (3 IPs) - multi_match = multi_ip_pattern.match(line) - if multi_match: + _log.debug(f"Line {i:2}: TIMEOUT HOP - {hop_number}") + + # Try to match partial timeout + elif partial_timeout_pattern.match(line): + partial_match = partial_timeout_pattern.match(line) + hop_number = int(partial_match.group(1)) + hostname = partial_match.group(2) + ip_address = partial_match.group(3) + rtt1 = float(partial_match.group(4)) if partial_match.group(4) else None + rtt2 = float(partial_match.group(5)) if partial_match.group(5) else None + _log.debug(f"Line {i:2}: PARTIAL TIMEOUT HOP - {hop_number}: {hostname} ({ip_address}) {rtt1} {rtt2} *") + + # Try to match hostname pattern + elif hostname_pattern.match(line): + hostname_match = hostname_pattern.match(line) + hop_number = int(hostname_match.group(1)) + hostname = hostname_match.group(2) + ip_address = hostname_match.group(3) + rtt1 = float(hostname_match.group(4)) + _log.debug(f"Line {i:2}: HOSTNAME HOP - {hop_number}: {hostname} ({ip_address}) {rtt1} ms") + + # Try to match multiple RTT pattern + elif multi_rtt_pattern.match(line): + multi_match = multi_rtt_pattern.match(line) hop_number = int(multi_match.group(1)) - hostname1 = multi_match.group(2).strip() - ip1 = multi_match.group(3) - rtt1 = float(multi_match.group(4)) - hostname2 = multi_match.group(5).strip() - ip2 = multi_match.group(6) - rtt2 = float(multi_match.group(7)) - hostname3 = multi_match.group(8).strip() - ip3 = multi_match.group(9) - rtt3 = float(multi_match.group(10)) - - _log.debug(f"Line {i:2d}: MULTI-IP HOP (3 IPs) - {hop_number}: {ip1}, {ip2}, {ip3}") - - # Use the first IP as primary, combine hostnames - display_hostname = f"{hostname1} / {hostname2} / {hostname3}" - if hostname1 == ip1: - display_hostname = None # All IP-only - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=ip1, - display_ip=None, - hostname=display_hostname, - rtt1=rtt1, - rtt2=rtt2, - rtt3=rtt3, - sent_count=3, - last_rtt=rtt3, - best_rtt=min(rtt1, rtt2, rtt3), - worst_rtt=max(rtt1, rtt2, rtt3), - loss_pct=0, # No loss if we got responses - # BGP enrichment fields (will be populated by enrichment plugin) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - # Try to match dual-IP pattern - dual_match = dual_ip_pattern.match(line) - if dual_match: - hop_number = int(dual_match.group(1)) - hostname1 = dual_match.group(2).strip() - ip1 = dual_match.group(3) - rtt1 = float(dual_match.group(4)) - hostname2 = dual_match.group(5).strip() - ip2 = dual_match.group(6) - rtt2 = float(dual_match.group(7)) - rtt3 = float(dual_match.group(8)) if dual_match.group(8) else None - - _log.debug(f"Line {i:2d}: DUAL-IP HOP - {hop_number}: {ip1} and {ip2}") - - # Use the first IP as primary, combine hostnames - display_hostname = f"{hostname1} / {hostname2}" - if hostname1 == ip1: - display_hostname = None # Both IP-only - - rtts = [x for x in [rtt1, rtt2, rtt3] if x is not None] - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=ip1, - display_ip=None, - hostname=display_hostname, - rtt1=rtt1, - rtt2=rtt2, - rtt3=rtt3, - sent_count=len(rtts), - last_rtt=rtts[-1] if rtts else None, - best_rtt=min(rtts) if rtts else None, - worst_rtt=max(rtts) if rtts else None, - loss_pct=0, # No loss if we got responses - # BGP enrichment fields (will be populated by enrichment plugin) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - # Try to match partial multi pattern (* hostname) - partial_multi_match = partial_multi_pattern.match(line) - if partial_multi_match: - hop_number = int(partial_multi_match.group(1)) - hostname = partial_multi_match.group(2).strip() - ip_address = partial_multi_match.group(3) - rtt1 = float(partial_multi_match.group(4)) - rtt2 = float(partial_multi_match.group(5)) if partial_multi_match.group(5) else None - - _log.debug( - f"Line {i:2d}: PARTIAL-MULTI HOP - {hop_number}: * {hostname} ({ip_address})" - ) - - rtts = [x for x in [rtt1, rtt2] if x is not None] - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=ip_address, - display_ip=None, - hostname=hostname if hostname != ip_address else None, - rtt1=rtt1, - rtt2=rtt2, - rtt3=None, - sent_count=3, # Still sent 3, but one timed out - last_rtt=rtts[-1] if rtts else None, - best_rtt=min(rtts) if rtts else None, - worst_rtt=max(rtts) if rtts else None, - loss_pct=33.33, # 1 out of 3 packets lost - # BGP enrichment fields (will be populated by enrichment plugin) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - # Try to match partial timeout (hostname with one *) - partial_timeout_match = partial_timeout_pattern.match(line) - if partial_timeout_match: - hop_number = int(partial_timeout_match.group(1)) - hostname = partial_timeout_match.group(2).strip() - ip_address = partial_timeout_match.group(3) - rtt1 = float(partial_timeout_match.group(4)) - rtt2 = ( - float(partial_timeout_match.group(5)) - if partial_timeout_match.group(5) - else None - ) - - _log.debug( - f"Line {i:2d}: PARTIAL-TIMEOUT HOP - {hop_number}: {hostname} ({ip_address}) with timeout" - ) - - rtts = [x for x in [rtt1, rtt2] if x is not None] - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=ip_address, - display_ip=None, - hostname=hostname if hostname != ip_address else None, - rtt1=rtt1, - rtt2=rtt2, - rtt3=None, - sent_count=3, # Still sent 3, but one timed out - last_rtt=rtts[-1] if rtts else None, - best_rtt=min(rtts) if rtts else None, - worst_rtt=max(rtts) if rtts else None, - loss_pct=33.33, # 1 out of 3 packets lost - # BGP enrichment fields (will be populated by enrichment plugin) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - # Try to match normal hop with hostname - hop_match = hop_pattern.match(line) - if hop_match: - hop_number = int(hop_match.group(1)) - hostname = hop_match.group(2).strip() - ip_address = hop_match.group(3) - rtt1 = float(hop_match.group(4)) - rtt2 = float(hop_match.group(5)) if hop_match.group(5) else None - rtt3 = float(hop_match.group(6)) if hop_match.group(6) else None - - _log.debug( - f"Line {i:2d}: NORMAL HOP - {hop_number}: {hostname} ({ip_address}) RTTs: {rtt1}, {rtt2}, {rtt3}" - ) - - rtts = [x for x in [rtt1, rtt2, rtt3] if x is not None] - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=ip_address, - display_ip=None, - hostname=hostname if hostname != ip_address else None, - rtt1=rtt1, - rtt2=rtt2, - rtt3=rtt3, - sent_count=len(rtts), - last_rtt=rtts[-1] if rtts else None, - best_rtt=min(rtts) if rtts else None, - worst_rtt=max(rtts) if rtts else None, - loss_pct=0, # No loss if we got a response - # BGP enrichment fields (will be populated by enrichment plugin) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - # Try to match IP-only hop (no hostname) - ip_match = ip_only_pattern.match(line) - if ip_match: - hop_number = int(ip_match.group(1)) - ip_display = ip_match.group(2).strip() # The IP shown before parentheses - ip_address = ip_match.group(3) # The IP in parentheses - rtt1 = float(ip_match.group(4)) - rtt2 = float(ip_match.group(5)) if ip_match.group(5) else None - rtt3 = float(ip_match.group(6)) if ip_match.group(6) else None - - _log.debug( - f"Line {i:2d}: IP-ONLY HOP - {hop_number}: {ip_address} RTTs: {rtt1}, {rtt2}, {rtt3}" - ) - - rtts = [x for x in [rtt1, rtt2, rtt3] if x is not None] - - hops.append( - TracerouteHop( - hop_number=hop_number, - ip_address=ip_address, - display_ip=None, - hostname=None, # No hostname for IP-only hops - rtt1=rtt1, - rtt2=rtt2, - rtt3=rtt3, - sent_count=len(rtts), - last_rtt=rtts[-1] if rtts else None, - best_rtt=min(rtts) if rtts else None, - worst_rtt=max(rtts) if rtts else None, - loss_pct=0, # No loss if we got a response - # BGP enrichment fields (will be populated by enrichment plugin) - asn=None, - org=None, - prefix=None, - country=None, - rir=None, - allocated=None, - ) - ) - continue - - _log.debug(f"Line {i:2d}: UNMATCHED - skipping") - - _log.debug(f"Before cleanup: {len(hops)} hops") - - # Clean up consecutive timeout hops at the end - # Keep only the first few timeouts, remove excessive trailing timeouts - if len(hops) > 5: - # Find the last non-timeout hop - last_real_hop = -1 - for i in range(len(hops) - 1, -1, -1): - if not hops[i].is_timeout: - last_real_hop = i - break - - if last_real_hop >= 0: - # Keep at most 3 timeout hops after the last real hop - max_timeouts = 3 - timeout_count = 0 - cleaned_hops = hops[: last_real_hop + 1] # Keep all hops up to last real hop - - for hop in hops[last_real_hop + 1 :]: - if hop.is_timeout: - timeout_count += 1 - if timeout_count <= max_timeouts: - cleaned_hops.append(hop) - else: - _log.debug(f"Removing excessive timeout hop {hop.hop_number}") - else: - # If we find another real hop after timeouts, keep it - cleaned_hops.append(hop) - timeout_count = 0 - - hops = cleaned_hops - - _log.debug(f"After cleanup: {len(hops)} hops") - - for hop in hops: - if hop.is_timeout: - _log.debug(f"Final hop {hop.hop_number}: * (timeout)") + ip_address = multi_match.group(2) + rtt1 = float(multi_match.group(3)) + rtt2 = float(multi_match.group(4)) if multi_match.group(4) else None + rtt3 = float(multi_match.group(5)) if multi_match.group(5) else None + _log.debug(f"Line {i:2}: MULTI RTT HOP - {hop_number}: {ip_address} {rtt1} {rtt2} {rtt3}") + + # Try to match simple pattern + elif simple_pattern.match(line): + simple_match = simple_pattern.match(line) + hop_number = int(simple_match.group(1)) + ip_address = simple_match.group(2) + rtt1 = float(simple_match.group(3)) + _log.debug(f"Line {i:2}: SIMPLE IP HOP - {hop_number}: {ip_address} {rtt1} ms") + else: - _log.debug( - f"Final hop {hop.hop_number}: {hop.ip_address} ({hop.hostname or 'no-hostname'}) - RTTs: {hop.rtt1}/{hop.rtt2}/{hop.rtt3}" + _log.debug(f"Line {i:2}: NO MATCH - skipping") + continue + + # Create hop object if we have valid data + if hop_number is not None: + try: + # Validate IP address if present + if ip_address: + validate_ip(ip_address) + except ValueError: + # If IP validation fails, treat as hostname + if not hostname: + hostname = ip_address + ip_address = None + + hop = TracerouteHop( + hop_number=hop_number, + ip_address=ip_address, + display_ip=None, + hostname=hostname, + rtt1=rtt1, + rtt2=rtt2, + rtt3=rtt3, + sent_count=None, + last_rtt=rtt1, + best_rtt=min(filter(None, [rtt1, rtt2, rtt3])) if any([rtt1, rtt2, rtt3]) else None, + worst_rtt=max(filter(None, [rtt1, rtt2, rtt3])) if any([rtt1, rtt2, rtt3]) else None, + loss_pct=None, + # BGP enrichment fields (will be populated by enrichment plugin) + asn=None, + org=None, + prefix=None, + country=None, + rir=None, + allocated=None, ) + hops.append(hop) - _log.info(f"Parsed {len(hops)} hops from FRR traceroute") + # Clean up hops - remove duplicates and sort by hop number + _log.debug(f"Before cleanup: {len(hops)} hops") + + # Group hops by hop number and merge data + hop_dict = {} + for hop in hops: + if hop.hop_number in hop_dict: + # Merge data for same hop number + existing = hop_dict[hop.hop_number] + # Keep the first non-None value for each field + hop_dict[hop.hop_number] = TracerouteHop( + hop_number=hop.hop_number, + ip_address=existing.ip_address or hop.ip_address, + display_ip=existing.display_ip or hop.display_ip, + hostname=existing.hostname or hop.hostname, + rtt1=existing.rtt1 or hop.rtt1, + rtt2=existing.rtt2 or hop.rtt2, + rtt3=existing.rtt3 or hop.rtt3, + sent_count=existing.sent_count or hop.sent_count, + last_rtt=existing.last_rtt or hop.last_rtt, + best_rtt=existing.best_rtt or hop.best_rtt, + worst_rtt=existing.worst_rtt or hop.worst_rtt, + loss_pct=existing.loss_pct or hop.loss_pct, + asn=existing.asn or hop.asn, + org=existing.org or hop.org, + prefix=existing.prefix or hop.prefix, + country=existing.country or hop.country, + rir=existing.rir or hop.rir, + allocated=existing.allocated or hop.allocated, + ) + else: + hop_dict[hop.hop_number] = hop + + # Convert back to sorted list + final_hops = [hop_dict[hop_num] for hop_num in sorted(hop_dict.keys())] + _log.debug(f"After cleanup: {len(final_hops)} hops") - # Extract packet size and max hops from header if available - max_hops = 30 # Default from your examples - packet_size = 60 # Default from your examples (IPv4) + # Debug final hop list + for hop in final_hops: + hostname_display = hop.hostname or "no-hostname" + _log.debug(f"Final hop {hop.hop_number}: {hop.ip_address} ({hostname_display}) - RTTs: {hop.rtt1}/{hop.rtt2}/{hop.rtt3}") - for line in text.split("\n"): - if "hops max" in line and "byte packets" in line: - # Example: "traceroute to syd.proof.ovh.net (51.161.209.134), 30 hops max, 60 byte packets" - parts = line.split() - for i, part in enumerate(parts): - if part == "hops": - try: - max_hops = int(parts[i - 1]) - except (ValueError, IndexError): - pass - elif part == "byte": - try: - packet_size = int(parts[i - 1]) - except (ValueError, IndexError): - pass - break + _log.info(f"Parsed {len(final_hops)} hops from FRR traceroute") + + # Extract packet information from traceroute output + max_hops = 30 # Default + packet_size = 60 # FRR default + + # Try to extract from header line + for line in lines: + if "hops max" in line.lower(): + try: + # Look for pattern like "30 hops max, 60 byte packets" + match = re.search(r"(\d+)\s+hops\s+max.*?(\d+)\s+byte", line) + if match: + max_hops = int(match.group(1)) + packet_size = int(match.group(2)) + break + except (ValueError, AttributeError): + pass return TracerouteResult( target=target, source=source, - hops=hops, + hops=final_hops, max_hops=max_hops, packet_size=packet_size, raw_output=text, @@ -521,6 +295,10 @@ class TraceroutePluginFrr(OutputPlugin): query.device, "name", "unknown" ) + # Logging + _log = log.bind(plugin="TraceroutePluginFrr") + _log.info(f"Processing Traceroute for {target} from {source}") + device = getattr(query, "device", None) if device is not None: if not getattr(device, "structured_output", False): @@ -545,8 +323,9 @@ class TraceroutePluginFrr(OutputPlugin): if getattr(params.structured, "enable_for_traceroute", None) is False: return output + # Parse traceroute output return parse_frr_traceroute( output=output, target=target, source=source, - ) + ) \ No newline at end of file