From df54d27710192c26f3e88570ee15f91c76abe5e8 Mon Sep 17 00:00:00 2001 From: Miguel da Costa Martins Marcelino Date: Wed, 15 Apr 2026 22:40:23 +0000 Subject: [PATCH] TUN-10385: Add connectivity checks foundation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds the foundation for the new cloudflared pre-checks by creating a new prechecks package. This adds the following: * types.go: Status, CheckResult, Report, Config (add IPVersion allregions.ConfigIPVersion field to Config) * interfaces.go: DNSResolver, TCPDialer, QUICDialer, ManagementDialer --- prechecks/interfaces.go | 82 +++++++++++++++++++++++++++++++++++ prechecks/types.go | 94 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 prechecks/interfaces.go create mode 100644 prechecks/types.go diff --git a/prechecks/interfaces.go b/prechecks/interfaces.go new file mode 100644 index 00000000..d8e97b01 --- /dev/null +++ b/prechecks/interfaces.go @@ -0,0 +1,82 @@ +package prechecks + +import ( + "context" + "crypto/tls" + "net" + "net/netip" + "time" + + "github.com/quic-go/quic-go" + "github.com/rs/zerolog" + + "github.com/cloudflare/cloudflared/edgediscovery/allregions" +) + +// DNSResolver abstracts edge DNS discovery used by DNS probes. +// +// The production implementation wraps allregions.EdgeDiscovery +// (edgediscovery/allregions/discovery.go), which performs an SRV lookup for +// _v2-origintunneld._tcp.argotunnel.com, falls back to DNS-over-TLS when the +// system resolver fails, and resolves each discovered hostname via +// net.LookupIP. The returned slice already has each address tagged with +// .IPVersion = V4 or V6. +// +// Note: allregions.EdgeDiscovery must be exported (currently unexported as +// edgeDiscovery) before a production adapter can be wired up. +type DNSResolver interface { + // Resolve performs edge discovery for the given region string (empty for + // global, "us" / "fed" for regional endpoints) and returns the resolved + // addresses grouped by CNAME target, mirroring the structure returned by + // allregions.EdgeDiscovery. + Resolve(region string) ([][]*allregions.EdgeAddr, error) +} + +// TCPDialer abstracts the TCP + TLS handshake used by HTTP/2 connectivity probes. +// +// The production implementation wraps edgediscovery.DialEdge +// (edgediscovery/dial.go), which is the same function supervisor/tunnel.go +// uses for production HTTP/2 connections. Reusing it ensures the pre-check +// validates the identical dial path the tunnel will take. +type TCPDialer interface { + // DialEdge dials the given edge TCP address with TLS, respecting the + // provided timeout, and returns the established connection. The caller is + // responsible for closing the connection. + DialEdge(ctx context.Context, timeout time.Duration, tlsConfig *tls.Config, addr *net.TCPAddr, localIP net.IP) (net.Conn, error) +} + +// QUICDialer abstracts the UDP + QUIC handshake used by QUIC connectivity probes. +// +// The production implementation wraps connection.DialQuic +// (connection/quic.go), which is the same function supervisor/tunnel.go uses +// for production QUIC connections. The pre-check performs a handshake only — +// no streams are opened and no RPC frames are sent — to avoid triggering the +// OTD registration timeout described in TUN-6732. +type QUICDialer interface { + // DialQuic performs a QUIC handshake to the given edge address and returns + // the established connection. The caller is responsible for closing the + // connection. connIndex is used for UDP port reuse bookkeeping consistent + // with the production dial path. + DialQuic( + ctx context.Context, + quicConfig *quic.Config, + tlsConfig *tls.Config, + addr netip.AddrPort, + localAddr net.IP, + connIndex uint8, + logger *zerolog.Logger, + ) (quic.Connection, error) +} + +// ManagementDialer abstracts the TCP dial to api.cloudflare.com:443 used by +// the Management API probe. +// +// A successful TCP connection (no TLS handshake required) is sufficient to +// confirm that port 443 is reachable. This probe is always a soft failure: +// the tunnel can run without it, but automatic software updates will be +// unavailable. +type ManagementDialer interface { + // DialContext opens a TCP connection to the given network address. The + // caller is responsible for closing the connection. + DialContext(ctx context.Context, network, addr string) (net.Conn, error) +} diff --git a/prechecks/types.go b/prechecks/types.go new file mode 100644 index 00000000..ee81c7e9 --- /dev/null +++ b/prechecks/types.go @@ -0,0 +1,94 @@ +package prechecks + +import ( + "time" + + "github.com/cloudflare/cloudflared/connection" + "github.com/cloudflare/cloudflared/edgediscovery/allregions" +) + +// Status represents the outcome of a single connectivity pre-check. +type Status int + +const ( + // Pass indicates the check completed successfully. + Pass Status = iota + // Warn indicates a soft failure: cloudflared can still run but in a + // degraded state (e.g. one transport blocked, API unreachable). + Warn + // Fail indicates a check failure that the user should act on (e.g. + // DNS unresolvable, both transports blocked). cloudflared still starts; + // this status is purely informational. + Fail + // Skip indicates the check was not executed because a prerequisite + // check (typically DNS) failed first. + Skip +) + +// String returns the canonical display name for a Status value. +func (s Status) String() string { + switch s { + case Pass: + return "PASS" + case Warn: + return "WARN" + case Fail: + return "FAIL" + case Skip: + return "SKIP" + default: + return "UNKNOWN" + } +} + +// CheckResult holds the outcome of one individual connectivity probe. +type CheckResult struct { + // Component is the human-readable probe category shown in the table header + // column, e.g. "DNS Resolution", "QUIC Connectivity". + Component string + + // Target is the address or resource that was probed, e.g. + // "region1.v2.argotunnel.com" or "Port 7844 (QUIC)". + Target string + + // ProbeStatus is the outcome of the probe. + ProbeStatus Status + + // Details is a short description of the result shown in the table, e.g. + // "Resolved successfully" or "Handshake failed". + Details string + + // Action is non-empty when ProbeStatus is Warn or Fail and contains + // a human-readable remediation instruction, e.g. + // "Allow outbound QUIC on port 7844." + Action string +} + +// Report aggregates all CheckResults produced by a single Run() invocation. +// Pre-checks run in parallel with tunnel initialization and are purely +// diagnostic: the Report is displayed to the user but never gates startup. +type Report struct { + // Results contains one entry per executed probe, in the order they were + // collected. + Results []CheckResult + + // SuggestedProtocol is the connection protocol the pre-checks recommend + // based on transport probe results. + SuggestedProtocol connection.Protocol +} + +// Config controls the behavior of a pre-check Run(). +type Config struct { + // Region is the optional cloudflared --region flag value. When non-empty + // the pre-check probes the regional edge hostnames instead of the global ones. + Region string + + // Timeout is the maximum wall-clock duration allowed for the entire + // pre-check suite to complete. + Timeout time.Duration + + // IPVersion controls which address families are probed for transport + // checks. It mirrors the --edge-ip-version CLI flag so that the pre-check + // exercises the same code paths the tunnel itself will use. + IPVersion allregions.ConfigIPVersion +}