Files
cloudflared/prechecks/probes.go
T
Miguel da Costa Martins Marcelino 9978cfd0d5 TUN-10388 Implement dialers for connectivity checks
This PR implements all the dialers and resolvers needed to make pre-checks happen. So this task focuses on the following:

1. Implement the DNS probe: call DNSResolver.Resolve(region)
2. Implement the QUIC probe: call QUICDialer.DialQuic (handshake only, no stream opened) and record the result.
3. Implement the HTTP/2 probe: call TCPDialer.DialEdge (TCP + TLS handshake only, no frames sent) and record the result.
4. Implement the Management API probe: call ManagementDialer.DialContext to api.cloudflare.com:443 and record the result.
5. Export edgeDiscovery as EdgeDiscovery in edgediscovery/allregions/discovery.go so the pre-check can reuse the production DNS path.

This sets up the main components to implement the checker.
2026-04-30 15:15:25 +00:00

339 lines
10 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package prechecks
import (
"context"
"crypto/tls"
"fmt"
"math"
"net"
"net/netip"
"time"
"github.com/quic-go/quic-go"
"github.com/rs/zerolog"
"github.com/cloudflare/cloudflared/connection/dialopts"
"github.com/cloudflare/cloudflared/connection"
edgedial "github.com/cloudflare/cloudflared/edgediscovery"
"github.com/cloudflare/cloudflared/edgediscovery/allregions"
)
const (
perProbeDialTimeout = 5 * time.Second
// Action messages for each probe outcome.
actionDNSFail = "Ensure your DNS resolver can resolve '%s'. Run: dig A %s @1.1.1.1. If that fails, contact your network administrator."
actionQUICBlocked = "QUIC traffic failed to connect to port 7844."
actionHTTP2Blocked = "Allow outbound TCP on port 7844."
actionAPIUnreachable = "cloudflared will still run, but automatic software updates are unavailable. " +
"Ensure port 443 TCP to api.cloudflare.com is open if you want auto-updates."
// Component names for CheckResult.
componentDNSResolution = "DNS Resolution"
componentUDPConnectivity = "UDP Connectivity"
componentTCPConnectivity = "TCP Connectivity"
componentCloudflareAPI = "Cloudflare API"
// Target identifiers for CheckResult.
targetPortQUIC = "Port 7844 (QUIC)"
targetPortHTTP2 = "Port 7844 (HTTP/2)"
targetAPI = "api.cloudflare.com:443"
// Details messages for CheckResult.
detailsNoAddressesReturned = "No addresses returned"
detailsResolvedSuccessfully = "Resolved successfully"
detailsHandshakeFailed = "Handshake failed"
detailsHandshakeSuccessful = "Handshake successful"
detailsBlockedOrUnreachable = "Blocked or unreachable"
detailsTLSHandshakeSuccessful = "TLS handshake successful"
detailsConnectionFailed = "Connection failed"
detailsTCPPortReachable = "TCP port reachable (TLS not validated)"
detailsDNSPrerequisiteFailed = "DNS prerequisite failed"
// Region hostname templates.
region1Global = "region1.v2.argotunnel.com"
region2Global = "region2.v2.argotunnel.com"
region1US = "us-region1.v2.argotunnel.com"
region2US = "us-region2.v2.argotunnel.com"
region1Fed = "fed-region1.v2.argotunnel.com"
region2Fed = "fed-region2.v2.argotunnel.com"
)
type EdgeDNSResolver struct {
Log *zerolog.Logger
}
func (r *EdgeDNSResolver) Resolve(region string) ([][]*allregions.EdgeAddr, error) {
return allregions.EdgeDiscovery(r.Log, allregions.RegionalServiceName(region))
}
type EdgeTCPDialer struct{}
func (d *EdgeTCPDialer) DialEdge(
ctx context.Context,
timeout time.Duration,
tlsConfig *tls.Config,
addr *net.TCPAddr,
localIP net.IP,
) (net.Conn, error) {
return edgedial.DialEdge(ctx, timeout, tlsConfig, addr, localIP)
}
type EdgeQUICDialer struct{}
func (d *EdgeQUICDialer) DialQuic(
ctx context.Context,
quicConfig *quic.Config,
tlsConfig *tls.Config,
addr netip.AddrPort,
localAddr net.IP,
connIndex uint8,
logger *zerolog.Logger,
opts dialopts.DialOpts,
) (quic.Connection, error) {
return connection.DialQuic(ctx, quicConfig, tlsConfig, addr, localAddr, connIndex, logger, opts)
}
type NetManagementDialer struct {
Dialer net.Dialer
}
func (d *NetManagementDialer) DialContext(ctx context.Context, network, addr string) (net.Conn, error) {
return d.Dialer.DialContext(ctx, network, addr)
}
// probeDNS resolves edge addresses for the given region via the supplied
// DNSResolver and returns a CheckResult for each region discovered. If
// resolution fails for all regions, every result will carry StatusFail.
func probeDNS(
resolver DNSResolver,
region string,
) ([][]*allregions.EdgeAddr, []CheckResult) {
addrGroups, err := resolver.Resolve(region)
if err != nil || len(addrGroups) == 0 {
detail := detailsNoAddressesReturned
if err != nil {
detail = err.Error()
}
region1Target, region2Target := regionTargets(region)
return nil, []CheckResult{
{
Type: ProbeTypeDNS,
Component: componentDNSResolution,
Target: region1Target,
ProbeStatus: Fail,
Details: detail,
Action: fmt.Sprintf(actionDNSFail, region1Target, region1Target),
},
{
Type: ProbeTypeDNS,
Component: componentDNSResolution,
Target: region2Target,
ProbeStatus: Fail,
Details: detail,
Action: fmt.Sprintf(actionDNSFail, region2Target, region2Target),
},
}
}
region1Target, region2Target := regionTargets(region)
targets := []string{region1Target, region2Target}
results := make([]CheckResult, 0, len(addrGroups))
for i, group := range addrGroups {
target := fmt.Sprintf("region%d.v2.argotunnel.com", i+1)
if i < len(targets) {
target = targets[i]
}
if len(group) == 0 {
results = append(results, CheckResult{
Type: ProbeTypeDNS,
Component: componentDNSResolution,
Target: target,
ProbeStatus: Fail,
Details: detailsNoAddressesReturned,
Action: fmt.Sprintf(actionDNSFail, target, target),
})
} else {
results = append(results, CheckResult{
Type: ProbeTypeDNS,
Component: componentDNSResolution,
Target: target,
ProbeStatus: Pass,
Details: detailsResolvedSuccessfully,
})
}
}
return addrGroups, results
}
// probeQUIC performs a QUIC handshake to a single edge address and returns a
// CheckResult. The connection is closed immediately after the handshake no
// streams are opened and no RPC frames are sent to avoid triggering the OTD
// registration timeout (TUN-6732). The probe SNI (probe.cftunnel.com) is used
// instead of the production quic.cftunnel.com to prevent OTD log noise.
//
// A per-probe deadline (perProbeDialTimeout) is applied on top of the parent
// context so that a single blocked handshake cannot consume the entire suite
// budget.
func probeQUIC(
ctx context.Context,
dialer QUICDialer,
addr *allregions.EdgeAddr,
logger *zerolog.Logger,
) CheckResult {
dialCtx, cancel := context.WithTimeout(ctx, perProbeDialTimeout)
defer cancel()
tlsSettings := connection.QUIC.ProbeTLSSettings()
tlsConfig := &tls.Config{
ServerName: tlsSettings.ServerName,
NextProtos: tlsSettings.NextProtos,
MinVersion: tls.VersionTLS13,
CurvePreferences: []tls.CurveID{tls.CurveP256},
}
// We call dialer.DialQuic with isProbe = true, which bypasses connIndex check.
// Therefore, whatever we add to connIndex will not be relevant.
edgeAddrPort := addr.UDP.AddrPort()
conn, err := dialer.DialQuic(
dialCtx,
&quic.Config{},
tlsConfig,
edgeAddrPort,
nil,
math.MaxUint8,
logger,
dialopts.DialOpts{SkipPortReuse: true},
)
if err != nil {
return CheckResult{
Type: ProbeTypeQUIC,
Component: componentUDPConnectivity,
Target: targetPortQUIC,
ProbeStatus: Fail,
Details: detailsHandshakeFailed,
Action: actionQUICBlocked,
}
}
if err := conn.CloseWithError(0, "precheck complete"); err != nil {
logger.Debug().Err(err).Msg("Failed to close QUIC connection after successful handshake")
}
return CheckResult{
Type: ProbeTypeQUIC,
Component: componentUDPConnectivity,
Target: targetPortQUIC,
ProbeStatus: Pass,
Details: detailsHandshakeSuccessful,
}
}
// probeHTTP2 performs a TCP + TLS handshake to a single edge address and
// returns a CheckResult. The connection is closed immediately after the
// handshake no HTTP/2 frames are sent to keep the probe minimal. The probe
// SNI (probe.cftunnel.com) is used instead of the production h2.cftunnel.com
// to prevent OTD log noise.
//
// The dial timeout is capped at perProbeDialTimeout so that a single blocked
// dial cannot exhaust the entire suite budget.
func probeHTTP2(ctx context.Context, dialer TCPDialer, addr *allregions.EdgeAddr) CheckResult {
tlsSettings := connection.HTTP2.ProbeTLSSettings()
tlsConfig := &tls.Config{
ServerName: tlsSettings.ServerName,
MinVersion: tls.VersionTLS12,
CurvePreferences: []tls.CurveID{tls.CurveP256},
}
conn, err := dialer.DialEdge(ctx, perProbeDialTimeout, tlsConfig, addr.TCP, nil)
if err != nil {
return CheckResult{
Type: ProbeTypeHTTP2,
Component: componentTCPConnectivity,
Target: targetPortHTTP2,
ProbeStatus: Fail,
Details: detailsBlockedOrUnreachable,
Action: actionHTTP2Blocked,
}
}
_ = conn.Close()
return CheckResult{
Type: ProbeTypeHTTP2,
Component: componentTCPConnectivity,
Target: targetPortHTTP2,
ProbeStatus: Pass,
Details: detailsTLSHandshakeSuccessful,
}
}
// probeManagementAPI tests TCP connectivity to api.cloudflare.com:443. A
// successful TCP connection (no TLS handshake required) confirms the port is
// reachable. This probe is always a soft failure: the tunnel can run without
// it, but automatic software updates will be unavailable.
func probeManagementAPI(ctx context.Context, dialer ManagementDialer) CheckResult {
dialCtx, cancel := context.WithTimeout(ctx, perProbeDialTimeout)
defer cancel()
conn, err := dialer.DialContext(dialCtx, "tcp", targetAPI)
if err != nil {
return CheckResult{
Type: ProbeTypeManagementAPI,
Component: componentCloudflareAPI,
Target: targetAPI,
ProbeStatus: Fail,
Details: detailsConnectionFailed,
Action: actionAPIUnreachable,
}
}
_ = conn.Close()
return CheckResult{
Type: ProbeTypeManagementAPI,
Component: componentCloudflareAPI,
Target: targetAPI,
ProbeStatus: Pass,
Details: detailsTCPPortReachable,
}
}
func skipResult(probeType ProbeType, component, target string) CheckResult {
return CheckResult{
Type: probeType,
Component: component,
Target: target,
ProbeStatus: Skip,
Details: detailsDNSPrerequisiteFailed,
}
}
// regionTargets returns the human-readable hostnames for region1 and region2
// based on the optional region flag value.
func regionTargets(region string) (string, string) {
switch region {
case "us":
return region1US, region2US
case "fed":
return region1Fed, region2Fed
default:
return region1Global, region2Global
}
}
// addrsByFamily extracts one V4 and one V6 address from a resolved CNAME group
// using allregions.NewRegion so that the IP-version preference logic matches
// production exactly. When cfg.IPVersion restricts to a single family the
// excluded family's pointer is nil.
func addrsByFamily(group []*allregions.EdgeAddr, ipVersion allregions.ConfigIPVersion) (v4, v6 *allregions.EdgeAddr) {
if ipVersion != allregions.IPv6Only {
v4 = allregions.NewRegion(group, allregions.IPv4Only).GetAnyAddress()
}
if ipVersion != allregions.IPv4Only {
v6 = allregions.NewRegion(group, allregions.IPv6Only).GetAnyAddress()
}
return
}