From 8811dc82bd69cb3687e98ec470af0e64425d49d1 Mon Sep 17 00:00:00 2001 From: Amir Raminfar Date: Mon, 1 Jun 2026 07:59:19 -0700 Subject: [PATCH] fix(cloud): resolve read-only container tools in one shot (no extra LLM round-trip) (#4767) Co-authored-by: Claude Opus 4.8 (1M context) --- internal/cloud/tools.go | 2 +- internal/cloud/tools_containers.go | 6 +- internal/cloud/tools_logs.go | 15 +- internal/cloud/tools_resolve.go | 263 +++++++++++++++--- internal/cloud/tools_resolve_readpath_test.go | 239 ++++++++++++++++ internal/cloud/tools_resolve_test.go | 66 +++++ internal/cloud/tools_stream.go | 15 +- 7 files changed, 568 insertions(+), 38 deletions(-) create mode 100644 internal/cloud/tools_resolve_readpath_test.go diff --git a/internal/cloud/tools.go b/internal/cloud/tools.go index c9cf0671..9b557ed9 100644 --- a/internal/cloud/tools.go +++ b/internal/cloud/tools.go @@ -67,7 +67,7 @@ var ( Properties: map[string]paramProperty{}, }) - containerIDParam = paramProperty{Type: "string", Description: "Container name or ID. You can pass the container name directly (as shown in logs, events, and find_containers) — it does not need to be the opaque ID. Resolved by exact name first, then ID, then a unique name substring. If the name matches more than one container the call fails with the list of candidates so you can disambiguate."} + containerIDParam = paramProperty{Type: "string", Description: "Container name or ID. You can pass the container name directly (as shown in logs, events, and find_containers) — it does not need to be the opaque ID. Resolved by exact name first, then ID, then a unique name substring. When a name matches several containers (e.g. a Swarm service with stopped task corpses, or multiple replicas), read-only tools (inspect/logs) resolve to the most relevant one — the running replica, or the most-recently-active if all are stopped — and tell you which one (and its siblings) in the result, so you don't need to look up the ID first. Write tools (start/stop/restart/remove/update) never guess between live containers: an ambiguous name fails with the candidate list so you can re-issue with an exact ID."} hostIDParam = paramProperty{Type: "string", Description: "Host name or ID (from list_hosts or find_containers). Optional — omit it when the container name is unique across all hosts; supply it (name or ID) only to scope to a specific host when a name is ambiguous."} boolFalse = false diff --git a/internal/cloud/tools_containers.go b/internal/cloud/tools_containers.go index c0c41809..da9cb825 100644 --- a/internal/cloud/tools_containers.go +++ b/internal/cloud/tools_containers.go @@ -168,7 +168,11 @@ func executeInspectContainer(argsJSON string, deps ToolDeps) (*pb.CallToolRespon if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { return nil, fmt.Errorf("failed to parse arguments: %w", err) } - hostID, containerID, err := resolveContainerRef(args.ContainerID, args.Host, deps) + // Read-only: resolve an ambiguous name in one shot rather than erroring and + // forcing a find_containers round-trip. The note is discarded here because + // inspect already returns the concrete id/name/state, which is itself the + // disambiguation — no need to mangle those structured fields with prose. + hostID, containerID, _, err := resolveContainerRefRead(args.ContainerID, args.Host, deps) if err != nil { return nil, err } diff --git a/internal/cloud/tools_logs.go b/internal/cloud/tools_logs.go index 3a783ad4..f6ada309 100644 --- a/internal/cloud/tools_logs.go +++ b/internal/cloud/tools_logs.go @@ -27,7 +27,7 @@ func executeFetchContainerLogs(ctx context.Context, argsJSON string, deps ToolDe if err := json.Unmarshal([]byte(argsJSON), &args); err != nil { return nil, fmt.Errorf("failed to parse arguments: %w", err) } - hostID, containerID, err := resolveContainerRef(args.ContainerID, args.Host, deps) + hostID, containerID, note, err := resolveContainerRefRead(args.ContainerID, args.Host, deps) if err != nil { return nil, err } @@ -93,6 +93,17 @@ func executeFetchContainerLogs(ctx context.Context, argsJSON string, deps ToolDe return &pb.CallToolResponse{ Success: true, - Result: &pb.CallToolResponse_FetchLogs{FetchLogs: &pb.FetchLogsResult{ContainerName: cs.Container.Name, Entries: entries}}, + Result: &pb.CallToolResponse_FetchLogs{FetchLogs: &pb.FetchLogsResult{ContainerName: withResolutionNote(cs.Container.Name, note), Entries: entries}}, }, nil } + +// withResolutionNote appends the read-path resolution note (if any) to a +// model-visible identity string, so the model learns which container an +// ambiguous name resolved to — and that siblings exist — in the same turn, +// without a round-trip. Empty note returns name unchanged. +func withResolutionNote(name, note string) string { + if note == "" { + return name + } + return name + " " + note +} diff --git a/internal/cloud/tools_resolve.go b/internal/cloud/tools_resolve.go index e3d655df..5855892b 100644 --- a/internal/cloud/tools_resolve.go +++ b/internal/cloud/tools_resolve.go @@ -11,6 +11,14 @@ import ( // id) plus an optional host reference (a name OR an id) into the concrete // (hostID, containerID) pair that HostService.FindContainer expects. // +// This is the STRICT (write) resolver used by the destructive tools +// (start/stop/restart/remove/update). It NEVER picks between multiple live +// containers — acting on the wrong one is irreversible — so a name that matches +// more than one container resolves only when exactly one candidate is running +// (the others being stopped task corpses); otherwise it fails with the +// candidate listing. Read-only tools use resolveContainerRefRead, which is +// allowed to pick the most-relevant container in one shot (see that function). +// // LLMs almost always have only the container name (it is what logs, events and // listings surface), so every container-scoped tool funnels through here rather // than passing the raw reference straight to FindContainer — which only matches @@ -23,32 +31,128 @@ import ( // 3. unique substring of the name // // The first tier that yields any candidates wins. An id match is always unique, -// so it is never treated as ambiguous. If a name tier yields more than one -// container the call fails with an error listing every candidate (name + id + -// host) so the caller can disambiguate — we NEVER silently pick one. This is -// what makes the write tools (stop/restart/remove/update) safe to drive by name. +// so it is never treated as ambiguous. When a name tier yields more than one +// container we try to break the tie by running state: if exactly one candidate +// is running it is the unambiguous live referent (the others are stopped task +// corpses left by Swarm redeploys / restart churn) and we resolve to it. Only +// when the tie is real — zero running, or several live containers — does the +// call fail with an error listing every candidate (name + id + state + host) so +// the caller can disambiguate. We NEVER pick between multiple *live* containers, +// which is what keeps the write tools (stop/restart/remove/update) safe by name. // // hostRef is optional: when empty the container must resolve unambiguously // across all hosts; when supplied it scopes the search to that host (matched by // id or name). func resolveContainerRef(containerRef, hostRef string, deps ToolDeps) (hostID, containerID string, err error) { + tier, trimmedHostRef, scopedHostID, hostNames, err := matchContainerTier(containerRef, hostRef, deps) + if err != nil { + return "", "", err + } + + switch len(tier) { + case 0: + // No tier matched. When an explicit host was given (and resolved to a + // real host id) but the listing produced no match — e.g. a host returned + // a partial error and was omitted from ListAllContainers — pass the + // reference straight through to FindContainer's direct lookup, exactly as + // before this resolver existed. This guarantees id-based callers never + // regress. + if scopedHostID != "" { + return scopedHostID, strings.TrimSpace(containerRef), nil + } + return "", "", fmt.Errorf("no container matching %q found across all connected hosts; call find_containers to list available containers", strings.TrimSpace(containerRef)) + case 1: + return tier[0].Host, tier[0].ID, nil + default: + // Multiple matches. The usual benign cause is Docker Swarm (or plain + // restart churn) leaving stopped task containers behind across redeploys + // — the short name "svc.1" substring-matches every historical + // "svc.1." corpse alongside the one live task. When exactly one + // candidate is running it is the unambiguous live referent, so resolve to + // it rather than make the caller hunt for an id. We still refuse to + // choose between multiple *live* containers — that is the real ambiguity + // the write tools must never guess at. + if live := runningContainers(tier); len(live) == 1 { + return live[0].Host, live[0].ID, nil + } + return "", "", ambiguousError(strings.TrimSpace(containerRef), trimmedHostRef, tier, hostNames) + } +} + +// resolveContainerRefRead is the PERMISSIVE (read-only) resolver used by the +// non-destructive tools (inspect_container, fetch_container_logs, stream_logs). +// +// It matches identically to resolveContainerRef, but when a name is ambiguous +// it does NOT error — it resolves to the single most-relevant candidate in one +// shot, eliminating the find-then-act round-trip that otherwise costs a whole +// extra LLM call on the investigation path. The pick is safe precisely because +// the tool is read-only: reading the wrong replica's logs is cheap and +// recoverable, and same-service replicas are interchangeable for diagnosis, +// whereas restarting the wrong one is not. The strict resolver above keeps the +// write tools honest. +// +// Selection (bestCandidate): running containers beat stopped ones, then newest +// StartedAt, then most-recently-finished/created. For the crash-loop case — +// every "svc.1.*" task exited — this lands on the corpse that died most +// recently, which is what the investigation is usually about. +// +// When it picks among several candidates it returns a one-line note naming the +// chosen container and its siblings. The caller surfaces that note in the tool +// RESULT (not via a round-trip), so the model gets the disambiguation context +// for free in the same turn and can re-call with an explicit id if it genuinely +// wants a different replica — but it is never forced to. note is empty when the +// match was unique (nothing to disclose). +func resolveContainerRefRead(containerRef, hostRef string, deps ToolDeps) (hostID, containerID, note string, err error) { + tier, _, scopedHostID, hostNames, err := matchContainerTier(containerRef, hostRef, deps) + if err != nil { + return "", "", "", err + } + + switch len(tier) { + case 0: + if scopedHostID != "" { + return scopedHostID, strings.TrimSpace(containerRef), "", nil + } + return "", "", "", fmt.Errorf("no container matching %q found across all connected hosts; call find_containers to list available containers", strings.TrimSpace(containerRef)) + case 1: + return tier[0].Host, tier[0].ID, "", nil + default: + // Exactly one running candidate is unambiguous — the corpses are never a + // valid target — so resolve cleanly with no note, identical to the strict + // resolver. A note is only warranted when the read path actually exercises + // its relaxation: zero running (pick the newest corpse) or several running + // replicas (pick the newest live one). + if live := runningContainers(tier); len(live) == 1 { + return live[0].Host, live[0].ID, "", nil + } + best := bestCandidate(tier) + note = resolutionNote(strings.TrimSpace(containerRef), best, tier, hostNames) + return best.Host, best.ID, note, nil + } +} + +// matchContainerTier runs the shared scoping + tiered matching used by both +// resolvers and returns the winning tier (the first non-empty of exact-id, +// exact-name, substring). It also returns the trimmed hostRef, the resolved +// scoped host id (empty when no host was supplied), and the host-name map for +// building human-readable notes/errors. An empty returned tier means no match. +func matchContainerTier(containerRef, hostRef string, deps ToolDeps) (tier []container.Container, trimmedHostRef, scopedHostID string, hostNames map[string]string, err error) { containerRef = strings.TrimSpace(containerRef) if containerRef == "" { - return "", "", fmt.Errorf("container_id is required") + return nil, "", "", nil, fmt.Errorf("container_id is required") } containers, errs := deps.HostService.ListAllContainers(deps.Labels) logHostErrors(errs) - hostNames := buildHostNameMap(deps.HostService) + hostNames = buildHostNameMap(deps.HostService) // Scope to a host if one was supplied. The host reference may be an id or a // name; an unknown host is an explicit error rather than a silent no-match. - hostRef = strings.TrimSpace(hostRef) - var scopedHostID string - if hostRef != "" { - scopedHostID, err = resolveHostRef(hostRef, deps) + trimmedHostRef = strings.TrimSpace(hostRef) + if trimmedHostRef != "" { + scopedHostID, err = resolveHostRef(trimmedHostRef, deps) if err != nil { - return "", "", err + return nil, trimmedHostRef, "", hostNames, err } filtered := containers[:0:0] for _, c := range containers { @@ -75,27 +179,12 @@ func resolveContainerRef(containerRef, hostRef string, deps ToolDeps) (hostID, c } } - for _, tier := range [][]container.Container{exactID, exactName, substring} { - switch len(tier) { - case 0: - continue - case 1: - return tier[0].Host, tier[0].ID, nil - default: - return "", "", ambiguousError(containerRef, hostRef, tier, hostNames) + for _, t := range [][]container.Container{exactID, exactName, substring} { + if len(t) > 0 { + return t, trimmedHostRef, scopedHostID, hostNames, nil } } - - // Legacy fallback: when an explicit host was given (and resolved to a real - // host id) but the listing produced no match — e.g. a host returned a - // partial error and was omitted from ListAllContainers — pass the reference - // straight through to FindContainer's direct lookup, exactly as before this - // resolver existed. This guarantees id-based callers never regress. - if scopedHostID != "" { - return scopedHostID, containerRef, nil - } - - return "", "", fmt.Errorf("no container matching %q found across all connected hosts; call find_containers to list available containers", containerRef) + return nil, trimmedHostRef, scopedHostID, hostNames, nil } // matchesID reports whether ref identifies the container id — either the full id @@ -152,7 +241,7 @@ func ambiguousError(containerRef, hostRef string, candidates []container.Contain parts := make([]string, len(candidates)) sameHost := true for i, c := range candidates { - parts[i] = fmt.Sprintf("%s (id %s on host %s)", c.Name, shortID(c.ID), resolveHostName(c.Host, hostNames)) + parts[i] = fmt.Sprintf("%s (id %s, %s, on host %s)", c.Name, shortID(c.ID), describeState(c), resolveHostName(c.Host, hostNames)) if c.Host != candidates[0].Host { sameHost = false } @@ -170,6 +259,118 @@ func ambiguousError(containerRef, hostRef string, candidates []container.Contain return fmt.Errorf("%q matches multiple containers: %s. To act on the right one, %s", containerRef, strings.Join(parts, "; "), hint) } +// runningContainers returns only the candidates in the running state. It breaks +// a multi-match tie down to the single live container when every other candidate +// is a stopped corpse (the typical Swarm-redeploy / restart-churn case), which +// is the one situation where picking among matches is unambiguous and safe. +func runningContainers(cs []container.Container) []container.Container { + live := make([]container.Container, 0, len(cs)) + for _, c := range cs { + if strings.EqualFold(c.State, "running") { + live = append(live, c) + } + } + return live +} + +// bestCandidate picks the single most-relevant container from an ambiguous name +// match for the read-only path. Ordering: running beats stopped, then newest +// StartedAt, then most-recently-finished, then most-recently-created. For the +// crash-loop case (every "svc.1.*" task exited) this lands on the corpse that +// died most recently — the one the investigation is almost always about. The +// caller guarantees len(cs) > 0. +func bestCandidate(cs []container.Container) container.Container { + best := cs[0] + for _, c := range cs[1:] { + if moreRelevant(c, best) { + best = c + } + } + return best +} + +// moreRelevant reports whether a should be preferred over b as the read-path +// referent. Running always wins; among the same running-ness, the more-recently +// active container wins (StartedAt, then FinishedAt, then Created). +func moreRelevant(a, b container.Container) bool { + aRunning, bRunning := isRunning(a), isRunning(b) + if aRunning != bRunning { + return aRunning + } + if !a.StartedAt.Equal(b.StartedAt) { + return a.StartedAt.After(b.StartedAt) + } + if !a.FinishedAt.Equal(b.FinishedAt) { + return a.FinishedAt.After(b.FinishedAt) + } + return a.Created.After(b.Created) +} + +func isRunning(c container.Container) bool { + return strings.EqualFold(c.State, "running") +} + +// resolutionNote renders the one-line transparency note the read-only tools +// surface in their result when a name resolved to one of several candidates. It +// names the chosen container (state-annotated), summarizes how many siblings of +// each running-ness exist, and lists the sibling names — so the model can re-call +// with an explicit id if it wants a different replica, without ever being forced +// to. Returned as a parenthetical prefix the tools prepend to a human-visible +// field. +func resolutionNote(containerRef string, chosen container.Container, candidates []container.Container, hostNames map[string]string) string { + running := len(runningContainers(candidates)) + + // Annotate sibling names with their host only when the candidates span more + // than one host — that is the case where the model needs the host to re-target + // a sibling; on a single host it would just be noise. + multiHost := false + for _, c := range candidates { + if c.Host != chosen.Host { + multiHost = true + break + } + } + + siblings := make([]string, 0, len(candidates)-1) + for _, c := range candidates { + if c.ID == chosen.ID { + continue + } + if multiHost { + siblings = append(siblings, fmt.Sprintf("%s on host %s", c.Name, resolveHostName(c.Host, hostNames))) + } else { + siblings = append(siblings, c.Name) + } + } + + var summary string + switch { + case running > 1 && isRunning(chosen): + summary = fmt.Sprintf("newest of %d running replicas", running) + case running == 0: + summary = fmt.Sprintf("most recently active of %d stopped containers", len(candidates)) + default: + summary = fmt.Sprintf("1 of %d matching containers", len(candidates)) + } + + return fmt.Sprintf("(resolved %q → %s [%s], %s; others: %s)", + containerRef, chosen.Name, describeState(chosen), summary, strings.Join(siblings, ", ")) +} + +// describeState renders a candidate's state (with health when known) for the +// ambiguity listing, so the caller can tell a live replica from a stopped corpse +// and pick the right one. A blank state is reported as "unknown". +func describeState(c container.Container) string { + state := c.State + if state == "" { + state = "unknown" + } + if c.Health != "" { + return fmt.Sprintf("%s (%s)", state, c.Health) + } + return state +} + // shortID trims a full docker id to its conventional 12-character form for // display in errors. func shortID(id string) string { diff --git a/internal/cloud/tools_resolve_readpath_test.go b/internal/cloud/tools_resolve_readpath_test.go new file mode 100644 index 00000000..14bcb0dd --- /dev/null +++ b/internal/cloud/tools_resolve_readpath_test.go @@ -0,0 +1,239 @@ +package cloud + +import ( + "context" + "testing" + "time" + + "github.com/amir20/dozzle/internal/container" + container_support "github.com/amir20/dozzle/internal/support/container" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" +) + +// The read path collapses the find-then-act round-trip: when a name matches +// several same-service replicas/corpses, a read-only tool resolves to the +// single most-relevant container (running first, then newest active) in one +// shot instead of erroring and forcing the LLM to call find_containers and +// re-issue. The write path stays strict — these tests lock both halves in. + +// --- resolver unit tests: read mode (resolveContainerRefRead) --- + +func TestResolveRead_AllStopped_PicksMostRecentlyActiveCorpse(t *testing.T) { + // The crash-loop case: every "svc.1.*" task has exited. There is no live + // container, but a read-only inspect/logs call still has an obvious target — + // the corpse that died most recently, which is what the investigation is + // about. Resolve to it rather than erroring. + t0 := time.Date(2026, 6, 1, 10, 0, 0, 0, time.UTC) + deps := resolverDeps([]container.Container{ + {ID: "old", Name: "svc.1.aaa", Host: "local", State: "exited", StartedAt: t0, FinishedAt: t0.Add(1 * time.Minute)}, + {ID: "newest", Name: "svc.1.bbb", Host: "local", State: "exited", StartedAt: t0.Add(5 * time.Minute), FinishedAt: t0.Add(6 * time.Minute)}, + {ID: "mid", Name: "svc.1.ccc", Host: "local", State: "exited", StartedAt: t0.Add(2 * time.Minute), FinishedAt: t0.Add(3 * time.Minute)}, + }) + + host, id, note, err := resolveContainerRefRead("svc.1", "", deps) + assert.NoError(t, err) + assert.Equal(t, "local", host) + assert.Equal(t, "newest", id) + // The pick is transparent: the note names the chosen task and that siblings + // exist, so the model can re-call with an explicit id if it wants another. + assert.NotEmpty(t, note) + assert.Contains(t, note, "svc.1.bbb") +} + +func TestResolveRead_MultipleRunning_PicksNewestRunningWithNote(t *testing.T) { + // Several live replicas of one service. For a read (logs/inspect) the + // replicas are interchangeable, so resolve to the newest running one and say + // so — never force a round-trip. + t0 := time.Date(2026, 6, 1, 10, 0, 0, 0, time.UTC) + deps := resolverDeps([]container.Container{ + {ID: "r-old", Name: "svc.1.aaa", Host: "local", State: "running", StartedAt: t0}, + {ID: "r-new", Name: "svc.1.bbb", Host: "local", State: "running", StartedAt: t0.Add(10 * time.Minute)}, + {ID: "dead", Name: "svc.1.ccc", Host: "local", State: "exited", StartedAt: t0.Add(20 * time.Minute)}, + }) + + host, id, note, err := resolveContainerRefRead("svc.1", "", deps) + assert.NoError(t, err) + assert.Equal(t, "local", host) + // Running beats a newer-but-dead container; among running, newest StartedAt. + assert.Equal(t, "r-new", id) + assert.NotEmpty(t, note) + assert.Contains(t, note, "svc.1.bbb") + assert.Contains(t, note, "running") +} + +func TestResolveRead_SingleRunning_NoNote(t *testing.T) { + // Exactly one running among corpses — same outcome as the write path, and + // since the choice is unambiguous there is nothing to disclose: no note. + deps := resolverDeps([]container.Container{ + {ID: "dead", Name: "svc.1.aaa", Host: "local", State: "exited"}, + {ID: "live", Name: "svc.1.bbb", Host: "local", State: "running"}, + }) + + host, id, note, err := resolveContainerRefRead("svc.1", "", deps) + assert.NoError(t, err) + assert.Equal(t, "local", host) + assert.Equal(t, "live", id) + assert.Empty(t, note) +} + +func TestResolveRead_UniqueName_NoNote(t *testing.T) { + // A plain unique match resolves exactly as before with no note — the read + // relaxation only adds behavior to the previously-erroring ambiguous case. + deps := resolverDeps([]container.Container{ + {ID: "abc123def456", Name: "nginx", Host: "local", State: "running"}, + {ID: "fff999", Name: "redis", Host: "local", State: "running"}, + }) + + host, id, note, err := resolveContainerRefRead("nginx", "", deps) + assert.NoError(t, err) + assert.Equal(t, "local", host) + assert.Equal(t, "abc123def456", id) + assert.Empty(t, note) +} + +func TestResolveRead_AmbiguousAcrossHosts_PicksNewestRunning(t *testing.T) { + // Same name on two hosts. The write path refuses (host_id needed). The read + // path is allowed to pick the newest running one across hosts — reads are + // safe and the note discloses the cross-host siblings. + t0 := time.Date(2026, 6, 1, 10, 0, 0, 0, time.UTC) + deps := resolverDeps([]container.Container{ + {ID: "a", Name: "nginx", Host: "host-a", State: "running", StartedAt: t0}, + {ID: "b", Name: "nginx", Host: "host-b", State: "running", StartedAt: t0.Add(1 * time.Minute)}, + }) + + host, id, note, err := resolveContainerRefRead("nginx", "", deps) + assert.NoError(t, err) + assert.Equal(t, "host-b", host) + assert.Equal(t, "b", id) + assert.NotEmpty(t, note) +} + +func TestResolveRead_NotFound_StillErrors(t *testing.T) { + // No match at all is still an error — the relaxation never invents a target. + deps := resolverDeps([]container.Container{ + {ID: "id1", Name: "nginx", Host: "local"}, + }) + + _, _, _, err := resolveContainerRefRead("does-not-exist", "", deps) + assert.Error(t, err) + assert.Contains(t, err.Error(), "no container matching") +} + +func TestResolveRead_HostScoped_StillRespectsHost(t *testing.T) { + // An explicit host still scopes the search even on the read path. + deps := resolverDeps([]container.Container{ + {ID: "id1", Name: "nginx", Host: "host-a", State: "running"}, + {ID: "id2", Name: "nginx", Host: "host-b", State: "running"}, + }) + + host, id, _, err := resolveContainerRefRead("nginx", "host-b", deps) + assert.NoError(t, err) + assert.Equal(t, "host-b", host) + assert.Equal(t, "id2", id) +} + +// --- write path stays strict: the relaxation must NOT leak into writes --- + +func TestResolveWrite_AllStopped_StillRefuses(t *testing.T) { + // The read path now resolves this; the write path must still refuse, because + // picking a destructive target among corpses is exactly the ambiguity write + // tools must never guess at. + deps := resolverDeps([]container.Container{ + {ID: "s1", Name: "svc.1.aaa", Host: "local", State: "exited"}, + {ID: "s2", Name: "svc.1.bbb", Host: "local", State: "exited"}, + }) + + _, _, err := resolveContainerRef("svc.1", "", deps) + assert.Error(t, err) + assert.Contains(t, err.Error(), "matches multiple containers") +} + +func TestResolveWrite_MultipleRunning_StillRefuses(t *testing.T) { + deps := resolverDeps([]container.Container{ + {ID: "r1", Name: "svc.1.aaa", Host: "local", State: "running"}, + {ID: "r2", Name: "svc.1.bbb", Host: "local", State: "running"}, + }) + + _, _, err := resolveContainerRef("svc.1", "", deps) + assert.Error(t, err) + assert.Contains(t, err.Error(), "matches multiple containers") +} + +// --- end-to-end through ExecuteTool: read tools resolve in one shot --- + +// closedLogsClientService is a MockClientService whose LogsBetweenDates returns +// an already-closed channel, so executeFetchContainerLogs's range loop drains +// immediately instead of blocking on the bare mock's nil channel. Embeds +// MockClientService so it satisfies the full ClientService interface unchanged. +type closedLogsClientService struct { + MockClientService +} + +func (c *closedLogsClientService) LogsBetweenDates(_ context.Context, _ container.Container, _ time.Time, _ time.Time, _ container.StdType) (<-chan *container.LogEvent, error) { + ch := make(chan *container.LogEvent) + close(ch) + return ch, nil +} + +func TestExecuteTool_FetchLogs_AllStopped_ResolvesNewestCorpseWithNote(t *testing.T) { + // fetch_container_logs on a crash-looped "svc.1" (every task exited) must + // succeed against the most-recently-active corpse and surface the pick in + // the result, not fail and force a find_containers round-trip. + t0 := time.Date(2026, 6, 1, 10, 0, 0, 0, time.UTC) + mockHost := &MockHostService{} + withResolver(mockHost, + container.Container{ID: "old", Name: "svc.1.aaa", Host: "local", State: "exited", StartedAt: t0}, + container.Container{ID: "newest", Name: "svc.1.bbb", Host: "local", State: "exited", StartedAt: t0.Add(5 * time.Minute)}, + ) + newestC := container.Container{ID: "newest", Name: "svc.1.bbb", Host: "local", State: "exited", StartedAt: t0.Add(5 * time.Minute)} + cs := container_support.NewContainerService(&closedLogsClientService{}, newestC) + mockHost.On("FindContainer", "local", "newest", container.ContainerLabels(nil)).Return(cs, nil) + + resp := ExecuteTool(context.Background(), "fetch_container_logs", `{"container_id":"svc.1"}`, ToolDeps{HostService: mockHost}) + assert.True(t, resp.Success) + // The returned container_name discloses the resolution so the model gets the + // sibling context for free in the same turn. + name := resp.GetFetchLogs().ContainerName + assert.Contains(t, name, "svc.1.bbb") + assert.Contains(t, name, "svc.1") // mentions the ambiguous ref it resolved + mockHost.AssertCalled(t, "FindContainer", "local", "newest", container.ContainerLabels(nil)) +} + +func TestExecuteTool_InspectContainer_AllStopped_ResolvesNewestCorpse(t *testing.T) { + // inspect_container on the same crash-loop resolves to the newest corpse in + // one shot. Inspect returns the concrete id/name/state (itself + // disambiguating), so we don't mangle its structured fields with a note. + t0 := time.Date(2026, 6, 1, 10, 0, 0, 0, time.UTC) + mockHost := &MockHostService{} + withResolver(mockHost, + container.Container{ID: "old", Name: "svc.1.aaa", Host: "local", State: "exited", StartedAt: t0}, + container.Container{ID: "newest", Name: "svc.1.bbb", Host: "local", State: "exited", StartedAt: t0.Add(5 * time.Minute)}, + ) + newestC := container.Container{ID: "newest", Name: "svc.1.bbb", Host: "local", State: "exited", StartedAt: t0.Add(5 * time.Minute)} + cs := container_support.NewContainerService(&MockClientService{}, newestC) + mockHost.On("FindContainer", "local", "newest", container.ContainerLabels(nil)).Return(cs, nil) + + resp := ExecuteTool(context.Background(), "inspect_container", `{"container_id":"svc.1"}`, ToolDeps{HostService: mockHost}) + assert.True(t, resp.Success) + assert.Equal(t, "newest", resp.GetInspectContainer().Id) + assert.Equal(t, "svc.1.bbb", resp.GetInspectContainer().Name) +} + +func TestExecuteTool_RestartContainer_AllStopped_StillRefuses(t *testing.T) { + // The destructive counterpart of the fetch_logs test above: the SAME + // ambiguous all-stopped ref that reads now resolve must still refuse for a + // write tool, and must never reach FindContainer / ContainerAction. + mockClient := &MockClientService{} + mockHost := &MockHostService{} + withResolver(mockHost, + container.Container{ID: "s1", Name: "svc.1.aaa", Host: "local", State: "exited"}, + container.Container{ID: "s2", Name: "svc.1.bbb", Host: "local", State: "exited"}, + ) + + resp := ExecuteTool(context.Background(), "restart_container", `{"container_id":"svc.1"}`, ToolDeps{HostService: mockHost, EnableActions: true}) + assert.False(t, resp.Success) + assert.Contains(t, resp.Error, "matches multiple containers") + mockHost.AssertNotCalled(t, "FindContainer", mock.Anything, mock.Anything, mock.Anything) + mockClient.AssertNotCalled(t, "ContainerAction", mock.Anything, mock.Anything, mock.Anything) +} diff --git a/internal/cloud/tools_resolve_test.go b/internal/cloud/tools_resolve_test.go index 9fe0223d..3912cf72 100644 --- a/internal/cloud/tools_resolve_test.go +++ b/internal/cloud/tools_resolve_test.go @@ -135,6 +135,52 @@ func TestResolveContainerRef_AmbiguousSubstring(t *testing.T) { assert.NotContains(t, err.Error(), "host_id") } +func TestResolveContainerRef_PrefersRunningAmongStoppedCorpses(t *testing.T) { + // The motivating case: Docker Swarm leaves stopped task containers behind + // across redeploys, so the short task name "svc.1" substring-matches every + // historical task — but only the current one is running. The single live + // container is the unambiguous referent; resolve to it instead of erroring. + deps := resolverDeps([]container.Container{ + {ID: "old1", Name: "svc.1.aaa", Host: "local", State: "exited"}, + {ID: "old2", Name: "svc.1.bbb", Host: "local", State: "exited"}, + {ID: "live", Name: "svc.1.ccc", Host: "local", State: "running"}, + {ID: "old3", Name: "svc.1.ddd", Host: "local", State: "exited"}, + }) + + host, id, err := resolveContainerRef("svc.1", "", deps) + assert.NoError(t, err) + assert.Equal(t, "local", host) + assert.Equal(t, "live", id) +} + +func TestResolveContainerRef_MultipleRunningStillAmbiguous(t *testing.T) { + // Two genuinely live replicas — we must NOT guess, and the listing must + // surface each candidate's state so the caller can pick the right one. + deps := resolverDeps([]container.Container{ + {ID: "r1", Name: "svc.1.aaa", Host: "host-a", State: "running"}, + {ID: "r2", Name: "svc.2.bbb", Host: "host-b", State: "running"}, + }) + + _, _, err := resolveContainerRef("svc", "", deps) + assert.Error(t, err) + assert.Contains(t, err.Error(), "matches multiple containers") + assert.Contains(t, err.Error(), "running") +} + +func TestResolveContainerRef_AllStoppedStillAmbiguous(t *testing.T) { + // No live container to prefer — refuse and list the candidates with their + // state rather than picking an arbitrary corpse. + deps := resolverDeps([]container.Container{ + {ID: "s1", Name: "svc.1.aaa", Host: "local", State: "exited"}, + {ID: "s2", Name: "svc.1.bbb", Host: "local", State: "exited"}, + }) + + _, _, err := resolveContainerRef("svc.1", "", deps) + assert.Error(t, err) + assert.Contains(t, err.Error(), "matches multiple containers") + assert.Contains(t, err.Error(), "exited") +} + func TestResolveContainerRef_HostDisambiguates(t *testing.T) { // Same name on two hosts; supplying the host id resolves cleanly. deps := resolverDeps([]container.Container{ @@ -272,3 +318,23 @@ func TestExecuteTool_RestartContainer_ByName_HostInferred(t *testing.T) { assert.Equal(t, "id1", resp.GetAction().ContainerId) mockClient.AssertCalled(t, "ContainerAction", mock.Anything, mock.Anything, container.Restart) } + +func TestExecuteTool_RestartContainer_PrefersRunningReplica(t *testing.T) { + // A name matching one running task and several stopped corpses resolves to + // the live task even for a write tool — the corpses are never a valid target, + // so picking the single running container is safe (and what the user means). + mockClient := &MockClientService{} + mockClient.On("ContainerAction", mock.Anything, mock.Anything, container.Restart).Return(nil) + cs := container_support.NewContainerService(mockClient, container.Container{ID: "live", Name: "svc.1.ccc", Host: "local", State: "running"}) + + mockHost := &MockHostService{} + withResolver(mockHost, + container.Container{ID: "dead", Name: "svc.1.aaa", Host: "local", State: "exited"}, + container.Container{ID: "live", Name: "svc.1.ccc", Host: "local", State: "running"}, + ) + mockHost.On("FindContainer", "local", "live", container.ContainerLabels(nil)).Return(cs, nil) + + resp := ExecuteTool(context.Background(), "restart_container", `{"container_id":"svc.1"}`, ToolDeps{HostService: mockHost, EnableActions: true}) + assert.True(t, resp.Success) + assert.Equal(t, "live", resp.GetAction().ContainerId) +} diff --git a/internal/cloud/tools_stream.go b/internal/cloud/tools_stream.go index 20e80a23..ab9b5a79 100644 --- a/internal/cloud/tools_stream.go +++ b/internal/cloud/tools_stream.go @@ -68,7 +68,11 @@ func executeStreamLogs(ctx context.Context, requestID string, argsJSON string, d return err } - hostID, containerID, err := resolveContainerRef(args.ContainerID, args.Host, deps) + // Read-only: resolve an ambiguous name in one shot instead of erroring. note + // is non-empty when the name resolved to one of several candidates; it is + // surfaced once, on the first emitted batch, so the model learns the pick and + // its siblings without a round-trip and without repeating on every batch. + hostID, containerID, note, err := resolveContainerRefRead(args.ContainerID, args.Host, deps) if err != nil { return err } @@ -87,10 +91,16 @@ func executeStreamLogs(ctx context.Context, requestID string, argsJSON string, d } }() + noteSent := false sendBatch := func(entries []*pb.LogEntry, endStream bool) error { if len(entries) == 0 && !endStream { return nil } + name := cs.Container.Name + if !noteSent { + name = withResolutionNote(name, note) + noteSent = true + } resp := &pb.ToolResponse{ RequestId: requestID, Type: &pb.ToolResponse_CallTool{ @@ -100,7 +110,7 @@ func executeStreamLogs(ctx context.Context, requestID string, argsJSON string, d EndStream: endStream, Result: &pb.CallToolResponse_FetchLogs{ FetchLogs: &pb.FetchLogsResult{ - ContainerName: cs.Container.Name, + ContainerName: name, Entries: entries, }, }, @@ -162,4 +172,3 @@ func executeStreamLogs(ctx context.Context, requestID string, argsJSON string, d } } } -