From f17b6b9fc3cdf32b046e96d6aa1e85398d91c9a4 Mon Sep 17 00:00:00 2001 From: silverwind <2021+silverwind@noreply.gitea.com> Date: Fri, 29 May 2026 22:33:44 +0000 Subject: [PATCH] fix(container): re-validate cached container id before reuse (#1003) `containerReference.id` was cached from `Create()` and never re-validated, so a container torn down out-of-band (AutoRemove on an unexpected exit, daemon-side cleanup, sibling-job race in a parallel matrix) left a stale id behind. The next `Copy`/`Exec` then hit the daemon with that dead id and failed the otherwise-successful job with `Could not find the file /var/run/act/ in container `. `find()` now `ContainerInspect`s the cached id and clears it only on a definitive `NotFound`; transient errors trust the cache so cleanup pipelines don't abort on a daemon blip. Operations that need a live container (`copyContent`/`copyDir`/`CopyTarStream`/`exec`/`GetContainerArchive`) fail fast with a clear `container "" does not exist` instead of the daemon's generic empty-id error. --- This PR was written with the help of Claude Opus 4.7 --------- Co-authored-by: Nicolas Reviewed-on: https://gitea.com/gitea/runner/pulls/1003 Reviewed-by: Nicolas Co-authored-by: silverwind <2021+silverwind@noreply.gitea.com> Co-committed-by: silverwind <2021+silverwind@noreply.gitea.com> --- act/container/docker_run.go | 40 ++++++++++- act/container/docker_run_test.go | 110 +++++++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 2 deletions(-) diff --git a/act/container/docker_run.go b/act/container/docker_run.go index b141bd62..15a62479 100644 --- a/act/container/docker_run.go +++ b/act/container/docker_run.go @@ -26,6 +26,7 @@ import ( "dario.cat/mergo" "github.com/Masterminds/semver" + cerrdefs "github.com/containerd/errdefs" "github.com/docker/cli/cli/compose/loader" "github.com/docker/cli/cli/connhelper" "github.com/go-git/go-billy/v5/helper/polyfill" @@ -152,6 +153,8 @@ func (cr *containerReference) Copy(destPath string, files ...*FileEntry) common. func (cr *containerReference) CopyDir(destPath, srcPath string, useGitIgnore bool) common.Executor { return common.NewPipelineExecutor( common.NewInfoExecutor("docker cp src=%s dst=%s", srcPath, destPath), + cr.connect(), + cr.find(), cr.copyDir(destPath, srcPath, useGitIgnore), func(ctx context.Context) error { // If this fails, then folders have wrong permissions on non root container @@ -167,6 +170,16 @@ func (cr *containerReference) GetContainerArchive(ctx context.Context, srcPath s if common.Dryrun(ctx) { return nil, errors.New("DRYRUN is not supported in GetContainerArchive") } + // Direct entry point (no pipeline) — revalidate cr.id ourselves. + if err := cr.connect()(ctx); err != nil { + return nil, err + } + if err := cr.find()(ctx); err != nil { + return nil, err + } + if cr.id == "" { + return nil, cr.missingContainerError("get archive %s", srcPath) + } result, err := cr.cli.CopyFromContainer(ctx, cr.id, client.CopyFromContainerOptions{SourcePath: srcPath}) if err != nil { return nil, err @@ -314,10 +327,22 @@ func (cr *containerReference) Close() common.Executor { } } +// missingContainerError is the shared "container X does not exist" error +// used by ops that need a live cr.id. +func (cr *containerReference) missingContainerError(format string, args ...any) error { + return fmt.Errorf("container %q does not exist; cannot "+format, append([]any{cr.input.Name}, args...)...) +} + func (cr *containerReference) find() common.Executor { return func(ctx context.Context) error { if cr.id != "" { - return nil + // Validate cached id; clear only on definitive NotFound so a + // transient daemon error doesn't abort cleanup pipelines. + _, err := cr.cli.ContainerInspect(ctx, cr.id, client.ContainerInspectOptions{}) + if !cerrdefs.IsNotFound(err) { + return nil + } + cr.id = "" } containers, err := cr.cli.ContainerList(ctx, client.ContainerListOptions{ All: true, @@ -335,7 +360,6 @@ func (cr *containerReference) find() common.Executor { } } - cr.id = "" return nil } } @@ -592,6 +616,9 @@ func (cr *containerReference) extractFromImageEnv(env *map[string]string) common func (cr *containerReference) exec(cmd []string, env map[string]string, user, workdir string) common.Executor { return func(ctx context.Context) error { + if cr.id == "" { + return cr.missingContainerError("exec %v", cmd) + } logger := common.Logger(ctx) // Fix slashes when running on Windows if runtime.GOOS == "windows" { @@ -746,6 +773,9 @@ func (cr *containerReference) waitForCommand(ctx context.Context, isTerminal boo } func (cr *containerReference) CopyTarStream(ctx context.Context, destPath string, tarStream io.Reader) error { + if cr.id == "" { + return cr.missingContainerError("copy to %s", destPath) + } // Mkdir buf := &bytes.Buffer{} tw := tar.NewWriter(buf) @@ -779,6 +809,9 @@ func (cr *containerReference) CopyTarStream(ctx context.Context, destPath string func (cr *containerReference) copyDir(dstPath, srcPath string, useGitIgnore bool) common.Executor { return func(ctx context.Context) error { + if cr.id == "" { + return cr.missingContainerError("copy directory to %s", dstPath) + } logger := common.Logger(ctx) tarFile, err := os.CreateTemp("", "act") if err != nil { @@ -853,6 +886,9 @@ func (cr *containerReference) copyDir(dstPath, srcPath string, useGitIgnore bool func (cr *containerReference) copyContent(dstPath string, files ...*FileEntry) common.Executor { return func(ctx context.Context) error { + if cr.id == "" { + return cr.missingContainerError("copy to %s", dstPath) + } logger := common.Logger(ctx) var buf bytes.Buffer tw := tar.NewWriter(&buf) diff --git a/act/container/docker_run_test.go b/act/container/docker_run_test.go index 39f406aa..7f2c5226 100644 --- a/act/container/docker_run_test.go +++ b/act/container/docker_run_test.go @@ -19,6 +19,7 @@ import ( "gitea.com/gitea/runner/act/common" + cerrdefs "github.com/containerd/errdefs" "github.com/moby/moby/api/types/container" mobyclient "github.com/moby/moby/client" "github.com/sirupsen/logrus/hooks/test" @@ -98,6 +99,16 @@ func (m *mockDockerClient) CopyToContainer(ctx context.Context, id string, optio return args.Get(0).(mobyclient.CopyToContainerResult), args.Error(1) } +func (m *mockDockerClient) ContainerInspect(ctx context.Context, id string, opts mobyclient.ContainerInspectOptions) (mobyclient.ContainerInspectResult, error) { + args := m.Called(ctx, id, opts) + return args.Get(0).(mobyclient.ContainerInspectResult), args.Error(1) +} + +func (m *mockDockerClient) ContainerList(ctx context.Context, opts mobyclient.ContainerListOptions) (mobyclient.ContainerListResult, error) { + args := m.Called(ctx, opts) + return args.Get(0).(mobyclient.ContainerListResult), args.Error(1) +} + type endlessReader struct { io.Reader } @@ -298,6 +309,105 @@ func TestDockerCopyTarStreamErrorInMkdir(t *testing.T) { client.AssertExpectations(t) } +// find() must drop a stale cached id so later Copy/Exec don't hit the +// daemon with a torn-down container. +func TestFindRevalidatesStaleID(t *testing.T) { + ctx := context.Background() + notFound := cerrdefs.ErrNotFound.WithMessage("No such container") + boom := errors.New("daemon unreachable") + newCR := func(id string) (*containerReference, *mockDockerClient) { + client := &mockDockerClient{} + return &containerReference{id: id, cli: client, input: &NewContainerInput{Name: "job-1"}}, client + } + listOpts := mobyclient.ContainerListOptions{All: true} + inspectOpts := mobyclient.ContainerInspectOptions{} + + t.Run("stale id cleared, name lookup empty", func(t *testing.T) { + cr, client := newCR("stale") + client.On("ContainerInspect", ctx, "stale", inspectOpts).Return(mobyclient.ContainerInspectResult{}, notFound) + client.On("ContainerList", ctx, listOpts).Return(mobyclient.ContainerListResult{}, nil) + require.NoError(t, cr.find()(ctx)) + assert.Empty(t, cr.id) + client.AssertExpectations(t) + }) + + t.Run("stale id cleared, name lookup repopulates", func(t *testing.T) { + cr, client := newCR("stale") + client.On("ContainerInspect", ctx, "stale", inspectOpts).Return(mobyclient.ContainerInspectResult{}, notFound) + client.On("ContainerList", ctx, listOpts).Return(mobyclient.ContainerListResult{Items: []container.Summary{ + {ID: "other", Names: []string{"/somebody-else"}}, + {ID: "fresh", Names: []string{"/job-1"}}, + }}, nil) + require.NoError(t, cr.find()(ctx)) + assert.Equal(t, "fresh", cr.id) + client.AssertExpectations(t) + }) + + t.Run("live id kept", func(t *testing.T) { + cr, client := newCR("live") + client.On("ContainerInspect", ctx, "live", inspectOpts).Return(mobyclient.ContainerInspectResult{}, nil) + require.NoError(t, cr.find()(ctx)) + assert.Equal(t, "live", cr.id) + client.AssertExpectations(t) + }) + + t.Run("transient inspect error trusts cache", func(t *testing.T) { + cr, client := newCR("live") + client.On("ContainerInspect", ctx, "live", inspectOpts).Return(mobyclient.ContainerInspectResult{}, boom) + require.NoError(t, cr.find()(ctx)) + assert.Equal(t, "live", cr.id) + client.AssertExpectations(t) + }) + + t.Run("list error propagates", func(t *testing.T) { + cr, client := newCR("") + client.On("ContainerList", ctx, listOpts).Return(mobyclient.ContainerListResult{}, boom) + require.ErrorIs(t, cr.find()(ctx), boom) + client.AssertExpectations(t) + }) +} + +// Every daemon entry point fails fast with a clear, container-named +// error when no live cr.id is known. +func TestRejectsMissingContainer(t *testing.T) { + ctx := context.Background() + client := &mockDockerClient{} + client.On("ContainerList", ctx, mobyclient.ContainerListOptions{All: true}).Return(mobyclient.ContainerListResult{}, nil) + cr := &containerReference{cli: client, input: &NewContainerInput{Name: "job-1"}} + check := func(op string, err error) { + t.Helper() + require.Error(t, err, op) + assert.Contains(t, err.Error(), `container "job-1" does not exist`, op) + } + check("copyContent", cr.copyContent("/var/run/act", &FileEntry{Name: "x", Mode: 0o644})(ctx)) + check("copyDir", cr.copyDir("/var/run/act", "/src", false)(ctx)) + check("CopyTarStream", cr.CopyTarStream(ctx, "/var/run/act", &bytes.Buffer{})) + check("exec", cr.exec([]string{"echo"}, nil, "", "")(ctx)) + _, err := cr.GetContainerArchive(ctx, "/var/run/act/x") + check("GetContainerArchive", err) +} + +// End-to-end: a stale cr.id is cleared, repopulated from name lookup, +// and the Copy completes against the fresh id. +func TestPublicCopyPipelineHandlesStaleID(t *testing.T) { + ctx := context.Background() + client := &mockDockerClient{} + client.On("ContainerInspect", ctx, "stale", mobyclient.ContainerInspectOptions{}). + Return(mobyclient.ContainerInspectResult{}, cerrdefs.ErrNotFound.WithMessage("gone")) + client.On("ContainerList", ctx, mobyclient.ContainerListOptions{All: true}). + Return(mobyclient.ContainerListResult{Items: []container.Summary{ + {ID: "fresh", Names: []string{"/job-1"}}, + }}, nil) + client.On("CopyToContainer", ctx, "fresh", mock.MatchedBy(func(opts mobyclient.CopyToContainerOptions) bool { + return opts.DestinationPath == "/var/run/act" + })).Return(mobyclient.CopyToContainerResult{}, nil) + + cr := &containerReference{id: "stale", cli: client, input: &NewContainerInput{Name: "job-1"}} + require.NoError(t, cr.Copy("/var/run/act", &FileEntry{Name: "x", Mode: 0o644})(ctx)) + assert.Equal(t, "fresh", cr.id) + client.AssertExpectations(t) +} + // TestDockerCopyToSymlinkPath is a regression test for gitea/runner#981. Most base images // symlink /var/run to /run, so copying into /var/run/act traverses that symlink. The broken // docker 29.5.1 daemon fails the extraction with "mkdirat var/run: file exists" (fixed in