diff --git a/act/runner/run_context.go b/act/runner/run_context.go index 70919361..37aa7e52 100644 --- a/act/runner/run_context.go +++ b/act/runner/run_context.go @@ -601,10 +601,34 @@ func (rc *RunContext) interpolateOutputs() common.Executor { func (rc *RunContext) startContainer() common.Executor { return func(ctx context.Context) error { + var err error if rc.IsHostEnv(ctx) { - return rc.startHostEnvironment()(ctx) + err = rc.startHostEnvironment()(ctx) + } else { + err = rc.startJobContainer()(ctx) } - return rc.startJobContainer()(ctx) + if err != nil { + // The job executor's teardown only runs after a successful start, so a failed + // start would otherwise leak the per-job network and container. + rc.cleanupFailedStart(ctx) + } + return err + } +} + +func (rc *RunContext) cleanupFailedStart(ctx context.Context) { + if rc.cleanUpJobContainer == nil { + return + } + cleanCtx := ctx + if ctx.Err() != nil { + // the start likely failed because ctx was cancelled, detach so teardown still runs + var cancel context.CancelFunc + cleanCtx, cancel = context.WithTimeout(common.WithLogger(context.Background(), common.Logger(ctx)), time.Minute) + defer cancel() + } + if err := rc.cleanUpJobContainer(cleanCtx); err != nil { + common.Logger(ctx).Errorf("Error while cleaning up after failed container start for job %s: %v", rc.JobName, err) } } diff --git a/act/runner/run_context_test.go b/act/runner/run_context_test.go index b2e2e087..22e993c9 100644 --- a/act/runner/run_context_test.go +++ b/act/runner/run_context_test.go @@ -19,6 +19,7 @@ import ( log "github.com/sirupsen/logrus" assert "github.com/stretchr/testify/assert" + require "github.com/stretchr/testify/require" yaml "go.yaml.in/yaml/v4" ) @@ -659,3 +660,53 @@ func TestPrintStartJobContainerGroupGolden(t *testing.T) { }, "\n") assert.Equal(t, want, buf.String()) } + +func TestRunContext_cleanupFailedStart(t *testing.T) { + type ctxKey string + const sentinel = ctxKey("sentinel") + + // the fresh context is cancelled via defer on return, so capture state inside the stub + type capture struct { + calls int + err error + sentinel any + } + newRC := func(c *capture) *RunContext { + return &RunContext{ + JobName: "job", + cleanUpJobContainer: func(ctx context.Context) error { + c.calls++ + c.err = ctx.Err() + c.sentinel = ctx.Value(sentinel) + return nil + }, + } + } + + t.Run("runs teardown on the live context", func(t *testing.T) { + var c capture + ctx := context.WithValue(context.Background(), sentinel, "v") + + newRC(&c).cleanupFailedStart(ctx) + + assert.Equal(t, 1, c.calls) + require.NoError(t, c.err) + assert.Equal(t, "v", c.sentinel) + }) + + t.Run("falls back to a fresh context when the input is done", func(t *testing.T) { + var c capture + ctx, cancel := context.WithCancel(context.WithValue(context.Background(), sentinel, "v")) + cancel() + + newRC(&c).cleanupFailedStart(ctx) + + assert.Equal(t, 1, c.calls) + require.NoError(t, c.err) + assert.Nil(t, c.sentinel) + }) + + t.Run("no-op when there is nothing to clean up", func(t *testing.T) { + assert.NotPanics(t, func() { (&RunContext{}).cleanupFailedStart(context.Background()) }) + }) +}