Align step failure log output with GitHub Actions (#927 )

Fixes #926. Before: <img src="/attachments/a5ae9221-eee2-410a-964e-6103ce126df4" alt="image.png" width="400"> After: <img width="400" alt="image.png" src="attachments/2f2d67c4-6080-4ec3-9ae5-df33e6479920"> Also gets rid of a bunch of emojis in the logging and the obsolete link to `nektos/act` and align some other error messages. --- This PR was written with the help of Claude Opus 4.7 --------- Co-authored-by: Nicolas <bircni@icloud.com> Reviewed-on: https://gitea.com/gitea/runner/pulls/927 Reviewed-by: Nicolas <bircni@icloud.com> Co-authored-by: silverwind <me@silverwind.io> Co-committed-by: silverwind <me@silverwind.io>
feat: add startup janitor for stale bind-workdir task workspaces (#870 )
2026-05-06 04:53:30 +08:00 · 2026-05-05 20:17:32 +00:00 · 2026-05-05 20:11:44 +00:00 · 2026-05-05 18:28:12 +00:00
16 changed files with 835 additions and 40 deletions
--- a/README.md
+++ b/README.md
@@ -132,6 +132,12 @@ Besides `GITEA_INSTANCE_URL` and `GITEA_RUNNER_REGISTRATION_TOKEN`, the image en

 For a fuller container-oriented walkthrough, see [examples/docker](examples/docker/README.md).

+When `container.bind_workdir` is enabled, stale task workspace directories can be cleaned while the runner is idle:
+- directories older than `runner.workdir_cleanup_age` are removed (default: `24h`; set `0` to disable)
+- cleanup runs every `runner.idle_cleanup_interval` (default: `10m`; set `0` to disable)
+- only purely numeric subdirectories under `container.workdir_parent` are treated as task workspaces and may be removed
+- cleanup assumes `container.workdir_parent` is not shared across multiple runners
+
 ### Example Deployments

 Check out the [examples](examples) directory for sample deployment types.
--- a/act/container/container_types.go
+++ b/act/container/container_types.go
@@ -6,6 +6,7 @@ package container

 import (
 	"context"
+	"fmt"
 	"io"

 	"gitea.com/gitea/runner/act/common"
@@ -13,6 +14,13 @@ import (
 	"github.com/docker/go-connections/nat"
 )

+// ExitCodeError reports a non-zero process exit code from a container command.
+type ExitCodeError int
+
+func (e ExitCodeError) Error() string {
+	return fmt.Sprintf("Process completed with exit code %d.", int(e))
+}
+
 // NewContainerInput the input for the New function
 type NewContainerInput struct {
 	Image          string
--- a/act/container/docker_run.go
+++ b/act/container/docker_run.go
@@ -633,14 +633,10 @@ func (cr *containerReference) exec(cmd []string, env map[string]string, user, wo
 			return fmt.Errorf("failed to inspect exec: %w", err)
 		}

-		switch inspectResp.ExitCode {
-		case 0:
+		if inspectResp.ExitCode == 0 {
 			return nil
-		case 127:
-			return fmt.Errorf("exitcode '%d': command not found, please refer to https://github.com/nektos/act/issues/107 for more information", inspectResp.ExitCode)
-		default:
-			return fmt.Errorf("exitcode '%d': failure", inspectResp.ExitCode)
 		}
+		return ExitCodeError(inspectResp.ExitCode)
 	}
 }

@@ -930,7 +926,7 @@ func (cr *containerReference) wait() common.Executor {
 			return nil
 		}

-		return fmt.Errorf("exit with `FAILURE`: %v", statusCode)
+		return ExitCodeError(statusCode)
 	}
 }

--- a/act/container/docker_run_test.go
+++ b/act/container/docker_run_test.go
@@ -23,6 +23,7 @@ import (
 	"github.com/sirupsen/logrus/hooks/test"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
 )

 func TestDocker(t *testing.T) {
@@ -85,6 +86,11 @@ func (m *mockDockerClient) ContainerExecInspect(ctx context.Context, execID stri
 	return args.Get(0).(types.ContainerExecInspect), args.Error(1)
 }

+func (m *mockDockerClient) ContainerWait(ctx context.Context, containerID string, condition container.WaitCondition) (<-chan container.WaitResponse, <-chan error) {
+	args := m.Called(ctx, containerID, condition)
+	return args.Get(0).(<-chan container.WaitResponse), args.Get(1).(<-chan error)
+}
+
 func (m *mockDockerClient) CopyToContainer(ctx context.Context, id, path string, content io.Reader, options types.CopyToContainerOptions) error {
 	args := m.Called(ctx, id, path, content, options)
 	return args.Error(0)
@@ -174,12 +180,43 @@ func TestDockerExecFailure(t *testing.T) {
 	}

 	err := cr.exec([]string{""}, map[string]string{}, "user", "workdir")(ctx)
-	assert.Error(t, err, "exit with `FAILURE`: 1") //nolint:testifylint // pre-existing issue from nektos/act
+	var exitErr ExitCodeError
+	require.ErrorAs(t, err, &exitErr)
+	assert.Equal(t, ExitCodeError(1), exitErr)
+	assert.Equal(t, "Process completed with exit code 1.", err.Error())

 	conn.AssertExpectations(t)
 	client.AssertExpectations(t)
 }

+func TestDockerWaitFailure(t *testing.T) {
+	ctx := context.Background()
+
+	statusCh := make(chan container.WaitResponse, 1)
+	statusCh <- container.WaitResponse{StatusCode: 2}
+	errCh := make(chan error, 1)
+
+	client := &mockDockerClient{}
+	client.On("ContainerWait", ctx, "123", container.WaitConditionNotRunning).
+		Return((<-chan container.WaitResponse)(statusCh), (<-chan error)(errCh))
+
+	cr := &containerReference{
+		id:  "123",
+		cli: client,
+		input: &NewContainerInput{
+			Image: "image",
+		},
+	}
+
+	err := cr.wait()(ctx)
+	var exitErr ExitCodeError
+	require.ErrorAs(t, err, &exitErr)
+	assert.Equal(t, ExitCodeError(2), exitErr)
+	assert.Equal(t, "Process completed with exit code 2.", err.Error())
+
+	client.AssertExpectations(t)
+}
+
 func TestDockerCopyTarStream(t *testing.T) {
 	ctx := context.Background()

--- a/act/container/host_environment.go
+++ b/act/container/host_environment.go
@@ -16,7 +16,9 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
+	"strconv"
 	"strings"
+	"sync"
 	"time"

 	"gitea.com/gitea/runner/act/common"
@@ -34,9 +36,15 @@ type HostEnvironment struct {
 	TmpDir    string
 	ToolCache string
 	Workdir   string
-	ActPath   string
-	CleanUp   func()
-	StdOut    io.Writer
+	// BindWorkdir is true when the app runner mounts the workspace on the host and
+	// deletes the task directory after the job; host teardown must not remove Workdir.
+	BindWorkdir bool
+	ActPath     string
+	CleanUp     func()
+	StdOut      io.Writer
+
+	mu          sync.Mutex
+	runningPIDs map[int]struct{}
 }

 func (e *HostEnvironment) Create(_, _ []string) common.Executor {
@@ -344,8 +352,30 @@ func (e *HostEnvironment) exec(ctx context.Context, command []string, cmdline st
 	if ppty != nil {
 		go writeKeepAlive(ppty)
 	}
-	err = cmd.Run()
+	// Split Start/Wait so the PID can be registered before the process can exit;
+	// cmd.Run() would block until exit, by which time the PID may have been reused.
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	if cmd.Process != nil {
+		e.mu.Lock()
+		if e.runningPIDs == nil {
+			e.runningPIDs = map[int]struct{}{}
+		}
+		e.runningPIDs[cmd.Process.Pid] = struct{}{}
+		e.mu.Unlock()
+		defer func(pid int) {
+			e.mu.Lock()
+			delete(e.runningPIDs, pid)
+			e.mu.Unlock()
+		}(cmd.Process.Pid)
+	}
+	err = cmd.Wait()
 	if err != nil {
+		var exitErr *exec.ExitError
+		if errors.As(err, &exitErr) {
+			return ExitCodeError(exitErr.ExitCode())
+		}
 		return err
 	}
 	if tty != nil {
@@ -385,12 +415,83 @@ func (e *HostEnvironment) UpdateFromEnv(srcPath string, env *map[string]string)
 	return parseEnvFile(e, srcPath, env)
 }

+func removePathWithRetry(ctx context.Context, path string) error {
+	if path == "" {
+		return nil
+	}
+	attempts := 1
+	delay := time.Duration(0)
+	if runtime.GOOS == "windows" {
+		attempts = 5
+		delay = 200 * time.Millisecond
+	}
+	var lastErr error
+	for i := 0; i < attempts; i++ {
+		if i > 0 {
+			select {
+			case <-ctx.Done():
+				return ctx.Err()
+			case <-time.After(delay):
+			}
+		}
+		lastErr = os.RemoveAll(path)
+		if lastErr == nil {
+			return nil
+		}
+	}
+	return lastErr
+}
+
+func (e *HostEnvironment) terminateRunningProcesses(ctx context.Context) {
+	if runtime.GOOS != "windows" {
+		return
+	}
+	e.mu.Lock()
+	pids := make([]int, 0, len(e.runningPIDs))
+	for pid := range e.runningPIDs {
+		pids = append(pids, pid)
+	}
+	e.mu.Unlock()
+
+	if len(pids) == 0 {
+		return
+	}
+
+	logger := common.Logger(ctx)
+	for _, pid := range pids {
+		// Best-effort: forcibly terminate process tree to release file handles
+		// so that workspace cleanup can succeed on Windows.
+		cmd := exec.CommandContext(ctx, "taskkill", "/PID", strconv.Itoa(pid), "/T", "/F")
+		out, err := cmd.CombinedOutput()
+		if err != nil {
+			logger.Debugf("taskkill failed for pid=%d: %v output=%s", pid, err, strings.TrimSpace(string(out)))
+		}
+	}
+}
+
 func (e *HostEnvironment) Remove() common.Executor {
 	return func(ctx context.Context) error {
+		// Ensure any lingering child processes are ended before attempting
+		// to remove the workspace (Windows file locks otherwise prevent cleanup).
+		e.terminateRunningProcesses(ctx)
+
+		// Only removes per-job misc state. Must not remove the cache/toolcache root.
 		if e.CleanUp != nil {
 			e.CleanUp()
 		}
-		return os.RemoveAll(e.Path)
+		logger := common.Logger(ctx)
+		var errs []error
+		if err := removePathWithRetry(ctx, e.Path); err != nil {
+			logger.Warnf("failed to remove host misc state %s: %v", e.Path, err)
+			errs = append(errs, err)
+		}
+		if !e.BindWorkdir && e.Workdir != "" {
+			if err := removePathWithRetry(ctx, e.Workdir); err != nil {
+				logger.Warnf("failed to remove host workspace %s: %v", e.Workdir, err)
+				errs = append(errs, err)
+			}
+		}
+		return errors.Join(errs...)
 	}
 }

--- a/act/container/host_environment_test.go
+++ b/act/container/host_environment_test.go
@@ -11,9 +11,14 @@ import (
 	"os"
 	"path"
 	"path/filepath"
+	"runtime"
 	"testing"

+	"gitea.com/gitea/runner/act/common"
+
+	"github.com/sirupsen/logrus"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 )

 // Type assert HostEnvironment implements ExecutionsEnvironment
@@ -69,3 +74,76 @@ func TestGetContainerArchive(t *testing.T) {
 	_, err = reader.Next()
 	assert.ErrorIs(t, err, io.EOF)
 }
+
+func TestHostEnvironmentExecExitCode(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("uses POSIX shell")
+	}
+	dir := t.TempDir()
+	ctx := context.Background()
+	e := &HostEnvironment{
+		Path:      filepath.Join(dir, "path"),
+		TmpDir:    filepath.Join(dir, "tmp"),
+		ToolCache: filepath.Join(dir, "tool_cache"),
+		ActPath:   filepath.Join(dir, "act_path"),
+		StdOut:    io.Discard,
+		Workdir:   filepath.Join(dir, "path"),
+	}
+	for _, p := range []string{e.Path, e.TmpDir, e.ToolCache, e.ActPath} {
+		assert.NoError(t, os.MkdirAll(p, 0o700)) //nolint:testifylint // test setup
+	}
+
+	err := e.Exec([]string{"sh", "-c", "exit 3"}, map[string]string{"PATH": os.Getenv("PATH")}, "", "")(ctx)
+	var exitErr ExitCodeError
+	require.ErrorAs(t, err, &exitErr)
+	assert.Equal(t, ExitCodeError(3), exitErr)
+	assert.Equal(t, "Process completed with exit code 3.", err.Error())
+}
+
+func TestHostEnvironmentRemoveCleansWorkdir(t *testing.T) {
+	logger := logrus.New()
+	ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
+	base := t.TempDir()
+	miscRoot := filepath.Join(base, "misc")
+	path := filepath.Join(miscRoot, "hostexecutor")
+	require.NoError(t, os.MkdirAll(path, 0o700))
+	workdir := filepath.Join(base, "workspace", "owner", "repo")
+	require.NoError(t, os.MkdirAll(workdir, 0o700))
+
+	e := &HostEnvironment{
+		Path:        path,
+		Workdir:     workdir,
+		BindWorkdir: false,
+		CleanUp: func() {
+			_ = os.RemoveAll(miscRoot)
+		},
+		StdOut: os.Stdout,
+	}
+	require.NoError(t, e.Remove()(ctx))
+	_, err := os.Stat(workdir)
+	assert.ErrorIs(t, err, os.ErrNotExist)
+}
+
+func TestHostEnvironmentRemoveSkipsWorkdirWhenBindWorkdir(t *testing.T) {
+	logger := logrus.New()
+	ctx := common.WithLogger(context.Background(), logrus.NewEntry(logger))
+	base := t.TempDir()
+	miscRoot := filepath.Join(base, "misc")
+	path := filepath.Join(miscRoot, "hostexecutor")
+	require.NoError(t, os.MkdirAll(path, 0o700))
+	workdir := filepath.Join(base, "workspace", "123", "owner", "repo")
+	require.NoError(t, os.MkdirAll(workdir, 0o700))
+
+	e := &HostEnvironment{
+		Path:        path,
+		Workdir:     workdir,
+		BindWorkdir: true,
+		CleanUp: func() {
+			_ = os.RemoveAll(miscRoot)
+		},
+		StdOut: os.Stdout,
+	}
+	require.NoError(t, e.Remove()(ctx))
+	_, err := os.Stat(workdir)
+	require.NoError(t, err)
+}
--- a/act/runner/job_executor.go
+++ b/act/runner/job_executor.go
@@ -24,6 +24,13 @@ type jobInfo interface {
 	result(result string)
 }

+// reportStepError emits the GitHub Actions ##[error] annotation and records
+// the error against the job so the job is reported as failed.
+func reportStepError(ctx context.Context, err error) {
+	common.Logger(ctx).Errorf("##[error]%v", err)
+	common.SetJobError(ctx, err)
+}
+
 func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executor {
 	steps := make([]common.Executor, 0)
 	preSteps := make([]common.Executor, 0)
@@ -32,7 +39,7 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo
 	steps = append(steps, func(ctx context.Context) error {
 		logger := common.Logger(ctx)
 		if len(info.matrix()) > 0 {
-			logger.Infof("\U0001F9EA  Matrix: %v", info.matrix())
+			logger.Infof("Matrix: %v", info.matrix())
 		}
 		return nil
 	})
@@ -75,33 +82,36 @@ func newJobExecutor(info jobInfo, sf stepFactory, rc *RunContext) common.Executo

 		preExec := step.pre()
 		preSteps = append(preSteps, useStepLogger(rc, stepModel, stepStagePre, func(ctx context.Context) error {
-			logger := common.Logger(ctx)
 			preErr := preExec(ctx)
 			if preErr != nil {
-				logger.Errorf("%v", preErr)
-				common.SetJobError(ctx, preErr)
+				reportStepError(ctx, preErr)
 			} else if ctx.Err() != nil {
-				logger.Errorf("%v", ctx.Err())
-				common.SetJobError(ctx, ctx.Err())
+				reportStepError(ctx, ctx.Err())
 			}
 			return preErr
 		}))

 		stepExec := step.main()
 		steps = append(steps, useStepLogger(rc, stepModel, stepStageMain, func(ctx context.Context) error {
-			logger := common.Logger(ctx)
 			err := stepExec(ctx)
 			if err != nil {
-				logger.Errorf("%v", err)
-				common.SetJobError(ctx, err)
+				reportStepError(ctx, err)
 			} else if ctx.Err() != nil {
-				logger.Errorf("%v", ctx.Err())
-				common.SetJobError(ctx, ctx.Err())
+				reportStepError(ctx, ctx.Err())
 			}
 			return nil
 		}))

-		postExec := useStepLogger(rc, stepModel, stepStagePost, step.post())
+		postFn := step.post()
+		postExec := useStepLogger(rc, stepModel, stepStagePost, func(ctx context.Context) error {
+			err := postFn(ctx)
+			if err != nil {
+				reportStepError(ctx, err)
+			} else if ctx.Err() != nil {
+				reportStepError(ctx, ctx.Err())
+			}
+			return err
+		})
 		if postExecutor != nil {
 			// run the post executor in reverse order
 			postExecutor = postExec.Finally(postExecutor)
@@ -196,7 +206,7 @@ func setJobResult(ctx context.Context, info jobInfo, rc *RunContext, success boo
 		jobResultMessage = "failed"
 	}

-	logger.WithField("jobResult", jobResult).Infof("\U0001F3C1  Job %s", jobResultMessage)
+	logger.WithField("jobResult", jobResult).Infof("Job %s", jobResultMessage)
 }

 func setJobOutputs(ctx context.Context, rc *RunContext) {
--- a/act/runner/run_context.go
+++ b/act/runner/run_context.go
@@ -220,11 +220,12 @@ func (rc *RunContext) startHostEnvironment() common.Executor {
 		}
 		toolCache := filepath.Join(cacheDir, "tool_cache")
 		rc.JobContainer = &container.HostEnvironment{
-			Path:      path,
-			TmpDir:    runnerTmp,
-			ToolCache: toolCache,
-			Workdir:   rc.Config.Workdir,
-			ActPath:   actPath,
+			Path:        path,
+			TmpDir:      runnerTmp,
+			ToolCache:   toolCache,
+			Workdir:     rc.Config.Workdir,
+			BindWorkdir: rc.Config.BindWorkdir,
+			ActPath:     actPath,
 			CleanUp: func() {
 				os.RemoveAll(miscpath)
 			},
@@ -729,7 +730,7 @@ func (rc *RunContext) isEnabled(ctx context.Context) (bool, error) {
 	jobType, jobTypeErr := job.Type()

 	if runJobErr != nil {
-		return false, fmt.Errorf("  \u274C  Error in if-expression: \"if: %s\" (%s)", job.If.Value, runJobErr)
+		return false, fmt.Errorf("if-expression %q evaluation failed: %s", job.If.Value, runJobErr)
 	}

 	if jobType == model.JobTypeInvalid {
--- a/act/runner/step.go
+++ b/act/runner/step.go
@@ -107,7 +107,7 @@ func runStepExecutor(step step, stage stepStage, executor common.Executor) commo
 		if strings.Contains(stepString, "::add-mask::") {
 			stepString = "add-mask command"
 		}
-		logger.Infof("\u2B50 Run %s %s", stage, stepString)
+		logger.Infof("Run %s %s", stage, stepString)

 		// Prepare and clean Runner File Commands
 		actPath := rc.JobContainer.GetActPath()
@@ -158,7 +158,7 @@ func runStepExecutor(step step, stage stepStage, executor common.Executor) commo
 		err = executor(timeoutctx)

 		if err == nil {
-			logger.WithField("stepResult", stepResult.Outcome).Infof("  \u2705  Success - %s %s", stage, stepString)
+			logger.WithField("stepResult", stepResult.Outcome).Infof("Success - %s %s", stage, stepString)
 		} else {
 			stepResult.Outcome = model.StepStatusFailure

@@ -169,6 +169,7 @@ func runStepExecutor(step step, stage stepStage, executor common.Executor) commo
 			}

 			if continueOnError {
+				logger.Errorf("##[error]%v", err)
 				logger.Infof("Failed but continue next step")
 				err = nil
 				stepResult.Conclusion = model.StepStatusSuccess
@@ -176,7 +177,9 @@ func runStepExecutor(step step, stage stepStage, executor common.Executor) commo
 				stepResult.Conclusion = model.StepStatusFailure
 			}

-			logger.WithField("stepResult", stepResult.Outcome).Errorf("  \u274C  Failure - %s %s", stage, stepString)
+			// Infof: Errorf entries are promoted to the user log by the reporter,
+			// which would duplicate the ##[error] annotation emitted elsewhere.
+			logger.WithField("stepResult", stepResult.Outcome).Infof("Failure - %s %s", stage, stepString)
 		}
 		// Process Runner File Commands
 		orgerr := err
@@ -268,7 +271,7 @@ func isStepEnabled(ctx context.Context, expr string, step step, stage stepStage)

 	runStep, err := EvalBool(ctx, rc.NewStepExpressionEvaluator(ctx, step), expr, defaultStatusCheck)
 	if err != nil {
-		return false, fmt.Errorf("  \u274C  Error in if-expression: \"if: %s\" (%s)", expr, err)
+		return false, fmt.Errorf("if-expression %q evaluation failed: %s", expr, err)
 	}

 	return runStep, nil
@@ -284,7 +287,7 @@ func isContinueOnError(ctx context.Context, expr string, step step, _ stepStage)

 	continueOnError, err := EvalBool(ctx, rc.NewStepExpressionEvaluator(ctx, step), expr, exprparser.DefaultStatusCheckNone)
 	if err != nil {
-		return false, fmt.Errorf("  \u274C  Error in continue-on-error-expression: \"continue-on-error: %s\" (%s)", expr, err)
+		return false, fmt.Errorf("continue-on-error expression %q evaluation failed: %s", expr, err)
 	}

 	return continueOnError, nil
--- a/internal/app/poll/poller.go
+++ b/internal/app/poll/poller.go
@@ -27,6 +27,11 @@ type TaskRunner interface {
 	Run(ctx context.Context, task *runnerv1.Task) error
 }

+// IdleRunner can run maintenance while the poller is idle.
+type IdleRunner interface {
+	OnIdle(ctx context.Context)
+}
+
 type Poller struct {
 	client       client.Client
 	runner       TaskRunner
@@ -95,6 +100,7 @@ func (p *Poller) Poll() {

 		task, ok := p.fetchTask(p.pollingCtx, s)
 		if !ok {
+			p.runIdleMaintenance()
 			<-sem
 			if !p.waitBackoff(s) {
 				return
@@ -119,6 +125,7 @@ func (p *Poller) PollOnce() {
 	for {
 		task, ok := p.fetchTask(p.pollingCtx, s)
 		if !ok {
+			p.runIdleMaintenance()
 			if !p.waitBackoff(s) {
 				return
 			}
@@ -130,6 +137,12 @@ func (p *Poller) PollOnce() {
 	}
 }

+func (p *Poller) runIdleMaintenance() {
+	if idleRunner, ok := p.runner.(IdleRunner); ok {
+		idleRunner.OnIdle(p.jobsCtx)
+	}
+}
+
 func (p *Poller) Shutdown(ctx context.Context) error {
 	p.shutdownPolling()

--- a/internal/app/poll/poller_test.go
+++ b/internal/app/poll/poller_test.go
@@ -125,6 +125,11 @@ type mockRunner struct {
 	totalCompleted atomic.Int64
 }

+type idleAwareRunner struct {
+	mockRunner
+	idleCalls atomic.Int64
+}
+
 func (m *mockRunner) Run(ctx context.Context, _ *runnerv1.Task) error {
 	atomicMax(&m.maxConcurrent, m.running.Add(1))
 	select {
@@ -136,6 +141,78 @@ func (m *mockRunner) Run(ctx context.Context, _ *runnerv1.Task) error {
 	return nil
 }

+func TestPollerRunIdleMaintenance(t *testing.T) {
+	runner := &idleAwareRunner{}
+	p := &Poller{runner: runner, jobsCtx: context.Background()}
+
+	p.runIdleMaintenance()
+
+	assert.Equal(t, int64(1), runner.idleCalls.Load())
+}
+
+func (m *idleAwareRunner) OnIdle(_ context.Context) {
+	m.idleCalls.Add(1)
+}
+
+func TestPollerPollCallsOnIdle(t *testing.T) {
+	cli := mocks.NewClient(t)
+	cli.On("FetchTask", mock.Anything, mock.Anything).Return(
+		func(_ context.Context, _ *connect_go.Request[runnerv1.FetchTaskRequest]) (*connect_go.Response[runnerv1.FetchTaskResponse], error) {
+			return connect_go.NewResponse(&runnerv1.FetchTaskResponse{}), nil
+		},
+	)
+
+	cfg, err := config.LoadDefault("")
+	require.NoError(t, err)
+	cfg.Runner.Capacity = 1
+	cfg.Runner.FetchInterval = 10 * time.Millisecond
+	cfg.Runner.FetchIntervalMax = 10 * time.Millisecond
+
+	runner := &idleAwareRunner{}
+	poller := New(cfg, cli, runner)
+
+	var wg sync.WaitGroup
+	wg.Go(poller.Poll)
+
+	require.Eventually(t, func() bool {
+		return runner.idleCalls.Load() > 0
+	}, time.Second, 10*time.Millisecond)
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	require.NoError(t, poller.Shutdown(ctx))
+	wg.Wait()
+}
+
+func TestPollerPollOnceCallsOnIdle(t *testing.T) {
+	cli := mocks.NewClient(t)
+	cli.On("FetchTask", mock.Anything, mock.Anything).Return(
+		func(_ context.Context, _ *connect_go.Request[runnerv1.FetchTaskRequest]) (*connect_go.Response[runnerv1.FetchTaskResponse], error) {
+			return connect_go.NewResponse(&runnerv1.FetchTaskResponse{}), nil
+		},
+	)
+
+	cfg, err := config.LoadDefault("")
+	require.NoError(t, err)
+	cfg.Runner.FetchInterval = 10 * time.Millisecond
+	cfg.Runner.FetchIntervalMax = 10 * time.Millisecond
+
+	runner := &idleAwareRunner{}
+	poller := New(cfg, cli, runner)
+
+	var wg sync.WaitGroup
+	wg.Go(poller.PollOnce)
+
+	require.Eventually(t, func() bool {
+		return runner.idleCalls.Load() > 0
+	}, time.Second, 10*time.Millisecond)
+
+	ctx, cancel := context.WithTimeout(context.Background(), time.Second)
+	defer cancel()
+	require.NoError(t, poller.Shutdown(ctx))
+	wg.Wait()
+}
+
 // TestPoller_ConcurrencyLimitedByCapacity verifies that with capacity=3 and
 // 6 available tasks, at most 3 tasks run concurrently, and FetchTask is
 // never called concurrently (single poller).
--- a/internal/app/run/runner.go
+++ b/internal/app/run/runner.go
@@ -7,11 +7,14 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"maps"
 	"net/http"
 	"os"
 	"path/filepath"
+	"runtime"
+	"strconv"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -45,8 +48,10 @@ type Runner struct {
 	envs         map[string]string
 	cacheHandler *artifactcache.Handler

-	runningTasks sync.Map
-	runningCount atomic.Int64
+	runningTasks            sync.Map
+	runningCount            atomic.Int64
+	lastIdleCleanupUnixNano atomic.Int64
+	now                     func() time.Time
 }

 func NewRunner(cfg *config.Config, reg *config.Registration, cli client.Client) *Runner {
@@ -89,13 +94,94 @@ func NewRunner(cfg *config.Config, reg *config.Registration, cli client.Client)
 	envs["GITEA_ACTIONS"] = "true"
 	envs["GITEA_ACTIONS_RUNNER_VERSION"] = ver.Version()

-	return &Runner{
+	runner := &Runner{
 		name:         reg.Name,
 		cfg:          cfg,
 		client:       cli,
 		labels:       ls,
 		envs:         envs,
 		cacheHandler: cacheHandler,
+		now:          time.Now,
+	}
+	return runner
+}
+
+// OnIdle performs lightweight maintenance during polling idle windows.
+// It runs synchronously on the poller goroutine; shouldRunIdleCleanup
+// throttles invocations to runner.idle_cleanup_interval so the impact on
+// poll cadence is bounded even when the workdir root is large.
+func (r *Runner) OnIdle(ctx context.Context) {
+	if !r.shouldRunIdleCleanup() {
+		return
+	}
+	workdirParent := strings.TrimLeft(r.cfg.Container.WorkdirParent, "/")
+	workdirRoot := filepath.FromSlash("/" + workdirParent)
+	r.cleanupStaleTaskDirs(ctx, workdirRoot)
+}
+
+func (r *Runner) shouldRunIdleCleanup() bool {
+	if !r.cfg.Container.BindWorkdir {
+		return false
+	}
+	if r.cfg.Runner.WorkdirCleanupAge <= 0 || r.cfg.Runner.IdleCleanupInterval <= 0 {
+		return false
+	}
+	if r.RunningCount() != 0 {
+		return false
+	}
+	now := r.now()
+	interval := r.cfg.Runner.IdleCleanupInterval
+	for {
+		last := r.lastIdleCleanupUnixNano.Load()
+		if last != 0 && now.Sub(time.Unix(0, last)) < interval {
+			return false
+		}
+		if r.lastIdleCleanupUnixNano.CompareAndSwap(last, now.UnixNano()) {
+			return true
+		}
+	}
+}
+
+func (r *Runner) cleanupStaleTaskDirs(ctx context.Context, workdirRoot string) {
+	entries, err := os.ReadDir(workdirRoot)
+	if err != nil {
+		if errors.Is(err, os.ErrNotExist) {
+			return
+		}
+		log.Warnf("failed to list task workspace root %s for stale cleanup: %v", workdirRoot, err)
+		return
+	}
+
+	// A task may begin between shouldRunIdleCleanup's running-count check and
+	// the loop below. That is safe because new task dirs are created with the
+	// current mtime and therefore fall on the keep side of cutoff.
+	cutoff := r.now().Add(-r.cfg.Runner.WorkdirCleanupAge)
+	for _, entry := range entries {
+		if err := ctx.Err(); err != nil {
+			return
+		}
+		if !entry.IsDir() {
+			continue
+		}
+		// Task workspaces are indexed by numeric task IDs; skip any other
+		// directories to avoid deleting operator-managed data under workdir_root.
+		if _, err := strconv.ParseUint(entry.Name(), 10, 64); err != nil {
+			continue
+		}
+		info, err := entry.Info()
+		if err != nil {
+			log.Warnf("failed to stat task workspace %s: %v", filepath.Join(workdirRoot, entry.Name()), err)
+			continue
+		}
+		if info.ModTime().After(cutoff) {
+			continue
+		}
+		taskDir := filepath.Join(workdirRoot, entry.Name())
+		if err := os.RemoveAll(taskDir); err != nil {
+			log.Warnf("failed to clean stale task workspace %s: %v", taskDir, err)
+			continue
+		}
+		log.Infof("cleaned stale task workspace %s", taskDir)
 	}
 }

@@ -238,6 +324,13 @@ func (r *Runner) run(ctx context.Context, task *runnerv1.Task, reporter *report.
 		workdirParent = fmt.Sprintf("%s/%d", workdirParent, task.Id)
 	}
 	workdir := filepath.FromSlash(fmt.Sprintf("/%s/%s", workdirParent, preset.Repository))
+	if runtime.GOOS == "windows" {
+		if abs, err := filepath.Abs(workdir); err == nil {
+			workdir = abs
+		}
+	}
+	// Without bind_workdir, the workspace path omits the task id; concurrent host-mode jobs
+	// for the same repository would share this directory and can race with per-job cleanup.

 	runnerConfig := &runner.Config{
 		// On Linux, Workdir will be like "/<parent_directory>/<owner>/<repo>"
--- a/internal/app/run/runner_idle_cleanup_test.go
+++ b/internal/app/run/runner_idle_cleanup_test.go
@@ -0,0 +1,247 @@
+// Copyright 2026 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package run
+
+import (
+	"context"
+	"os"
+	"path/filepath"
+	"strconv"
+	"testing"
+	"time"
+
+	"gitea.com/gitea/runner/internal/pkg/config"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRunnerCleanupStaleTaskDirs(t *testing.T) {
+	now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
+	workdirRoot := filepath.Join(t.TempDir(), "workspace")
+	require.NoError(t, os.MkdirAll(workdirRoot, 0o700))
+
+	oldTask := filepath.Join(workdirRoot, "1001")
+	freshTask := filepath.Join(workdirRoot, "1002")
+	nonTask := filepath.Join(workdirRoot, "shared")
+	alphaNumericTask := filepath.Join(workdirRoot, "123abc")
+	for _, path := range []string{oldTask, freshTask, nonTask, alphaNumericTask} {
+		require.NoError(t, os.MkdirAll(path, 0o700))
+	}
+
+	require.NoError(t, os.Chtimes(oldTask, now.Add(-3*time.Hour), now.Add(-3*time.Hour)))
+	require.NoError(t, os.Chtimes(freshTask, now.Add(-30*time.Minute), now.Add(-30*time.Minute)))
+	require.NoError(t, os.Chtimes(nonTask, now.Add(-5*time.Hour), now.Add(-5*time.Hour)))
+	require.NoError(t, os.Chtimes(alphaNumericTask, now.Add(-5*time.Hour), now.Add(-5*time.Hour)))
+
+	r := &Runner{
+		cfg: &config.Config{
+			Runner: config.Runner{
+				WorkdirCleanupAge: 2 * time.Hour,
+			},
+		},
+		now: func() time.Time { return now },
+	}
+
+	r.cleanupStaleTaskDirs(context.Background(), workdirRoot)
+
+	assert.NoDirExists(t, oldTask)
+	assert.DirExists(t, freshTask)
+	assert.DirExists(t, nonTask)
+	assert.DirExists(t, alphaNumericTask)
+}
+
+func TestRunnerCleanupStaleTaskDirsMissingRoot(t *testing.T) {
+	r := &Runner{
+		cfg: &config.Config{
+			Runner: config.Runner{WorkdirCleanupAge: time.Hour},
+		},
+		now: time.Now,
+	}
+
+	// Must be a silent no-op rather than a warning or panic when the root
+	// has not yet been created (e.g. the runner has never executed a task).
+	r.cleanupStaleTaskDirs(context.Background(), filepath.Join(t.TempDir(), "missing"))
+}
+
+func TestRunnerCleanupStaleTaskDirsHonorsContext(t *testing.T) {
+	now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
+	workdirRoot := filepath.Join(t.TempDir(), "workspace")
+	require.NoError(t, os.MkdirAll(workdirRoot, 0o700))
+
+	for i := 1001; i <= 1003; i++ {
+		dir := filepath.Join(workdirRoot, strconv.Itoa(i))
+		require.NoError(t, os.MkdirAll(dir, 0o700))
+		require.NoError(t, os.Chtimes(dir, now.Add(-3*time.Hour), now.Add(-3*time.Hour)))
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+
+	r := &Runner{
+		cfg: &config.Config{
+			Runner: config.Runner{WorkdirCleanupAge: time.Hour},
+		},
+		now: func() time.Time { return now },
+	}
+
+	r.cleanupStaleTaskDirs(ctx, workdirRoot)
+
+	for i := 1001; i <= 1003; i++ {
+		assert.DirExists(t, filepath.Join(workdirRoot, strconv.Itoa(i)))
+	}
+}
+
+func TestRunnerShouldRunIdleCleanupThrottles(t *testing.T) {
+	now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
+	r := &Runner{
+		cfg: &config.Config{
+			Container: config.Container{
+				BindWorkdir: true,
+			},
+			Runner: config.Runner{
+				WorkdirCleanupAge:   24 * time.Hour,
+				IdleCleanupInterval: time.Hour,
+			},
+		},
+		now: func() time.Time { return now },
+	}
+
+	assert.True(t, r.shouldRunIdleCleanup())
+
+	now = now.Add(30 * time.Minute)
+	assert.False(t, r.shouldRunIdleCleanup())
+
+	now = now.Add(31 * time.Minute)
+	assert.True(t, r.shouldRunIdleCleanup())
+}
+
+func TestRunnerShouldRunIdleCleanupSkipsWhenJobRunning(t *testing.T) {
+	r := &Runner{
+		cfg: &config.Config{
+			Container: config.Container{
+				BindWorkdir: true,
+			},
+			Runner: config.Runner{
+				WorkdirCleanupAge:   24 * time.Hour,
+				IdleCleanupInterval: time.Minute,
+			},
+		},
+		now: time.Now,
+	}
+	r.runningCount.Store(1)
+
+	assert.False(t, r.shouldRunIdleCleanup())
+}
+
+func TestRunnerShouldRunIdleCleanupSkipsWhenBindWorkdirDisabled(t *testing.T) {
+	r := &Runner{
+		cfg: &config.Config{
+			Runner: config.Runner{
+				WorkdirCleanupAge:   24 * time.Hour,
+				IdleCleanupInterval: time.Minute,
+			},
+		},
+		now: time.Now,
+	}
+
+	assert.False(t, r.shouldRunIdleCleanup())
+}
+
+func TestRunnerShouldRunIdleCleanupSkipsWhenDisabled(t *testing.T) {
+	now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
+
+	t.Run("cleanup age disabled", func(t *testing.T) {
+		r := &Runner{
+			cfg: &config.Config{
+				Container: config.Container{
+					BindWorkdir: true,
+				},
+				Runner: config.Runner{
+					WorkdirCleanupAge:   -1,
+					IdleCleanupInterval: time.Minute,
+				},
+			},
+			now: func() time.Time { return now },
+		}
+
+		assert.False(t, r.shouldRunIdleCleanup())
+	})
+
+	t.Run("idle interval disabled", func(t *testing.T) {
+		r := &Runner{
+			cfg: &config.Config{
+				Container: config.Container{
+					BindWorkdir: true,
+				},
+				Runner: config.Runner{
+					WorkdirCleanupAge:   24 * time.Hour,
+					IdleCleanupInterval: -1,
+				},
+			},
+			now: func() time.Time { return now },
+		}
+
+		assert.False(t, r.shouldRunIdleCleanup())
+	})
+}
+
+// TestRunnerOnIdleIntegratesCleanup wires the full OnIdle entry point and
+// confirms it walks workdir_parent (after the leading-slash trim that
+// matches the production path construction) and removes stale numeric dirs.
+func TestRunnerOnIdleIntegratesCleanup(t *testing.T) {
+	now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
+	root := t.TempDir()
+	stale := filepath.Join(root, "1234")
+	require.NoError(t, os.MkdirAll(stale, 0o700))
+	require.NoError(t, os.Chtimes(stale, now.Add(-48*time.Hour), now.Add(-48*time.Hour)))
+
+	r := &Runner{
+		cfg: &config.Config{
+			Container: config.Container{
+				BindWorkdir:   true,
+				WorkdirParent: root, // leading slash absent, OnIdle reattaches it
+			},
+			Runner: config.Runner{
+				WorkdirCleanupAge:   24 * time.Hour,
+				IdleCleanupInterval: time.Minute,
+			},
+		},
+		now: func() time.Time { return now },
+	}
+
+	r.OnIdle(context.Background())
+
+	assert.NoDirExists(t, stale)
+}
+
+// TestRunnerOnIdleSkipsWhenAlreadyCancelled verifies a pre-cancelled ctx
+// short-circuits cleanup before any directory entry is touched.
+func TestRunnerOnIdleSkipsWhenAlreadyCancelled(t *testing.T) {
+	now := time.Date(2026, time.April, 29, 20, 0, 0, 0, time.UTC)
+	root := t.TempDir()
+	stale := filepath.Join(root, "1234")
+	require.NoError(t, os.MkdirAll(stale, 0o700))
+	require.NoError(t, os.Chtimes(stale, now.Add(-48*time.Hour), now.Add(-48*time.Hour)))
+
+	r := &Runner{
+		cfg: &config.Config{
+			Container: config.Container{
+				BindWorkdir:   true,
+				WorkdirParent: root,
+			},
+			Runner: config.Runner{
+				WorkdirCleanupAge:   24 * time.Hour,
+				IdleCleanupInterval: time.Minute,
+			},
+		},
+		now: func() time.Time { return now },
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel()
+	r.OnIdle(ctx)
+
+	assert.DirExists(t, stale)
+}
--- a/internal/pkg/config/config.example.yaml
+++ b/internal/pkg/config/config.example.yaml
@@ -40,6 +40,12 @@ runner:
  # The runner uses exponential backoff when idle, increasing the interval up to this maximum.
  # Set to 0 or same as fetch_interval to disable backoff.
  fetch_interval_max: 5s
+  # While idle, remove stale bind-workdir task directories older than this duration.
+  # Setting either workdir_cleanup_age or idle_cleanup_interval to 0 (or any
+  # non-positive value) disables workdir cleanup entirely.
+  workdir_cleanup_age: 24h
+  # Cadence for the idle stale bind-workdir cleanup pass.
+  idle_cleanup_interval: 10m
  # The base interval for periodic log flush to the Gitea instance.
  # Logs may be sent earlier if the buffer reaches log_report_batch_size
  # or if log_report_max_latency expires after the first buffered row.
@@ -107,6 +113,7 @@ container:
  # If the path starts with '/', the '/' will be trimmed.
  # For example, if the parent directory is /path/to/my/dir, workdir_parent should be path/to/my/dir
  # If it's empty, /workspace will be used.
+  # Purely numeric subdirectories under this path are reserved for task workspaces and may be removed by idle cleanup.
  workdir_parent:
  # Volumes (including bind mounts) can be mounted to containers. Glob syntax is supported, see https://github.com/gobwas/glob
  # You can specify multiple volumes. If the sequence is empty, no volumes can be mounted.
--- a/internal/pkg/config/config.go
+++ b/internal/pkg/config/config.go
@@ -33,6 +33,8 @@ type Runner struct {
 	FetchTimeout        time.Duration     `yaml:"fetch_timeout"`          // FetchTimeout specifies the timeout duration for fetching resources.
 	FetchInterval       time.Duration     `yaml:"fetch_interval"`         // FetchInterval specifies the interval duration for fetching resources.
 	FetchIntervalMax    time.Duration     `yaml:"fetch_interval_max"`     // FetchIntervalMax specifies the maximum backoff interval when idle.
+	WorkdirCleanupAge   time.Duration     `yaml:"workdir_cleanup_age"`    // WorkdirCleanupAge removes stale bind-workdir task directories older than this duration during idle cleanup.
+	IdleCleanupInterval time.Duration     `yaml:"idle_cleanup_interval"`  // IdleCleanupInterval runs stale bind-workdir cleanup periodically while the runner is idle. Set to 0 to disable cleanup cadence.
 	LogReportInterval   time.Duration     `yaml:"log_report_interval"`    // LogReportInterval specifies the base interval for periodic log flush.
 	LogReportMaxLatency time.Duration     `yaml:"log_report_max_latency"` // LogReportMaxLatency specifies the max time a log row can wait before being sent.
 	LogReportBatchSize  int               `yaml:"log_report_batch_size"`  // LogReportBatchSize triggers immediate log flush when buffer reaches this size.
@@ -92,6 +94,7 @@ type Config struct {
 // If file is not empty, it will be used to load the configuration.
 func LoadDefault(file string) (*Config, error) {
 	cfg := &Config{}
+	definedRunnerKeys := map[string]bool{}
 	if file != "" {
 		content, err := os.ReadFile(file)
 		if err != nil {
@@ -100,6 +103,10 @@ func LoadDefault(file string) (*Config, error) {
 		if err := yaml.Unmarshal(content, cfg); err != nil {
 			return nil, fmt.Errorf("parse config file %q: %w", file, err)
 		}
+		definedRunnerKeys, err = definedRunnerConfigKeys(content)
+		if err != nil {
+			return nil, fmt.Errorf("parse config file %q for defaults metadata: %w", file, err)
+		}
 	}
 	compatibleWithOldEnvs(file != "", cfg)

@@ -157,6 +164,12 @@ func LoadDefault(file string) (*Config, error) {
 	if cfg.Runner.FetchIntervalMax <= 0 {
 		cfg.Runner.FetchIntervalMax = 5 * time.Second
 	}
+	if cfg.Runner.WorkdirCleanupAge == 0 && !definedRunnerKeys["workdir_cleanup_age"] {
+		cfg.Runner.WorkdirCleanupAge = 24 * time.Hour
+	}
+	if cfg.Runner.IdleCleanupInterval == 0 && !definedRunnerKeys["idle_cleanup_interval"] {
+		cfg.Runner.IdleCleanupInterval = 10 * time.Minute
+	}
 	if cfg.Runner.LogReportInterval <= 0 {
 		cfg.Runner.LogReportInterval = 5 * time.Second
 	}
@@ -199,3 +212,30 @@ func LoadDefault(file string) (*Config, error) {

 	return cfg, nil
 }
+
+func definedRunnerConfigKeys(content []byte) (map[string]bool, error) {
+	var root yaml.Node
+	if err := yaml.Unmarshal(content, &root); err != nil {
+		return nil, err
+	}
+
+	defined := map[string]bool{}
+	if len(root.Content) == 0 {
+		return defined, nil
+	}
+
+	doc := root.Content[0]
+	for i := 0; i+1 < len(doc.Content); i += 2 {
+		key := doc.Content[i]
+		value := doc.Content[i+1]
+		if key.Value != "runner" || value.Kind != yaml.MappingNode {
+			continue
+		}
+		for j := 0; j+1 < len(value.Content); j += 2 {
+			defined[value.Content[j].Value] = true
+		}
+		break
+	}
+
+	return defined, nil
+}
--- a/internal/pkg/config/config_test.go
+++ b/internal/pkg/config/config_test.go
@@ -7,6 +7,7 @@ import (
 	"os"
 	"path/filepath"
 	"testing"
+	"time"

 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -39,3 +40,80 @@ cache:
 	_, err := LoadDefault(path)
 	require.NoError(t, err)
 }
+
+func TestLoadDefault_DefaultsWorkdirCleanupAge(t *testing.T) {
+	cfg, err := LoadDefault("")
+	require.NoError(t, err)
+	assert.Equal(t, 24*time.Hour, cfg.Runner.WorkdirCleanupAge)
+	assert.Equal(t, 10*time.Minute, cfg.Runner.IdleCleanupInterval)
+}
+
+func TestLoadDefault_UsesConfiguredWorkdirCleanupAge(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(path, []byte(`
+runner:
+  workdir_cleanup_age: 2h30m
+`), 0o600))
+
+	cfg, err := LoadDefault(path)
+	require.NoError(t, err)
+	assert.Equal(t, 150*time.Minute, cfg.Runner.WorkdirCleanupAge)
+}
+
+func TestLoadDefault_UsesConfiguredIdleCleanupInterval(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(path, []byte(`
+runner:
+  idle_cleanup_interval: 45m
+`), 0o600))
+
+	cfg, err := LoadDefault(path)
+	require.NoError(t, err)
+	assert.Equal(t, 45*time.Minute, cfg.Runner.IdleCleanupInterval)
+}
+
+func TestLoadDefault_AllowsDisablingWorkdirCleanup(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(path, []byte(`
+runner:
+  workdir_cleanup_age: 0s
+  idle_cleanup_interval: 0s
+`), 0o600))
+
+	cfg, err := LoadDefault(path)
+	require.NoError(t, err)
+	assert.Equal(t, time.Duration(0), cfg.Runner.WorkdirCleanupAge)
+	assert.Equal(t, time.Duration(0), cfg.Runner.IdleCleanupInterval)
+}
+
+func TestLoadDefault_AllowsNegativeWorkdirCleanupValues(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(path, []byte(`
+runner:
+  workdir_cleanup_age: -1s
+  idle_cleanup_interval: -1s
+`), 0o600))
+
+	cfg, err := LoadDefault(path)
+	require.NoError(t, err)
+	assert.Equal(t, -1*time.Second, cfg.Runner.WorkdirCleanupAge)
+	assert.Equal(t, -1*time.Second, cfg.Runner.IdleCleanupInterval)
+}
+
+// TestLoadDefault_MalformedYAMLReturnsParseError pins the error surfaced for
+// invalid YAML to the canonical "parse config file" message rather than the
+// "for defaults metadata" variant — i.e. the main yaml.Unmarshal runs first.
+func TestLoadDefault_MalformedYAMLReturnsParseError(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.yaml")
+	require.NoError(t, os.WriteFile(path, []byte("runner:\n  capacity: [unterminated\n"), 0o600))
+
+	_, err := LoadDefault(path)
+	require.Error(t, err)
+	assert.Contains(t, err.Error(), "parse config file")
+	assert.NotContains(t, err.Error(), "defaults metadata")
+}