From 5271c6decad42f1a2bc653b978295fdb2bef1af4 Mon Sep 17 00:00:00 2001 From: Stavros kois Date: Wed, 11 Jan 2023 16:40:01 +0200 Subject: [PATCH] some changes to adapt to job and tests --- .../tests/jobs/jobTemplate_test.yaml | 198 ++++++++++++++++++ library/common-test/values.yaml | 24 --- .../templates/lib/container/_security.tpl | 47 +++-- .../templates/lib/container/_termination.tpl | 11 +- .../1.0.0/templates/lib/job/_jobPod.tpl | 48 +++-- .../common/1.0.0/templates/lib/pod/_dns.tpl | 3 +- .../common/1.0.0/templates/lib/pod/_pod.tpl | 2 +- .../templates/lib/pod/_podSecurityContext.tpl | 3 +- .../templates/lib/pod/_runtimeClassName.tpl | 53 +++-- library/common/1.0.0/values.yaml | 2 + 10 files changed, 306 insertions(+), 85 deletions(-) diff --git a/library/common-test/tests/jobs/jobTemplate_test.yaml b/library/common-test/tests/jobs/jobTemplate_test.yaml index 38c556b196..108ea9e2ed 100644 --- a/library/common-test/tests/jobs/jobTemplate_test.yaml +++ b/library/common-test/tests/jobs/jobTemplate_test.yaml @@ -140,6 +140,63 @@ tests: - failedTemplate: errorMessage: At least one container in is required in (job-name). + - it: should pass with job values changed + documentIndex: *deploymentDoc + set: + jobs: + job-name: + enabled: true + podSpec: + dnsPolicy: None + hostname: some-hostname + priorityClassName: some-priorityClass + schedulerName: some-schedulerName + serviceAccountName: some-serviceAccountName + runtimeClassName: some-runtimeClassName + hostNetwork: true + enableServiceLinks: true + restartPolicy: OnFailure + podSecurityContext: + fsGroup: 0 + fsGroupChangePolicy: Always + supplementalGroups: + - 50 + containers: + main: + imageSelector: image + asserts: + - isKind: + of: Deployment + - equal: + path: spec.template.spec.serviceAccountName + value: default + - equal: + path: spec.template.spec.dnsPolicy + value: ClusterFirst + - isNull: + path: spec.template.spec.hostname + - isNull: + path: spec.template.spec.priorityClassName + - isNull: + path: spec.template.spec.runtimeClassName + - isNull: + path: spec.template.spec.schedulerName + - equal: + path: spec.template.spec.hostNetwork + value: false + - equal: + path: spec.template.spec.enableServiceLinks + value: false + - equal: + path: spec.template.spec.restartPolicy + value: Always + - equal: + path: spec.template.spec.securityContext + value: + fsGroup: 568 + fsGroupChangePolicy: OnRootMismatch + supplementalGroups: [] + - it: should pass with default in job documentIndex: &jobDoc 3 set: @@ -427,6 +484,45 @@ tests: content: schedulerName: somename + - it: should pass with inherit set in termination.gracePeriodSeconds + documentIndex: *jobDoc + set: + termination: + gracePeriodSeconds: 35 + jobs: + job-name: + enabled: true + podSpec: + termination: + gracePeriodSeconds: inherit + containers: + main: + imageSelector: image + asserts: + - isSubset: + path: spec.template.spec + content: + terminationGracePeriodSeconds: 35 + + - it: should pass with custom termination.gracePeriodSeconds + documentIndex: *jobDoc + set: + key: 50 + jobs: + job-name: + enabled: true + podSpec: + termination: + gracePeriodSeconds: 50 + containers: + main: + imageSelector: image + asserts: + - isSubset: + path: spec.template.spec + content: + terminationGracePeriodSeconds: 50 + - it: should pass with inherit set for hostNetwork documentIndex: *jobDoc set: @@ -553,6 +649,46 @@ tests: content: priorityClassName: somename + - it: should pass with inherit set in runtimeClassName + documentIndex: *jobDoc + set: + global: + ixChartContext: + addNvidiaRuntimeClass: true + nvidiaRuntimeClassName: nvidia-runtime + scaleGPU: + gpu: somegpu + jobs: + job-name: + enabled: true + podSpec: + runtimeClassName: inherit + containers: + main: + imageSelector: image + asserts: + - isSubset: + path: spec.template.spec + content: + runtimeClassName: nvidia-runtime + + - it: should pass with custom runtimeClassName + documentIndex: *jobDoc + set: + jobs: + job-name: + enabled: true + podSpec: + runtimeClassName: some-runtime + containers: + main: + imageSelector: image + asserts: + - isSubset: + path: spec.template.spec + content: + runtimeClassName: some-runtime + - it: should pass with inherit set in hostname documentIndex: *jobDoc set: @@ -625,3 +761,65 @@ tests: path: spec.template.spec content: dnsPolicy: None + + - it: should pass with volumes added + documentIndex: *jobDoc + set: + persistence: + some-volume: + enabled: true + type: nfs + server: some-server + path: /some/path + mountPath: /some/path + jobs: + job-name: + enabled: true + podSpec: + containers: + main: + imageSelector: image + asserts: + - isKind: + of: Job + - isSubset: + path: spec.template.spec + content: + volumes: + - name: some-volume + nfs: + server: some-server + path: /some/path + + - it: should pass with security Context changed and added gpu + documentIndex: *jobDoc + set: + global: + ixChartContext: + addNvidiaRuntimeClass: true + nvidiaRuntimeClassName: nvidia-runtime + jobs: + job-name: + enabled: true + podSpec: + podSecurityContext: + fsGroup: 0 + fsGroupChangePolicy: Always + supplementalGroups: + - 50 + containers: + main: + imageSelector: image + scaleGPU: + gpu: nvidia + asserts: + - isSubset: + path: spec.template.spec + content: + runtimeClassName: nvidia-runtime + securityContext: + fsGroup: 0 + fsGroupChangePolicy: Always + supplementalGroups: + - 50 + - 44 diff --git a/library/common-test/values.yaml b/library/common-test/values.yaml index a13fb2bf9a..67cebbb6ff 100644 --- a/library/common-test/values.yaml +++ b/library/common-test/values.yaml @@ -3,27 +3,3 @@ service: ports: main: port: 65535 -# jobs: -# jobname: -# enabled: true -# nameOverride: "" -# cron: -# enabled: false -# schedule: "* *" -# timezone: -# concurrencyPolicy: -# failedJobsHistoryLimit: 0 -# successfulJobsHistoryLimit: 0 -# startingDeadlineSeconds: -# annotations: -# labels: -# backoffLimit: 5 -# ttlSecondsAfterFinished: 100 -# activeDeadlineSeconds: 100 -# parallelism: 1 -# completions: 1 -# completionMode: Indexed -# podSpec: -# containers: -# asdf: -# imageSelector: image diff --git a/library/common/1.0.0/templates/lib/container/_security.tpl b/library/common/1.0.0/templates/lib/container/_security.tpl index 16e9aa20a2..07c04bd3a3 100644 --- a/library/common/1.0.0/templates/lib/container/_security.tpl +++ b/library/common/1.0.0/templates/lib/container/_security.tpl @@ -28,7 +28,9 @@ The reason is not splitted, is that on one of the places needs a combo of all va {{- end -}} {{/* Overwrite from values that user/dev passed on this container */}} - {{- $returnValue = mustMergeOverwrite $returnValue $secCont -}} + {{- if $secCont -}} {{/* If secCont is empty don't try to merge */}} + {{- $returnValue = mustMergeOverwrite $returnValue $secCont -}} + {{- end -}} {{- $isPrivilegedPort := false -}} @@ -106,6 +108,7 @@ The reason is not splitted, is that on one of the places needs a combo of all va {{- define "ix.v1.common.lib.podSecurityContext" -}} {{- $root := .root -}} + {{- $isJob := .isJob -}} {{- $podSecCont := .podSecCont -}} {{/* Initialiaze Values */}} @@ -128,23 +131,37 @@ The reason is not splitted, is that on one of the places needs a combo of all va {{/* If at least one of those is not true, lets make sure it's not needed by any other container */}} {{- if or (not $appendDeviceGroups) (not $appendGPUGroup) -}} + {{- if not $isJob -}} + {{- range $key := (list "initContainers" "installContainers" "upgradeContainers" "additionalContainers") -}} + {{/* If they have containers defined... */}} + {{- if (get $root.Values $key) -}} - {{- range $key := (list "initContainers" "installContainers" "upgradeContainers" "additionalContainers") -}} - {{/* If they have containers defined... */}} - {{- if (get $root.Values $key) -}} - - {{/* Go over the containers */}} - {{- range $containerName, $container := (get $root.Values $key) -}} - {{/* If the container has deviceList */}} - {{- if hasKey $container "deviceList" -}} - {{- if $container.deviceList -}} - {{- $appendDeviceGroups = true -}} + {{/* Go over the containers */}} + {{- range $containerName, $container := (get $root.Values $key) -}} + {{/* If the container has deviceList */}} + {{- if hasKey $container "deviceList" -}} + {{- if $container.deviceList -}} + {{- $appendDeviceGroups = true -}} + {{- end -}} + {{- end -}} + {{/* If the container has scaleGPU */}} + {{- if hasKey $container "scaleGPU" -}} + {{- if $container.scaleGPU -}} + {{- $appendGPUGroup = true -}} + {{- end -}} {{- end -}} {{- end -}} - {{/* If the container has scaleGPU */}} - {{- if hasKey $container "scaleGPU" -}} - {{- if $container.scaleGPU -}} - {{- $appendGPUGroup = true -}} + {{- end -}} + {{- end -}} + {{- else -}} + {{- range $jobName, $job := $root.Values.jobs -}} + {{- if $job.enabled -}} + {{- range $name, $container := $job.podSpec.containers -}} + {{- if hasKey $container "scaleGPU" -}} + {{- if $container.scaleGPU -}} + {{/* If at least 1 container has GPU... */}} + {{- $appendGPUGroup = true -}} + {{- end -}} {{- end -}} {{- end -}} {{- end -}} diff --git a/library/common/1.0.0/templates/lib/container/_termination.tpl b/library/common/1.0.0/templates/lib/container/_termination.tpl index e8486e168e..b75fbef47a 100644 --- a/library/common/1.0.0/templates/lib/container/_termination.tpl +++ b/library/common/1.0.0/templates/lib/container/_termination.tpl @@ -2,8 +2,9 @@ {{- define "ix.v1.common.container.termination.messagePath" -}} {{- $msgPath := .msgPath -}} {{- $root := .root -}} - - {{- tpl $msgPath $root -}} + {{- if $msgPath -}} + {{- tpl $msgPath $root -}} + {{- end -}} {{- end -}} {{/* Returns the terminationMessagePolicy for the container */}} @@ -11,7 +12,11 @@ {{- $msgPolicy := .msgPolicy -}} {{- $root := .root -}} - {{- $policy := (tpl $msgPolicy $root) -}} + {{- $policy := "" -}} + {{- if $msgPolicy -}} + {{- $policy = (tpl $msgPolicy $root) -}} + {{- end -}} + {{- with $policy -}} {{- if not (mustHas . (list "File" "FallbackToLogsOnError")) }} {{- fail (printf "Not valid option for messagePolicy (%s). Valid options are FallbackToLogsOnError and File" $policy) -}} diff --git a/library/common/1.0.0/templates/lib/job/_jobPod.tpl b/library/common/1.0.0/templates/lib/job/_jobPod.tpl index 55b816c169..e9f81a5637 100644 --- a/library/common/1.0.0/templates/lib/job/_jobPod.tpl +++ b/library/common/1.0.0/templates/lib/job/_jobPod.tpl @@ -52,11 +52,11 @@ {{- $dnsPolicy := "" -}} {{- with $values.dnsPolicy -}} {{- if eq . $inherit -}} - {{- with (include "ix.v1.common.dnsPolicy" (dict "dnsPolicy" $root.Values.dnsPolicy "hostNetwork" $root.Values.hostNetwork) | trim ) -}} + {{- with (include "ix.v1.common.dnsPolicy" (dict "dnsPolicy" $root.Values.dnsPolicy "hostNetwork" $root.Values.hostNetwork "root" $root) | trim ) -}} {{- $dnsPolicy = . -}} {{- end -}} {{- else -}} - {{- with (include "ix.v1.common.dnsPolicy" (dict "dnsPolicy" $values.dnsPolicy "hostNetwork" ($values.hostNetwork | default $root.Values.hostNetwork)) | trim ) -}} + {{- with (include "ix.v1.common.dnsPolicy" (dict "dnsPolicy" $values.dnsPolicy "hostNetwork" ($values.hostNetwork | default $root.Values.hostNetwork) "root" $root) | trim ) -}} {{- $dnsPolicy = . -}} {{- end -}} {{- end -}} @@ -146,23 +146,25 @@ {{- $runtimeClassName = . -}} {{- end -}} {{- else -}} - {{- with (include "ix.v1.common.runtimeClassName" (dict "root" $root "runtime" $root.Values.runtimeClassName) | trim) -}} - {{- $runtimeClassName = . -}} {{/* TODO: fix scaleGPU here */}} - {{- end -}} + {{- $runtimeClassName = . -}} {{- end -}} {{- else -}} - {{/* If we ever have value in global.defaults */}} + {{- with (include "ix.v1.common.runtimeClassName" (dict "root" $root "runtime" $root.Values.runtimeClassName "isJob" true) | trim) -}} + {{- $runtimeClassName = . -}} + {{- end -}} {{- end -}} -{{- $termSeconds := "" }} -{{- with $values.termination -}} - {{- if eq (toString .) $inherit -}} - {{- with $root.Values.termination.gracePeriodSeconds }} - {{- $termSeconds = . }} - {{- end -}} - {{- else -}} - {{- with $values.termination.gracePeriodSeconds }} - {{- $termSeconds = . }} +{{- $termSeconds := "" -}} +{{- if (hasKey $values "termination") -}} + {{- with $values.termination.gracePeriodSeconds -}} + {{- if eq (toString .) $inherit -}} + {{- with $root.Values.termination.gracePeriodSeconds -}} + {{- $termSeconds = . -}} + {{- end -}} + {{- else -}} + {{- with $values.termination.gracePeriodSeconds -}} + {{- $termSeconds = . -}} + {{- end -}} {{- end -}} {{- end -}} {{- else -}} @@ -170,14 +172,18 @@ {{- end -}} {{- $secCont := dict -}} -{{- with $values.securityContext -}} - {{- if eq (toString .) $inherit -}} - {{- with (include "ix.v1.common.container.podSecurityContext" (dict "podSecCont" $root.Values.podSecurityContext "root" $root) | trim) -}} +{{- with $values.podSecurityContext -}} + {{- if eq (toString .) $inherit -}} {{/* If inherti is set, use the main podSecCont */}} + {{- with (include "ix.v1.common.container.podSecurityContext" (dict "podSecCont" $root.Values.podSecurityContext "root" $root "isJob" true) | trim) -}} + {{- $secCont = . -}} + {{- end -}} + {{- else -}} {{/* Otherwise use the job's podpodSecCont values */}} + {{- with (include "ix.v1.common.container.podSecurityContext" (dict "podSecCont" $values.podSecurityContext "root" $root "isJob" true) | trim) -}} {{- $secCont = . -}} {{- end -}} {{- end -}} -{{- else -}} - {{- with (include "ix.v1.common.container.podSecurityContext" (dict "podSecCont" $values.podSecurityContext "root" $root) | trim) -}} +{{- else -}} {{/* Otherwise use the job's podSecCont values (if empty, will use the global defaults) */}} + {{- with (include "ix.v1.common.container.podSecurityContext" (dict "podSecCont" $values.podSecurityContext "root" $root "isJob" true) | trim) -}} {{- $secCont = . -}} {{- end -}} {{- end -}} @@ -253,7 +259,7 @@ imagePullSecrets: {{- end -}} {{- with $runtimeClassName }} -runtimeClassname: {{ . }} +runtimeClassName: {{ . }} {{- end -}} {{- with $termSeconds }} diff --git a/library/common/1.0.0/templates/lib/pod/_dns.tpl b/library/common/1.0.0/templates/lib/pod/_dns.tpl index fbb6130b1a..4ba325d22f 100644 --- a/library/common/1.0.0/templates/lib/pod/_dns.tpl +++ b/library/common/1.0.0/templates/lib/pod/_dns.tpl @@ -2,8 +2,9 @@ {{- define "ix.v1.common.dnsPolicy" -}} {{- $dnsPolicy := .dnsPolicy -}} {{- $hostNetwork := .hostNetwork -}} + {{- $root := .root -}} - {{- $policy := "ClusterFirst" -}} + {{- $policy := $root.Values.global.defaults.dnsPolicy -}} {{- if $dnsPolicy -}} {{- if not (mustHas $dnsPolicy (list "Default" "ClusterFirst" "ClusterFirstWithHostNet" "None")) -}} {{- fail (printf "Not valid dnsPolicy (%s). Valid options are ClusterFirst, Default, ClusterFirstWithHostNet, None" $dnsPolicy) -}} diff --git a/library/common/1.0.0/templates/lib/pod/_pod.tpl b/library/common/1.0.0/templates/lib/pod/_pod.tpl index cddf6cc744..d0122a3aaf 100644 --- a/library/common/1.0.0/templates/lib/pod/_pod.tpl +++ b/library/common/1.0.0/templates/lib/pod/_pod.tpl @@ -20,7 +20,7 @@ priorityClassName: {{ . }} hostname: {{ . }} {{- end -}} -{{- with (include "ix.v1.common.dnsPolicy" (dict "dnsPolicy" $root.Values.dnsPolicy "hostNetwork" $root.Values.hostNetwork) | trim ) }} +{{- with (include "ix.v1.common.dnsPolicy" (dict "dnsPolicy" $root.Values.dnsPolicy "hostNetwork" $root.Values.hostNetwork "root" $root) | trim ) }} dnsPolicy: {{ . }} {{- end -}} diff --git a/library/common/1.0.0/templates/lib/pod/_podSecurityContext.tpl b/library/common/1.0.0/templates/lib/pod/_podSecurityContext.tpl index 8d67d81bf1..d7cda26d38 100644 --- a/library/common/1.0.0/templates/lib/pod/_podSecurityContext.tpl +++ b/library/common/1.0.0/templates/lib/pod/_podSecurityContext.tpl @@ -1,10 +1,11 @@ {{/* A dict podSecContext is expected with keys like fsGroup */}} {{- define "ix.v1.common.container.podSecurityContext" -}} {{- $podSecCont := .podSecCont -}} + {{- $isJob := .isJob -}} {{- $root := .root -}} {{/* Calculate all security values */}} - {{- $security := (include "ix.v1.common.lib.podSecurityContext" (dict "root" $root "podSecCont" $podSecCont) | fromJson) }} + {{- $security := (include "ix.v1.common.lib.podSecurityContext" (dict "root" $root "podSecCont" $podSecCont "isJob" $isJob) | fromJson) }} fsGroup: {{ $security.fsGroup }} {{- with $security.supplementalGroups }} supplementalGroups: diff --git a/library/common/1.0.0/templates/lib/pod/_runtimeClassName.tpl b/library/common/1.0.0/templates/lib/pod/_runtimeClassName.tpl index 052b0a2912..44c660a831 100644 --- a/library/common/1.0.0/templates/lib/pod/_runtimeClassName.tpl +++ b/library/common/1.0.0/templates/lib/pod/_runtimeClassName.tpl @@ -1,7 +1,7 @@ {{- define "ix.v1.common.runtimeClassName" -}} {{- $root := .root -}} {{- $runtime := .runtime -}} - + {{- $isJob := .isJob -}} {{/* Override previous if a runtime is passed from global defaults */}} {{- $runtimeName := "" -}} {{- with $root.Values.global.defaults.runtimeClassName -}} @@ -13,31 +13,46 @@ {{- $runtimeName = . -}} {{- end -}} - {{/* Override all previous if running in Scale and it's defined */}} + {{/* Override all previous if running in Scale and it's defined */}} {{- if hasKey $root.Values.global "ixChartContext" -}} {{- if $root.Values.global.ixChartContext.addNvidiaRuntimeClass -}} {{- $nvidiaRunTime := false -}} - {{/* If main container has GPU... */}} - {{- if $root.Values.scaleGPU -}} - {{- $nvidiaRunTime = true -}} - {{- end -}} - - {{- $containers := dict -}} - {{/* Append containers if exist, to the $containers dict */}} - {{- range $key := (list "initContainers" "installContainers" "upgradeContainers" "additionalContainers") -}} - {{- if (get $root.Values $key) -}} - {{- $containers = mustMerge $containers (get $root.Values $key) -}} + {{- if not $isJob -}} + {{/* If main container has GPU... */}} + {{- if $root.Values.scaleGPU -}} + {{- $nvidiaRunTime = true -}} {{- end -}} - {{- end -}} - {{/* Check containers if they have GPU assigned */}} - {{- range $name, $container := $containers -}} - {{- if hasKey $container "scaleGPU" -}} - {{- if $container.scaleGPU -}} - {{/* If at least 1 container has GPU... */}} - {{- $nvidiaRunTime = true -}} + {{- $containers := dict -}} + {{/* Append containers if exist, to the $containers dict */}} + {{- range $key := (list "initContainers" "installContainers" "upgradeContainers" "additionalContainers") -}} + {{- if (get $root.Values $key) -}} + {{- $containers = mustMerge $containers (get $root.Values $key) -}} + {{- end -}} + {{- end -}} + + {{/* Check containers if they have GPU assigned */}} + {{- range $name, $container := $containers -}} + {{- if hasKey $container "scaleGPU" -}} + {{- if $container.scaleGPU -}} + {{/* If at least 1 container has GPU... */}} + {{- $nvidiaRunTime = true -}} + {{- end -}} + {{- end -}} + {{- end -}} + {{- else -}} + {{- range $jobName, $job := $root.Values.jobs -}} + {{- if $job.enabled -}} + {{- range $name, $container := $job.podSpec.containers -}} + {{- if hasKey $container "scaleGPU" -}} + {{- if $container.scaleGPU -}} + {{/* If at least 1 container has GPU... */}} + {{- $nvidiaRunTime = true -}} + {{- end -}} + {{- end -}} + {{- end -}} {{- end -}} {{- end -}} {{- end -}} diff --git a/library/common/1.0.0/values.yaml b/library/common/1.0.0/values.yaml index 2f074af941..52853db372 100644 --- a/library/common/1.0.0/values.yaml +++ b/library/common/1.0.0/values.yaml @@ -103,6 +103,8 @@ global: useRevokedCerts: false # If not defined on the the cert item, assume this useExpiredCerts: false + # If not defined on the pod, assume this + dnsPolicy: ClusterFirst # If no restart Policy is defined, assume this restartPolicy: Always # If no restart Policy for job is defined, assume this