Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions assets/gpu-feature-discovery/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,20 @@ spec:
- name: toolkit-validation
image: "FILLED BY THE OPERATOR"
command: ['sh', '-c']
args: ["until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container stack to be setup; sleep 5; done"]
args: ["nvidia-validator"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: WITH_WAIT
value: "true"
- name: COMPONENT
value: toolkit
securityContext:
privileged: true
volumeMounts:
- name: run-nvidia
mountPath: /run/nvidia
mountPropagation: HostToContainer
mountPropagation: Bidirectional
- name: config-manager-init
image: "FILLED BY THE OPERATOR"
command: ["config-manager"]
Expand Down
4 changes: 4 additions & 0 deletions assets/state-container-toolkit/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ spec:
fieldPath: metadata.namespace
imagePullPolicy: IfNotPresent
name: nvidia-container-toolkit-ctr
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "rm -f /run/nvidia/validations/toolkit-ready"]
securityContext:
privileged: true
seLinuxOptions:
Expand Down
13 changes: 10 additions & 3 deletions assets/state-dcgm-exporter/0800_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ spec:
- name: toolkit-validation
image: "FILLED BY THE OPERATOR"
command: ['sh', '-c']
args: ["until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container stack to be setup; sleep 5; done"]
args: ["nvidia-validator"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: WITH_WAIT
value: "true"
- name: COMPONENT
value: toolkit
securityContext:
privileged: true
volumeMounts:
- name: run-nvidia
mountPath: "/run/nvidia"
mountPropagation: HostToContainer
mountPath: /run/nvidia
mountPropagation: Bidirectional
containers:
- image: "FILLED BY THE OPERATOR"
name: nvidia-dcgm-exporter
Expand Down
11 changes: 9 additions & 2 deletions assets/state-dcgm/0400_dcgm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ spec:
- name: toolkit-validation
image: "FILLED BY THE OPERATOR"
command: ['sh', '-c']
args: ["until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container stack to be setup; sleep 5; done"]
args: ["nvidia-validator"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: WITH_WAIT
value: "true"
- name: COMPONENT
value: toolkit
securityContext:
privileged: true
volumeMounts:
- name: run-nvidia
mountPath: /run/nvidia
mountPropagation: HostToContainer
mountPropagation: Bidirectional
containers:
- image: "FILLED BY THE OPERATOR"
name: nvidia-dcgm-ctr
Expand Down
11 changes: 9 additions & 2 deletions assets/state-device-plugin/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ spec:
- image: "FILLED BY THE OPERATOR"
name: toolkit-validation
command: ['sh', '-c']
args: ["until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container stack to be setup; sleep 5; done"]
args: ["nvidia-validator"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: WITH_WAIT
value: "true"
- name: COMPONENT
value: toolkit
securityContext:
privileged: true
volumeMounts:
- name: run-nvidia-validations
mountPath: /run/nvidia/validations
mountPropagation: HostToContainer
mountPropagation: Bidirectional
- image: "FILLED BY THE OPERATOR"
name: config-manager-init
command: ["config-manager"]
Expand Down
2 changes: 1 addition & 1 deletion assets/state-driver/0500_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ spec:
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "rm -f /run/nvidia/validations/.driver-ctr-ready"]
command: ["/bin/sh", "-c", "rm -f /run/nvidia/validations/.driver-ctr-ready /run/nvidia/validations/driver-ready"]
- image: "FILLED BY THE OPERATOR"
imagePullPolicy: IfNotPresent
name: nvidia-peermem-ctr
Expand Down
11 changes: 9 additions & 2 deletions assets/state-mig-manager/0600_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ spec:
- name: toolkit-validation
image: "FILLED BY THE OPERATOR"
command: ['sh', '-c']
args: ["until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container toolkit to be setup; sleep 5; done"]
args: ["nvidia-validator"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: WITH_WAIT
value: "true"
- name: COMPONENT
value: toolkit
securityContext:
privileged: true
volumeMounts:
- name: run-nvidia-validations
mountPath: /run/nvidia/validations
mountPropagation: HostToContainer
mountPropagation: Bidirectional
containers:
- name: nvidia-mig-manager
image: "FILLED BY THE OPERATOR"
Expand Down
11 changes: 9 additions & 2 deletions assets/state-mps-control-daemon/0400_daemonset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,20 @@ spec:
- image: "FILLED BY THE OPERATOR"
name: toolkit-validation
command: ['sh', '-c']
args: ["until [ -f /run/nvidia/validations/toolkit-ready ]; do echo waiting for nvidia container stack to be setup; sleep 5; done"]
args: ["nvidia-validator"]
env:
- name: NVIDIA_VISIBLE_DEVICES
value: "all"
- name: WITH_WAIT
value: "true"
- name: COMPONENT
value: toolkit
securityContext:
privileged: true
volumeMounts:
- name: run-nvidia
mountPath: /run/nvidia
mountPropagation: HostToContainer
mountPropagation: Bidirectional
- image: "FILLED BY THE OPERATOR"
name: mps-control-daemon-mounts
command: [mps-control-daemon, mount-shm]
Expand Down
Loading