Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrading autopilot to version v2.1.0 #646

Merged
merged 2 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions autopilot/base/clusterrolebindings/autopilot-privileged.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: autopilot-privileged
namespace: autopilot
subjects:
- kind: ServiceAccount
name: autopilot
namespace: autopilot
roleRef:
kind: ClusterRole
name: system:openshift:scc:privileged
apiGroup: rbac.authorization.k8s.io
1 change: 1 addition & 0 deletions autopilot/base/clusterrolebindings/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- autopilot.yaml
- autopilot-privileged.yaml
- prometheus-k8s-autopilot.yaml
50 changes: 23 additions & 27 deletions autopilot/base/daemonsets/autopilot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,47 @@ spec:
app: autopilot
template:
metadata:
annotations:
null
labels:
app: autopilot
spec:
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu.product
operator: Equal
value: NVIDIA-A100-SXM4-40GB
- effect: NoSchedule
key: nvidia.com/gpu.product
operator: Equal
value: Tesla-V100-PCIE-32GB
nodeSelector:
nvidia.com/gpu.present: 'true'
serviceAccountName: autopilot
securityContext: {}
initContainers:
- args:
- |
until [ -f /usr/bin/nvidia-smi ]; do echo waiting for nvidia device plug-in to be setup; sleep 5 && exit -1; done
- until [ -f /usr/bin/nvidia-smi ]; do echo waiting for nvidia device plug-in to be setup; sleep 5 && exit -1; done
command:
- sh
- -c
image: quay.io/autopilot/autopilot:v1.9.0
image: quay.io/autopilot/autopilot:v2.1.0
imagePullPolicy: Always
name: device-plugin-validation
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
privileged: true
containers:
- image: quay.io/autopilot/autopilot:v1.9.0
- image: quay.io/autopilot/autopilot:v2.1.0
command:
- sh
- -c
- |
iperf3 -s -p 6310 -D
/usr/local/bin/autopilot --port 3333 --loglevel=2 --bw 4 --w 1 --invasive-check-timer 4
- sh
- -c
- |
/usr/local/bin/autopilot --port 3333 --loglevel=2 --bw 4 --w 1 --invasive-check-timer 4
imagePullPolicy: Always
name: autopilot
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
env:
- name: PERIODIC_CHECKS
value: pciebw,remapped,dcgm,ping,gpupower
- name: PVC_TEST_STORAGE_CLASS
value:
value: ''
- name: "NODE_NAME"
valueFrom:
fieldRef:
Expand Down Expand Up @@ -91,8 +87,8 @@ spec:
- nvidia-smi
resources:
limits:
nvidia.com/gpu: 0
nvidia.com/gpu: '0'
requests:
nvidia.com/gpu: 0
volumeMounts: []
volumes: []
nvidia.com/gpu: '0'
securityContext:
privileged: true
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ spec:
selector:
matchLabels:
app: autopilot
service: autopilot-metrics-service
1 change: 1 addition & 0 deletions autopilot/base/services/autopilot-healthchecks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: Service
metadata:
labels:
app: autopilot
service: autopilot-healthchecks
name: autopilot-healthchecks
namespace: autopilot
annotations:
Expand Down
1 change: 1 addition & 0 deletions autopilot/base/services/autopilot-metrics-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: Service
metadata:
labels:
app: autopilot
service: autopilot-metrics-service
name: autopilot-metrics-service
namespace: autopilot
spec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: Service
metadata:
labels:
app: autopilot
service: autopilot-readinessprobe
name: autopilot-readinessprobe
namespace: autopilot
spec:
Expand Down
2 changes: 1 addition & 1 deletion autopilot/base/services/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ kind: Kustomization
resources:
- autopilot-metrics-service.yaml
- autopilot-healthchecks.yaml
- autopilot.yaml
- autopilot-readinessprobe.yaml
Loading