Skip to content

Commit

Permalink
Merge pull request #646 from computate/autopilot-v2.1.0
Browse files Browse the repository at this point in the history
Upgrading autopilot to version v2.1.0
  • Loading branch information
computate authored Feb 7, 2025
2 parents 56cd8d4 + c54c640 commit b0a22d5
Show file tree
Hide file tree
Showing 9 changed files with 815 additions and 553 deletions.
13 changes: 13 additions & 0 deletions autopilot/base/clusterrolebindings/autopilot-privileged.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: autopilot-privileged
namespace: autopilot
subjects:
- kind: ServiceAccount
name: autopilot
namespace: autopilot
roleRef:
kind: ClusterRole
name: system:openshift:scc:privileged
apiGroup: rbac.authorization.k8s.io
1 change: 1 addition & 0 deletions autopilot/base/clusterrolebindings/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- autopilot.yaml
- autopilot-privileged.yaml
- prometheus-k8s-autopilot.yaml
50 changes: 23 additions & 27 deletions autopilot/base/daemonsets/autopilot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,51 +11,47 @@ spec:
app: autopilot
template:
metadata:
annotations:
null
labels:
app: autopilot
spec:
tolerations:
- effect: NoSchedule
key: nvidia.com/gpu.product
operator: Equal
value: NVIDIA-A100-SXM4-40GB
- effect: NoSchedule
key: nvidia.com/gpu.product
operator: Equal
value: Tesla-V100-PCIE-32GB
nodeSelector:
nvidia.com/gpu.present: 'true'
serviceAccountName: autopilot
securityContext: {}
initContainers:
- args:
- |
until [ -f /usr/bin/nvidia-smi ]; do echo waiting for nvidia device plug-in to be setup; sleep 5 && exit -1; done
- until [ -f /usr/bin/nvidia-smi ]; do echo waiting for nvidia device plug-in to be setup; sleep 5 && exit -1; done
command:
- sh
- -c
image: quay.io/autopilot/autopilot:v1.9.0
image: quay.io/autopilot/autopilot:v2.1.0
imagePullPolicy: Always
name: device-plugin-validation
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
privileged: true
containers:
- image: quay.io/autopilot/autopilot:v1.9.0
- image: quay.io/autopilot/autopilot:v2.1.0
command:
- sh
- -c
- |
iperf3 -s -p 6310 -D
/usr/local/bin/autopilot --port 3333 --loglevel=2 --bw 4 --w 1 --invasive-check-timer 4
- sh
- -c
- |
/usr/local/bin/autopilot --port 3333 --loglevel=2 --bw 4 --w 1 --invasive-check-timer 4
imagePullPolicy: Always
name: autopilot
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
env:
- name: PERIODIC_CHECKS
value: pciebw,remapped,dcgm,ping,gpupower
- name: PVC_TEST_STORAGE_CLASS
value:
value: ''
- name: "NODE_NAME"
valueFrom:
fieldRef:
Expand Down Expand Up @@ -91,8 +87,8 @@ spec:
- nvidia-smi
resources:
limits:
nvidia.com/gpu: 0
nvidia.com/gpu: '0'
requests:
nvidia.com/gpu: 0
volumeMounts: []
volumes: []
nvidia.com/gpu: '0'
securityContext:
privileged: true
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ spec:
selector:
matchLabels:
app: autopilot
service: autopilot-metrics-service
1 change: 1 addition & 0 deletions autopilot/base/services/autopilot-healthchecks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: Service
metadata:
labels:
app: autopilot
service: autopilot-healthchecks
name: autopilot-healthchecks
namespace: autopilot
annotations:
Expand Down
1 change: 1 addition & 0 deletions autopilot/base/services/autopilot-metrics-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: Service
metadata:
labels:
app: autopilot
service: autopilot-metrics-service
name: autopilot-metrics-service
namespace: autopilot
spec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ kind: Service
metadata:
labels:
app: autopilot
service: autopilot-readinessprobe
name: autopilot-readinessprobe
namespace: autopilot
spec:
Expand Down
2 changes: 1 addition & 1 deletion autopilot/base/services/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ kind: Kustomization
resources:
- autopilot-metrics-service.yaml
- autopilot-healthchecks.yaml
- autopilot.yaml
- autopilot-readinessprobe.yaml
Loading

0 comments on commit b0a22d5

Please sign in to comment.