File tree Expand file tree Collapse file tree 2 files changed +22
-3
lines changed
charts/amazon-cloudwatch-observability/templates Expand file tree Collapse file tree 2 files changed +22
-3
lines changed Original file line number Diff line number Diff line change @@ -248,6 +248,26 @@ Get the current recommended neuron-monitor image for a region
248248{ {- printf " %s/%s:%s" $imageDomain .Values.neuronMonitor.image.repository .Values.neuronMonitor.image.tag -} }
249249{ {- end -} }
250250
251+ { {/*
252+ Set DCGM_EXPORTER_INTERVAL environment variable for dcgmExporter if accelerated_compute_gpu_metrics_collection_interval is set and less than 60
253+ */} }
254+ { {- define " dcgm-exporter.env" -} }
255+ { {- $intervalFound := false -} }
256+ { {- $intervalValue := 0 -} }
257+ { {- range .Values.agents -} }
258+ { {- $agent := merge . (deepCopy $.Values.agent) -} }
259+ { {- $agentConfig := $agent .config | default $agent .defaultConfig -} }
260+ { {- if and (hasKey $agentConfig " logs" ) (hasKey $agentConfig .logs " metrics_collected" ) (hasKey $agentConfig .logs.metrics_collected " kubernetes" ) (hasKey $agentConfig .logs.metrics_collected.kubernetes " accelerated_compute_gpu_metrics_collection_interval" ) -} }
261+ { {- $intervalFound = true -} }
262+ { {- $intervalValue = $agentConfig .logs.metrics_collected.kubernetes.accelerated_compute_gpu_metrics_collection_interval -} }
263+ { {- end -} }
264+ { {- end -} }
265+ { {- if and $intervalFound (lt $intervalValue 60) -} }
266+ - name: DCGM_EXPORTER_INTERVAL
267+ value: "1000"
268+ { {- end -} }
269+ { {- end -} }
270+
251271{ {/*
252272Get the current recommended auto instrumentation java image
253273*/} }
@@ -407,5 +427,3 @@ Get namespaceSelector value for admission webhooks
407427{ {- end -} }
408428{ {- end -} }
409429{ {- end -} }
410-
411-
Original file line number Diff line number Diff line change 3131 valueFrom :
3232 fieldRef :
3333 fieldPath : spec.nodeName
34+ {{- include "dcgm-exporter.env" . | nindent 2 }}
3435 ports :
3536 - name : " metrics"
3637 port : {{ .Values.dcgmExporter.service.port }}
6970 cert_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.crt
7071 key_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.key
7172 {{- dict "component" .Values.dcgmExporter "context" . | include "amazon-cloudwatch-observability.common.tolerations" | nindent 2 }}
72- {{- end }}
73+ {{- end }}
You can’t perform that action at this time.
0 commit comments