From bb6f0ba63db403837693467dd0d501014951edb7 Mon Sep 17 00:00:00 2001 From: Guy Ziv Date: Thu, 20 Nov 2025 09:19:38 +0200 Subject: [PATCH] feat: Add OpenShift deployment for Grafana dashboards - Add grafana/openshift/ directory with deployment manifests and scripts - Implement auto-generation of ConfigMaps from dashboard JSON files - Add deploy-grafana-dashboards.sh for one-command deployment - Add grafana-instance.yaml, grafana-datasource.yaml, and dashboard CRDs - Fix datasource configuration to properly reference PostgreSQL secret - Update .gitignore to exclude generated ConfigMaps and local files - Remove obsolete import-dashboards.sh and setup-datasource.sh scripts - Add comprehensive README with deployment and troubleshooting guides --- .gitignore | 3 + grafana/dashboards/mlops-batch-metrics.json | 281 ++++++++++++++++-- grafana/dashboards/mlops-job-details.json | 201 +++++++------ grafana/import-dashboards.sh | 35 --- grafana/openshift/README.md | 200 +++++++++++++ .../openshift/deploy-grafana-dashboards.sh | 91 ++++++ .../generate-dashboard-configmaps.sh | 60 ++++ .../grafana-dashboard-batch-metrics.yaml | 13 + .../grafana-dashboard-job-details.yaml | 13 + grafana/openshift/grafana-datasource.yaml | 30 ++ grafana/openshift/grafana-instance.yaml | 41 +++ grafana/setup-datasource.sh | 56 ---- 12 files changed, 816 insertions(+), 208 deletions(-) delete mode 100755 grafana/import-dashboards.sh create mode 100644 grafana/openshift/README.md create mode 100755 grafana/openshift/deploy-grafana-dashboards.sh create mode 100755 grafana/openshift/generate-dashboard-configmaps.sh create mode 100644 grafana/openshift/grafana-dashboard-batch-metrics.yaml create mode 100644 grafana/openshift/grafana-dashboard-job-details.yaml create mode 100644 grafana/openshift/grafana-datasource.yaml create mode 100644 grafana/openshift/grafana-instance.yaml delete mode 100755 grafana/setup-datasource.sh diff --git a/.gitignore b/.gitignore index 5785759..a270159 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,6 @@ charts/*.tar.gz CLAUDE.md agent-configs-orchestrator/ orchestrator-ai-assistant-rules/ +grafana/openshift/.env +grafana/openshift/configmap-*.yaml +grafana/openshift/fix-datasource-password.sh diff --git a/grafana/dashboards/mlops-batch-metrics.json b/grafana/dashboards/mlops-batch-metrics.json index 1c4920f..d76103c 100644 --- a/grafana/dashboards/mlops-batch-metrics.json +++ b/grafana/dashboards/mlops-batch-metrics.json @@ -20,7 +20,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -50,7 +50,7 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 6, "w": 6, "x": 0, "y": 0 @@ -100,7 +100,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -130,7 +130,7 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 6, "w": 6, "x": 6, "y": 0 @@ -164,7 +164,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -194,7 +194,7 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 6, "w": 6, "x": 12, "y": 0 @@ -228,7 +228,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -258,7 +258,7 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 6, "w": 6, "x": 18, "y": 0 @@ -292,7 +292,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -345,7 +345,7 @@ "h": 8, "w": 24, "x": 0, - "y": 4 + "y": 6 }, "id": 5, "options": { @@ -376,7 +376,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -489,7 +489,7 @@ "h": 8, "w": 12, "x": 0, - "y": 12 + "y": 14 }, "id": 6, "options": { @@ -508,7 +508,7 @@ { "format": "table", "rawQuery": true, - "rawSql": "SELECT\n batch_id AS \"Batch ID\",\n testing_data_nvrs_version AS \"Data Version\",\n prompts_version AS \"Prompt Version\",\n total_tp AS \"True Positives\",\n total_fp AS \"False Positives\",\n total_tn AS \"True Negatives\",\n total_fn AS \"False Negatives\",\n batch_accuracy AS \"Accuracy\"\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\nORDER BY submitted_at DESC\nLIMIT 10;", + "rawSql": "SELECT\n batch_id AS \"Batch ID\",\n testing_data_nvrs_version AS \"Data Version\",\n prompts_version AS \"Prompt Version\",\n known_non_issues_version AS \"Known Non-Issues Version\",\n container_image AS \"Container Image\",\n total_tp AS \"True Positives\",\n total_fp AS \"False Positives\",\n total_tn AS \"True Negatives\",\n total_fn AS \"False Negatives\",\n batch_accuracy AS \"Accuracy\"\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\nORDER BY submitted_at DESC\nLIMIT 10;", "refId": "A" } ], @@ -518,7 +518,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -615,7 +615,7 @@ "h": 8, "w": 12, "x": 12, - "y": 12 + "y": 14 }, "id": 7, "options": { @@ -639,7 +639,7 @@ { "format": "table", "rawQuery": true, - "rawSql": "SELECT\n batch_id AS \"Batch ID\",\n submitted_at AS \"Submitted At\",\n testing_data_nvrs_version AS \"Data Version\",\n prompts_version AS \"Prompt Version\",\n total_jobs AS \"Total Jobs\",\n jobs_with_metrics AS \"Jobs w/ Metrics\",\n ROUND(avg_accuracy::NUMERIC, 4) AS \"Avg Accuracy\",\n ROUND(batch_accuracy::NUMERIC, 4) AS \"Batch Accuracy\",\n ROUND(avg_f1_score::NUMERIC, 4) AS \"Avg F1\",\n ROUND(batch_f1_score::NUMERIC, 4) AS \"Batch F1\"\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\nORDER BY submitted_at DESC;", + "rawSql": "SELECT\n batch_id AS \"Batch ID\",\n submitted_at AS \"Submitted At\",\n testing_data_nvrs_version AS \"Data Version\",\n prompts_version AS \"Prompt Version\",\n known_non_issues_version AS \"Known Non-Issues Version\",\n container_image AS \"Container Image\",\n total_jobs AS \"Total Jobs\",\n jobs_with_metrics AS \"Jobs w/ Metrics\",\n ROUND(avg_accuracy::NUMERIC, 4) AS \"Avg Accuracy\",\n ROUND(batch_accuracy::NUMERIC, 4) AS \"Batch Accuracy\",\n ROUND(avg_f1_score::NUMERIC, 4) AS \"Avg F1\",\n ROUND(batch_f1_score::NUMERIC, 4) AS \"Batch F1\"\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\nORDER BY submitted_at DESC;", "refId": "A" } ], @@ -649,7 +649,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -702,7 +702,7 @@ "h": 8, "w": 24, "x": 0, - "y": 20 + "y": 22 }, "id": 8, "options": { @@ -723,12 +723,180 @@ { "format": "table", "rawQuery": true, - "rawSql": "SELECT\n prompts_version,\n AVG(batch_accuracy) AS avg_accuracy,\n AVG(batch_precision) AS avg_precision,\n AVG(batch_recall) AS avg_recall,\n AVG(batch_f1_score) AS avg_f1_score,\n COUNT(*) AS batch_count\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\nGROUP BY prompts_version\nORDER BY prompts_version;", + "rawSql": "SELECT\n prompts_version,\n AVG(batch_accuracy) AS avg_accuracy,\n AVG(batch_precision) AS avg_precision,\n AVG(batch_recall) AS avg_recall,\n AVG(batch_f1_score) AS avg_f1_score,\n COUNT(*) AS batch_count\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\n AND (ARRAY_LENGTH(ARRAY[${prompt_versions:singlequote}], 1) IS NULL OR prompts_version IN (${prompt_versions:singlequote}))\nGROUP BY prompts_version\nORDER BY prompts_version;", "refId": "A" } ], "title": "Performance by Prompt Version", "type": "barchart" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "format": "table", + "rawQuery": true, + "rawSql": "SELECT\n known_non_issues_version,\n AVG(batch_accuracy) AS avg_accuracy,\n AVG(batch_precision) AS avg_precision,\n AVG(batch_recall) AS avg_recall,\n AVG(batch_f1_score) AS avg_f1_score,\n COUNT(*) AS batch_count\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\n AND (ARRAY_LENGTH(ARRAY[${known_non_issues_versions:singlequote}], 1) IS NULL OR known_non_issues_version IN (${known_non_issues_versions:singlequote}))\nGROUP BY known_non_issues_version\nORDER BY known_non_issues_version;", + "refId": "A" + } + ], + "title": "Performance by Known Non-Issues Version", + "type": "barchart" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "mean", + "max", + "min" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi" + } + }, + "targets": [ + { + "format": "table", + "rawQuery": true, + "rawSql": "SELECT\n container_image,\n AVG(batch_accuracy) AS avg_accuracy,\n AVG(batch_precision) AS avg_precision,\n AVG(batch_recall) AS avg_recall,\n AVG(batch_f1_score) AS avg_f1_score,\n COUNT(*) AS batch_count\nFROM mlops_batch_metrics_view\nWHERE $__timeFilter(submitted_at)\n AND (ARRAY_LENGTH(ARRAY[${container_images:singlequote}], 1) IS NULL OR container_image IN (${container_images:singlequote}))\nGROUP BY container_image\nORDER BY container_image;", + "refId": "A" + } + ], + "title": "Performance by Container Image", + "type": "barchart" } ], "refresh": "30s", @@ -741,7 +909,80 @@ "sast-ai" ], "templating": { - "list": [] + "list": [ + { + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "P413637974B2AAB20" + }, + "definition": "SELECT DISTINCT prompts_version FROM mlops_batch_metrics_view ORDER BY prompts_version;", + "hide": 0, + "includeAll": true, + "label": "Prompt Versions", + "multi": true, + "name": "prompt_versions", + "options": [], + "query": "SELECT DISTINCT prompts_version FROM mlops_batch_metrics_view ORDER BY prompts_version;", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "P413637974B2AAB20" + }, + "definition": "SELECT DISTINCT known_non_issues_version FROM mlops_batch_metrics_view ORDER BY known_non_issues_version;", + "hide": 0, + "includeAll": true, + "label": "Known Non-Issues Versions", + "multi": true, + "name": "known_non_issues_versions", + "options": [], + "query": "SELECT DISTINCT known_non_issues_version FROM mlops_batch_metrics_view ORDER BY known_non_issues_version;", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "P413637974B2AAB20" + }, + "definition": "SELECT DISTINCT container_image FROM mlops_batch_metrics_view ORDER BY container_image;", + "hide": 0, + "includeAll": true, + "label": "Container Images", + "multi": true, + "name": "container_images", + "options": [], + "query": "SELECT DISTINCT container_image FROM mlops_batch_metrics_view ORDER BY container_image;", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { "from": "now-30d", diff --git a/grafana/dashboards/mlops-job-details.json b/grafana/dashboards/mlops-job-details.json index 23131d1..9a4aeb8 100644 --- a/grafana/dashboards/mlops-job-details.json +++ b/grafana/dashboards/mlops-job-details.json @@ -10,7 +10,7 @@ { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -84,17 +84,17 @@ { "format": "table", "rawQuery": true, - "rawSql": "SELECT\n jm.mlops_job_id AS metric,\n jm.accuracy,\n jm.precision,\n jm.recall,\n jm.f1_score,\n j.package_name\nFROM mlops_job_metrics jm\nJOIN mlops_job j ON jm.mlops_job_id = j.id\nWHERE j.mlops_batch_id = $batch_id\nORDER BY jm.created_at DESC;", + "rawSql": "SELECT\n j.package_name,\n jm.accuracy,\n jm.precision,\n jm.recall,\n jm.f1_score\nFROM mlops_job_metrics jm\nJOIN mlops_job j ON jm.mlops_job_id = j.id\nWHERE j.mlops_batch_id = $batch_id\n AND (ARRAY_LENGTH(ARRAY[${packages:singlequote}], 1) IS NULL OR j.package_name IN (${packages:singlequote}))\nORDER BY j.package_name\nLIMIT 5;", "refId": "A" } ], - "title": "Job-Level Metrics Distribution (Select Batch ID Below)", + "title": "Job-Level Metrics Distribution", "type": "barchart" }, { "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "fieldConfig": { "defaults": { @@ -132,9 +132,23 @@ "value": "color-background" }, { - "id": "color", + "id": "thresholds", "value": { - "mode": "continuous-GrYlRd" + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.7 + }, + { + "color": "green", + "value": 0.85 + } + ] } } ] @@ -154,9 +168,23 @@ "value": "color-background" }, { - "id": "color", + "id": "thresholds", "value": { - "mode": "continuous-GrYlRd" + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.7 + }, + { + "color": "green", + "value": 0.85 + } + ] } } ] @@ -176,9 +204,23 @@ "value": "color-background" }, { - "id": "color", + "id": "thresholds", "value": { - "mode": "continuous-GrYlRd" + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.7 + }, + { + "color": "green", + "value": 0.85 + } + ] } } ] @@ -198,9 +240,23 @@ "value": "color-background" }, { - "id": "color", + "id": "thresholds", "value": { - "mode": "continuous-GrYlRd" + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "yellow", + "value": 0.7 + }, + { + "color": "green", + "value": 0.85 + } + ] } } ] @@ -235,91 +291,12 @@ { "format": "table", "rawQuery": true, - "rawSql": "SELECT\n j.id AS \"Job ID\",\n j.package_name AS \"Package Name\",\n j.package_nvr AS \"Package NVR\",\n j.status AS \"Status\",\n jm.accuracy AS \"Accuracy\",\n jm.precision AS \"Precision\",\n jm.recall AS \"Recall\",\n jm.f1_score AS \"F1 Score\",\n jm.cm_tp AS \"TP\",\n jm.cm_fp AS \"FP\",\n jm.cm_tn AS \"TN\",\n jm.cm_fn AS \"FN\",\n j.tekton_url AS \"Tekton URL\",\n j.created_at AS \"Created\",\n j.completed_at AS \"Completed\"\nFROM mlops_job j\nLEFT JOIN mlops_job_metrics jm ON j.id = jm.mlops_job_id\nWHERE j.mlops_batch_id = $batch_id\nORDER BY j.created_at DESC;", + "rawSql": "SELECT\n j.id AS \"Job ID\",\n j.package_name AS \"Package Name\",\n j.package_nvr AS \"Package NVR\",\n j.status AS \"Status\",\n jm.accuracy AS \"Accuracy\",\n jm.precision AS \"Precision\",\n jm.recall AS \"Recall\",\n jm.f1_score AS \"F1 Score\",\n jm.cm_tp AS \"Correct Non-Issue\",\n jm.cm_fp AS \"Missed Real Issue\",\n jm.cm_tn AS \"Correct Real Issue\",\n jm.cm_fn AS \"Missed Non-Issue\",\n j.tekton_url AS \"Tekton URL\",\n j.created_at AS \"Created\",\n j.completed_at AS \"Completed\"\nFROM mlops_job j\nLEFT JOIN mlops_job_metrics jm ON j.id = jm.mlops_job_id\nWHERE j.mlops_batch_id = $batch_id\nORDER BY j.created_at DESC;", "refId": "A" } ], "title": "Detailed Job Metrics", "type": "table" - }, - { - "datasource": { - "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "targets": [ - { - "format": "time_series", - "rawQuery": true, - "rawSql": "SELECT\n j.completed_at AS time,\n COUNT(*) AS \"Jobs Completed\"\nFROM mlops_job j\nWHERE j.mlops_batch_id = $batch_id\n AND j.completed_at IS NOT NULL\n AND $__timeFilter(j.completed_at)\nGROUP BY time\nORDER BY time;", - "refId": "A" - } - ], - "title": "Job Completion Timeline", - "type": "timeseries" } ], "refresh": "", @@ -335,13 +312,13 @@ "list": [ { "current": { - "selected": false, - "text": "", - "value": "" + "selected": true, + "text": "6", + "value": "6" }, "datasource": { "type": "grafana-postgresql-datasource", - "uid": "af3woqo2g8gzkd" + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" }, "definition": "SELECT id FROM mlops_batch ORDER BY submitted_at DESC;", "hide": 0, @@ -349,13 +326,43 @@ "label": "Batch ID", "multi": false, "name": "batch_id", - "options": [], + "options": [ + { + "selected": true, + "text": "6", + "value": "6" + } + ], "query": "SELECT id FROM mlops_batch ORDER BY id DESC;", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" + }, + { + "current": { + "selected": false, + "text": [], + "value": [] + }, + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "c3340f63-6cc1-4a9e-9cd2-72876665193b" + }, + "definition": "SELECT DISTINCT package_name FROM mlops_job WHERE mlops_batch_id = $batch_id ORDER BY package_name;", + "hide": 0, + "includeAll": false, + "label": "Packages (max 5)", + "multi": true, + "name": "packages", + "options": [], + "query": "SELECT DISTINCT package_name FROM mlops_job WHERE mlops_batch_id = $batch_id ORDER BY package_name;", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, diff --git a/grafana/import-dashboards.sh b/grafana/import-dashboards.sh deleted file mode 100755 index ca090a8..0000000 --- a/grafana/import-dashboards.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash -# Script to import Grafana dashboards via API -# Can be used for manual setup or CI/CD automation - -set -e - -# Configuration -GRAFANA_URL="${GRAFANA_URL:-http://localhost:3000}" -GRAFANA_USER="${GRAFANA_USER:-admin}" -GRAFANA_PASSWORD="${GRAFANA_PASSWORD:-admin}" -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -echo "Importing dashboards to Grafana at ${GRAFANA_URL}..." - -# Import MLOps Batch Metrics dashboard -echo "Importing MLOps Batch Metrics Overview..." -DASHBOARD_JSON=$(cat "${SCRIPT_DIR}/dashboards/mlops-batch-metrics.json") -curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ - -X POST -H "Content-Type: application/json" \ - -d "{\"dashboard\": ${DASHBOARD_JSON}, \"overwrite\": true}" \ - "${GRAFANA_URL}/api/dashboards/db" | \ - python3 -c "import sys, json; r=json.load(sys.stdin); print(f\" ✓ {r.get('title', 'Dashboard')} imported: {r.get('url', 'N/A')}\")" - -# Import MLOps Job Details dashboard -echo "Importing MLOps Job Details..." -DASHBOARD_JSON2=$(cat "${SCRIPT_DIR}/dashboards/mlops-job-details.json") -curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ - -X POST -H "Content-Type: application/json" \ - -d "{\"dashboard\": ${DASHBOARD_JSON2}, \"overwrite\": true}" \ - "${GRAFANA_URL}/api/dashboards/db" | \ - python3 -c "import sys, json; r=json.load(sys.stdin); print(f\" ✓ {r.get('title', 'Dashboard')} imported: {r.get('url', 'N/A')}\")" - -echo "" -echo "✓ All dashboards imported successfully!" -echo " Access them at: ${GRAFANA_URL}" \ No newline at end of file diff --git a/grafana/openshift/README.md b/grafana/openshift/README.md new file mode 100644 index 0000000..09aaf93 --- /dev/null +++ b/grafana/openshift/README.md @@ -0,0 +1,200 @@ +# Grafana Dashboards for OpenShift + +This directory contains manifests and scripts to deploy Grafana dashboards to OpenShift using the Grafana Operator. + +## Prerequisites + +- OpenShift cluster with Grafana Operator installed +- `oc` CLI tool installed and configured +- Access to the target namespace (default: `sast-ai-mlops`) +- PostgreSQL database running with the orchestrator application + +## Quick Start + +Login to OpenShift and run the deployment script: + +```bash +oc login +./deploy-grafana-dashboards.sh +``` + +This script will: +1. Auto-generate dashboard ConfigMaps from JSON files in `../dashboards/` +2. Create/verify Grafana instance +3. Deploy dashboard ConfigMaps +4. Create PostgreSQL datasource +5. Deploy dashboard references + +## Access Grafana + +After deployment, access Grafana at: + +``` +http://sast-ai-grafana-service-sast-ai-mlops.apps. +``` + +Login with credentials: **admin/admin** (configured in `grafana-instance.yaml`) + +## Available Dashboards + +1. **MLOps Batch Metrics Overview** - Aggregate metrics across all batches + - Latest batch metrics (accuracy, precision, recall, F1) + - Performance trends over time + - Confusion matrix for recent batches + - Comparison by prompt version, known non-issues version, and container image + +2. **MLOps Job Details** - Job-level metrics for individual packages + - Job-level metrics distribution + - Detailed job metrics table with confusion matrix values + - Filterable by batch, job, and package name + +## Architecture + +### Dashboard Source of Truth +- Dashboard JSON files are stored in `../dashboards/` +- ConfigMaps are **auto-generated** during deployment (not committed to git) +- Edit dashboards in Grafana UI, export JSON, and save to `../dashboards/` + +### How It Works +1. `generate-dashboard-configmaps.sh` reads JSON files from `../dashboards/` +2. Creates temporary ConfigMap YAML files: `configmap-*.yaml` +3. `deploy-grafana-dashboards.sh` applies these ConfigMaps to OpenShift +4. Dashboard CRDs (`grafana-dashboard-*.yaml`) reference these ConfigMaps +5. Grafana Operator syncs dashboards to Grafana instance + +## Files Overview + +### Committed Files +- `README.md` - This documentation +- `deploy-grafana-dashboards.sh` - Main deployment orchestrator +- `generate-dashboard-configmaps.sh` - Auto-generates ConfigMaps from JSON +- `grafana-instance.yaml` - Grafana instance CRD (Grafana app deployment) +- `grafana-datasource.yaml` - PostgreSQL datasource CRD +- `grafana-dashboard-batch-metrics.yaml` - Batch metrics dashboard reference +- `grafana-dashboard-job-details.yaml` - Job details dashboard reference + +### Generated/Local Files (gitignored) +- `configmap-*.yaml` - Auto-generated from `../dashboards/*.json` +- `.env` - Local credentials (only needed for troubleshooting script) +- `fix-datasource-password.sh` - Emergency troubleshooting script + +## Troubleshooting + +### Dashboards Show "No Data" + +**First, verify the datasource:** +1. Log into Grafana +2. Go to Configuration → Data sources +3. Click on "SAST-AI-PostgreSQL" +4. Click "Test" button +5. Should show "Database Connection OK" + +**If the connection test fails:** + +The Grafana Operator v5 has a known bug where datasource passwords may not persist properly during reconciliation cycles. If you encounter this issue repeatedly: + +1. Create a local `.env` file with your credentials: + ```bash + # Grafana credentials + GRAFANA_USERNAME=admin + GRAFANA_PASSWORD=admin + + # PostgreSQL database credentials + DB_USERNAME=quarkus + DB_PASSWORD=quarkus + + # OpenShift namespace + NAMESPACE=sast-ai-mlops + + # Grafana datasource UID (do not change unless recreating datasource) + DATASOURCE_UID=c3340f63-6cc1-4a9e-9cd2-72876665193b + ``` + +2. Use the troubleshooting script (not committed to git): + ```bash + ./fix-datasource-password.sh + ``` + + This script will: + - Port-forward to Grafana service + - Update datasource password via Grafana API + - Test the database connection + +**Note:** We've applied a fix to `grafana-datasource.yaml` to address the operator bug. If you still need to run the fix script frequently, please report this as it indicates the operator issue persists. + +### Verify Deployment Status + +```bash +# Check Grafana instance +oc get grafana -n sast-ai-mlops + +# Check datasource +oc get grafanadatasource -n sast-ai-mlops + +# Check dashboards +oc get grafanadashboard -n sast-ai-mlops + +# Check dashboard ConfigMaps +oc get configmap -n sast-ai-mlops | grep dashboard +``` + +### Redeploy Everything + +If you need to start fresh: + +```bash +# Delete all Grafana resources +oc delete grafanadashboard --all -n sast-ai-mlops +oc delete grafanadatasource --all -n sast-ai-mlops +oc delete grafana --all -n sast-ai-mlops +oc delete configmap -l app=sast-ai-orchestrator -n sast-ai-mlops + +# Redeploy +./deploy-grafana-dashboards.sh +``` + +## Making Dashboard Changes + +1. **Edit in Grafana UI:** + - Make your changes in the Grafana web interface + - Test thoroughly + +2. **Export the dashboard:** + - Click dashboard settings (gear icon) + - Click "JSON Model" + - Copy the JSON + +3. **Update the source file:** + - Save to `../dashboards/mlops-batch-metrics.json` or `mlops-job-details.json` + - Commit the JSON file to git + +4. **Redeploy:** + ```bash + ./deploy-grafana-dashboards.sh + ``` + +## Security Notes + +- Default credentials (admin/admin) are configured in `grafana-instance.yaml` +- Database credentials are pulled from the existing Kubernetes secret: `sast-ai-orchestrator-postgresql` +- Change Grafana admin password in production environments +- The `.env` file (if created) contains credentials and is gitignored + +## Technical Details + +### Grafana Operator +- Version: v5.20.0 (check with `oc get csv -n openshift-operators | grep grafana`) +- Custom Resource Definitions (CRDs): + - `Grafana` - Grafana instance + - `GrafanaDatasource` - Data source connections + - `GrafanaDashboard` - Dashboard references + +### Database Connection +- **Service**: `sast-ai-orchestrator-postgresql:5432` +- **Database**: `sast-ai` +- **User**: `quarkus` +- **Password**: Pulled from secret `sast-ai-orchestrator-postgresql` +- **SSL Mode**: Disabled (cluster-internal connection) + +### Datasource Configuration +The datasource uses the Grafana Operator's `valuesFrom` feature to securely inject the password from a Kubernetes secret. The password is never stored in plaintext in the YAML files. \ No newline at end of file diff --git a/grafana/openshift/deploy-grafana-dashboards.sh b/grafana/openshift/deploy-grafana-dashboards.sh new file mode 100755 index 0000000..ba27fb8 --- /dev/null +++ b/grafana/openshift/deploy-grafana-dashboards.sh @@ -0,0 +1,91 @@ +#!/bin/bash +# Deploy Grafana dashboards to OpenShift using Grafana Operator + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +NAMESPACE="${NAMESPACE:-sast-ai-mlops}" + +echo "================================================" +echo "Deploying Grafana Dashboards to OpenShift" +echo "================================================" +echo "" + +# Generate ConfigMaps from dashboard JSON files +echo "Generating dashboard ConfigMaps from source files..." +"${SCRIPT_DIR}/generate-dashboard-configmaps.sh" + +echo "" +echo "Namespace: ${NAMESPACE}" +echo "" + +# Check if oc is available +if ! command -v oc &> /dev/null; then + echo "Error: oc CLI not found. Please install OpenShift CLI." + exit 1 +fi + +# Check if logged in to OpenShift +if ! oc whoami &> /dev/null; then + echo "Error: Not logged in to OpenShift. Please run 'oc login' first." + exit 1 +fi + +# Check if namespace exists, create if not +if ! oc get namespace "${NAMESPACE}" &> /dev/null; then + echo "Creating namespace: ${NAMESPACE}" + oc create namespace "${NAMESPACE}" +fi + +# Switch to namespace +oc project "${NAMESPACE}" + +echo "" +echo "Step 1/5: Creating Grafana instance..." +if oc get grafana sast-ai-grafana &> /dev/null; then + echo " Grafana instance already exists" +else + oc apply -f "${SCRIPT_DIR}/grafana-instance.yaml" + echo " ✓ Grafana instance created" + echo " Waiting for Grafana to be ready..." + oc wait --for=condition=Ready grafana/sast-ai-grafana --timeout=300s +fi +echo "✓ Grafana instance ready" + +echo "" +echo "Step 2/5: Creating dashboard ConfigMaps..." +oc apply -f "${SCRIPT_DIR}/configmap-batch-metrics.yaml" +oc apply -f "${SCRIPT_DIR}/configmap-job-details.yaml" +echo "✓ ConfigMaps created" + +echo "" +echo "Step 3/5: Creating Grafana datasource..." +oc apply -f "${SCRIPT_DIR}/grafana-datasource.yaml" +echo "✓ Datasource created" + +echo "" +echo "Step 4/5: Waiting for datasource to be synced..." +sleep 5 +echo "✓ Datasource synced" + +echo "" +echo "Step 5/5: Creating Grafana dashboards..." +oc apply -f "${SCRIPT_DIR}/grafana-dashboard-batch-metrics.yaml" +oc apply -f "${SCRIPT_DIR}/grafana-dashboard-job-details.yaml" +echo "✓ Dashboards created" + +echo "" +echo "================================================" +echo "✓ Deployment completed successfully!" +echo "================================================" +echo "" +echo "Next steps:" +echo "1. Ensure your Grafana instance has the label 'dashboards: grafana'" +echo "2. Ensure PostgreSQL service is accessible at 'postgresql:5432'" +echo "3. Verify datasource connection in Grafana UI" +echo "4. Access your dashboards in Grafana" +echo "" +echo "To check deployment status:" +echo " oc get grafanadatasource -n ${NAMESPACE}" +echo " oc get grafanadashboard -n ${NAMESPACE}" +echo "" \ No newline at end of file diff --git a/grafana/openshift/generate-dashboard-configmaps.sh b/grafana/openshift/generate-dashboard-configmaps.sh new file mode 100755 index 0000000..5ee2220 --- /dev/null +++ b/grafana/openshift/generate-dashboard-configmaps.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +# Script to auto-generate Kubernetes ConfigMaps from Grafana dashboard JSON files +# This ensures dashboards are the single source of truth + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +DASHBOARDS_DIR="$SCRIPT_DIR/../dashboards" +OUTPUT_DIR="$SCRIPT_DIR" + +echo "=== Generating Dashboard ConfigMaps ===" +echo "" + +# Check if dashboards directory exists +if [ ! -d "$DASHBOARDS_DIR" ]; then + echo "Error: Dashboards directory not found at $DASHBOARDS_DIR" + exit 1 +fi + +# Generate ConfigMap for each dashboard JSON file +for dashboard_file in "$DASHBOARDS_DIR"/*.json; do + if [ ! -f "$dashboard_file" ]; then + echo "No dashboard files found in $DASHBOARDS_DIR" + exit 1 + fi + + dashboard_name=$(basename "$dashboard_file" .json) + configmap_name="dashboard-${dashboard_name}" + output_file="$OUTPUT_DIR/configmap-${dashboard_name}.yaml" + + echo "Generating ConfigMap: $configmap_name" + echo " Source: $dashboard_file" + echo " Output: $output_file" + + # Create ConfigMap YAML + cat > "$output_file" << EOF +apiVersion: v1 +kind: ConfigMap +metadata: + name: $configmap_name + labels: + app: sast-ai-orchestrator +data: + ${dashboard_name}.json: |- +EOF + + # Append dashboard JSON with proper indentation (4 spaces) + sed 's/^/ /' "$dashboard_file" >> "$output_file" + + echo " ✓ Generated" + echo "" +done + +echo "=== ConfigMaps generated successfully ===" +echo "" +echo "Generated files:" +ls -lh "$OUTPUT_DIR"/configmap-*.yaml +echo "" +echo "Note: These are temporary files. Do not commit them to git." diff --git a/grafana/openshift/grafana-dashboard-batch-metrics.yaml b/grafana/openshift/grafana-dashboard-batch-metrics.yaml new file mode 100644 index 0000000..5fb02da --- /dev/null +++ b/grafana/openshift/grafana-dashboard-batch-metrics.yaml @@ -0,0 +1,13 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: mlops-batch-metrics + labels: + app: sast-ai-orchestrator +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + configMapRef: + name: dashboard-mlops-batch-metrics + key: mlops-batch-metrics.json \ No newline at end of file diff --git a/grafana/openshift/grafana-dashboard-job-details.yaml b/grafana/openshift/grafana-dashboard-job-details.yaml new file mode 100644 index 0000000..fbdc5b9 --- /dev/null +++ b/grafana/openshift/grafana-dashboard-job-details.yaml @@ -0,0 +1,13 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: mlops-job-details + labels: + app: sast-ai-orchestrator +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + configMapRef: + name: dashboard-mlops-job-details + key: mlops-job-details.json diff --git a/grafana/openshift/grafana-datasource.yaml b/grafana/openshift/grafana-datasource.yaml new file mode 100644 index 0000000..20f6b01 --- /dev/null +++ b/grafana/openshift/grafana-datasource.yaml @@ -0,0 +1,30 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: sast-ai-postgresql + labels: + app: sast-ai-orchestrator +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + datasource: + name: SAST-AI-PostgreSQL + type: postgres + access: proxy + # PostgreSQL service in sast-ai-mlops namespace + url: sast-ai-orchestrator-postgresql:5432 + database: sast-ai + user: quarkus + isDefault: true + editable: false + jsonData: + sslmode: disable + postgresVersion: 1400 + timescaledb: false + valuesFrom: + - targetPath: secureJsonData.password + valueFrom: + secretKeyRef: + name: sast-ai-orchestrator-postgresql + key: password \ No newline at end of file diff --git a/grafana/openshift/grafana-instance.yaml b/grafana/openshift/grafana-instance.yaml new file mode 100644 index 0000000..7d4f14a --- /dev/null +++ b/grafana/openshift/grafana-instance.yaml @@ -0,0 +1,41 @@ +apiVersion: grafana.integreatly.org/v1beta1 +kind: Grafana +metadata: + name: sast-ai-grafana + labels: + dashboards: "grafana" + app: sast-ai-orchestrator +spec: + config: + log: + mode: "console" + auth: + disable_login_form: "false" + security: + admin_user: "admin" + admin_password: "admin" + deployment: + spec: + replicas: 1 + template: + spec: + containers: + - name: grafana + image: grafana/grafana:11.0.0 + service: + metadata: + annotations: + service.beta.openshift.io/serving-cert-secret-name: grafana-tls + spec: + ports: + - name: grafana + port: 3000 + protocol: TCP + targetPort: 3000 + ingress: + metadata: + annotations: + route.openshift.io/termination: edge + spec: + hostname: sast-ai-grafana-sast-ai-mlops.apps.cluster.example.com + # Update this hostname to match your cluster's domain \ No newline at end of file diff --git a/grafana/setup-datasource.sh b/grafana/setup-datasource.sh deleted file mode 100755 index 8e96ae1..0000000 --- a/grafana/setup-datasource.sh +++ /dev/null @@ -1,56 +0,0 @@ -#!/bin/bash -# Script to configure Grafana PostgreSQL datasource via API -# Can be used for manual setup or CI/CD automation - -set -e - -# Configuration -GRAFANA_URL="${GRAFANA_URL:-http://localhost:3000}" -GRAFANA_USER="${GRAFANA_USER:-admin}" -GRAFANA_PASSWORD="${GRAFANA_PASSWORD:-admin}" - -# Database configuration (update these for your environment) -DB_HOST="${DB_HOST:-postgres}" -DB_PORT="${DB_PORT:-5432}" -DB_NAME="${DB_NAME:-sast-ai}" -DB_USER="${DB_USER:-quarkus}" -DB_PASSWORD="${DB_PASSWORD:-quarkus}" - -echo "Waiting for Grafana to be ready..." -until curl -s "${GRAFANA_URL}/api/health" > /dev/null 2>&1; do - sleep 2 -done - -echo "Grafana is ready. Configuring PostgreSQL datasource..." - -# Delete existing datasource if it exists (optional) -curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ - -X DELETE "${GRAFANA_URL}/api/datasources/name/SAST-AI-PostgreSQL" || true - -# Create datasource with proper password in secureJsonData -curl -s -u "${GRAFANA_USER}:${GRAFANA_PASSWORD}" \ - -X POST -H "Content-Type: application/json" \ - -d "{ - \"name\": \"SAST-AI-PostgreSQL\", - \"type\": \"grafana-postgresql-datasource\", - \"access\": \"proxy\", - \"url\": \"${DB_HOST}:${DB_PORT}\", - \"database\": \"${DB_NAME}\", - \"user\": \"${DB_USER}\", - \"secureJsonData\": { - \"password\": \"${DB_PASSWORD}\" - }, - \"jsonData\": { - \"sslmode\": \"disable\", - \"postgresVersion\": 1400, - \"timescaledb\": false - }, - \"isDefault\": true - }" \ - "${GRAFANA_URL}/api/datasources" - -echo "" -echo "✓ PostgreSQL datasource configured successfully!" -echo " - Name: SAST-AI-PostgreSQL" -echo " - URL: ${DB_HOST}:${DB_PORT}" -echo " - Database: ${DB_NAME}" \ No newline at end of file