Skip to content

[CI] Add dispatch_job script #526

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 179 additions & 0 deletions premerge/buildbot/dispatch_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
"""Dispatches a job to the k8s cluster.

This script takes in a commit SHA to test along with the platform, spawns a job
to test it, and then streams the logs from the job. We read logs from the job
every so often using the kuberntes logging API rather than directly executing
commands inside the container and streaming the output. This is to work
around https://github.com/kubernetes-sigs/apiserver-network-proxy/issues/748.
"""

import sys
import logging
import time
import dateutil
import datetime
import json

import kubernetes

PLATFORM_TO_NAMESPACE = {"Linux": "llvm-premerge-linux-buildbot"}
LOG_SECONDS_TO_QUERY = 10
SECONDS_QUERY_LOGS_EVERY = 5


def start_build_linux(commit_sha: str, k8s_client) -> str:
"""Spawns a pod to build/test LLVM at the specified SHA.

Args:
commit_sha: The commit SHA to build/run the tests at.
k8s_client: The kubernetes client instance to use for spawning the pod.

Returns:
A string containing the name of the pod.
"""
pod_name = f"build-{commit_sha}"
commands = [
"git clone --depth 100 https://github.com/llvm/llvm-project",
"cd llvm-project",
f"git checkout ${commit_sha}",
"export CC=clang",
"export CXX=clang++",
'./.ci/monolithic-linux.sh "bolt;clang;clang-tools-extra;flang;libclc;lld;lldb;llvm;mlir;polly" "check-bolt check-clang check-clang-cir check-clang-tools check-flang check-lld check-lldb check-llvm check-mlir check-polly" "compiler-rt;libc;libcxx;libcxxabi;libunwind" "check-compiler-rt check-libc" "check-cxx check-cxxabi check-unwind" "OFF"'
"echo BUILD FINISHED",
]
pod_definition = {
"apiVersion": "v1",
"kind": "Pod",
"metadata": {
"name": pod_name,
"namespace": PLATFORM_TO_NAMESPACE["Linux"],
},
"spec": {
"containers": [
{
"name": "build",
"image": "ghcr.io/llvm/ci-ubuntu-24.04",
"command": ["/bin/bash", "-c", ";".join(commands)],
}
],
"restartPolicy": "Never",
},
}
kubernetes.utils.create_from_dict(k8s_client, pod_definition)
return pod_name


def read_logs(pod_name: str, namespace: str, v1_api) -> list[str]:
"""Reads logs from the specified pod.

Reads logs using the k8s API and returns a nicely formatted list of
strings.

Args:
pod_name: The name of the pod to read logs from.
namespace: The namespace the pod is in.
v1_api: The kubernetes API instance to use for querying logs.

Returns:
A list of strings representing the log lines.
"""
logs = v1_api.read_namespaced_pod_log(
name=pod_name,
namespace=namespace,
timestamps=True,
since_seconds=LOG_SECONDS_TO_QUERY,
)
return logs.split("\n")[:-1]


def get_logs_to_print(
logs: list[str], latest_time: datetime.datetime
) -> tuple[datetime.datetime, list[str]]:
"""Get the logs that we should be printing.

This function takes in a raw list of logs along with the timestamp of the
last log line to be printed and returns the new log lines that should be
printed.

Args:
logs: The raw list of log lines.
latest_time: The timestamp from the last log line that was printed.

Returns:
A tuple containing the timestamp of the last log line returned and a list
of strings containing the log lines that should be printed.
"""
first_new_index = 0
time_stamp = latest_time
for log_line in logs:
time_stamp_str = log_line.split(" ")[0]
time_stamp = dateutil.parser.parse(time_stamp_str[:-1])
if time_stamp > latest_time:
break
first_new_index += 1
last_time_stamp = latest_time
if logs:
last_time_stamp_str = logs[-1].split(" ")[0]
last_time_stamp = dateutil.parser.parse(last_time_stamp_str[:-1])
return (last_time_stamp, logs[first_new_index:])


def print_logs(
pod_name: str, namespace: str, v1_api, lastest_time: datetime.datetime
) -> tuple[bool, datetime.datetime]:
"""Queries the pod and prints the relevant log lines.

Args:
pod_name: The pod to print the logs for.
namespace: The namespace the log is in.
v1_api: The kubernetes API client instance to use for querying the logs.
latest_time: The timestamp of the last log line to be printed.

Returns:
A tuple containing a boolean representing whether or not the pod has
finished executing and the timestamp of the last log line printed.
"""
logs = read_logs(pod_name, namespace, v1_api)
new_time_stamp, logs_to_print = get_logs_to_print(logs, lastest_time)
pod_finished = False
for log_line in logs_to_print:
print(log_line.split("\r")[-1])
if "BUILD FINISHED" in log_line:
pod_finished = True

return (pod_finished, new_time_stamp)


def main(commit_sha: str, platform: str):
kubernetes.config.load_kube_config()
k8s_client = kubernetes.client.ApiClient()
if platform == "Linux":
pod_name = start_build_linux(commit_sha, k8s_client)
else:
raise ValueError("Unrecognized platform.")
namespace = PLATFORM_TO_NAMESPACE[platform]
latest_time = datetime.datetime.min
v1_api = kubernetes.client.CoreV1Api()
while True:
try:
pod_finished, latest_time = print_logs(
pod_name, namespace, v1_api, latest_time
)
if pod_finished:
break
except kubernetes.client.exceptions.ApiException as log_exception:
if "ContainerCreating" in json.loads(log_exception.body)["message"]:
logging.warning(
"Cannot yet read logs from the pod: waiting for the container to start."
)
else:
logging.warning(f"Failed to get logs from the pod: {log_exception}")
time.sleep(SECONDS_QUERY_LOGS_EVERY)
v1_api.delete_namespaced_pod(pod_name, namespace)


if __name__ == "__main__":
if len(sys.argv) != 3:
logging.fatal("Expected usage is dispatch_job.py {commit SHA} {platform}")
sys.exit(1)
main(sys.argv[1], sys.argv[2])
86 changes: 86 additions & 0 deletions premerge/buildbot/dispatch_job_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Tests for the dispatch_job.py script."""

import unittest
import datetime
import dateutil

import dispatch_job


class TestDispatchJobs(unittest.TestCase):
def test_get_logs_first_time(self):
"""Test we return the correct logs if we have not seen any before."""
log_lines = [
"2025-07-29T15:48:00.259595535Z test1",
"2025-07-29T15:48:00.383251277Z test2",
]
current_timestamp = datetime.datetime.min
latest_timestamp, lines_to_print = dispatch_job.get_logs_to_print(
log_lines, current_timestamp
)
self.assertSequenceEqual(
lines_to_print,
[
"2025-07-29T15:48:00.259595535Z test1",
"2025-07-29T15:48:00.383251277Z test2",
],
)
self.assertEqual(
latest_timestamp, dateutil.parser.parse("2025-07-29T15:48:00.383251277")
)

def test_get_logs_nonoverlapping(self):
"""Test we return the correct logs for non-overlapping ranges.

Test that if the timestamp of the last log that we have printed is
less than the current set returned by kubernetes, we return the correct
lines.
"""
log_lines = [
"2025-07-29T15:48:01.787177054Z test1",
"2025-07-29T15:48:03.074715108Z test2",
]
current_timestamp = dateutil.parser.parse("2025-07-29T15:48:00.383251277")
latest_timestamp, lines_to_print = dispatch_job.get_logs_to_print(
log_lines, current_timestamp
)
self.assertSequenceEqual(
lines_to_print,
[
"2025-07-29T15:48:01.787177054Z test1",
"2025-07-29T15:48:03.074715108Z test2",
],
)
self.assertEqual(
latest_timestamp, dateutil.parser.parse("2025-07-29T15:48:03.074715108")
)

def test_get_logs_overlapping(self):
"""Test we return the correct logs for overlapping ranges.

Test that if the last line to be printed is contained within the logs
kubernetes returned, we skip the lines that have already been printed.
"""
log_lines = [
"2025-07-29T15:48:00.383251277Z test1",
"2025-07-29T15:48:01.787177054Z test2",
"2025-07-29T15:48:03.074715108Z test3",
]
current_timestamp = dateutil.parser.parse("2025-07-29T15:48:00.383251277")
latest_timestamp, lines_to_print = dispatch_job.get_logs_to_print(
log_lines, current_timestamp
)
self.assertSequenceEqual(
lines_to_print,
[
"2025-07-29T15:48:01.787177054Z test2",
"2025-07-29T15:48:03.074715108Z test3",
],
)
self.assertEqual(
latest_timestamp, dateutil.parser.parse("2025-07-29T15:48:03.074715108")
)


if __name__ == "__main__":
unittest.main()
54 changes: 54 additions & 0 deletions premerge/buildbot/requirements.lock.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#
# This file is autogenerated by pip-compile with Python 3.12
# by the following command:
#
# pip-compile --output-file=requirements.lock.txt requirements.txt
#
cachetools==5.5.2
# via google-auth
certifi==2025.7.14
# via
# kubernetes
# requests
charset-normalizer==3.4.2
# via requests
durationpy==0.10
# via kubernetes
google-auth==2.40.3
# via kubernetes
idna==3.10
# via requests
kubernetes==33.1.0
# via -r requirements.txt
oauthlib==3.3.1
# via
# kubernetes
# requests-oauthlib
pyasn1==0.6.1
# via
# pyasn1-modules
# rsa
pyasn1-modules==0.4.2
# via google-auth
python-dateutil==2.9.0.post0
# via kubernetes
pyyaml==6.0.2
# via kubernetes
requests==2.32.4
# via
# kubernetes
# requests-oauthlib
requests-oauthlib==2.0.0
# via kubernetes
rsa==4.9.1
# via google-auth
six==1.17.0
# via
# kubernetes
# python-dateutil
urllib3==2.5.0
# via
# kubernetes
# requests
websocket-client==1.8.0
# via kubernetes
1 change: 1 addition & 0 deletions premerge/buildbot/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
kubernetes==33.1.0