|  | 
|  | 1 | +/* Windowed per-build table (UTC), incl. PR & main builds, with queue totals, cost, and is_main_branch. | 
|  | 2 | +   WAIT: only attempts with started_at IS NOT NULL contribute wait (runnable → started). | 
|  | 3 | +   RUN: clip to [w_start, w_end]; 1-day zombie guard for open 'running' attempts. | 
|  | 4 | +   COST: 1.3232 * gpu_1_queue_run_hours + 4.602 * gpu_4_queue_run_hours (fixed). | 
|  | 5 | +*/ | 
|  | 6 | + | 
|  | 7 | +WITH | 
|  | 8 | +  parseDateTime64BestEffort({startTime:String}, 3) AS w_start,   -- inclusive (UTC) | 
|  | 9 | +  parseDateTime64BestEffort({stopTime:String},  3) AS w_end,     -- exclusive (UTC) | 
|  | 10 | +  toDateTime64(now(), 3)                       AS now64, | 
|  | 11 | +  (w_end - INTERVAL 1 DAY)                     AS zombie_cutoff, | 
|  | 12 | +  toDateTime64('2100-01-01 00:00:00', 3)       AS FAR_FUTURE, | 
|  | 13 | +  ['gpu_1_queue','gpu_4_queue']                AS QUEUES | 
|  | 14 | + | 
|  | 15 | +/* 1) All builds created within the window (+ branch/PR context) */ | 
|  | 16 | +, builds_window AS ( | 
|  | 17 | +  SELECT | 
|  | 18 | +    tupleElement(build,'id') AS build_id, | 
|  | 19 | + | 
|  | 20 | +    argMax(tupleElement(build,'number'),    tupleElement(job,'created_at')) AS build_number, | 
|  | 21 | +    argMax(tupleElement(build,'web_url'),   tupleElement(job,'created_at')) AS build_url, | 
|  | 22 | +    concat(argMax(tupleElement(build,'web_url'), tupleElement(job,'created_at')), '/steps/table') AS steps_table_url, | 
|  | 23 | +    argMax(tupleElement(build,'commit'),    tupleElement(job,'created_at')) AS commit_sha, | 
|  | 24 | + | 
|  | 25 | +    /* robust start/finish (fallback to job min/max if build-level fields are NULL) */ | 
|  | 26 | +    coalesce(argMax(tupleElement(build,'started_at'),  tupleElement(job,'created_at')), | 
|  | 27 | +             min(tupleElement(job,'started_at')))  AS robust_start, | 
|  | 28 | +    coalesce(argMax(tupleElement(build,'finished_at'), tupleElement(job,'created_at')), | 
|  | 29 | +             max(tupleElement(job,'finished_at'))) AS robust_finish, | 
|  | 30 | + | 
|  | 31 | +    countDistinct(tupleElement(job,'id')) AS steps_count, | 
|  | 32 | +    argMax(tupleElement(build,'state'), tupleElement(job,'created_at')) AS latest_build_state, | 
|  | 33 | + | 
|  | 34 | +    /* repo + PR mapping (repo_slug may come from pipeline or PR repo) */ | 
|  | 35 | +    coalesce( | 
|  | 36 | +      nullIf(extract(argMax(tupleElement(pipeline,'repository'),           tupleElement(job,'created_at')), 'github\\.com[:/]+([^/]+/[^/.]+)'), ''), | 
|  | 37 | +      nullIf(extract(argMax(tupleElement(build,'pull_request').repository, tupleElement(job,'created_at')), 'github\\.com[:/]+([^/]+/[^/.]+)'), ''), | 
|  | 38 | +      nullIf(extract(argMax(tupleElement(build,'pull_request').repository, tupleElement(job,'created_at')), '([^/]+/[^/.]+)'), '') | 
|  | 39 | +    ) AS repo_slug, | 
|  | 40 | +    coalesce( | 
|  | 41 | +      toInt64OrNull(argMax(tupleElement(build,'pull_request').id, tupleElement(job,'created_at'))), | 
|  | 42 | +      toInt64OrNull(extract(argMax(tupleElement(build,'branch'), tupleElement(job,'created_at')), 'pull/([0-9]+)')) | 
|  | 43 | +    ) AS pr_number, | 
|  | 44 | + | 
|  | 45 | +    argMax(tupleElement(build,'created_at'), tupleElement(job,'created_at')) AS build_created_at_utc, | 
|  | 46 | +    argMax(tupleElement(build,'branch'),     tupleElement(job,'created_at')) AS branch_name | 
|  | 47 | +  FROM vllm.vllm_buildkite_jobs | 
|  | 48 | +  GROUP BY tupleElement(build,'id') | 
|  | 49 | +  HAVING build_created_at_utc >= w_start AND build_created_at_utc < w_end | 
|  | 50 | +) | 
|  | 51 | + | 
|  | 52 | +/* 2) Agent-run attempts for those builds that can overlap the window */ | 
|  | 53 | +, base_agent AS ( | 
|  | 54 | +  SELECT | 
|  | 55 | +    tupleElement(build,'id')        AS build_id, | 
|  | 56 | +    tupleElement(job,'id')          AS job_id, | 
|  | 57 | +    tupleElement(job,'created_at')  AS created_at, | 
|  | 58 | +    tupleElement(job,'state')       AS state, | 
|  | 59 | +    tupleElement(job,'runnable_at') AS runnable_at, | 
|  | 60 | +    tupleElement(job,'started_at')  AS started_at, | 
|  | 61 | +    tupleElement(job,'finished_at') AS finished_at, | 
|  | 62 | +    replaceOne(arrayFirst(x -> startsWith(x,'queue='), | 
|  | 63 | +                          tupleElement(job,'agent_query_rules')), 'queue=', '') AS queue_key | 
|  | 64 | +  FROM vllm.vllm_buildkite_jobs | 
|  | 65 | +  INNER JOIN builds_window b ON tupleElement(build,'id') = b.build_id | 
|  | 66 | +  WHERE tupleElement(job,'type') IN ('script','command') | 
|  | 67 | +    AND ( | 
|  | 68 | +      tupleElement(job,'runnable_at') < w_end OR | 
|  | 69 | +      tupleElement(job,'started_at')  < w_end OR | 
|  | 70 | +      ifNull(tupleElement(job,'finished_at'), FAR_FUTURE) >= w_start | 
|  | 71 | +    ) | 
|  | 72 | +) | 
|  | 73 | + | 
|  | 74 | +/* 3) Collapse to (build_id, job_id) and collect attempts keyed by queue */ | 
|  | 75 | +, jobs_by_build AS ( | 
|  | 76 | +  SELECT | 
|  | 77 | +    build_id, | 
|  | 78 | +    job_id, | 
|  | 79 | +    argMax(state, created_at) AS latest_state, | 
|  | 80 | +    max(created_at)           AS last_event_at, | 
|  | 81 | + | 
|  | 82 | +    /* RUN attempts: (queue, start, finish) */ | 
|  | 83 | +    arrayDistinct(arrayFilter(t -> t.2 IS NOT NULL, | 
|  | 84 | +      groupArray((queue_key, started_at, finished_at)) | 
|  | 85 | +    )) AS run_triplets, | 
|  | 86 | + | 
|  | 87 | +    /* WAIT attempts: (queue, runnable, start) — ONLY attempts that actually started */ | 
|  | 88 | +    arrayDistinct(arrayFilter(t -> t.2 IS NOT NULL AND t.3 IS NOT NULL, | 
|  | 89 | +      groupArray((queue_key, runnable_at, started_at)) | 
|  | 90 | +    )) AS wait_triplets | 
|  | 91 | +  FROM base_agent | 
|  | 92 | +  GROUP BY build_id, job_id | 
|  | 93 | +) | 
|  | 94 | + | 
|  | 95 | +/* 4) RUN attempts → per build × queue (clip to window; zombie guard for open runs) */ | 
|  | 96 | +, runs_scored AS ( | 
|  | 97 | +  SELECT | 
|  | 98 | +    build_id, | 
|  | 99 | +    tupleElement(rt, 1) AS queue_key, | 
|  | 100 | +    greatest(tupleElement(rt, 2), w_start) AS s_clip, | 
|  | 101 | +    least( | 
|  | 102 | +      ifNull( | 
|  | 103 | +        tupleElement(rt, 3), | 
|  | 104 | +        if(latest_state = 'running' AND last_event_at < zombie_cutoff, | 
|  | 105 | +           least(last_event_at + INTERVAL 1 MINUTE, w_end), | 
|  | 106 | +           w_end) | 
|  | 107 | +      ), | 
|  | 108 | +      w_end | 
|  | 109 | +    ) AS e_clip | 
|  | 110 | +  FROM jobs_by_build | 
|  | 111 | +  ARRAY JOIN run_triplets AS rt | 
|  | 112 | +  WHERE tupleElement(rt, 1) IN QUEUES | 
|  | 113 | +) | 
|  | 114 | +, run_by_build AS ( | 
|  | 115 | +  SELECT | 
|  | 116 | +    build_id, queue_key, | 
|  | 117 | +    sumIf(dateDiff('second', s_clip, e_clip), e_clip > s_clip) AS total_run_s | 
|  | 118 | +  FROM runs_scored | 
|  | 119 | +  GROUP BY build_id, queue_key | 
|  | 120 | +) | 
|  | 121 | + | 
|  | 122 | +/* 5) WAIT attempts (runnable → started) → per build × queue (clip to window) */ | 
|  | 123 | +, waits_scored AS ( | 
|  | 124 | +  SELECT | 
|  | 125 | +    build_id, | 
|  | 126 | +    tupleElement(wt, 1) AS queue_key, | 
|  | 127 | +    greatest(tupleElement(wt, 2), w_start) AS ra_clip, | 
|  | 128 | +    least(tupleElement(wt, 3), w_end)      AS st_clip, | 
|  | 129 | +    greatest(0, dateDiff('second', greatest(tupleElement(wt, 2), w_start), least(tupleElement(wt, 3), w_end))) AS wait_s | 
|  | 130 | +  FROM jobs_by_build | 
|  | 131 | +  ARRAY JOIN wait_triplets AS wt | 
|  | 132 | +  WHERE tupleElement(wt, 1) IN QUEUES | 
|  | 133 | +) | 
|  | 134 | +, waits_p90_pivot AS ( | 
|  | 135 | +  SELECT | 
|  | 136 | +    build_id, | 
|  | 137 | +    /* P90 per queue (approximate quantile; broadly supported) */ | 
|  | 138 | +    quantileIf(0.9)(toFloat64(wait_s), queue_key = 'gpu_1_queue') AS gpu1_p90_s, | 
|  | 139 | +    quantileIf(0.9)(toFloat64(wait_s), queue_key = 'gpu_4_queue') AS gpu4_p90_s, | 
|  | 140 | +    /* Combined P90 across both queues */ | 
|  | 141 | +    quantile(0.9)(toFloat64(wait_s)) AS p90_combined_s | 
|  | 142 | +  FROM waits_scored | 
|  | 143 | +  WHERE wait_s > 0 | 
|  | 144 | +  GROUP BY build_id | 
|  | 145 | +) | 
|  | 146 | + | 
|  | 147 | +/* 6) Pivot per-build totals to hour columns */ | 
|  | 148 | +, run_totals_by_build AS ( | 
|  | 149 | +  SELECT | 
|  | 150 | +    build_id, | 
|  | 151 | +    round(sumIf(total_run_s, queue_key = 'gpu_1_queue') / 3600.0, 2) AS gpu_1_queue_run_hours, | 
|  | 152 | +    round(sumIf(total_run_s, queue_key = 'gpu_4_queue') / 3600.0, 2) AS gpu_4_queue_run_hours | 
|  | 153 | +  FROM run_by_build | 
|  | 154 | +  GROUP BY build_id | 
|  | 155 | +) | 
|  | 156 | + | 
|  | 157 | +/* 7) Final table (UTC) — includes both PR and main builds */ | 
|  | 158 | +SELECT | 
|  | 159 | +  /* PR URL (NULL for non-PR builds) */ | 
|  | 160 | +  if((b.pr_number IS NULL) OR (b.repo_slug IS NULL), | 
|  | 161 | +     NULL, | 
|  | 162 | +     concat('https://github.com/', b.repo_slug, '/pull/', toString(b.pr_number)) | 
|  | 163 | +  ) AS pr_url, | 
|  | 164 | + | 
|  | 165 | +  b.build_number AS build_number, | 
|  | 166 | +  b.build_id AS build_id, | 
|  | 167 | +  b.build_url AS build_url, | 
|  | 168 | +  b.steps_table_url AS steps_table_url, | 
|  | 169 | +  b.commit_sha AS commit_sha, | 
|  | 170 | + | 
|  | 171 | +  b.robust_start  AS build_started_at, | 
|  | 172 | +  b.robust_finish AS build_finished_at, | 
|  | 173 | + | 
|  | 174 | +  /* duration (hours) = finish − start (UTC) */ | 
|  | 175 | +  multiIf( | 
|  | 176 | +    b.robust_start IS NULL OR b.robust_finish IS NULL, | 
|  | 177 | +    NULL, | 
|  | 178 | +    round(dateDiff('second', b.robust_start, b.robust_finish) / 3600.0, 2) | 
|  | 179 | +  ) AS duration_hours, | 
|  | 180 | + | 
|  | 181 | +  b.steps_count AS steps_count, | 
|  | 182 | +  b.latest_build_state AS latest_build_state, | 
|  | 183 | + | 
|  | 184 | +  /* Keep run hours for cost */ | 
|  | 185 | +  ifNull(rt.gpu_1_queue_run_hours,  0) AS gpu_1_queue_run_hours, | 
|  | 186 | +  ifNull(rt.gpu_4_queue_run_hours,  0) AS gpu_4_queue_run_hours, | 
|  | 187 | + | 
|  | 188 | +  /* NEW: P90 wait hours (by queue + combined) */ | 
|  | 189 | +  round(ifNull(wp.gpu1_p90_s, 0) / 3600.0, 2) AS gpu_1_queue_wait_p90_hours, | 
|  | 190 | +  round(ifNull(wp.gpu4_p90_s, 0) / 3600.0, 2) AS gpu_4_queue_wait_p90_hours, | 
|  | 191 | +  round(ifNull(wp.p90_combined_s, 0) / 3600.0, 2) AS wait_p90_hours, | 
|  | 192 | + | 
|  | 193 | +  /* Fixed-rate cost */ | 
|  | 194 | +  round( | 
|  | 195 | +    1.3232 * ifNull(rt.gpu_1_queue_run_hours, 0) + | 
|  | 196 | +    4.602  * ifNull(rt.gpu_4_queue_run_hours, 0), | 
|  | 197 | +    2 | 
|  | 198 | +  ) AS cost, | 
|  | 199 | + | 
|  | 200 | +  /* Mark if the build branch is literally 'main' */ | 
|  | 201 | +  toUInt8(b.branch_name = 'main') AS is_main_branch | 
|  | 202 | + | 
|  | 203 | +FROM builds_window AS b | 
|  | 204 | +LEFT JOIN run_totals_by_build AS rt ON rt.build_id = b.build_id | 
|  | 205 | +LEFT JOIN waits_p90_pivot    AS wp ON wp.build_id = b.build_id | 
|  | 206 | +ORDER BY b.build_created_at_utc ASC; | 
0 commit comments