33import logging
44import os
55import git
6+ from google .cloud import bigquery
67import requests
78
89GRAFANA_URL = (
1112GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql"
1213REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
1314
15+ # BigQuery dataset and tables to write metrics to.
16+ OPERATIONAL_METRICS_DATASET = "operational_metrics"
17+ LLVM_COMMITS_TABLE = "llvm_commits"
18+
1419# How many commits to query the GitHub GraphQL API for at a time.
1520# Querying too many commits at once often leads to the call failing.
1621GITHUB_API_BATCH_SIZE = 50
2732commit_{commit_sha}:
2833 object(oid:"{commit_sha}") {{
2934 ... on Commit {{
35+ author {{
36+ user {{
37+ login
38+ }}
39+ }}
3040 associatedPullRequests(first: 1) {{
3141 totalCount
3242 pullRequest: nodes {{
3343 number
3444 reviewDecision
45+ reviews(first: 10) {{
46+ nodes {{
47+ reviewer: author {{
48+ login
49+ }}
50+ }}
51+ }}
3552 }}
3653 }}
3754 }}
4259@dataclasses .dataclass
4360class LLVMCommitInfo :
4461 commit_sha : str
45- commit_datetime : datetime .datetime
4662 commit_timestamp_seconds : int
63+ files_modified : set [str ]
64+ commit_author : str = "" # GitHub username of author is unknown until API call
4765 has_pull_request : bool = False
48- pr_number : int = 0
66+ pull_request_number : int = 0
4967 is_reviewed : bool = False
5068 is_approved : bool = False
69+ reviewers : set [str ] = dataclasses .field (default_factory = set )
5170
5271
5372def scrape_new_commits_by_date (
@@ -99,7 +118,9 @@ def query_for_reviews(
99118 # Create a map of commit sha to info
100119 new_commits = {
101120 commit .hexsha : LLVMCommitInfo (
102- commit .hexsha , commit .committed_datetime , commit .committed_date
121+ commit_sha = commit .hexsha ,
122+ commit_timestamp_seconds = commit .committed_date ,
123+ files_modified = set (commit .stats .files .keys ()),
103124 )
104125 for commit in new_commits
105126 }
@@ -140,29 +161,41 @@ def query_for_reviews(
140161 },
141162 json = {"query" : query },
142163 )
164+
165+ # Exit if API call fails
166+ # A failed API call means a large batch of data is missing and will not be
167+ # reflected in the dashboard. The dashboard will silently misrepresent
168+ # commit data if we continue execution, so it's better to fail loudly.
143169 if response .status_code < 200 or response .status_code >= 300 :
144170 logging .error ("Failed to query GitHub GraphQL API: %s" , response .text )
171+ exit (1 )
172+
145173 api_commit_data .update (response .json ()["data" ]["repository" ])
146174
175+ # Amend commit information with GitHub data
147176 for commit_sha , data in api_commit_data .items ():
148- # Verify that push commit has no pull requests
149177 commit_sha = commit_sha .removeprefix ("commit_" )
178+ commit_info = new_commits [commit_sha ]
179+ commit_info .commit_author = data ["author" ]["user" ]["login" ]
150180
151181 # If commit has no pull requests, skip it. No data to update.
152182 if data ["associatedPullRequests" ]["totalCount" ] == 0 :
153183 continue
154184
155185 pull_request = data ["associatedPullRequests" ]["pullRequest" ][0 ]
156- commit_info = new_commits [commit_sha ]
157186 commit_info .has_pull_request = True
158- commit_info .pr_number = pull_request ["number" ]
187+ commit_info .pull_request_number = pull_request ["number" ]
159188 commit_info .is_reviewed = pull_request ["reviewDecision" ] is not None
160189 commit_info .is_approved = pull_request ["reviewDecision" ] == "APPROVED"
190+ commit_info .reviewers = set ([
191+ review ["reviewer" ]["login" ]
192+ for review in pull_request ["reviews" ]["nodes" ]
193+ ])
161194
162195 return list (new_commits .values ())
163196
164197
165- def upload_daily_metrics (
198+ def upload_daily_metrics_to_grafana (
166199 grafana_api_key : str ,
167200 grafana_metrics_userid : str ,
168201 new_commits : list [LLVMCommitInfo ],
@@ -205,6 +238,22 @@ def upload_daily_metrics(
205238 logging .error ("Failed to submit data to Grafana: %s" , response .text )
206239
207240
241+ def upload_daily_metrics_to_bigquery (new_commits : list [LLVMCommitInfo ]) -> None :
242+ """Upload processed commit metrics to a BigQuery dataset.
243+
244+ Args:
245+ new_commits: List of commits to process & upload to BigQuery.
246+ """
247+ bq_client = bigquery .Client ()
248+ table_ref = bq_client .dataset (OPERATIONAL_METRICS_DATASET ).table (
249+ LLVM_COMMITS_TABLE
250+ )
251+ table = bq_client .get_table (table_ref )
252+ commit_records = [dataclasses .asdict (commit ) for commit in new_commits ]
253+ bq_client .insert_rows (table , commit_records )
254+ bq_client .close ()
255+
256+
208257def main () -> None :
209258 github_token = os .environ ["GITHUB_TOKEN" ]
210259 grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
@@ -227,7 +276,12 @@ def main() -> None:
227276 new_commit_info = query_for_reviews (new_commits , github_token )
228277
229278 logging .info ("Uploading metrics to Grafana." )
230- upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
279+ upload_daily_metrics_to_grafana (
280+ grafana_api_key , grafana_metrics_userid , new_commit_info
281+ )
282+
283+ logging .info ("Uploading metrics to BigQuery." )
284+ upload_daily_metrics_to_bigquery (new_commit_info )
231285
232286
233287if __name__ == "__main__" :
0 commit comments