@@ -129,6 +129,32 @@ export const action: Action = async exec => {
129
129
, ttl_only_drop_parts = 1
130
130
` ) ;
131
131
132
+ // You might be wondering why we parse the data in such a weird way.
133
+ // This was the smartest I way I came up that did not require introducing additional span attribute validation logic on the gateway.
134
+ // We want to avoid inserts failing due to a column type-mismatch at any chance, since we are doing batch inserts and one fault record
135
+ // could prevent all other inserts within the same batch from happening.
136
+ //
137
+ // The idea here is to attempt verifying that the input is "array"-like and if so parse it as safe as possible.
138
+ // If the input is not "array"-like we just insert an empty array and move on.
139
+ //
140
+ // Later on, we could think about actually rejecting incorrect span values on the otel-collector business logic.
141
+ //
142
+ // ```
143
+ // if(
144
+ // JSONType(toString("SpanAttributes"['hive.graphql.error.codes'])) = 'Array',
145
+ // arrayFilter(
146
+ // -- Filter out empty values
147
+ // x -> notEquals(x, ''),
148
+ // arrayMap(
149
+ // -- If the user provided something non-stringy, this returns '', which is fine imho
150
+ // x -> JSONExtractString(x),
151
+ // JSONExtractArrayRaw(toString("SpanAttributes"['hive.graphql.error.codes']))
152
+ // )
153
+ // ),
154
+ // []
155
+ // )
156
+ // ```
157
+
132
158
await exec ( `
133
159
CREATE MATERIALIZED VIEW IF NOT EXISTS "otel_traces_normalized_mv" TO "otel_traces_normalized" (
134
160
"target_id" LowCardinality(String)
@@ -165,25 +191,37 @@ export const action: Action = async exec => {
165
191
, "SpanAttributes"['http.url'] AS "http_url"
166
192
, "SpanAttributes"['hive.client.name'] AS "client_name"
167
193
, "SpanAttributes"['hive.client.version'] AS "client_version"
168
- , "SpanAttributes"['hive. graphql.operation.name'] AS "graphql_operation_name"
169
- , toLowCardinality("SpanAttributes"['hive. graphql.operation.type']) AS "graphql_operation_type"
170
- , "SpanAttributes"['hive. graphql.operation .document'] AS "graphql_operation_document"
194
+ , "SpanAttributes"['graphql.operation.name'] AS "graphql_operation_name"
195
+ , toLowCardinality("SpanAttributes"['graphql.operation.type']) AS "graphql_operation_type"
196
+ , "SpanAttributes"['graphql.document'] AS "graphql_operation_document"
171
197
, "SpanAttributes"['hive.graphql.operation.hash'] AS "graphql_operation_hash"
172
198
, toInt64OrZero("SpanAttributes"['hive.graphql.error.count']) AS "graphql_error_count"
173
199
, if(
174
- "SpanAttributes"['hive.graphql.error.codes'] = '',
175
- [],
176
- arrayMap(x -> toLowCardinality(x), splitByChar(',', "SpanAttributes"['hive.graphql.error.codes']))
200
+ JSONType(toString("SpanAttributes"['hive.graphql.error.codes'])) = 'Array',
201
+ arrayFilter(
202
+ x -> notEquals(x, ''),
203
+ arrayMap(
204
+ x -> JSONExtractString(x),
205
+ JSONExtractArrayRaw(toString("SpanAttributes"['hive.graphql.error.codes']))
206
+ )
207
+ ),
208
+ []
177
209
) AS "graphql_error_codes"
178
210
, if(
179
- "SpanAttributes"['hive.subgraph.names'] = '',
180
- [],
181
- arrayMap(x -> toLowCardinality(x), splitByChar(',', "SpanAttributes"['hive.subgraph.names']))
211
+ JSONType(toString("SpanAttributes"['hive.gateway.operation.subgraph.names'])) = 'Array',
212
+ arrayFilter(
213
+ x -> notEquals(x, ''),
214
+ arrayMap(
215
+ x -> JSONExtractString(x),
216
+ JSONExtractArrayRaw(toString("SpanAttributes"['hive.gateway.operation.subgraph.names']))
217
+ )
218
+ ),
219
+ []
182
220
) AS "subgraph_names"
183
221
FROM
184
222
"otel_traces"
185
223
WHERE
186
- empty("ParentSpanId") AND NOT empty("SpanAttributes"['hive. graphql.operation.type'])
224
+ empty("ParentSpanId") AND NOT empty("SpanAttributes"['graphql.operation.type'])
187
225
)
188
226
` ) ;
189
227
@@ -253,14 +291,20 @@ export const action: Action = async exec => {
253
291
, "SpanAttributes"['http.url'] AS "http_url"
254
292
, "SpanAttributes"['hive.client.name'] AS "client_name"
255
293
, "SpanAttributes"['hive.client.version'] AS "client_version"
256
- , "SpanAttributes"['hive. graphql.operation.name'] AS "graphql_operation_name"
257
- , toLowCardinality("SpanAttributes"['hive. graphql.operation.type']) AS "graphql_operation_type"
258
- , "SpanAttributes"['hive. graphql.operation .document'] AS "graphql_operation_document"
294
+ , "SpanAttributes"['graphql.operation.name'] AS "graphql_operation_name"
295
+ , toLowCardinality("SpanAttributes"['graphql.operation.type']) AS "graphql_operation_type"
296
+ , "SpanAttributes"['graphql.document'] AS "graphql_operation_document"
259
297
, toInt64OrZero("SpanAttributes"['hive.graphql.error.count']) AS "graphql_error_count"
260
298
, if(
261
- "SpanAttributes"['hive.graphql.error.codes'] = '',
262
- [],
263
- arrayMap(x -> toLowCardinality(x), splitByChar(',', "SpanAttributes"['hive.graphql.error.codes']))
299
+ JSONType(toString("SpanAttributes"['hive.graphql.error.codes'])) = 'Array',
300
+ arrayFilter(
301
+ x -> notEquals(x, ''),
302
+ arrayMap(
303
+ x -> JSONExtractString(x),
304
+ JSONExtractArrayRaw(toString("SpanAttributes"['hive.graphql.error.codes']))
305
+ )
306
+ ),
307
+ []
264
308
) AS "graphql_error_codes"
265
309
FROM
266
310
"otel_traces"
0 commit comments