5
5
var sumoHttp = require ( './sumoclient' ) ;
6
6
var { ContainerClient } = require ( "@azure/storage-blob" ) ;
7
7
var { DefaultAzureCredential } = require ( "@azure/identity" ) ;
8
+ const { TableClient } = require ( "@azure/data-tables" ) ;
8
9
var { AbortController } = require ( "@azure/abort-controller" ) ;
9
10
var { ServiceBusClient } = require ( "@azure/service-bus" ) ;
10
11
var DEFAULT_CSV_SEPARATOR = "," ;
11
12
var MAX_CHUNK_SIZE = 1024 ;
12
13
var JSON_BLOB_HEAD_BYTES = 12 ;
13
14
var JSON_BLOB_TAIL_BYTES = 2 ;
15
+ const azureTableClient = TableClient . fromConnectionString ( process . env . AzureWebJobsStorage , "FileOffsetMap" ) ;
14
16
15
17
function csvToArray ( strData , strDelimiter ) {
16
18
strDelimiter = ( strDelimiter || "," ) ;
@@ -116,19 +118,131 @@ function csvHandler(context,msgtext, headers) {
116
118
return messageArray ;
117
119
}
118
120
119
- function nsgLogsHandler ( context , msg ) {
121
+ /*
122
+ return index of first time when pattern matches the string
123
+ */
124
+ function regexIndexOf ( string , regex , startpos ) {
125
+ var indexOf = string . substring ( startpos || 0 ) . search ( regex ) ;
126
+ return ( indexOf >= 0 ) ? ( indexOf + ( startpos || 0 ) ) : indexOf ;
127
+ }
128
+
129
+ /*
130
+ return index of last time when pattern matches the string
131
+ */
132
+ function regexLastIndexOf ( string , regex , startpos ) {
133
+ // https://stackoverflow.com/questions/19445994/javascript-string-search-for-regex-starting-at-the-end-of-the-string
134
+ var stringToWorkWith = string . substring ( startpos , string . length ) ;
135
+ var match = stringToWorkWith . match ( regex ) ;
136
+ return match ? string . lastIndexOf ( match . slice ( - 1 ) ) : - 1 ;
137
+ }
138
+
139
+ /*
140
+ returns array of json by removing unparseable prefix and suffix in data
141
+ */
142
+ function getParseableJsonArray ( data , context ) {
143
+
144
+ let logRegex = '\{\\s*\"time\"\:' ; // starting regex for nsg logs
145
+ let defaultEncoding = "utf8" ;
146
+ let orginalDatalength = data . length ;
147
+ // If the byte sequence in the buffer data is not valid according to the provided encoding, then it is replaced by the default replacement character i.e. U+FFFD.
148
+ // return -1 if not found
149
+ let firstIdx = regexIndexOf ( data , logRegex ) ;
150
+ let lastIndex = regexLastIndexOf ( data , logRegex , firstIdx + 1 ) ;
151
+
152
+ // data.substring method extracts the characters in a string between "start" and "end", not including "end" itself.
153
+ let prefix = data . substring ( 0 , firstIdx ) ;
154
+ // in case only one time string
155
+ if ( lastIndex === - 1 && data . length > 0 ) {
156
+ lastIndex = data . length ;
157
+ }
158
+ let suffix = data . substring ( lastIndex , data . length ) ;
159
+ if ( suffix . length > 0 ) {
160
+ try {
161
+ JSON . parse ( suffix . trim ( ) ) ;
162
+ lastIndex = data . length ;
163
+ } catch ( error ) {
164
+ context . log . error ( `Failed to parse the JSON last chunk. Ignoring suffix: ${ suffix . trim ( ) } , error: ${ error } ` ) ;
165
+ }
166
+ }
167
+
168
+ // ideally ignoredprefixLen should always be 0. it will be dropped for files which are updated
169
+ context . log . verbose ( `Ignoring log prefix length: ${ Buffer . byteLength ( prefix , defaultEncoding ) } suffix length: ${ Buffer . byteLength ( data . substring ( lastIndex , data . length ) , defaultEncoding ) } ` ) ;
170
+
171
+ // data with both prefix and suffix removed
172
+ data = data . substring ( firstIdx , lastIndex ) ;
173
+ let dataLenParsed = Buffer . byteLength ( prefix + data , defaultEncoding ) ;
174
+ data = data . trim ( ) . replace ( / ( ^ , ) | ( , $ ) / g, "" ) ; //removing trailing spaces,newlines and leftover commas
175
+
176
+ try {
177
+ var jsonArray = JSON . parse ( "[" + data + "]" ) ;
178
+ context . log . verbose ( `Successfully parsed Json! datalength: ${ data . length } orginalDatalength: ${ orginalDatalength } dataLenParsed: ${ dataLenParsed } ` )
179
+ return [ jsonArray , dataLenParsed , true ] ;
180
+ } catch ( error ) {
181
+ context . log . error ( `Failed to parse the JSON after removing prefix/suffix Error: ${ error } firstIdx: ${ firstIdx } lastIndex: ${ lastIndex } prefix: ${ prefix } datastart: ${ data . substring ( 0 , 10 ) } dataend: ${ data . substring ( data . length - 10 , data . length ) } orginalDatalength: ${ orginalDatalength } dataLenParsed: ${ dataLenParsed } ` ) ;
182
+ return [ [ data ] , dataLenParsed , false ] ;
183
+ }
184
+ }
185
+
186
+ function getRowKey ( metadata ) {
187
+ var storageName = metadata . url . split ( "//" ) . pop ( ) . split ( "." ) [ 0 ] ;
188
+ var arr = metadata . url . split ( '/' ) . slice ( 3 ) ;
189
+ var keyArr = [ storageName ] ;
190
+ Array . prototype . push . apply ( keyArr , arr ) ;
191
+ // key cannot be greater than 1KB or 1024 bytes;
192
+ var rowKey = keyArr . join ( "-" ) ;
193
+ return rowKey . substr ( 0 , Math . min ( 1024 , rowKey . length ) ) . replace ( / ^ - + | - + $ / g, '' ) ;
194
+ }
195
+
196
+ async function setAppendBlobOffset ( context , serviceBusTask , newOffset ) {
197
+
198
+ try {
199
+ let rowKey = getRowKey ( serviceBusTask ) ;
200
+ // Todo: this should be atomic update if other request decreases offset it shouldn't allow
201
+ context . log . verbose ( "Attempting to update offset row: %s from: %d to: %d" , rowKey , serviceBusTask . startByte , newOffset ) ;
202
+ let entity = {
203
+ offset : { type : "Int64" , value : String ( newOffset ) } ,
204
+ // In a scenario where the entity could have been deleted (archived) by appendblob because of large queueing time so to avoid error in insertOrMerge Entity we include rest of the fields like storageName,containerName etc.
205
+ partitionKey : serviceBusTask . containerName ,
206
+ rowKey : rowKey ,
207
+ blobName : serviceBusTask . blobName ,
208
+ containerName : serviceBusTask . containerName ,
209
+ storageName : serviceBusTask . storageName
210
+ }
211
+ var updateResult = await azureTableClient . updateEntity ( entity , "Merge" ) ;
212
+ context . log . verbose ( "Updated offset result: %s row: %s from: %d to: %d" , JSON . stringify ( updateResult ) , rowKey , serviceBusTask . startByte , newOffset ) ;
213
+ } catch ( error ) {
214
+ context . log . error ( `Error - Failed to update OffsetMap table, error: ${ JSON . stringify ( error ) } , rowKey: ${ rowKey } , newOffset: ${ newOffset } ` )
215
+ }
216
+ }
217
+
218
+ async function nsgLogsHandler ( context , msg , serviceBusTask ) {
120
219
121
220
var jsonArray = [ ] ;
122
221
msg = msg . trim ( ) . replace ( / ( ^ , ) | ( , $ ) / g, "" ) ; //removing trailing spaces,newlines and leftover commas
123
- jsonArray = JSON . parse ( "[" + msg + "]" ) ;
222
+
223
+ try {
224
+ jsonArray = JSON . parse ( "[" + msg + "]" ) ;
225
+ } catch ( err ) {
226
+ let response = getParseableJsonArray ( msg , context , serviceBusTask ) ;
227
+ jsonArray = response [ 0 ] ;
228
+ let is_success = response [ 2 ] ;
229
+ let newOffset = response [ 1 ] + serviceBusTask . startByte ;
230
+ if ( is_success ) {
231
+ await setAppendBlobOffset ( context , serviceBusTask , newOffset ) ;
232
+ } else {
233
+ return jsonArray ;
234
+ }
235
+
236
+ }
237
+
124
238
var eventsArr = [ ] ;
125
239
jsonArray . forEach ( function ( record ) {
126
- version = record . properties . Version ;
240
+ let version = record . properties . Version ;
127
241
record . properties . flows . forEach ( function ( rule ) {
128
242
rule . flows . forEach ( function ( flow ) {
129
243
flow . flowTuples . forEach ( function ( tuple ) {
130
- col = tuple . split ( "," ) ;
131
- event = {
244
+ let col = tuple . split ( "," ) ;
245
+ let event = {
132
246
time : col [ 0 ] , // this should be epoch time
133
247
sys_id : record . systemId ,
134
248
category : record . category ,
@@ -179,7 +293,7 @@ function jsonHandler(context,msg) {
179
293
function blobHandler ( context , msg ) {
180
294
// it's assumed that .blob files contains json separated by \n
181
295
//https://docs.microsoft.com/en-us/azure/application-insights/app-insights-export-telemetry
182
-
296
+
183
297
var jsonArray = [ ] ;
184
298
msg = msg . replace ( / \0 / g, '' ) ;
185
299
msg = msg . replace ( / ( \r ? \n | \r ) / g, "," ) ;
@@ -238,7 +352,7 @@ function messageHandler(serviceBusTask, context, sumoClient) {
238
352
context . done ( ) ;
239
353
return ;
240
354
}
241
- if ( file_ext === "json" & serviceBusTask . containerName === "insights-logs-networksecuritygroupflowevent" ) {
355
+ if ( ( file_ext === "json" ) && ( serviceBusTask . containerName === "insights-logs-networksecuritygroupflowevent" ) ) {
242
356
// because in json first block and last block remain as it is and azure service adds new block in 2nd last pos
243
357
if ( serviceBusTask . endByte < JSON_BLOB_HEAD_BYTES + JSON_BLOB_TAIL_BYTES ) {
244
358
context . done ( ) ; //rejecting first commit when no data is there data will always be atleast HEAD_BYTES+DATA_BYTES+TAIL_BYTES
@@ -247,19 +361,17 @@ function messageHandler(serviceBusTask, context, sumoClient) {
247
361
serviceBusTask . endByte -= JSON_BLOB_TAIL_BYTES ;
248
362
if ( serviceBusTask . startByte <= JSON_BLOB_HEAD_BYTES ) {
249
363
serviceBusTask . startByte = JSON_BLOB_HEAD_BYTES ;
250
- } else {
251
- serviceBusTask . startByte -= 1 ; //to remove comma before json object
252
364
}
253
365
file_ext = "nsg" ;
254
366
}
255
367
getBlockBlobService ( context , serviceBusTask ) . then ( function ( blobService ) {
256
- return getData ( serviceBusTask , blobService , context ) . then ( function ( msg ) {
368
+ return getData ( serviceBusTask , blobService , context ) . then ( async function ( msg ) {
257
369
context . log ( "Sucessfully downloaded blob %s %d %d" , serviceBusTask . blobName , serviceBusTask . startByte , serviceBusTask . endByte ) ;
258
370
var messageArray ;
259
371
if ( file_ext === "csv" ) {
260
372
return getcsvHeader ( serviceBusTask . containerName , serviceBusTask . blobName , blobService , context ) . then ( function ( headers ) {
261
373
context . log ( "Received headers %d" , headers . length ) ;
262
- messageArray = msghandler [ file_ext ] ( context , msg , headers ) ;
374
+ messageArray = csvHandler ( context , msg , headers ) ;
263
375
// context.log("Transformed data %s", JSON.stringify(messageArray));
264
376
messageArray . forEach ( function ( msg ) {
265
377
sumoClient . addData ( msg ) ;
@@ -270,7 +382,11 @@ function messageHandler(serviceBusTask, context, sumoClient) {
270
382
context . done ( err ) ;
271
383
} ) ;
272
384
} else {
273
- messageArray = msghandler [ file_ext ] ( context , msg ) ;
385
+ if ( file_ext == "nsg" ) {
386
+ messageArray = await nsgLogsHandler ( context , msg , serviceBusTask ) ;
387
+ } else {
388
+ messageArray = msghandler [ file_ext ] ( context , msg ) ;
389
+ }
274
390
messageArray . forEach ( function ( msg ) {
275
391
sumoClient . addData ( msg ) ;
276
392
} ) ;
@@ -282,7 +398,7 @@ function messageHandler(serviceBusTask, context, sumoClient) {
282
398
context . log . error ( "Error in messageHandler: blob file doesn't exist " + serviceBusTask . blobName + " " + serviceBusTask . startByte + " " + serviceBusTask . endByte ) ;
283
399
context . done ( )
284
400
} else {
285
- context . log . error ( "Error in messageHandler: Failed to send blob " + serviceBusTask . blobName + " " + serviceBusTask . startByte + " " + serviceBusTask . endByte ) ;
401
+ context . log . error ( "Error in messageHandler: Failed to send blob " + serviceBusTask . blobName + " " + serviceBusTask . startByte + " " + serviceBusTask . endByte + " err: " + err ) ;
286
402
context . done ( err ) ;
287
403
}
288
404
@@ -347,20 +463,18 @@ function servicebushandler(context, serviceBusTask) {
347
463
} ;
348
464
setSourceCategory ( serviceBusTask , options ) ;
349
465
function failureHandler ( msgArray , ctx ) {
350
- ctx . log ( "ServiceBus Task: " , serviceBusTask )
351
- ctx . log . error ( "Failed to send to Sumo" ) ;
466
+ ctx . log . error ( `Failed to send to Sumo` ) ;
352
467
if ( sumoClient . messagesAttempted === sumoClient . messagesReceived ) {
353
468
ctx . done ( "TaskConsumer failedmessages: " + sumoClient . messagesFailed ) ;
354
469
}
355
470
}
356
471
function successHandler ( ctx ) {
357
472
if ( sumoClient . messagesAttempted === sumoClient . messagesReceived ) {
358
- ctx . log ( "ServiceBus Task: " , serviceBusTask )
359
473
if ( sumoClient . messagesFailed > 0 ) {
360
- ctx . log . error ( ' Failed to send few messages to Sumo' )
474
+ ctx . log . error ( ` Failed to send few messages to Sumo` )
361
475
ctx . done ( "TaskConsumer failedmessages: " + sumoClient . messagesFailed ) ;
362
476
} else {
363
- ctx . log ( ' Successfully sent to Sumo, Exiting now.' ) ;
477
+ ctx . log ( ` Successfully sent to Sumo, Exiting now.` ) ;
364
478
ctx . done ( ) ;
365
479
}
366
480
}
@@ -457,6 +571,16 @@ async function timetriggerhandler(context, timetrigger) {
457
571
}
458
572
459
573
module . exports = function ( context , triggerData ) {
574
+ // triggerData = {
575
+ // "blobName": "blob_fixtures.json",
576
+ // "containerName": "insights-logs-networksecuritygroupflowevent",
577
+ // "endByte": 2617,
578
+ // "resourceGroupName": "testsumosa250624004409",
579
+ // "startByte": 0,
580
+ // "storageName": "testsa250624004409",
581
+ // "subscriptionId": "c088dc46-d692-42ad-a4b6-9a542d28ad2a",
582
+ // "url": "https://testsa250624004409.blob.core.windows.net/insights-logs-networksecuritygroupflowevent/blob_fixtures.json"
583
+ // };
460
584
if ( triggerData . isPastDue === undefined ) {
461
585
servicebushandler ( context , triggerData ) ;
462
586
} else {
0 commit comments