Skip to content

Commit 9259237

Browse files
fix: Padding now added to Arrow file marker and RecordBatches are being written with correct alignment (#95)
Files were being written with a non-padded file marker and alignment was not being written when serializing record batches. Also the metadata length was being set to zero in the block. ## What's Changed The padded version of the filemarker is being written. Padding is written to record blocks and record block metadata, i.e.: ``` addPadForAlignment(&writer) ``` The metadata length was being written as zero. This was preventing PyArrow from reading files written by `ArrowWriter`. This has now been calculated and set in the Block. Closes #91.
1 parent f9ad7e9 commit 9259237

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

Arrow/Sources/Arrow/ArrowWriter.swift

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,12 +144,25 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
144144
withUnsafeBytes(of: CONTINUATIONMARKER.littleEndian) {writer.append(Data($0))}
145145
withUnsafeBytes(of: rbResult.1.o.littleEndian) {writer.append(Data($0))}
146146
writer.append(rbResult.0)
147+
addPadForAlignment(&writer)
148+
let metadataLength = writer.count - startIndex
149+
let bodyStart = writer.count
147150
switch writeRecordBatchData(&writer, fields: batch.schema.fields, columns: batch.columns) {
148151
case .success:
152+
let bodyLength = writer.count - bodyStart
153+
let expectedSize = startIndex + metadataLength + bodyLength
154+
guard expectedSize == writer.count else {
155+
return .failure(.invalid(
156+
"Invalid Block. Expected \(expectedSize), got \(writer.count)"
157+
))
158+
}
149159
rbBlocks.append(
150-
org_apache_arrow_flatbuf_Block(offset: Int64(startIndex),
151-
metaDataLength: Int32(0),
152-
bodyLength: Int64(rbResult.1.o)))
160+
org_apache_arrow_flatbuf_Block(
161+
offset: Int64(startIndex),
162+
metaDataLength: Int32(metadataLength),
163+
bodyLength: Int64(bodyLength)
164+
)
165+
)
153166
case .failure(let error):
154167
return .failure(error)
155168
}
@@ -293,6 +306,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
293306
case .success(let schemaOffset):
294307
fbb.finish(offset: schemaOffset)
295308
writer.append(fbb.data)
309+
addPadForAlignment(&writer)
296310
case .failure(let error):
297311
return .failure(error)
298312
}
@@ -379,7 +393,7 @@ public class ArrowWriter { // swiftlint:disable:this type_body_length
379393
addPadForAlignment(&markerData)
380394

381395
var writer: any DataWriter = FileDataWriter(fileHandle)
382-
writer.append(FILEMARKER.data(using: .utf8)!)
396+
writer.append(markerData)
383397
switch writeFile(&writer, info: info) {
384398
case .success:
385399
writer.append(FILEMARKER.data(using: .utf8)!)

0 commit comments

Comments
 (0)