Skip to content
141 changes: 116 additions & 25 deletions Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
case .binary:
return try ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
return try ArrowArrayHolderImpl(StructArray(with))
return try ArrowArrayHolderImpl(NestedArray(with))
case .list:
return try ArrowArrayHolderImpl(NestedArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
Expand Down Expand Up @@ -355,53 +357,142 @@ public class BinaryArray: ArrowArray<Data> {
}
}

public class StructArray: ArrowArray<[Any?]> {
public private(set) var arrowFields: [ArrowArrayHolder]?
public class NestedArray: ArrowArray<[Any?]> {
private var children: [ArrowArrayHolder]?

public required init(_ arrowData: ArrowData) throws {
try super.init(arrowData)
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}

self.arrowFields = fields
switch arrowData.type.id {
case .list:
guard arrowData.children.count == 1 else {
throw ArrowError.invalid("List array must have exactly one child")
}

guard let listType = arrowData.type as? ArrowTypeList else {
throw ArrowError.invalid("Expected ArrowTypeList for list type ID")
}

self.children = [try ArrowArrayHolderImpl.loadArray(
listType.elementField.type,
with: arrowData.children[0]
)]

case .strct:
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}
self.children = fields

default:
throw ArrowError.invalid("NestedArray only supports list and struct types, got: \(arrowData.type.id)")
}
}

public override subscript(_ index: UInt) -> [Any?]? {
if self.arrowData.isNull(index) {
return nil
}

if let fields = arrowFields {
guard let children = self.children else {
return nil
}

switch arrowData.type.id {
case .list:
guard let values = children.first else { return nil }

let offsets = self.arrowData.buffers[1]
let offsetIndex = Int(index) * MemoryLayout<Int32>.stride

let startOffset = offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)
let endOffset = offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).load(as: Int32.self)

var items = [Any?]()
for i in startOffset..<endOffset {
items.append(values.array.asAny(UInt(i)))
}

return items

case .strct:
var result = [Any?]()
for field in fields {
for field in children {
result.append(field.array.asAny(index))
}

return result
}

return nil
default:
return nil
}
}

public override func asString(_ index: UInt) -> String {
if self.arrowData.isNull(index) {
return ""
}
switch arrowData.type.id {
case .list:
if self.arrowData.isNull(index) {
return "null"
}

guard let list = self[index] else {
return "null"
}

var output = "{"
if let fields = arrowFields {
for fieldIndex in 0..<fields.count {
let asStr = fields[fieldIndex].array as? AsString
if fieldIndex == 0 {
output.append("\(asStr!.asString(index))")
var output = "["
for (i, item) in list.enumerated() {
if i > 0 {
output.append(",")
}

if item == nil {
output.append("null")
} else if let asStringItem = item as? AsString {
output.append(asStringItem.asString(0))
} else {
output.append(",\(asStr!.asString(index))")
output.append("\(item!)")
}
}
output.append("]")
return output

case .strct:
if self.arrowData.isNull(index) {
return ""
}

var output = "{"
if let children = self.children {
for fieldIndex in 0..<children.count {
let asStr = children[fieldIndex].array as? AsString
if fieldIndex == 0 {
output.append("\(asStr!.asString(index))")
} else {
output.append(",\(asStr!.asString(index))")
}
}
}
output += "}"
return output

default:
return ""
}
}

public var isListArray: Bool {
return arrowData.type.id == .list
}

public var isStructArray: Bool {
return arrowData.type.id == .strct
}

public var fields: [ArrowArrayHolder]? {
return arrowData.type.id == .strct ? children : nil
}

output += "}"
return output
public var values: ArrowArrayHolder? {
return arrowData.type.id == .list ? children?.first : nil
}
}
53 changes: 48 additions & 5 deletions Arrow/Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,13 @@ public class TimestampArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Int64>,
}
}

public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, NestedArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}

Expand All @@ -143,7 +143,7 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
}

self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
try super.init(ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields))
}

public override func append(_ values: [Any?]?) {
Expand All @@ -159,7 +159,7 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
}
}

public override func finish() throws -> StructArray {
public override func finish() throws -> NestedArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
Expand All @@ -169,11 +169,36 @@ public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructAr
let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount: self.nullCount,
length: self.length)
let structArray = try StructArray(arrowData)
let structArray = try NestedArray(arrowData)
return structArray
}
}

public class ListArrayBuilder: ArrowArrayBuilder<ListBufferBuilder, NestedArray> {
let valueBuilder: any ArrowArrayHolderBuilder

public override init(_ elementType: ArrowType) throws {
self.valueBuilder = try ArrowArrayBuilders.loadBuilder(arrowType: elementType)
try super.init(ArrowTypeList(ArrowField("item", type: elementType, isNullable: true)))
}

public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let vals = values {
for val in vals {
self.valueBuilder.appendAny(val)
}
}
}

public override func finish() throws -> NestedArray {
let buffers = self.bufferBuilder.finish()
let childData = try valueBuilder.toHolder().array.arrowData
let arrowData = try ArrowData(self.type, buffers: buffers, children: [childData], nullCount: self.nullCount, length: self.length)
return try NestedArray(arrowData)
}
}

public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
Expand Down Expand Up @@ -290,6 +315,16 @@ public class ArrowArrayBuilders {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try TimestampArrayBuilder(timestampType.unit)
case .strct:
guard let structType = arrowType as? ArrowTypeStruct else {
throw ArrowError.invalid("Expected ArrowStructType for \(arrowType.id)")
}
return try StructArrayBuilder(structType.fields)
case .list:
guard let listType = arrowType as? ArrowTypeList else {
throw ArrowError.invalid("Expected ArrowTypeList for \(arrowType.id)")
}
return try ListArrayBuilder(listType.elementField.type)
default:
throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
}
Expand Down Expand Up @@ -353,4 +388,12 @@ public class ArrowArrayBuilders {
public static func loadTimestampArrayBuilder(_ unit: ArrowTimestampUnit, timezone: String? = nil) throws -> TimestampArrayBuilder {
return try TimestampArrayBuilder(unit, timezone: timezone)
}

public static func loadStructArrayBuilder(_ fields: [ArrowField]) throws -> StructArrayBuilder {
return try StructArrayBuilder(fields)
}

public static func loadListArrayBuilder(_ elementType: ArrowType) throws -> ListArrayBuilder {
return try ListArrayBuilder(elementType)
}
}
81 changes: 78 additions & 3 deletions Arrow/Sources/Arrow/ArrowBufferBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -338,20 +338,20 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder<Date, Int64> {

public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var info: ArrowNestedType?
var info: ArrowTypeStruct?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}

public func initializeTypeInfo(_ fields: [ArrowField]) {
info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
info = ArrowTypeStruct(ArrowType.ArrowStruct, fields: fields)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
if length > self.nulls.length {
if self.length > self.nulls.length {
self.resize(length)
}

Expand Down Expand Up @@ -379,3 +379,78 @@ public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
return [nulls]
}
}

public class ListBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var offsets: ArrowBuffer

public required init() throws {
self.offsets = ArrowBuffer.createBuffer(1, size: UInt(MemoryLayout<Int32>.stride))
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
self.offsets.rawPointer.storeBytes(of: Int32(0), as: Int32.self)
}

public func append(_ count: Int) {
let index = UInt(self.length)
self.length += 1

if length >= self.offsets.length {
self.resize(length + 1)
}

let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)

BitUtility.setBit(index + self.offset, buffer: self.nulls)
let newOffset = currentOffset + Int32(count)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1

if self.length >= self.offsets.length {
self.resize(self.length + 1)
}

let offsetIndex = Int(index) * MemoryLayout<Int32>.stride
let currentOffset = self.offsets.rawPointer.advanced(by: offsetIndex).load(as: Int32.self)

if let vals = newValue {
BitUtility.setBit(index + self.offset, buffer: self.nulls)
let newOffset = currentOffset + Int32(vals.count)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: newOffset, as: Int32.self)
} else {
self.nullCount += 1
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
self.offsets.rawPointer.advanced(by: offsetIndex + MemoryLayout<Int32>.stride).storeBytes(of: currentOffset, as: Int32.self)
}
}

public override func isNull(_ index: UInt) -> Bool {
return !BitUtility.isSet(index + self.offset, buffer: self.nulls)
}

public func resize(_ length: UInt) {
if length > self.offsets.length {
let resizeLength = resizeLength(self.offsets)
var offsets = ArrowBuffer.createBuffer(resizeLength, size: UInt(MemoryLayout<Int32>.size))
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: self.offsets.capacity)
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
self.offsets = offsets
self.nulls = nulls
}
}

public func finish() -> [ArrowBuffer] {
let length = self.length
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
var offsets = ArrowBuffer.createBuffer(length + 1, size: UInt(MemoryLayout<Int32>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
ArrowBuffer.copyCurrent(self.offsets, to: &offsets, len: offsets.capacity)
return [nulls, offsets]
}
}
Loading