Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
261 changes: 249 additions & 12 deletions connectorx/src/destinations/arrowstream/arrow_assoc.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
use super::errors::{ArrowDestinationError, Result};
use super::{
errors::{ArrowDestinationError, Result},
typesystem::{DateTimeWrapperMicro, NaiveDateTimeWrapperMicro, NaiveTimeWrapperMicro},
};
use crate::constants::{DEFAULT_ARROW_DECIMAL, DEFAULT_ARROW_DECIMAL_SCALE, SECONDS_IN_DAY};
use crate::utils::decimal_to_i128;
use arrow::array::{
ArrayBuilder, BooleanBuilder, Date32Builder, Date64Builder, Decimal128Builder, Float32Builder,
Float64Builder, Int32Builder, Int64Builder, LargeBinaryBuilder, LargeListBuilder,
StringBuilder, Time64NanosecondBuilder, TimestampNanosecondBuilder, UInt32Builder,
UInt64Builder,
ArrayBuilder, BooleanBuilder, Date32Builder, Decimal128Builder, Float32Builder, Float64Builder,
Int16Builder, Int32Builder, Int64Builder, LargeBinaryBuilder, LargeListBuilder, StringBuilder,
Time64MicrosecondBuilder, Time64NanosecondBuilder, TimestampMicrosecondBuilder,
TimestampNanosecondBuilder, UInt16Builder, UInt32Builder, UInt64Builder,
};
use arrow::datatypes::Field;
use arrow::datatypes::{DataType as ArrowDataType, TimeUnit};
Expand Down Expand Up @@ -59,8 +62,10 @@ macro_rules! impl_arrow_assoc {
};
}

impl_arrow_assoc!(u16, ArrowDataType::UInt16, UInt16Builder);
impl_arrow_assoc!(u32, ArrowDataType::UInt32, UInt32Builder);
impl_arrow_assoc!(u64, ArrowDataType::UInt64, UInt64Builder);
impl_arrow_assoc!(i16, ArrowDataType::Int16, Int16Builder);
impl_arrow_assoc!(i32, ArrowDataType::Int32, Int32Builder);
impl_arrow_assoc!(i64, ArrowDataType::Int64, Int64Builder);
impl_arrow_assoc!(f32, ArrowDataType::Float32, Float32Builder);
Expand Down Expand Up @@ -230,6 +235,48 @@ impl ArrowAssoc for Option<DateTime<Utc>> {
}
}

impl ArrowAssoc for DateTimeWrapperMicro {
type Builder = TimestampMicrosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
TimestampMicrosecondBuilder::with_capacity(nrows).with_timezone("+00:00")
}

#[throws(ArrowDestinationError)]
fn append(builder: &mut Self::Builder, value: DateTimeWrapperMicro) {
builder.append_value(value.0.timestamp_micros());
}

fn field(header: &str) -> Field {
Field::new(
header,
ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
false,
)
}
}

impl ArrowAssoc for Option<DateTimeWrapperMicro> {
type Builder = TimestampMicrosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
TimestampMicrosecondBuilder::with_capacity(nrows).with_timezone("+00:00")
}

#[throws(ArrowDestinationError)]
fn append(builder: &mut Self::Builder, value: Option<DateTimeWrapperMicro>) {
builder.append_option(value.map(|x| x.0.timestamp_micros()));
}

fn field(header: &str) -> Field {
Field::new(
header,
ArrowDataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
true,
)
}
}

fn naive_date_to_arrow(nd: NaiveDate) -> i32 {
match nd.and_hms_opt(0, 0, 0) {
Some(dt) => (dt.and_utc().timestamp() / SECONDS_IN_DAY) as i32,
Expand All @@ -238,7 +285,9 @@ fn naive_date_to_arrow(nd: NaiveDate) -> i32 {
}

fn naive_datetime_to_arrow(nd: NaiveDateTime) -> i64 {
nd.and_utc().timestamp_millis()
nd.and_utc()
.timestamp_nanos_opt()
.unwrap_or_else(|| panic!("out of range DateTime"))
}

impl ArrowAssoc for Option<NaiveDate> {
Expand Down Expand Up @@ -276,10 +325,10 @@ impl ArrowAssoc for NaiveDate {
}

impl ArrowAssoc for Option<NaiveDateTime> {
type Builder = Date64Builder;
type Builder = TimestampNanosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
Date64Builder::with_capacity(nrows)
TimestampNanosecondBuilder::with_capacity(nrows)
}

fn append(builder: &mut Self::Builder, value: Option<NaiveDateTime>) -> Result<()> {
Expand All @@ -288,15 +337,19 @@ impl ArrowAssoc for Option<NaiveDateTime> {
}

fn field(header: &str) -> Field {
Field::new(header, ArrowDataType::Date64, true)
Field::new(
header,
ArrowDataType::Timestamp(TimeUnit::Nanosecond, None),
true,
)
}
}

impl ArrowAssoc for NaiveDateTime {
type Builder = Date64Builder;
type Builder = TimestampNanosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
Date64Builder::with_capacity(nrows)
TimestampNanosecondBuilder::with_capacity(nrows)
}

fn append(builder: &mut Self::Builder, value: NaiveDateTime) -> Result<()> {
Expand All @@ -305,7 +358,56 @@ impl ArrowAssoc for NaiveDateTime {
}

fn field(header: &str) -> Field {
Field::new(header, ArrowDataType::Date64, false)
Field::new(
header,
ArrowDataType::Timestamp(TimeUnit::Nanosecond, None),
false,
)
}
}

impl ArrowAssoc for Option<NaiveDateTimeWrapperMicro> {
type Builder = TimestampMicrosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
TimestampMicrosecondBuilder::with_capacity(nrows)
}

fn append(builder: &mut Self::Builder, value: Option<NaiveDateTimeWrapperMicro>) -> Result<()> {
builder.append_option(match value {
Some(v) => Some(v.0.and_utc().timestamp_micros()),
None => None,
});
Ok(())
}

fn field(header: &str) -> Field {
Field::new(
header,
ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
true,
)
}
}

impl ArrowAssoc for NaiveDateTimeWrapperMicro {
type Builder = TimestampMicrosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
TimestampMicrosecondBuilder::with_capacity(nrows)
}

fn append(builder: &mut Self::Builder, value: NaiveDateTimeWrapperMicro) -> Result<()> {
builder.append_value(value.0.and_utc().timestamp_micros());
Ok(())
}

fn field(header: &str) -> Field {
Field::new(
header,
ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
false,
)
}
}

Expand Down Expand Up @@ -349,6 +451,45 @@ impl ArrowAssoc for NaiveTime {
}
}

impl ArrowAssoc for Option<NaiveTimeWrapperMicro> {
type Builder = Time64MicrosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
Time64MicrosecondBuilder::with_capacity(nrows)
}

fn append(builder: &mut Self::Builder, value: Option<NaiveTimeWrapperMicro>) -> Result<()> {
builder.append_option(value.map(|t| {
t.0.num_seconds_from_midnight() as i64 * 1_000_000 + (t.0.nanosecond() as i64) / 1000
}));
Ok(())
}

fn field(header: &str) -> Field {
Field::new(header, ArrowDataType::Time64(TimeUnit::Microsecond), true)
}
}

impl ArrowAssoc for NaiveTimeWrapperMicro {
type Builder = Time64MicrosecondBuilder;

fn builder(nrows: usize) -> Self::Builder {
Time64MicrosecondBuilder::with_capacity(nrows)
}

fn append(builder: &mut Self::Builder, value: NaiveTimeWrapperMicro) -> Result<()> {
builder.append_value(
value.0.num_seconds_from_midnight() as i64 * 1_000_000
+ (value.0.nanosecond() as i64) / 1000,
);
Ok(())
}

fn field(header: &str) -> Field {
Field::new(header, ArrowDataType::Time64(TimeUnit::Microsecond), false)
}
}

impl ArrowAssoc for Option<Vec<u8>> {
type Builder = LargeBinaryBuilder;

Expand Down Expand Up @@ -386,6 +527,93 @@ impl ArrowAssoc for Vec<u8> {
}
}

impl ArrowAssoc for Option<Vec<Option<Decimal>>> {
type Builder = LargeListBuilder<Decimal128Builder>;

fn builder(nrows: usize) -> Self::Builder {
LargeListBuilder::with_capacity(
Decimal128Builder::with_capacity(nrows).with_data_type(DEFAULT_ARROW_DECIMAL),
nrows,
)
}

fn append(builder: &mut Self::Builder, value: Self) -> Result<()> {
match value {
Some(vals) => {
let mut list = vec![];

for val in vals {
match val {
Some(v) => {
list.push(Some(decimal_to_i128(
v,
DEFAULT_ARROW_DECIMAL_SCALE as u32,
)?));
}
None => list.push(None),
}
}

builder.append_value(list);
}
None => builder.append_null(),
};
Ok(())
}

fn field(header: &str) -> Field {
Field::new(
header,
ArrowDataType::LargeList(std::sync::Arc::new(Field::new_list_field(
DEFAULT_ARROW_DECIMAL,
true,
))),
true,
)
}
}

impl ArrowAssoc for Vec<Option<Decimal>> {
type Builder = LargeListBuilder<Decimal128Builder>;

fn builder(nrows: usize) -> Self::Builder {
LargeListBuilder::with_capacity(
Decimal128Builder::with_capacity(nrows).with_data_type(DEFAULT_ARROW_DECIMAL),
nrows,
)
}

fn append(builder: &mut Self::Builder, vals: Self) -> Result<()> {
let mut list = vec![];

for val in vals {
match val {
Some(v) => {
list.push(Some(decimal_to_i128(
v,
DEFAULT_ARROW_DECIMAL_SCALE as u32,
)?));
}
None => list.push(None),
}
}

builder.append_value(list);
Ok(())
}

fn field(header: &str) -> Field {
Field::new(
header,
ArrowDataType::LargeList(std::sync::Arc::new(Field::new_list_field(
DEFAULT_ARROW_DECIMAL,
false,
))),
false,
)
}
}

macro_rules! impl_arrow_array_assoc {
($T:ty, $AT:expr, $B:ident) => {
impl ArrowAssoc for $T {
Expand Down Expand Up @@ -432,4 +660,13 @@ macro_rules! impl_arrow_array_assoc {
};
}

impl_arrow_array_assoc!(Vec<Option<bool>>, ArrowDataType::Boolean, BooleanBuilder);
impl_arrow_array_assoc!(Vec<Option<String>>, ArrowDataType::Utf8, StringBuilder);
impl_arrow_array_assoc!(Vec<Option<i16>>, ArrowDataType::Int16, Int16Builder);
impl_arrow_array_assoc!(Vec<Option<i32>>, ArrowDataType::Int32, Int32Builder);
impl_arrow_array_assoc!(Vec<Option<i64>>, ArrowDataType::Int64, Int64Builder);
impl_arrow_array_assoc!(Vec<Option<u16>>, ArrowDataType::UInt16, UInt16Builder);
impl_arrow_array_assoc!(Vec<Option<u32>>, ArrowDataType::UInt32, UInt32Builder);
impl_arrow_array_assoc!(Vec<Option<u64>>, ArrowDataType::UInt64, UInt64Builder);
impl_arrow_array_assoc!(Vec<Option<f32>>, ArrowDataType::Float32, Float32Builder);
impl_arrow_array_assoc!(Vec<Option<f64>>, ArrowDataType::Float64, Float64Builder);
Loading