From b5249dcf392eb984b748f329dcd75081fd040008 Mon Sep 17 00:00:00 2001 From: cetra3 Date: Thu, 17 Apr 2025 09:13:08 +0930 Subject: [PATCH] Add support for u128 datatype --- columnar/src/columnar/column_type.rs | 5 +- columnar/src/columnar/merge/mod.rs | 35 +++++++++++- .../src/columnar/writer/column_operation.rs | 12 ++++ columnar/src/columnar/writer/mod.rs | 56 +++++++++++++++++++ columnar/src/dynamic_column.rs | 16 ++++++ columnar/src/tests.rs | 17 ++++++ src/aggregation/metric/top_hits.rs | 7 +++ src/core/json_utils.rs | 3 + src/fastfield/writer.rs | 6 ++ src/indexer/segment_writer.rs | 14 +++++ src/postings/per_field_postings_writer.rs | 1 + src/query/query_parser/query_parser.rs | 9 +++ src/query/range_query/mod.rs | 2 +- .../range_query/range_query_fastfield.rs | 4 +- src/schema/document/default_document.rs | 14 +++++ src/schema/document/mod.rs | 1 + src/schema/document/owned_value.rs | 5 ++ src/schema/document/se.rs | 3 + src/schema/document/value.rs | 26 +++++++++ src/schema/field_entry.rs | 6 ++ src/schema/field_type.rs | 32 +++++++++++ src/schema/mod.rs | 1 + src/schema/schema.rs | 16 ++++++ src/schema/term.rs | 24 ++++++++ 24 files changed, 310 insertions(+), 5 deletions(-) diff --git a/columnar/src/columnar/column_type.rs b/columnar/src/columnar/column_type.rs index ac61a7253f..6474bf12c4 100644 --- a/columnar/src/columnar/column_type.rs +++ b/columnar/src/columnar/column_type.rs @@ -20,6 +20,7 @@ pub enum ColumnType { Bool = 5u8, IpAddr = 6u8, DateTime = 7u8, + U128 = 8u8, } impl fmt::Display for ColumnType { @@ -33,6 +34,7 @@ impl fmt::Display for ColumnType { ColumnType::Bool => "bool", ColumnType::IpAddr => "ip", ColumnType::DateTime => "datetime", + ColumnType::U128 => "u128", }; write!(f, "{short_str}") } @@ -83,7 +85,8 @@ impl ColumnType { | ColumnType::Str | ColumnType::Bool | ColumnType::IpAddr - | ColumnType::DateTime => None, + | ColumnType::DateTime + | ColumnType::U128 => None, } } } diff --git a/columnar/src/columnar/merge/mod.rs b/columnar/src/columnar/merge/mod.rs index b286698df1..5b81260c1c 100644 --- a/columnar/src/columnar/merge/mod.rs +++ b/columnar/src/columnar/merge/mod.rs @@ -38,6 +38,7 @@ pub(crate) enum ColumnTypeCategory { Bool, IpAddr, DateTime, + U128, } impl From for ColumnTypeCategory { @@ -51,6 +52,7 @@ impl From for ColumnTypeCategory { ColumnType::Bool => ColumnTypeCategory::Bool, ColumnType::IpAddr => ColumnTypeCategory::IpAddr, ColumnType::DateTime => ColumnTypeCategory::DateTime, + ColumnType::U128 => ColumnTypeCategory::U128, } } } @@ -123,7 +125,10 @@ fn dynamic_column_to_u64_monotonic(dynamic_column: DynamicColumn) -> Option Some(column.to_u64_monotonic()), DynamicColumn::F64(column) => Some(column.to_u64_monotonic()), DynamicColumn::DateTime(column) => Some(column.to_u64_monotonic()), - DynamicColumn::IpAddr(_) | DynamicColumn::Bytes(_) | DynamicColumn::Str(_) => None, + DynamicColumn::IpAddr(_) + | DynamicColumn::Bytes(_) + | DynamicColumn::Str(_) + | DynamicColumn::U128(_) => None, } } @@ -193,6 +198,33 @@ fn merge_column( serialize_column_mappable_to_u128(merged_column_index, &merge_column_values, wrt)?; } + ColumnType::U128 => { + let mut column_indexes: Vec = Vec::with_capacity(columns_to_merge.len()); + let mut column_values: Vec>>> = + Vec::with_capacity(columns_to_merge.len()); + for (i, dynamic_column_opt) in columns_to_merge.into_iter().enumerate() { + if let Some(DynamicColumn::U128(Column { index: idx, values })) = dynamic_column_opt + { + column_indexes.push(idx); + column_values.push(Some(values)); + } else { + column_indexes.push(ColumnIndex::Empty { + num_docs: num_docs_per_column[i], + }); + column_values.push(None); + } + } + + let merged_column_index = + crate::column_index::merge_column_index(&column_indexes[..], merge_row_order); + let merge_column_values = MergedColumnValues { + column_indexes: &column_indexes[..], + column_values: &column_values, + merge_row_order, + }; + + serialize_column_mappable_to_u128(merged_column_index, &merge_column_values, wrt)?; + } ColumnType::Bytes | ColumnType::Str => { let mut column_indexes: Vec = Vec::with_capacity(columns_to_merge.len()); let mut bytes_columns: Vec> = @@ -464,6 +496,7 @@ fn min_max_if_numerical(column: &DynamicColumn) -> Option<(NumericalValue, Numer DynamicColumn::F64(column) => Some((column.min_value().into(), column.max_value().into())), DynamicColumn::Bool(_) | DynamicColumn::IpAddr(_) + | DynamicColumn::U128(_) | DynamicColumn::DateTime(_) | DynamicColumn::Bytes(_) | DynamicColumn::Str(_) => None, diff --git a/columnar/src/columnar/writer/column_operation.rs b/columnar/src/columnar/writer/column_operation.rs index 1297a0be94..c091ca66ef 100644 --- a/columnar/src/columnar/writer/column_operation.rs +++ b/columnar/src/columnar/writer/column_operation.rs @@ -155,6 +155,18 @@ impl SymbolValue for Ipv6Addr { } } +impl SymbolValue for u128 { + fn serialize(self, buffer: &mut [u8]) -> u8 { + buffer[0..16].copy_from_slice(&self.to_be_bytes()); + 16 + } + + fn deserialize(bytes: &[u8]) -> Self { + let octets: [u8; 16] = bytes[0..16].try_into().unwrap(); + u128::from_be_bytes(octets) + } +} + #[derive(Default)] struct MiniBuffer { pub bytes: [u8; 17], diff --git a/columnar/src/columnar/writer/mod.rs b/columnar/src/columnar/writer/mod.rs index 7fb8993457..8b14b4e6aa 100644 --- a/columnar/src/columnar/writer/mod.rs +++ b/columnar/src/columnar/writer/mod.rs @@ -31,6 +31,7 @@ struct SpareBuffers { value_index_builders: PreallocatedIndexBuilders, u64_values: Vec, ip_addr_values: Vec, + u128_values: Vec, } /// Makes it possible to create a new columnar. @@ -52,6 +53,7 @@ pub struct ColumnarWriter { datetime_field_hash_map: ArenaHashMap, bool_field_hash_map: ArenaHashMap, ip_addr_field_hash_map: ArenaHashMap, + u128_field_hash_map: ArenaHashMap, bytes_field_hash_map: ArenaHashMap, str_field_hash_map: ArenaHashMap, arena: MemoryArena, @@ -145,6 +147,10 @@ impl ColumnarWriter { column_name.as_bytes(), |column_opt: Option| column_opt.unwrap_or_default(), ), + ColumnType::U128 => self.u128_field_hash_map.mutate_or_create( + column_name.as_bytes(), + |column_opt: Option| column_opt.unwrap_or_default(), + ), } } @@ -177,6 +183,18 @@ impl ColumnarWriter { ); } + pub fn record_u128(&mut self, doc: RowId, column_name: &str, u128: u128) { + let (hash_map, arena) = (&mut self.u128_field_hash_map, &mut self.arena); + hash_map.mutate_or_create( + column_name.as_bytes(), + |column_opt: Option| { + let mut column: ColumnWriter = column_opt.unwrap_or_default(); + column.record(doc, u128, arena); + column + }, + ); + } + pub fn record_bool(&mut self, doc: RowId, column_name: &str, val: bool) { let (hash_map, arena) = (&mut self.bool_field_hash_map, &mut self.arena); hash_map.mutate_or_create( @@ -323,6 +341,20 @@ impl ColumnarWriter { )?; column_serializer.finalize()?; } + ColumnType::U128 => { + let column_writer: ColumnWriter = self.u128_field_hash_map.read(addr); + let cardinality = column_writer.get_cardinality(num_docs); + let mut column_serializer = + serializer.start_serialize_column(column_name, ColumnType::U128); + serialize_u128_column( + cardinality, + num_docs, + column_writer.operation_iterator(arena, &mut symbol_byte_buffer), + buffers, + &mut column_serializer, + )?; + column_serializer.finalize()?; + } ColumnType::Bytes | ColumnType::Str => { let str_or_bytes_column_writer: StrOrBytesColumnWriter = if column_type == ColumnType::Bytes { @@ -536,6 +568,30 @@ fn serialize_ip_addr_column( Ok(()) } +fn serialize_u128_column( + cardinality: Cardinality, + num_docs: RowId, + column_operations_it: impl Iterator>, + buffers: &mut SpareBuffers, + wrt: &mut impl io::Write, +) -> io::Result<()> { + let SpareBuffers { + value_index_builders, + u128_values, + .. + } = buffers; + send_to_serialize_column_mappable_to_u128( + column_operations_it, + cardinality, + num_docs, + value_index_builders, + u128_values, + wrt, + )?; + Ok(()) +} + + fn send_to_serialize_column_mappable_to_u128< T: Copy + Ord + std::fmt::Debug + Send + Sync + MonotonicallyMappableToU128 + PartialOrd, >( diff --git a/columnar/src/dynamic_column.rs b/columnar/src/dynamic_column.rs index 2b9d69770c..74ca6c719e 100644 --- a/columnar/src/dynamic_column.rs +++ b/columnar/src/dynamic_column.rs @@ -16,6 +16,7 @@ pub enum DynamicColumn { I64(Column), U64(Column), F64(Column), + U128(Column), IpAddr(Column), DateTime(Column), Bytes(BytesColumn), @@ -30,6 +31,7 @@ impl fmt::Debug for DynamicColumn { DynamicColumn::I64(col) => write!(f, " {col:?}")?, DynamicColumn::U64(col) => write!(f, " {col:?}")?, DynamicColumn::F64(col) => write!(f, "{col:?}")?, + DynamicColumn::U128(col) => write!(f, "{col:?}")?, DynamicColumn::IpAddr(col) => write!(f, "{col:?}")?, DynamicColumn::DateTime(col) => write!(f, "{col:?}")?, DynamicColumn::Bytes(col) => write!(f, "{col:?}")?, @@ -46,6 +48,7 @@ impl DynamicColumn { DynamicColumn::I64(c) => &c.index, DynamicColumn::U64(c) => &c.index, DynamicColumn::F64(c) => &c.index, + DynamicColumn::U128(c) => &c.index, DynamicColumn::IpAddr(c) => &c.index, DynamicColumn::DateTime(c) => &c.index, DynamicColumn::Bytes(c) => &c.ords().index, @@ -63,6 +66,7 @@ impl DynamicColumn { DynamicColumn::I64(c) => c.values.num_vals(), DynamicColumn::U64(c) => c.values.num_vals(), DynamicColumn::F64(c) => c.values.num_vals(), + DynamicColumn::U128(c) => c.values.num_vals(), DynamicColumn::IpAddr(c) => c.values.num_vals(), DynamicColumn::DateTime(c) => c.values.num_vals(), DynamicColumn::Bytes(c) => c.ords().values.num_vals(), @@ -76,6 +80,7 @@ impl DynamicColumn { DynamicColumn::I64(_) => ColumnType::I64, DynamicColumn::U64(_) => ColumnType::U64, DynamicColumn::F64(_) => ColumnType::F64, + DynamicColumn::U128(_) => ColumnType::U128, DynamicColumn::IpAddr(_) => ColumnType::IpAddr, DynamicColumn::DateTime(_) => ColumnType::DateTime, DynamicColumn::Bytes(_) => ColumnType::Bytes, @@ -227,6 +232,7 @@ static_dynamic_conversions!(Column, DateTime); static_dynamic_conversions!(StrColumn, Str); static_dynamic_conversions!(BytesColumn, Bytes); static_dynamic_conversions!(Column, IpAddr); +static_dynamic_conversions!(Column, U128); #[derive(Clone, Debug)] pub struct DynamicColumnHandle { @@ -272,6 +278,13 @@ impl DynamicColumnHandle { )?; Ok(Some(column)) } + ColumnType::U128 => { + let column = crate::column::open_column_u128_as_compact_u64( + column_bytes, + self.format_version, + )?; + Ok(Some(column)) + } ColumnType::Bool | ColumnType::I64 | ColumnType::U64 @@ -301,6 +314,9 @@ impl DynamicColumnHandle { ColumnType::F64 => { crate::column::open_column_u64::(column_bytes, self.format_version)?.into() } + ColumnType::U128 => { + crate::column::open_column_u128::(column_bytes, self.format_version)?.into() + } ColumnType::Bool => { crate::column::open_column_u64::(column_bytes, self.format_version)?.into() } diff --git a/columnar/src/tests.rs b/columnar/src/tests.rs index b7ce7f27fc..1e25b75c7f 100644 --- a/columnar/src/tests.rs +++ b/columnar/src/tests.rs @@ -252,6 +252,7 @@ enum ColumnValue { Bytes(&'static [u8]), Numerical(NumericalValue), IpAddr(Ipv6Addr), + U128(u128), Bool(bool), DateTime(DateTime), } @@ -269,6 +270,7 @@ impl ColumnValue { ColumnValue::Bytes(_) => ColumnTypeCategory::Bytes, ColumnValue::Numerical(_) => ColumnTypeCategory::Numerical, ColumnValue::IpAddr(_) => ColumnTypeCategory::IpAddr, + ColumnValue::U128(_) => ColumnTypeCategory::U128, ColumnValue::Bool(_) => ColumnTypeCategory::Bool, ColumnValue::DateTime(_) => ColumnTypeCategory::DateTime, } @@ -303,6 +305,7 @@ fn column_value_strategy() -> impl Strategy { 0, ip_addr_byte ))), + 1 => any::().prop_map(|val| ColumnValue::U128(val)), 1 => any::().prop_map(ColumnValue::Bool), 1 => (679_723_993i64..1_679_723_995i64) .prop_map(|val| { ColumnValue::DateTime(DateTime::from_timestamp_secs(val)) }) @@ -353,6 +356,9 @@ fn build_columnar_with_mapping(docs: &[Vec<(&'static str, ColumnValue)>]) -> Col ColumnValue::IpAddr(ip_addr) => { columnar_writer.record_ip_addr(doc_id as u32, column_name, ip_addr); } + ColumnValue::U128(u128) => { + columnar_writer.record_u128(doc_id as u32, column_name, u128); + } ColumnValue::Bool(bool_val) => { columnar_writer.record_bool(doc_id as u32, column_name, bool_val); } @@ -506,6 +512,15 @@ impl AssertEqualToColumnValue for Ipv6Addr { } } +impl AssertEqualToColumnValue for u128 { + fn assert_equal_to_column_value(&self, column_value: &ColumnValue) { + let ColumnValue::U128(val) = column_value else { + panic!() + }; + assert_eq!(self, val); + } +} + impl> AssertEqualToColumnValue for T { fn assert_equal_to_column_value(&self, column_value: &ColumnValue) { let ColumnValue::Numerical(num) = column_value else { @@ -617,6 +632,8 @@ proptest! { assert_column_values(col, expected_col_values), DynamicColumn::IpAddr(col) => assert_column_values(col, expected_col_values), + DynamicColumn::U128(col) => + assert_column_values(col, expected_col_values), DynamicColumn::DateTime(col) => assert_column_values(col, expected_col_values), DynamicColumn::Bytes(col) => diff --git a/src/aggregation/metric/top_hits.rs b/src/aggregation/metric/top_hits.rs index 524f8d76f3..9f64fb594a 100644 --- a/src/aggregation/metric/top_hits.rs +++ b/src/aggregation/metric/top_hits.rs @@ -299,6 +299,10 @@ impl TopHitsAggregationReq { .values_for_doc(doc_id) .map(FastFieldValue::IpAddr) .collect::>(), + DynamicColumn::U128(accessor) => accessor + .values_for_doc(doc_id) + .map(FastFieldValue::U128) + .collect::>(), DynamicColumn::DateTime(accessor) => accessor .values_for_doc(doc_id) .map(FastFieldValue::Date) @@ -334,6 +338,8 @@ pub enum FastFieldValue { IpAddr(Ipv6Addr), /// A list of values. Array(Vec), + /// U128 + U128(u128), } impl From for OwnedValue { @@ -350,6 +356,7 @@ impl From for OwnedValue { FastFieldValue::Array(a) => { OwnedValue::Array(a.into_iter().map(OwnedValue::from).collect()) } + FastFieldValue::U128(u128) => OwnedValue::U128(u128), } } } diff --git a/src/core/json_utils.rs b/src/core/json_utils.rs index 774bf44403..ef98800504 100644 --- a/src/core/json_utils.rs +++ b/src/core/json_utils.rs @@ -207,6 +207,9 @@ pub(crate) fn index_json_value<'a, V: Value<'a>>( ReferenceValueLeaf::IpAddr(_) => { unimplemented!("IP address support in dynamic fields is not yet implemented") } + ReferenceValueLeaf::U128(_) => { + unimplemented!("U128 support in dynamic fields is not yet implemented") + } }, ReferenceValue::Array(elements) => { for val in elements { diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index a1288e0ad1..6cc636c1b0 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -189,6 +189,9 @@ impl FastFieldsWriter { .record_str(doc_id, field_name, &token.text); } } + ReferenceValueLeaf::U128(val) => { + self.columnar_writer.record_u128(doc_id, field_name, val) + } }, ReferenceValue::Array(val) => { // TODO: Check this is the correct behaviour we want. @@ -320,6 +323,9 @@ fn record_json_value_to_columnar_writer<'a, V: Value<'a>>( "Pre-tokenized string support in dynamic fields is not yet implemented" ) } + ReferenceValueLeaf::U128(_) => { + unimplemented!("U128 support in dynamic fields is not yet implemented") + } }, ReferenceValue::Array(elements) => { for el in elements { diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 5fe478249c..4d462d74bf 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -337,6 +337,20 @@ impl SegmentWriter { self.fieldnorms_writer.record(doc_id, field, num_vals); } } + FieldType::U128(_) => { + let mut num_vals = 0; + for value in values { + let value = value.as_value(); + + num_vals += 1; + let u128 = value.as_u128().ok_or_else(make_schema_error)?; + term_buffer.set_u128(u128); + postings_writer.subscribe(doc_id, 0u32, term_buffer, ctx); + } + if field_entry.has_fieldnorms() { + self.fieldnorms_writer.record(doc_id, field, num_vals); + } + } } } Ok(()) diff --git a/src/postings/per_field_postings_writer.rs b/src/postings/per_field_postings_writer.rs index f3d6d6534c..7819d99379 100644 --- a/src/postings/per_field_postings_writer.rs +++ b/src/postings/per_field_postings_writer.rs @@ -51,6 +51,7 @@ fn posting_writer_from_field_entry(field_entry: &FieldEntry) -> Box Box::>::default(), FieldType::JsonObject(ref json_object_options) => { if let Some(text_indexing_option) = json_object_options.get_text_indexing_options() { diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 3526209c3e..c5d4337164 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -522,6 +522,10 @@ impl QueryParser { let ip_v6 = IpAddr::from_str(phrase)?.into_ipv6_addr(); Ok(Term::from_field_ip_addr(field, ip_v6)) } + FieldType::U128(_) => { + let u128 = u128::from_str(phrase)?; + Ok(Term::from_field_u128(field, u128)) + } } } @@ -622,6 +626,11 @@ impl QueryParser { let term = Term::from_field_ip_addr(field, ip_v6); Ok(vec![LogicalLiteral::Term(term)]) } + FieldType::U128(_) => { + let u128 = u128::from_str(phrase)?; + let term = Term::from_field_u128(field, u128); + Ok(vec![LogicalLiteral::Term(term)]) + } } } diff --git a/src/query/range_query/mod.rs b/src/query/range_query/mod.rs index 7bb530b0ca..8fd4693383 100644 --- a/src/query/range_query/mod.rs +++ b/src/query/range_query/mod.rs @@ -15,7 +15,7 @@ pub(crate) fn is_type_valid_for_fastfield_range_query(typ: Type) -> bool { Type::Str | Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date | Type::Json => { true } - Type::IpAddr => true, + Type::IpAddr | Type::U128 => true, Type::Facet | Type::Bytes => false, } } diff --git a/src/query/range_query/range_query_fastfield.rs b/src/query/range_query/range_query_fastfield.rs index ef34915e89..8ef6a91c48 100644 --- a/src/query/range_query/range_query_fastfield.rs +++ b/src/query/range_query/range_query_fastfield.rs @@ -128,7 +128,7 @@ impl Weight for FastFieldRangeWeight { BoundsRange::new(bounds.lower_bound, bounds.upper_bound), ) } - Type::Bool | Type::Facet | Type::Bytes | Type::Json | Type::IpAddr => { + Type::Bool | Type::Facet | Type::Bytes | Type::Json | Type::IpAddr | Type::U128 => { Err(crate::TantivyError::InvalidArgument(format!( "unsupported value bytes type in json term value_bytes {:?}", term_value.typ() @@ -421,7 +421,7 @@ fn search_on_u64_ff( pub(crate) fn maps_to_u64_fastfield(typ: Type) -> bool { match typ { Type::U64 | Type::I64 | Type::F64 | Type::Bool | Type::Date => true, - Type::IpAddr => false, + Type::IpAddr | Type::U128 => false, Type::Str | Type::Facet | Type::Bytes | Type::Json => false, } } diff --git a/src/schema/document/default_document.rs b/src/schema/document/default_document.rs index 915b685aa7..fce9ba1dbd 100644 --- a/src/schema/document/default_document.rs +++ b/src/schema/document/default_document.rs @@ -93,6 +93,11 @@ impl CompactDoc { self.add_leaf_field_value(field, value); } + /// Add a U128 field. + pub fn add_u128(&mut self, field: Field, value: u128) { + self.add_leaf_field_value(field, value); + } + /// Add a i64 field pub fn add_i64(&mut self, field: Field, value: i64) { self.add_leaf_field_value(field, value); @@ -254,6 +259,7 @@ impl CompactDoc { } ReferenceValueLeaf::IpAddr(num) => write_into(&mut self.node_data, num.to_u128()), ReferenceValueLeaf::PreTokStr(pre_tok) => write_into(&mut self.node_data, *pre_tok), + ReferenceValueLeaf::U128(u128) => write_into(&mut self.node_data, u128), }; ValueAddr { type_id, val_addr } } @@ -472,6 +478,11 @@ impl<'a> CompactDocValue<'a> { self.container, addr, )?)), + ValueType::U128 => self + .container + .read_from::(addr) + .map(|num| ReferenceValueLeaf::U128(num)) + .map(Into::into), } } } @@ -542,6 +553,8 @@ pub enum ValueType { Object = 11, /// Pre-tokenized str type, Array = 12, + /// U128 + U128 = 13, } impl BinarySerializable for ValueType { @@ -587,6 +600,7 @@ impl<'a> From<&ReferenceValueLeaf<'a>> for ValueType { ReferenceValueLeaf::PreTokStr(_) => ValueType::PreTokStr, ReferenceValueLeaf::Facet(_) => ValueType::Facet, ReferenceValueLeaf::Bytes(_) => ValueType::Bytes, + ReferenceValueLeaf::U128(_) => ValueType::U128, } } } diff --git a/src/schema/document/mod.rs b/src/schema/document/mod.rs index 91ce894c4d..51b5f45c54 100644 --- a/src/schema/document/mod.rs +++ b/src/schema/document/mod.rs @@ -268,6 +268,7 @@ pub(crate) mod type_codes { pub const NULL_CODE: u8 = 11; pub const ARRAY_CODE: u8 = 12; pub const OBJECT_CODE: u8 = 13; + pub const U128_CODE: u8 = 14; // Extended type codes pub const TOK_STR_EXT_CODE: u8 = 0; diff --git a/src/schema/document/owned_value.rs b/src/schema/document/owned_value.rs index 9fbf1f8c26..34064ea90d 100644 --- a/src/schema/document/owned_value.rs +++ b/src/schema/document/owned_value.rs @@ -49,6 +49,8 @@ pub enum OwnedValue { Object(Vec<(String, Self)>), /// IpV6 Address. Internally there is no IpV4, it needs to be converted to `Ipv6Addr`. IpAddr(Ipv6Addr), + /// U128 + U128(u128), } impl AsRef for OwnedValue { @@ -77,6 +79,7 @@ impl<'a> Value<'a> for &'a OwnedValue { OwnedValue::IpAddr(val) => ReferenceValueLeaf::IpAddr(*val).into(), OwnedValue::Array(array) => ReferenceValue::Array(array.iter()), OwnedValue::Object(object) => ReferenceValue::Object(ObjectMapIter(object.iter())), + OwnedValue::U128(val) => ReferenceValueLeaf::U128(*val).into(), } } } @@ -198,6 +201,7 @@ impl serde::Serialize for OwnedValue { } } OwnedValue::Array(ref array) => array.serialize(serializer), + OwnedValue::U128(ref u128) => u128.serialize(serializer), } } } @@ -285,6 +289,7 @@ impl<'a, V: Value<'a>> From> for OwnedValue { ReferenceValueLeaf::IpAddr(val) => OwnedValue::IpAddr(val), ReferenceValueLeaf::Bool(val) => OwnedValue::Bool(val), ReferenceValueLeaf::PreTokStr(val) => OwnedValue::PreTokStr(*val.clone()), + ReferenceValueLeaf::U128(u128) => OwnedValue::U128(u128), }, ReferenceValue::Array(val) => { OwnedValue::Array(val.map(|v| v.as_value().into()).collect()) diff --git a/src/schema/document/se.rs b/src/schema/document/se.rs index 9ad5003b70..40124d032c 100644 --- a/src/schema/document/se.rs +++ b/src/schema/document/se.rs @@ -136,6 +136,9 @@ where W: Write self.write_type_code(type_codes::EXT_CODE)?; self.serialize_with_type_code(type_codes::TOK_STR_EXT_CODE, &*val) } + ReferenceValueLeaf::U128(val) => { + self.serialize_with_type_code(type_codes::U128_CODE, &val) + } }, ReferenceValue::Array(elements) => { self.write_type_code(type_codes::ARRAY_CODE)?; diff --git a/src/schema/document/value.rs b/src/schema/document/value.rs index de1067ce67..fe966d0c2c 100644 --- a/src/schema/document/value.rs +++ b/src/schema/document/value.rs @@ -136,6 +136,8 @@ pub enum ReferenceValueLeaf<'a> { Bool(bool), /// Pre-tokenized str type, PreTokStr(Box), + /// U128 value + U128(u128), } impl From for ReferenceValueLeaf<'_> { @@ -194,6 +196,13 @@ impl From for ReferenceValueLeaf<'_> { } } +impl From for ReferenceValueLeaf<'_> { + #[inline] + fn from(value: u128) -> Self { + ReferenceValueLeaf::U128(value) + } +} + impl From for ReferenceValueLeaf<'_> { #[inline] fn from(val: PreTokenizedString) -> Self { @@ -220,6 +229,7 @@ impl<'a, T: Value<'a> + ?Sized> From> for ReferenceValue< ReferenceValueLeaf::PreTokStr(val) => { ReferenceValue::Leaf(ReferenceValueLeaf::PreTokStr(val)) } + ReferenceValueLeaf::U128(val) => ReferenceValue::Leaf(ReferenceValueLeaf::U128(val)), } } } @@ -291,6 +301,16 @@ impl<'a> ReferenceValueLeaf<'a> { } } + #[inline] + /// If the Value is a IP address, returns the associated IP. Returns None otherwise. + pub fn as_u128(&self) -> Option { + if let Self::U128(val) = self { + Some(*val) + } else { + None + } + } + #[inline] /// If the Value is a bool, returns the associated bool. Returns None otherwise. pub fn as_bool(&self) -> Option { @@ -411,6 +431,12 @@ where V: Value<'a> self.as_leaf().and_then(|leaf| leaf.as_ip_addr()) } + #[inline] + /// If the Value is a IP address, returns the associated IP. Returns None otherwise. + pub fn as_u128(&self) -> Option { + self.as_leaf().and_then(|leaf| leaf.as_u128()) + } + #[inline] /// If the Value is a bool, returns the associated bool. Returns None otherwise. pub fn as_bool(&self) -> Option { diff --git a/src/schema/field_entry.rs b/src/schema/field_entry.rs index 77e061bc61..a402b7da66 100644 --- a/src/schema/field_entry.rs +++ b/src/schema/field_entry.rs @@ -65,6 +65,11 @@ impl FieldEntry { Self::new(field_name, FieldType::IpAddr(ip_options)) } + /// Creates a new ip address field entry. + pub fn new_u128(field_name: String, u128_options: NumericOptions) -> FieldEntry { + Self::new(field_name, FieldType::U128(u128_options)) + } + /// Creates a field entry for a facet. pub fn new_facet(field_name: String, facet_options: FacetOptions) -> FieldEntry { Self::new(field_name, FieldType::Facet(facet_options)) @@ -129,6 +134,7 @@ impl FieldEntry { FieldType::Bytes(ref options) => options.is_stored(), FieldType::JsonObject(ref options) => options.is_stored(), FieldType::IpAddr(ref options) => options.is_stored(), + FieldType::U128(ref options) => options.is_stored(), } } } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 8b203f5b37..f314db76dc 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -71,6 +71,8 @@ pub enum Type { Json = b'j', /// IpAddr IpAddr = b'p', + /// U128 + U128 = b'8', } impl From for Type { @@ -84,6 +86,7 @@ impl From for Type { ColumnType::DateTime => Type::Date, ColumnType::Bytes => Type::Bytes, ColumnType::IpAddr => Type::IpAddr, + ColumnType::U128 => Type::U128, } } } @@ -139,6 +142,7 @@ impl Type { Type::Bytes => "Bytes", Type::Json => "Json", Type::IpAddr => "IpAddr", + Type::U128 => "U128", } } @@ -157,6 +161,7 @@ impl Type { b'b' => Some(Type::Bytes), b'j' => Some(Type::Json), b'p' => Some(Type::IpAddr), + b'8' => Some(Type::U128), _ => None, } } @@ -189,6 +194,8 @@ pub enum FieldType { JsonObject(JsonObjectOptions), /// IpAddr field IpAddr(IpAddrOptions), + /// U128 field + U128(NumericOptions), } impl FieldType { @@ -205,6 +212,7 @@ impl FieldType { FieldType::Bytes(_) => Type::Bytes, FieldType::JsonObject(_) => Type::Json, FieldType::IpAddr(_) => Type::IpAddr, + FieldType::U128(_) => Type::U128, } } @@ -241,6 +249,7 @@ impl FieldType { FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(), FieldType::JsonObject(ref json_object_options) => json_object_options.is_indexed(), FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_indexed(), + FieldType::U128(ref u128_options) => u128_options.is_indexed(), } } @@ -276,6 +285,7 @@ impl FieldType { | FieldType::Bool(ref int_options) => int_options.is_fast(), FieldType::Date(ref date_options) => date_options.is_fast(), FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.is_fast(), + FieldType::U128(ref u128_options) => u128_options.is_fast(), FieldType::Facet(_) => true, FieldType::JsonObject(ref json_object_options) => json_object_options.is_fast(), } @@ -297,6 +307,7 @@ impl FieldType { FieldType::Bytes(ref bytes_options) => bytes_options.fieldnorms(), FieldType::JsonObject(ref _json_object_options) => false, FieldType::IpAddr(ref ip_addr_options) => ip_addr_options.fieldnorms(), + FieldType::U128(ref u128_options) => u128_options.fieldnorms(), } } @@ -348,6 +359,13 @@ impl FieldType { None } } + FieldType::U128(ref u128_options) => { + if u128_options.is_indexed() { + Some(IndexRecordOption::Basic) + } else { + None + } + } } } @@ -449,6 +467,16 @@ impl FieldType { Ok(OwnedValue::IpAddr(ip_addr.into_ipv6_addr())) } + FieldType::U128(_) => { + let u128: u128 = u128::from_str(&field_text).map_err(|err| { + ValueParsingError::ParseError { + error: err.to_string(), + json: JsonValue::String(field_text), + } + })?; + + Ok(OwnedValue::U128(u128)) + } } } JsonValue::Number(field_val_num) => match self { @@ -508,6 +536,10 @@ impl FieldType { expected: "a string with an ip addr", json: JsonValue::Number(field_val_num), }), + FieldType::U128(_) => Err(ValueParsingError::TypeError { + expected: "a string with a u128", + json: JsonValue::Number(field_val_num), + }), }, JsonValue::Object(json_map) => match self { FieldType::Str(_) => { diff --git a/src/schema/mod.rs b/src/schema/mod.rs index 1cd4b72436..6df734f682 100644 --- a/src/schema/mod.rs +++ b/src/schema/mod.rs @@ -167,6 +167,7 @@ pub(crate) fn value_type_to_column_type(typ: Type) -> Option { Type::Facet => Some(ColumnType::Str), Type::Bytes => Some(ColumnType::Bytes), Type::IpAddr => Some(ColumnType::IpAddr), + Type::U128 => Some(ColumnType::U128), Type::Json => None, } } diff --git a/src/schema/schema.rs b/src/schema/schema.rs index c1d22c0baa..9337215a2b 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -139,6 +139,22 @@ impl SchemaBuilder { self.add_field(field_entry) } + /// Adds a u128 field. + /// Returns the associated field handle. + /// + /// # Panics + /// + /// Panics when field already exists. + pub fn add_u128_field>( + &mut self, + field_name_str: &str, + field_options: T, + ) -> Field { + let field_name = String::from(field_name_str); + let field_entry = FieldEntry::new_u128(field_name, field_options.into()); + self.add_field(field_entry) + } + /// Adds a new text field. /// Returns the associated field handle /// diff --git a/src/schema/term.rs b/src/schema/term.rs index 0d63056651..3281ec4894 100644 --- a/src/schema/term.rs +++ b/src/schema/term.rs @@ -121,6 +121,13 @@ impl Term { term } + /// Builds a term given a field, and a `u128`-value + pub fn from_field_u128(field: Field, u128: u128) -> Term { + let mut term = Self::with_type_and_field(Type::U128, field); + term.set_u128(u128); + term + } + /// Builds a term given a field, and a `u64`-value pub fn from_field_u64(field: Field, val: u64) -> Term { Term::from_fast_value(field, &val) @@ -244,6 +251,11 @@ impl Term { self.set_bytes(val.to_u128().to_be_bytes().as_ref()); } + /// Sets a `u128` value in the term. + pub fn set_u128(&mut self, val: u128) { + self.set_bytes(val.to_u128().to_be_bytes().as_ref()); + } + /// Sets the value of a `Bytes` field. pub fn set_bytes(&mut self, bytes: &[u8]) { self.truncate_value_bytes(0); @@ -459,6 +471,15 @@ where B: AsRef<[u8]> Some(Ipv6Addr::from_u128(ip_u128)) } + /// Returns a `u128` value from the term. + pub fn as_u128(&self) -> Option { + if self.typ() != Type::U128 { + return None; + } + let u128 = u128::from_be_bytes(self.raw_value_bytes_payload().try_into().ok()?); + Some(u128) + } + /// Returns the json path type. /// /// Returns `None` if the value is not JSON. @@ -561,6 +582,9 @@ where B: AsRef<[u8]> Type::IpAddr => { write_opt(f, self.as_ip_addr())?; } + Type::U128 => { + write_opt(f, self.as_u128())?; + } } Ok(()) }