Skip to content

Commit 84cec81

Browse files
committed
Added StripeColumnStatistics from Metadata section.
1 parent 4941f49 commit 84cec81

File tree

6 files changed

+35
-4
lines changed

6 files changed

+35
-4
lines changed

src/ApacheOrcDotNet.OptimizedReader/OrcReader.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ public ColumnStatistics GetFileColumnStatistics(int columnId)
5757
return _fileTail.Footer.Statistics[columnId];
5858
}
5959

60+
public ColumnStatistics GetStripeColumnStatistics(int columnId, int stripeId)
61+
{
62+
return _fileTail.Metadata.StripeStats[stripeId].ColStats[columnId];
63+
}
64+
6065
public IEnumerable<RowGroupDetail> ReadRowGroupIndex(int columnId, int stripeId)
6166
{
6267
if(!_stripeStreams.TryGetValue(stripeId, out var streamDetails))

src/ApacheOrcDotNet.OptimizedReader/SpanFileTail.cs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ public sealed class SpanFileTail
99
{
1010
public Protocol.PostScript PostScript { get; private init; }
1111
public Protocol.Footer Footer { get; private init; }
12+
public Protocol.Metadata Metadata { get; private init; }
1213

1314
public static bool TryRead(ReadOnlySpan<byte> buffer, out SpanFileTail fileTail, out int additionalBytesRequired)
1415
{
@@ -36,22 +37,30 @@ public static bool TryRead(ReadOnlySpan<byte> buffer, out SpanFileTail fileTail,
3637
throw new InvalidDataException("Postscript didn't contain magic bytes");
3738

3839
accumulatedLength += (int)postScript.FooterLength;
40+
int footerStart = buffer.Length - accumulatedLength;
41+
accumulatedLength += (int)postScript.MetadataLength;
42+
int metadataStart = buffer.Length - accumulatedLength;
43+
3944
if (buffer.Length < accumulatedLength)
4045
{
4146
additionalBytesRequired = accumulatedLength - buffer.Length;
4247
fileTail = null;
4348
return false;
4449
}
4550

46-
int footerStart = buffer.Length - accumulatedLength;
4751
var compressedFooter = buffer.Slice(footerStart, (int)postScript.FooterLength);
48-
using var decompressedMemorySequence = new DecompressingMemorySequence(compressedFooter, postScript.Compression, (int)postScript.CompressionBlockSize);
49-
var footer = Serializer.Deserialize<Protocol.Footer>(decompressedMemorySequence.Sequence);
52+
using var decompressedFooterSequence = new DecompressingMemorySequence(compressedFooter, postScript.Compression, (int)postScript.CompressionBlockSize);
53+
var footer = Serializer.Deserialize<Protocol.Footer>(decompressedFooterSequence.Sequence);
54+
55+
var compressedMetadata = buffer.Slice(metadataStart, (int)postScript.MetadataLength);
56+
using var decompressedMetadataSequence = new DecompressingMemorySequence(compressedMetadata, postScript.Compression, (int)postScript.CompressionBlockSize);
57+
var metadata = Serializer.Deserialize<Protocol.Metadata>(decompressedMetadataSequence.Sequence);
5058

5159
fileTail = new SpanFileTail
5260
{
5361
PostScript = postScript,
54-
Footer = footer
62+
Footer = footer,
63+
Metadata = metadata
5564
};
5665
additionalBytesRequired = 0;
5766
return true;
-1.9 KB
Binary file not shown.
19.7 KB
Binary file not shown.

test/ApacheOrcDotNet.OptimizedReader.Test/OrcReader_Test.cs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,20 @@ public void FileColumnStatistics()
5353
Assert.Equal(72041.725554m, decimal.Parse(reader.GetFileColumnStatistics(5).DecimalStatistics.Maximum));
5454
}
5555

56+
[Fact]
57+
public void StripeColumnStatistics()
58+
{
59+
var reader = new OrcReader(new OrcReaderConfiguration(), _byteRangeProvider);
60+
Assert.Equal("BZX", reader.GetStripeColumnStatistics(1, 0).StringStatistics.Minimum);
61+
Assert.Equal("BZX", reader.GetStripeColumnStatistics(1, 0).StringStatistics.Maximum);
62+
Assert.Equal(1, reader.GetStripeColumnStatistics(2, 0).IntStatistics.Minimum);
63+
Assert.Equal(35, reader.GetStripeColumnStatistics(2, 0).IntStatistics.Maximum);
64+
Assert.Equal(311, reader.GetStripeColumnStatistics(3, 0).IntStatistics.Minimum);
65+
Assert.Equal(16690225, reader.GetStripeColumnStatistics(3, 0).IntStatistics.Maximum);
66+
Assert.Equal(25200.063318m, decimal.Parse(reader.GetStripeColumnStatistics(5, 0).DecimalStatistics.Minimum));
67+
Assert.Equal(71979.49409m, decimal.Parse(reader.GetStripeColumnStatistics(5, 0).DecimalStatistics.Maximum));
68+
}
69+
5670
[Fact]
5771
public void RowGroupStatistics()
5872
{

test/ApacheOrcDotNet.OptimizedReader.Test/TestByteRangeProvider.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ public TestByteRangeProvider(bool writeRequestedRangesToFile, bool readRequested
1616
{
1717
_writeRequestedRangesToFile = writeRequestedRangesToFile;
1818
_readRequestedRangesFromFile = readRequestedRangesFromFile;
19+
20+
if (_writeRequestedRangesToFile == true && _readRequestedRangesFromFile == true)
21+
throw new InvalidOperationException("Cannot read and write to range files simultaneously");
1922
}
2023

2124
public void Dispose()

0 commit comments

Comments
 (0)