Skip to content

Commit ab93abd

Browse files
authored
Merge pull request #11 from akasarto/refsCleanup
Review sample file columns and apps.
2 parents 20aec07 + 10d0999 commit ab93abd

27 files changed

+20289
-20296
lines changed

src/ApacheOrcDotNet.OptimizedReaderTest.App/Configs.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ namespace ApacheOrcDotNet.OptimizedReaderTest.App
44
{
55
public class Configs
66
{
7-
public string Source { get; set; }
8-
public string Symbol { get; set; }
7+
public string Vendor { get; set; }
8+
public string Product { get; set; }
99
public TimeSpan BeginTime { get; set; }
1010
public TimeSpan EndTime { get; set; }
1111
}

src/ApacheOrcDotNet.OptimizedReaderTest.App/Item.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@ namespace ApacheOrcDotNet.OptimizedReaderTest.App
44
{
55
public class Item
66
{
7-
public string Source { get; set; }
8-
public string Symbol { get; set; }
9-
public decimal? Time { get; set; }
10-
public long? Size { get; set; }
7+
public string StringDictionaryV2 { get; set; }
8+
public string StringDirectV2 { get; set; }
9+
public decimal? Decimal { get; set; }
10+
public long? Integer { get; set; }
1111
public DateTime? Date { get; set; }
1212
public double? Double { get; set; }
1313
public float? Float { get; set; }

src/ApacheOrcDotNet.OptimizedReaderTest.App/Program.cs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,21 @@ static async Task Main(string[] args)
1919
;
2020

2121
var uri = config.GetValue("uri", string.Empty);
22-
var source = config.GetValue("source", string.Empty);
23-
var symbol = config.GetValue("symbol", string.Empty);
22+
var vendor = config.GetValue("vendor", string.Empty);
23+
var product = config.GetValue("product", string.Empty);
2424
var beginTime = config.GetValue("beginTime", "00:00:00");
2525
var endTime = config.GetValue("endTime", "23:45:00");
2626

2727
var isValidBeginTime = TimeSpan.TryParse(beginTime, CultureInfo.InvariantCulture, out var parsedBeginTime);
2828
var isValidEndTime = TimeSpan.TryParse(endTime, CultureInfo.InvariantCulture, out var parsedEndTime);
2929

30-
if (uri.Length ==0 || source.Length == 0 || symbol.Length == 0 || !isValidBeginTime || !isValidEndTime || (parsedEndTime < parsedBeginTime))
30+
if (uri.Length ==0 || vendor.Length == 0 || product.Length == 0 || !isValidBeginTime || !isValidEndTime || (parsedEndTime < parsedBeginTime))
3131
{
32-
Console.WriteLine("Usage: --uri orcFileUri --source sourceName --symbol symbolName --beginTime hh:mm:ss --endTime hh:mm:ss");
32+
Console.WriteLine("Usage: --uri orcFileUri --vendor vendorName --product productName --beginTime hh:mm:ss --endTime hh:mm:ss");
3333
Console.WriteLine();
3434
Console.WriteLine("Examples:");
35-
Console.WriteLine(@" dotnet run --uri file://c:/path/to/testFile.orc --source CTSPillarNetworkB --symbol SPY --beginTime 09:43:20 --endTime 09:43:21");
36-
Console.WriteLine(@" dotnet run --uri https://s3.amazonaws.com/some/path/testFile.orc --source CTSPillarNetworkB --symbol SPY --beginTime 09:43:20 --endTime 09:43:21");
35+
Console.WriteLine(@" dotnet run --uri file://c:/path/to/testFile.orc --vendor xyz --product test --beginTime 09:43:20 --endTime 09:43:21");
36+
Console.WriteLine(@" dotnet run --uri https://s3.amazonaws.com/some/path/testFile.orc --vendor xyz --product test --beginTime 09:43:20 --endTime 09:43:21");
3737
Console.WriteLine();
3838
Console.WriteLine(@" You can use files under ApacheOrcDotNet.OptimizedReader.Test/Data to test the readers:");
3939
Console.WriteLine(@" - optimized_reader_test_file.orc");
@@ -45,16 +45,16 @@ static async Task Main(string[] args)
4545
Console.WriteLine("Running.. CTRL+C to exit.");
4646
Console.WriteLine();
4747
Console.WriteLine($"Pid: {Environment.ProcessId}");
48-
Console.WriteLine($"source: '{source}'");
49-
Console.WriteLine($"symbol: '{symbol}'");
48+
Console.WriteLine($"vendor: '{vendor}'");
49+
Console.WriteLine($"product: '{product}'");
5050
Console.WriteLine($"beginTime: '{beginTime}'");
5151
Console.WriteLine($"endTime: '{endTime}'");
5252
Console.WriteLine();
5353

5454
var configs = new Configs
5555
{
56-
Source = source,
57-
Symbol = symbol,
56+
Vendor = vendor,
57+
Product = product,
5858
BeginTime = parsedBeginTime,
5959
EndTime = parsedEndTime
6060
};
@@ -79,9 +79,8 @@ static async Task Main(string[] args)
7979
//await (new ReadFilteredApp(uri, configs, fileByteRangeProviderFactory)).Run();
8080

8181
//// Sample app 5
82-
//// This requires a test file with a sorce,symbol,time,price and size fields.
83-
//// (Or the test class below can be updated to use different fields)
84-
//await Task.Run(() => (new TradeDataSourceApp(uri, configs, fileByteRangeProviderFactory)).Run());
82+
//// Simulates the usage of the buffers as a specialized data source, filtered by vendor,product and sale time.
83+
//await Task.Run(() => (new SampleDataSourceApp(uri, configs, fileByteRangeProviderFactory)).Run());
8584

8685
stopWatch.Stop();
8786

src/ApacheOrcDotNet.OptimizedReaderTest.App/ReadAllApp.cs

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ public async Task Run()
3232
watch.Start();
3333

3434
// Columns
35-
var sourceColumn = reader.GetColumn("source");
36-
var symbolColumn = reader.GetColumn("symbol");
37-
var timeColumn = reader.GetColumn("time");
38-
var sizeColumn = reader.GetColumn("size");
35+
var stringDictionaryV2Column = reader.GetColumn("stringDictionaryV2");
36+
var stringDirectV2Column = reader.GetColumn("stringDirectV2");
37+
var decimalColumn = reader.GetColumn("decimal");
38+
var integerColumn = reader.GetColumn("integer");
3939
var dateColumn = reader.GetColumn("date");
4040
var doubleColumn = reader.GetColumn("double");
4141
var floatColumn = reader.GetColumn("float");
@@ -45,10 +45,10 @@ public async Task Run()
4545
var booleanColumn = reader.GetColumn("boolean");
4646

4747
// Buffers
48-
var sourceColumnBuffer = reader.CreateStringColumnBuffer(sourceColumn);
49-
var symbolColumnBuffer = reader.CreateStringColumnBuffer(symbolColumn);
50-
var timeColumnBuffer = reader.CreateDecimalColumnBuffer(timeColumn);
51-
var sizeColumnBuffer = reader.CreateIntegerColumnBuffer(sizeColumn);
48+
var stringDictionaryV2ColumnBuffer = reader.CreateStringColumnBuffer(stringDictionaryV2Column);
49+
var stringDirectV2ColumnBuffer = reader.CreateStringColumnBuffer(stringDirectV2Column);
50+
var decimalColumnBuffer = reader.CreateDecimalColumnBuffer(decimalColumn);
51+
var integerColumnBuffer = reader.CreateIntegerColumnBuffer(integerColumn);
5252
var dateColumnBuffer = reader.CreateDateColumnBuffer(dateColumn);
5353
var doubleColumnBuffer = reader.CreateDoubleColumnBuffer(doubleColumn);
5454
var floatColumnBuffer = reader.CreateFloatColumnBuffer(floatColumn);
@@ -63,15 +63,15 @@ public async Task Run()
6363

6464
for (var stripeId = 0; stripeId < reader.GetNumberOfStripes(); stripeId++)
6565
{
66-
var numRowEntryIndexes = reader.GetNumberOfRowGroupEntries(stripeId, timeColumn.Id);
66+
var numRowEntryIndexes = reader.GetNumberOfRowGroupEntries(stripeId, decimalColumn.Id);
6767

6868
for (var rowEntryIndex = 0; rowEntryIndex < numRowEntryIndexes; rowEntryIndex++)
6969
{
7070
await Task.WhenAll(
71-
reader.LoadDataAsync(stripeId, rowEntryIndex, sourceColumnBuffer),
72-
reader.LoadDataAsync(stripeId, rowEntryIndex, symbolColumnBuffer),
73-
reader.LoadDataAsync(stripeId, rowEntryIndex, timeColumnBuffer),
74-
reader.LoadDataAsync(stripeId, rowEntryIndex, sizeColumnBuffer),
71+
reader.LoadDataAsync(stripeId, rowEntryIndex, stringDictionaryV2ColumnBuffer),
72+
reader.LoadDataAsync(stripeId, rowEntryIndex, stringDirectV2ColumnBuffer),
73+
reader.LoadDataAsync(stripeId, rowEntryIndex, decimalColumnBuffer),
74+
reader.LoadDataAsync(stripeId, rowEntryIndex, integerColumnBuffer),
7575
reader.LoadDataAsync(stripeId, rowEntryIndex, dateColumnBuffer),
7676
reader.LoadDataAsync(stripeId, rowEntryIndex, doubleColumnBuffer),
7777
reader.LoadDataAsync(stripeId, rowEntryIndex, floatColumnBuffer),
@@ -85,10 +85,10 @@ await Task.WhenAll(
8585
{
8686
totalCount++;
8787

88-
var source = sourceColumnBuffer.Values[idx];
89-
var symbol = symbolColumnBuffer.Values[idx];
90-
var time = timeColumnBuffer.Values[idx];
91-
var size = sizeColumnBuffer.Values[idx];
88+
var stringDictionaryV2 = stringDictionaryV2ColumnBuffer.Values[idx];
89+
var stringDirectV2 = stringDirectV2ColumnBuffer.Values[idx];
90+
var @decimal = decimalColumnBuffer.Values[idx];
91+
var integer = integerColumnBuffer.Values[idx];
9292
var date = dateColumnBuffer.Values[idx];
9393
var dobl = doubleColumnBuffer.Values[idx];
9494
var sing = floatColumnBuffer.Values[idx];
@@ -100,10 +100,10 @@ await Task.WhenAll(
100100
if (outputData)
101101
{
102102
Console.WriteLine($"" +
103-
$"{source}," +
104-
$"{symbol}," +
105-
$"{(time.HasValue ? time.Value.ToString(CultureInfo.InvariantCulture).PadRight(15, '0') : string.Empty)}," +
106-
$"{size}" +
103+
$"{stringDictionaryV2}," +
104+
$"{stringDirectV2}," +
105+
$"{(@decimal.HasValue ? @decimal.Value.ToString(CultureInfo.InvariantCulture).PadRight(15, '0') : string.Empty)}," +
106+
$"{integer}" +
107107
$" " +
108108
$"{(date.HasValue ? date.Value.ToString("MM/dd/yyyy", CultureInfo.InvariantCulture) : string.Empty)}," +
109109
$"{dobl}," +

src/ApacheOrcDotNet.OptimizedReaderTest.App/ReadAllOldApp.cs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ public void Run()
4141
if (outputData)
4242
{
4343
Console.WriteLine($"" +
44-
$"{item.Source}," +
45-
$"{item.Symbol}," +
46-
$"{(item.Time.HasValue ? item.Time.Value.ToString(CultureInfo.InvariantCulture).PadRight(15, '0') : string.Empty)}," +
47-
$"{item.Size}" +
44+
$"{item.StringDictionaryV2}," +
45+
$"{item.StringDirectV2}," +
46+
$"{(item.Decimal.HasValue ? item.Decimal.Value.ToString(CultureInfo.InvariantCulture).PadRight(15, '0') : string.Empty)}," +
47+
$"{item.Integer}" +
4848
$" " +
4949
$"{(item.Date.HasValue ? item.Date.Value.ToString("MM/dd/yyyy", CultureInfo.InvariantCulture) : string.Empty)}," +
5050
$"{item.Double}," +

src/ApacheOrcDotNet.OptimizedReaderTest.App/ReadCompareApp.cs

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ public async Task Run()
4747
watch.Start();
4848

4949
// Columns
50-
var sourceColumn = orcReaderNew.GetColumn("source");
51-
var symbolColumn = orcReaderNew.GetColumn("symbol");
52-
var timeColumn = orcReaderNew.GetColumn("time");
53-
var sizeColumn = orcReaderNew.GetColumn("size");
50+
var stringDictionaryV2Column = orcReaderNew.GetColumn("stringDictionaryV2");
51+
var stringDirectV2Column = orcReaderNew.GetColumn("stringDirectV2");
52+
var decimalColumn = orcReaderNew.GetColumn("decimal");
53+
var integerColumn = orcReaderNew.GetColumn("integer");
5454
var dateColumn = orcReaderNew.GetColumn("date");
5555
var doubleColumn = orcReaderNew.GetColumn("double");
5656
var floatColumn = orcReaderNew.GetColumn("float");
@@ -60,10 +60,10 @@ public async Task Run()
6060
var booleanColumn = orcReaderNew.GetColumn("boolean");
6161

6262
// Buffers
63-
var sourceColumnBuffer = orcReaderNew.CreateStringColumnBuffer(sourceColumn);
64-
var symbolColumnBuffer = orcReaderNew.CreateStringColumnBuffer(symbolColumn);
65-
var timeColumnBuffer = orcReaderNew.CreateDecimalColumnBuffer(timeColumn);
66-
var sizeColumnBuffer = orcReaderNew.CreateIntegerColumnBuffer(sizeColumn);
63+
var stringDictionaryV2ColumnBuffer = orcReaderNew.CreateStringColumnBuffer(stringDictionaryV2Column);
64+
var stringDirectV2ColumnBuffer = orcReaderNew.CreateStringColumnBuffer(stringDirectV2Column);
65+
var decimalColumnBuffer = orcReaderNew.CreateDecimalColumnBuffer(decimalColumn);
66+
var integerColumnBuffer = orcReaderNew.CreateIntegerColumnBuffer(integerColumn);
6767
var dateColumnBuffer = orcReaderNew.CreateDateColumnBuffer(dateColumn);
6868
var doubleColumnBuffer = orcReaderNew.CreateDoubleColumnBuffer(doubleColumn);
6969
var floatColumnBuffer = orcReaderNew.CreateFloatColumnBuffer(floatColumn);
@@ -80,15 +80,15 @@ public async Task Run()
8080

8181
for (var stripeId = 0; stripeId < orcReaderNew.GetNumberOfStripes(); stripeId++)
8282
{
83-
var numRowEntryIndexes = orcReaderNew.GetNumberOfRowGroupEntries(stripeId, timeColumn.Id);
83+
var numRowEntryIndexes = orcReaderNew.GetNumberOfRowGroupEntries(stripeId, decimalColumn.Id);
8484

8585
for (var rowEntryIndex = 0; rowEntryIndex < numRowEntryIndexes; rowEntryIndex++)
8686
{
8787
await Task.WhenAll(
88-
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, sourceColumnBuffer),
89-
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, symbolColumnBuffer),
90-
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, timeColumnBuffer),
91-
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, sizeColumnBuffer),
88+
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, stringDictionaryV2ColumnBuffer),
89+
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, stringDirectV2ColumnBuffer),
90+
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, decimalColumnBuffer),
91+
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, integerColumnBuffer),
9292
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, dateColumnBuffer),
9393
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, doubleColumnBuffer),
9494
orcReaderNew.LoadDataAsync(stripeId, rowEntryIndex, floatColumnBuffer),
@@ -102,10 +102,10 @@ await Task.WhenAll(
102102
{
103103
totalCount++;
104104

105-
var source = sourceColumnBuffer.Values[idx];
106-
var symbol = symbolColumnBuffer.Values[idx];
107-
var time = timeColumnBuffer.Values[idx];
108-
var size = sizeColumnBuffer.Values[idx];
105+
var stringDictionaryV2 = stringDictionaryV2ColumnBuffer.Values[idx];
106+
var stringDirectV2 = stringDirectV2ColumnBuffer.Values[idx];
107+
var @decimal = decimalColumnBuffer.Values[idx];
108+
var integer = integerColumnBuffer.Values[idx];
109109
var date = dateColumnBuffer.Values[idx];
110110
var dobl = doubleColumnBuffer.Values[idx];
111111
var sing = floatColumnBuffer.Values[idx];
@@ -122,8 +122,8 @@ await Task.WhenAll(
122122

123123
var item = (Item)oldReaderItemsEnumerator.Current;
124124

125-
if (source != item.Source || symbol != item.Symbol || time != item.Time || size != item.Size)
126-
throw new InvalidDataException($"{source},{symbol},{time},{size} != {item.Source},{item.Symbol},{item.Time},{item.Size}");
125+
if (stringDictionaryV2 != item.StringDictionaryV2 || stringDirectV2 != item.StringDirectV2 || @decimal != item.Decimal || integer != item.Integer)
126+
throw new InvalidDataException($"{stringDictionaryV2},{stringDirectV2},{@decimal},{integer} != {item.StringDictionaryV2},{item.StringDirectV2},{item.Decimal},{item.Integer}");
127127

128128
if (date != item.Date || dobl != item.Double || sing != item.Float || timeStamp != item.TimeStamp)
129129
throw new InvalidDataException($"{date},{dobl},{sing},{timeStamp} != {item.Date},{item.Double},{item.Float},{item.TimeStamp}");

0 commit comments

Comments
 (0)