Skip to content

Commit 05d0f01

Browse files
authored
Add methods to stream generated rows directly to file (#2468)
1 parent b244b28 commit 05d0f01

File tree

3 files changed

+170
-69
lines changed

3 files changed

+170
-69
lines changed

src/org/labkey/test/tests/assay/UploadLargeExcelAssayTest.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ protected void doCleanup(boolean afterTest)
3939
@BeforeClass
4040
public static void setupProject() throws Exception
4141
{
42-
UploadLargeExcelAssayTest init = (UploadLargeExcelAssayTest) getCurrentTest();
42+
UploadLargeExcelAssayTest init = getCurrentTest();
4343

4444
init.doSetup();
4545
}
@@ -85,9 +85,8 @@ public void testUpload200kRows() throws Exception
8585
String fileName = "200kXlsxFile.xlsx";
8686
var dgen = new TestDataGenerator("samples", "chaos_sample", getProjectName())
8787
.withColumns(ASSAY_FIELDS);
88-
dgen.generateRows(200_000);
8988
log("writing large .xlsx file");
90-
var largeExcelFile = dgen.writeData(fileName);
89+
var largeExcelFile = dgen.writeData(fileName, 200_000);
9190
log("finished writing large .xlsx file");
9291

9392
// import large generated excel to assay1

src/org/labkey/test/util/TestDataGenerator.java

Lines changed: 106 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@
1616
package org.labkey.test.util;
1717

1818
import org.apache.commons.csv.CSVFormat;
19-
import org.apache.commons.io.FileUtils;
2019
import org.apache.commons.lang3.StringUtils;
2120
import org.apache.commons.lang3.time.DateUtils;
22-
import org.apache.poi.xssf.streaming.SXSSFRow;
23-
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
2421
import org.jetbrains.annotations.NotNull;
2522
import org.jetbrains.annotations.Nullable;
2623
import org.junit.Assert;
@@ -37,15 +34,13 @@
3734
import org.labkey.remoteapi.query.SelectRowsResponse;
3835
import org.labkey.remoteapi.query.Sort;
3936
import org.labkey.serverapi.reader.TabLoader;
40-
import org.labkey.test.TestFileUtils;
4137
import org.labkey.test.WebTestHelper;
4238
import org.labkey.test.params.FieldDefinition;
4339
import org.labkey.test.util.data.ColumnNameMapper;
4440
import org.labkey.test.util.data.TestDataUtils;
4541
import org.labkey.test.util.query.QueryApiHelper;
4642

4743
import java.io.File;
48-
import java.io.FileOutputStream;
4944
import java.io.IOException;
5045
import java.text.SimpleDateFormat;
5146
import java.util.ArrayList;
@@ -54,8 +49,11 @@
5449
import java.util.Collections;
5550
import java.util.Date;
5651
import java.util.HashMap;
52+
import java.util.Iterator;
5753
import java.util.List;
5854
import java.util.Map;
55+
import java.util.NoSuchElementException;
56+
import java.util.Objects;
5957
import java.util.Set;
6058
import java.util.concurrent.ThreadLocalRandom;
6159
import java.util.function.Supplier;
@@ -313,7 +311,7 @@ public TestDataGenerator withGeneratedRows(int desiredRowCount)
313311

314312
public TestDataGenerator addDataSupplier(String columnName, Supplier<Object> supplier)
315313
{
316-
_dataSuppliers.put(columnName, ()-> supplier.get());
314+
_dataSuppliers.put(columnName, supplier);
317315
return this;
318316
}
319317

@@ -400,7 +398,7 @@ public List<PropertyDescriptor> getColumns()
400398

401399
public void generateRows(int numberOfRowsToGenerate)
402400
{
403-
if (_columns.keySet().size() == 0)
401+
if (_columns.isEmpty())
404402
throw new IllegalStateException("can't generate row data without column definitions");
405403

406404
for (int i= 0; i < numberOfRowsToGenerate; i++)
@@ -444,7 +442,7 @@ private Supplier<Object> getDefaultDataSupplier(String columnType)
444442
case "double":
445443
return ()-> randomDouble(0, 20);
446444
case "boolean":
447-
return ()-> randomBoolean();
445+
return this::randomBoolean;
448446
case "date":
449447
case "datetime":
450448
return ()-> randomDateString(DateUtils.addWeeks(new Date(), -39), new Date());
@@ -670,40 +668,6 @@ public String getDataAsTsv()
670668
return TestDataUtils.stringFromRowMaps(_rows, getFieldsForFile(), true, CSVFormat.TDF);
671669
}
672670

673-
public File writeGeneratedDataToExcel(String sheetName, String fileName) throws IOException
674-
{
675-
File file = new File(TestFileUtils.getTestTempDir(), fileName);
676-
FileUtils.forceMkdirParent(file);
677-
678-
try (SXSSFWorkbook workbook = new SXSSFWorkbook(1000); // only holds 1000 rows in memory
679-
FileOutputStream out = new FileOutputStream(file))
680-
{
681-
var sheet = workbook.createSheet(sheetName);
682-
683-
// write headers as row 0
684-
List<String> columnNames = getFieldsForFile();
685-
var headerRow = sheet.createRow(0);
686-
for (int i = 0; i < columnNames.size(); i++)
687-
{
688-
headerRow.createCell(i).setCellValue(columnNames.get(i));
689-
}
690-
691-
// write content
692-
for (int i = 0; i < _rows.size(); i++)
693-
{
694-
Map<String, Object> row = _rows.get(i);
695-
SXSSFRow currentRow = sheet.createRow(i + 1);
696-
for (int j = 0; j < columnNames.size(); j++)
697-
{
698-
currentRow.createCell(j).setCellValue(row.getOrDefault(columnNames.get(j), "").toString());
699-
}
700-
}
701-
workbook.write(out);
702-
}
703-
704-
return file;
705-
}
706-
707671
/**
708672
* Creates a file containing the contents of the current rows, formatted in TSV, CSV, or xlsx.
709673
* The file is written to the test temp dir
@@ -712,33 +676,43 @@ public File writeGeneratedDataToExcel(String sheetName, String fileName) throws
712676
*/
713677
public File writeData(String fileName)
714678
{
715-
String fileExtension = fileName.toLowerCase().substring(fileName.lastIndexOf('.') + 1);
716-
switch (fileExtension)
717-
{
718-
case "xlsx":
719-
case "xls":
720-
try
721-
{
722-
return writeGeneratedDataToExcel("sheet1", fileName);
723-
}
724-
catch (IOException e)
725-
{
726-
throw new RuntimeException(e);
727-
}
728-
case "csv":
729-
return writeData(fileName, CSVFormat.DEFAULT);
730-
case "tsv":
731-
return writeData(fileName, CSVFormat.TDF);
732-
default:
733-
throw new IllegalArgumentException("Unsupported file extension: " + fileExtension);
734-
}
679+
return writeData(fileName, new FileRowIterator(getFieldsForFile(), _rows));
735680
}
736681

737-
public File writeData(String fileName, CSVFormat format)
682+
/**
683+
* Generate rows and write to file without saving in data generator
684+
* @param fileName the name of the file, e.g. 'testDataFileForMyTest.tsv'
685+
* @param numRows the number of rows to generate
686+
* @return object pointing at created file
687+
*/
688+
public File writeData(String fileName, int numRows)
689+
{
690+
return writeData(fileName, new FileRowIterator(getFieldsForFile(), this::generateRow, numRows));
691+
}
692+
693+
/**
694+
* Creates a file containing the contents of the current rows, formatted in TSV, CSV, or xlsx.
695+
* The file is written to the test temp dir
696+
* @param fileName the name of the file, e.g. 'testDataFileForMyTest.tsv'
697+
* @return File object pointing at created file
698+
*/
699+
public File writeData(String fileName, Iterator<List<Object>> rowIterator)
738700
{
739701
try
740702
{
741-
return TestDataUtils.writeRowsToFile(fileName, TestDataUtils.rowListsFromMaps(_rows, getFieldsForFile()), format);
703+
String fileExtension = fileName.toLowerCase().substring(fileName.lastIndexOf('.') + 1);
704+
switch (fileExtension)
705+
{
706+
case "xlsx":
707+
case "xls":
708+
return TestDataUtils.writeRowsToExcel(fileName, rowIterator);
709+
case "csv":
710+
return TestDataUtils.writeRowsToFile(fileName, rowIterator, CSVFormat.DEFAULT);
711+
case "tsv":
712+
return TestDataUtils.writeRowsToFile(fileName, rowIterator, CSVFormat.TDF);
713+
default:
714+
throw new IllegalArgumentException("Unsupported file extension: " + fileExtension);
715+
}
742716
}
743717
catch (IOException e)
744718
{
@@ -915,3 +889,71 @@ public static boolean doesDomainExists(final String containerPath, final String
915889
}
916890

917891
}
892+
893+
class FileRowIterator implements Iterator<List<Object>>
894+
{
895+
private final List<String> headers;
896+
private final Iterator<Map<String, Object>> rows;
897+
898+
private boolean firstRow = true;
899+
900+
public FileRowIterator(@NotNull List<String> headers, @NotNull Iterator<Map<String, Object>> rows)
901+
{
902+
this.headers = Objects.requireNonNull(headers);
903+
this.rows = Objects.requireNonNull(rows);
904+
}
905+
906+
public FileRowIterator(@NotNull List<String> headers, @NotNull Supplier<Map<String, Object>> rowSupplier, final int rowCount)
907+
{
908+
this(headers, new Iterator<>()
909+
{
910+
int count = 0;
911+
912+
@Override
913+
public boolean hasNext()
914+
{
915+
return count < rowCount;
916+
}
917+
918+
@Override
919+
public Map<String, Object> next()
920+
{
921+
count++;
922+
return rowSupplier.get();
923+
}
924+
});
925+
}
926+
927+
public FileRowIterator(@NotNull List<String> headers, @NotNull List<Map<String, Object>> rows)
928+
{
929+
this(headers, rows.iterator());
930+
}
931+
932+
@Override
933+
public boolean hasNext()
934+
{
935+
return firstRow || rows.hasNext();
936+
}
937+
938+
@Override
939+
public List<Object> next()
940+
{
941+
if (!hasNext())
942+
throw new NoSuchElementException();
943+
944+
if (firstRow)
945+
{
946+
firstRow = false;
947+
return Collections.unmodifiableList(headers);
948+
}
949+
else
950+
{
951+
return rowMapToList(rows.next());
952+
}
953+
}
954+
955+
private List<Object> rowMapToList(Map<String, Object> row)
956+
{
957+
return headers.stream().map(h -> row.getOrDefault(h, "")).toList();
958+
}
959+
}

src/org/labkey/test/util/data/TestDataUtils.java

Lines changed: 62 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,18 @@
77
import org.apache.commons.io.FileUtils;
88
import org.apache.commons.io.IOUtils;
99
import org.apache.commons.lang3.StringUtils;
10+
import org.apache.poi.xssf.streaming.SXSSFRow;
11+
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
1012
import org.jetbrains.annotations.NotNull;
1113
import org.labkey.serverapi.reader.DataLoader;
1214
import org.labkey.serverapi.reader.TabLoader;
1315
import org.labkey.test.TestFileUtils;
1416
import org.labkey.test.params.FieldDefinition;
1517
import org.labkey.test.util.TestDataGenerator;
18+
import org.labkey.test.util.TestLogger;
1619

1720
import java.io.File;
21+
import java.io.FileOutputStream;
1822
import java.io.FileReader;
1923
import java.io.FileWriter;
2024
import java.io.IOException;
@@ -24,6 +28,7 @@
2428
import java.nio.charset.StandardCharsets;
2529
import java.util.ArrayList;
2630
import java.util.Collection;
31+
import java.util.Iterator;
2732
import java.util.LinkedHashMap;
2833
import java.util.LinkedHashSet;
2934
import java.util.List;
@@ -314,15 +319,70 @@ public static <T> File writeRowsToCsv(String fileName, List<List<T>> rows) throw
314319
}
315320

316321
public static @NotNull <T> File writeRowsToFile(String fileName, List<List<T>> rows, CSVFormat format) throws IOException
322+
{
323+
return writeRowsToFile(fileName, rows.iterator(), format);
324+
}
325+
326+
public static @NotNull <T> File writeRowsToFile(String fileName, Iterator<List<T>> rowIterator, CSVFormat format) throws IOException
317327
{
318328
File file = new File(TestFileUtils.getTestTempDir(), fileName);
319329
FileUtils.forceMkdirParent(file);
320330

331+
TestLogger.log("Writing data file " + file.getAbsolutePath());
332+
321333
try (CSVPrinter printer = new CSVPrinter(new FileWriter(file, StandardCharsets.UTF_8), format)) {
322-
for (List<T> row : rows)
334+
while (rowIterator.hasNext())
323335
{
324-
printer.printRecord(row);
336+
printer.printRecord(rowIterator.next());
337+
}
338+
}
339+
340+
return file;
341+
}
342+
343+
public static @NotNull <T> File writeRowsToExcel(String fileName, List<List<T>> rows) throws IOException
344+
{
345+
return writeRowsToExcel(fileName, rows.iterator());
346+
}
347+
348+
public static @NotNull <T> File writeRowsToExcel(String fileName, Iterator<List<T>> rowIterator) throws IOException
349+
{
350+
File file = new File(TestFileUtils.getTestTempDir(), fileName);
351+
FileUtils.forceMkdirParent(file);
352+
353+
TestLogger.log("Writing data file " + file.getAbsolutePath());
354+
355+
try (SXSSFWorkbook workbook = new SXSSFWorkbook(1000); // only holds 1000 rows in memory
356+
FileOutputStream out = new FileOutputStream(file))
357+
{
358+
var sheet = workbook.createSheet("sheet1");
359+
360+
// write headers as row 0
361+
List<T> columnNames = rowIterator.next();
362+
var headerRow = sheet.createRow(0);
363+
for (int col = 0; col < columnNames.size(); col++)
364+
{
365+
if (columnNames.get(col) instanceof String s)
366+
{
367+
headerRow.createCell(col).setCellValue(s);
368+
}
369+
else
370+
{
371+
throw new IllegalArgumentException("Expected column headers to be Strings but got " + columnNames.get(col).getClass().getSimpleName());
372+
}
373+
}
374+
375+
// write content
376+
for (int rowNum = 1; rowIterator.hasNext(); rowNum++)
377+
{
378+
List<T> row = rowIterator.next();
379+
SXSSFRow currentRow = sheet.createRow(rowNum + 1);
380+
for (int col = 0; col < columnNames.size(); col++)
381+
{
382+
currentRow.createCell(col).setCellValue(row.get(col).toString());
383+
}
325384
}
385+
workbook.write(out);
326386
}
327387

328388
return file;

0 commit comments

Comments
 (0)