Skip to content

Commit 1e413a6

Browse files
Merge pull request #25 from gemstone/csv-extensions
Add extension methods for parsing CSV data
2 parents d0a89fb + 39a772d commit 1e413a6

File tree

1 file changed

+105
-0
lines changed

1 file changed

+105
-0
lines changed

src/Gemstone/StringExtensions/StringExtensions.cs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2474,4 +2474,109 @@ public static string Interpolate(this string format, IEnumerable<KeyValuePair<st
24742474

24752475
return string.Format(indexed, parameterValues);
24762476
}
2477+
2478+
/// <summary>
2479+
/// Parses the string as a CSV document.
2480+
/// </summary>
2481+
/// <param name="csv">The string to be parsed</param>
2482+
/// <returns>An array of rows parsed as CSV data.</returns>
2483+
public static string[][] ParseAsCSV(this string csv)
2484+
{
2485+
using StringReader reader = new(csv);
2486+
List<string[]> rows = [];
2487+
2488+
while (true)
2489+
{
2490+
string[]? row = ReadCSVRow(reader);
2491+
if (row is null) break;
2492+
rows.Add(row);
2493+
}
2494+
2495+
return [.. rows];
2496+
}
2497+
2498+
/// <summary>
2499+
/// Reads characters from the text reader and returns a single row of CSV data.
2500+
/// </summary>
2501+
/// <param name="reader">The text reader providing the CSV data</param>
2502+
/// <returns>An array of fields in one row of CSV data or <c>null</c> if there is no more data available from the text reader.</returns>
2503+
public static string[]? ReadCSVRow(this TextReader reader)
2504+
{
2505+
List<string> fields = [];
2506+
int c = reader.Read();
2507+
2508+
if (EOF())
2509+
return null;
2510+
2511+
while (!EOF() && !EOL())
2512+
{
2513+
if (Matches('"'))
2514+
fields.Add(ReadQuoted());
2515+
else
2516+
fields.Add(ReadToComma());
2517+
2518+
if (Matches(','))
2519+
{
2520+
Advance();
2521+
2522+
// Edge case for when the last
2523+
// field in a row is empty
2524+
if (EOF() || EOL())
2525+
fields.Add(string.Empty);
2526+
}
2527+
}
2528+
2529+
// Advance to the next line before returning
2530+
if (Matches('\r')) Advance();
2531+
if (Matches('\n')) Advance();
2532+
return [.. fields];
2533+
2534+
// Gets current character and advances to read the next character
2535+
char Advance() => (char)(c, c = reader.Read()).c;
2536+
2537+
bool Matches(char m) => c == m;
2538+
bool EOL() => Matches('\r') || Matches('\n');
2539+
bool EOF() => c == -1;
2540+
2541+
string ReadToComma()
2542+
{
2543+
StringBuilder token = new();
2544+
2545+
while (!EOF() && !EOL() && !Matches(','))
2546+
token.Append(Advance());
2547+
2548+
return token.ToString();
2549+
}
2550+
2551+
string ReadQuoted()
2552+
{
2553+
StringBuilder token = new();
2554+
2555+
// Skip past the opening quote
2556+
Advance();
2557+
2558+
while (true)
2559+
{
2560+
while (!EOF() && !Matches('"'))
2561+
token.Append(Advance());
2562+
2563+
// Skip past the end quote
2564+
if (!EOF())
2565+
Advance();
2566+
2567+
// Check if it's actually an end quote vs an escaped quote
2568+
if (Matches('"'))
2569+
token.Append(Advance());
2570+
else
2571+
break;
2572+
}
2573+
2574+
// Excel treats everything after the
2575+
// end quote as if it were not quoted
2576+
if (!EOF() && !EOL() && !Matches(','))
2577+
token.Append(ReadToComma());
2578+
2579+
return token.ToString();
2580+
}
2581+
}
24772582
}

0 commit comments

Comments
 (0)