Skip to content

Commit 1c86eea

Browse files
committed
New Tokenizer implementation
Most tests pass; a couple of tests were either not needed, or were testing semantics that change with the new implementation (e.g., spaces in option values are allowed now).
1 parent 85cf9d3 commit 1c86eea

File tree

3 files changed

+174
-174
lines changed

3 files changed

+174
-174
lines changed

src/CommandLine/Core/Tokenizer.cs

Lines changed: 159 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -16,50 +16,184 @@ public static Result<IEnumerable<Token>, Error> Tokenize(
1616
IEnumerable<string> arguments,
1717
Func<string, NameLookupResult> nameLookup)
1818
{
19-
return Tokenizer.Tokenize(arguments, nameLookup, tokens => tokens);
19+
return Tokenizer.Tokenize(arguments, nameLookup, ignoreUnknownArguments:false, allowDashDash:true);
2020
}
2121

2222
public static Result<IEnumerable<Token>, Error> Tokenize(
2323
IEnumerable<string> arguments,
2424
Func<string, NameLookupResult> nameLookup,
25-
Func<IEnumerable<Token>, IEnumerable<Token>> normalize)
25+
bool ignoreUnknownArguments,
26+
bool allowDashDash)
2627
{
2728
var errors = new List<Error>();
2829
Action<Error> onError = errors.Add;
2930

30-
var tokens = (from arg in arguments
31-
from token in !arg.StartsWith("-", StringComparison.Ordinal)
32-
? new[] { Token.Value(arg) }
33-
: arg.StartsWith("--", StringComparison.Ordinal)
34-
? TokenizeLongName(arg, onError)
35-
: TokenizeShortName(arg, nameLookup)
36-
select token)
37-
.Memoize();
31+
int consumeNext = 0;
32+
var tokens = new List<Token>();
33+
Action<string> addValue = (s => tokens.Add(new Value(s)));
34+
Action<string> addName = (s => tokens.Add(new Name(s)));
3835

39-
var normalized = normalize(tokens).Memoize();
36+
var enumerator = arguments.GetEnumerator();
37+
while (enumerator.MoveNext())
38+
{
39+
string arg = enumerator.Current;
40+
// TODO: Turn this into a switch statement with pattern matching
41+
if (arg == null)
42+
{
43+
continue;
44+
}
4045

41-
var unkTokens = (from t in normalized where t.IsName() && nameLookup(t.Text) == NameLookupResult.NoOptionFound select t).Memoize();
46+
if (consumeNext > 0)
47+
{
48+
addValue(arg);
49+
consumeNext = consumeNext - 1;
50+
continue;
51+
}
4252

43-
return Result.Succeed(normalized.Where(x => !unkTokens.Contains(x)), errors.Concat(from t in unkTokens select new UnknownOptionError(t.Text)));
44-
}
53+
if (arg == "--")
54+
{
55+
if (allowDashDash)
56+
{
57+
consumeNext = System.Int32.MaxValue;
58+
continue;
59+
}
60+
else
61+
{
62+
addValue(arg);
63+
continue;
64+
}
65+
}
4566

46-
public static Result<IEnumerable<Token>, Error> PreprocessDashDash(
47-
IEnumerable<string> arguments,
48-
Func<IEnumerable<string>, Result<IEnumerable<Token>, Error>> tokenizer)
49-
{
50-
if (arguments.Any(arg => arg.EqualsOrdinal("--")))
51-
{
52-
var tokenizerResult = tokenizer(arguments.TakeWhile(arg => !arg.EqualsOrdinal("--")));
53-
var values = arguments.SkipWhile(arg => !arg.EqualsOrdinal("--")).Skip(1).Select(Token.Value);
54-
return tokenizerResult.Map(tokens => tokens.Concat(values));
67+
if (arg.StartsWith("--"))
68+
{
69+
if (arg.Contains("="))
70+
{
71+
string[] parts = arg.Substring(2).Split(new char[] { '=' }, 2);
72+
if (String.IsNullOrWhiteSpace(parts[0]) || parts[0].Contains(" "))
73+
{
74+
onError(new BadFormatTokenError(arg));
75+
continue;
76+
}
77+
else
78+
{
79+
var name = parts[0];
80+
var tokenType = nameLookup(name);
81+
if (tokenType == NameLookupResult.NoOptionFound)
82+
{
83+
if (ignoreUnknownArguments)
84+
{
85+
continue;
86+
}
87+
else
88+
{
89+
onError(new UnknownOptionError(name));
90+
continue;
91+
}
92+
}
93+
addName(parts[0]);
94+
addValue(parts[1]);
95+
continue;
96+
}
97+
}
98+
else
99+
{
100+
var name = arg.Substring(2);
101+
var tokenType = nameLookup(name);
102+
if (tokenType == NameLookupResult.OtherOptionFound)
103+
{
104+
addName(name);
105+
consumeNext = 1;
106+
continue;
107+
}
108+
else if (tokenType == NameLookupResult.NoOptionFound)
109+
{
110+
if (ignoreUnknownArguments)
111+
{
112+
// When ignoreUnknownArguments is true and AutoHelp is true, calling code is responsible for
113+
// setting up nameLookup so that it will return a known name for --help, so that we don't skip it here
114+
continue;
115+
}
116+
else
117+
{
118+
onError(new UnknownOptionError(name));
119+
continue;
120+
}
121+
}
122+
else
123+
{
124+
addName(name);
125+
continue;
126+
}
127+
}
128+
}
129+
130+
if (arg == "-")
131+
{
132+
// A single hyphen is always a value (it usually means "read from stdin" or "write to stdout")
133+
addValue(arg);
134+
continue;
135+
}
136+
137+
if (arg.StartsWith("-"))
138+
{
139+
// First option char that requires a value means we swallow the rest of the string as the value
140+
// But if there is no rest of the string, then instead we swallow the next argument
141+
string chars = arg.Substring(1);
142+
int len = chars.Length;
143+
if (len > 0 && Char.IsDigit(chars[0]))
144+
{
145+
// Assume it's a negative number
146+
addValue(arg);
147+
continue;
148+
}
149+
for (int i = 0; i < len; i++)
150+
{
151+
var s = new String(chars[i], 1);
152+
var tokenType = nameLookup(s);
153+
if (tokenType == NameLookupResult.OtherOptionFound)
154+
{
155+
addName(s);
156+
if (i+1 < len)
157+
{
158+
addValue(chars.Substring(i+1));
159+
break;
160+
}
161+
else
162+
{
163+
consumeNext = 1;
164+
}
165+
}
166+
else if (tokenType == NameLookupResult.NoOptionFound)
167+
{
168+
if (ignoreUnknownArguments)
169+
{
170+
continue;
171+
}
172+
else
173+
{
174+
onError(new UnknownOptionError(s));
175+
}
176+
}
177+
else
178+
{
179+
addName(s);
180+
}
181+
}
182+
continue;
183+
}
184+
185+
// If we get this far, it's a plain value
186+
addValue(arg);
55187
}
56-
return tokenizer(arguments);
188+
189+
return Result.Succeed<IEnumerable<Token>, Error>(tokens.AsEnumerable(), errors.AsEnumerable());
57190
}
58191

59192
public static Result<IEnumerable<Token>, Error> ExplodeOptionList(
60193
Result<IEnumerable<Token>, Error> tokenizerResult,
61194
Func<string, Maybe<char>> optionSequenceWithSeparatorLookup)
62195
{
196+
// TODO: I don't like how this works. I don't want "-s foo;bar baz" to put three values into -s. Let's instead have a third token type, List, besides Name and Value.
63197
var tokens = tokenizerResult.SucceededWith().Memoize();
64198

65199
var replaces = tokens.Select((t, i) =>
@@ -77,33 +211,6 @@ public static Result<IEnumerable<Token>, Error> ExplodeOptionList(
77211
return Result.Succeed(flattened, tokenizerResult.SuccessMessages());
78212
}
79213

80-
public static IEnumerable<Token> Normalize(
81-
IEnumerable<Token> tokens, Func<string, bool> nameLookup)
82-
{
83-
var indexes =
84-
from i in
85-
tokens.Select(
86-
(t, i) =>
87-
{
88-
var prev = tokens.ElementAtOrDefault(i - 1).ToMaybe();
89-
return t.IsValue() && ((Value)t).ExplicitlyAssigned
90-
&& prev.MapValueOrDefault(p => p.IsName() && !nameLookup(p.Text), false)
91-
? Maybe.Just(i)
92-
: Maybe.Nothing<int>();
93-
}).Where(i => i.IsJust())
94-
select i.FromJustOrFail();
95-
96-
var toExclude =
97-
from t in
98-
tokens.Select((t, i) => indexes.Contains(i) ? Maybe.Just(t) : Maybe.Nothing<Token>())
99-
.Where(t => t.IsJust())
100-
select t.FromJustOrFail();
101-
102-
var normalized = tokens.Where(t => toExclude.Contains(t) == false);
103-
104-
return normalized;
105-
}
106-
107214
public static Func<
108215
IEnumerable<string>,
109216
IEnumerable<OptionSpecification>,
@@ -115,94 +222,10 @@ public static Func<
115222
{
116223
return (arguments, optionSpecs) =>
117224
{
118-
var normalize = ignoreUnknownArguments
119-
? toks => Tokenizer.Normalize(toks,
120-
name => NameLookup.Contains(name, optionSpecs, nameComparer) != NameLookupResult.NoOptionFound)
121-
: new Func<IEnumerable<Token>, IEnumerable<Token>>(toks => toks);
122-
123-
var tokens = enableDashDash
124-
? Tokenizer.PreprocessDashDash(
125-
arguments,
126-
args =>
127-
Tokenizer.Tokenize(args, name => NameLookup.Contains(name, optionSpecs, nameComparer), normalize))
128-
: Tokenizer.Tokenize(arguments, name => NameLookup.Contains(name, optionSpecs, nameComparer), normalize);
225+
var tokens = Tokenizer.Tokenize(arguments, name => NameLookup.Contains(name, optionSpecs, nameComparer), ignoreUnknownArguments, enableDashDash);
129226
var explodedTokens = Tokenizer.ExplodeOptionList(tokens, name => NameLookup.HavingSeparator(name, optionSpecs, nameComparer));
130227
return explodedTokens;
131228
};
132229
}
133-
134-
private static IEnumerable<Token> TokenizeShortName(
135-
string value,
136-
Func<string, NameLookupResult> nameLookup)
137-
{
138-
if (value.Length > 1 && value[0] == '-' && value[1] != '-')
139-
{
140-
var text = value.Substring(1);
141-
142-
if (char.IsDigit(text[0]))
143-
{
144-
yield return Token.Value(value);
145-
yield break;
146-
}
147-
148-
if (value.Length == 2)
149-
{
150-
yield return Token.Name(text);
151-
yield break;
152-
}
153-
154-
var i = 0;
155-
foreach (var c in text)
156-
{
157-
var n = new string(c, 1);
158-
var r = nameLookup(n);
159-
// Assume first char is an option
160-
if (i > 0 && r == NameLookupResult.NoOptionFound) break;
161-
i++;
162-
yield return Token.Name(n);
163-
// If option expects a value (other than a boolean), assume following chars are that value
164-
if (r == NameLookupResult.OtherOptionFound) break;
165-
}
166-
167-
if (i < text.Length)
168-
{
169-
yield return Token.Value(text.Substring(i));
170-
}
171-
}
172-
}
173-
174-
private static IEnumerable<Token> TokenizeLongName(
175-
string value,
176-
Action<Error> onError)
177-
{
178-
if (value.Length > 2 && value.StartsWith("--", StringComparison.Ordinal))
179-
{
180-
var text = value.Substring(2);
181-
var equalIndex = text.IndexOf('=');
182-
if (equalIndex <= 0)
183-
{
184-
yield return Token.Name(text);
185-
yield break;
186-
}
187-
if (equalIndex == 1) // "--="
188-
{
189-
onError(new BadFormatTokenError(value));
190-
yield break;
191-
}
192-
193-
var tokenMatch = Regex.Match(text, "^([^=]+)=([^ ].*)$");
194-
195-
if (tokenMatch.Success)
196-
{
197-
yield return Token.Name(tokenMatch.Groups[1].Value);
198-
yield return Token.Value(tokenMatch.Groups[2].Value, true);
199-
}
200-
else
201-
{
202-
onError(new BadFormatTokenError(value));
203-
yield break;
204-
}
205-
}
206-
}
207230
}
208231
}

tests/CommandLine.Tests/Unit/Core/InstanceBuilderTests.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -408,12 +408,10 @@ public void Double_dash_force_subsequent_arguments_as_values()
408408
};
409409
var arguments = new[] { "--stringvalue", "str1", "--", "10", "-a", "--bee", "-c", "20" };
410410

411-
// Exercize system
411+
// Exercize system
412412
var result = InstanceBuilder.Build(
413413
Maybe.Just<Func<Simple_Options_With_Values>>(() => new Simple_Options_With_Values()),
414-
(a, optionSpecs) =>
415-
Tokenizer.PreprocessDashDash(a,
416-
args => Tokenizer.Tokenize(args, name => NameLookup.Contains(name, optionSpecs, StringComparer.Ordinal))),
414+
(args, optionSpecs) => Tokenizer.ConfigureTokenizer(StringComparer.Ordinal, false, true)(args, optionSpecs),
417415
arguments,
418416
StringComparer.Ordinal,
419417
false,

0 commit comments

Comments
 (0)