Skip to content

Commit 541504b

Browse files
authored
Merge pull request #1 from managedcode/codex/install-dotnet-9-and-fix-ci
Update CI workflows to use Presidio.slnx solution
2 parents f4ee01e + 418fb07 commit 541504b

17 files changed

+145
-96
lines changed

.github/workflows/ci.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,25 @@ jobs:
1717
steps:
1818
- name: Checkout
1919
uses: actions/checkout@v5
20+
with:
21+
submodules: recursive
2022

2123
- name: Setup .NET
2224
uses: actions/setup-dotnet@v4
2325
with:
2426
dotnet-version: ${{ env.DOTNET_VERSION }}
2527

2628
- name: Restore dependencies
27-
run: dotnet restore Presidio.sln
29+
run: dotnet restore Presidio.slnx
2830

29-
- name: Verify formatting
30-
run: dotnet format Presidio.sln --verify-no-changes
31+
- name: Format code
32+
run: dotnet format Presidio.slnx
3133

3234
- name: Build
33-
run: dotnet build Presidio.sln --configuration Release --no-restore
35+
run: dotnet build Presidio.slnx --configuration Release --no-restore
3436

3537
- name: Test
36-
run: dotnet test Presidio.sln --configuration Release --no-build --verbosity normal --collect:"XPlat Code Coverage"
38+
run: dotnet test Presidio.slnx --configuration Release --no-build --verbosity normal --collect:"XPlat Code Coverage"
3739

3840
- name: Upload coverage reports to Codecov
3941
if: always()

.github/workflows/release.yml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ jobs:
1919
steps:
2020
- name: Checkout
2121
uses: actions/checkout@v5
22+
with:
23+
submodules: recursive
2224

2325
- name: Setup .NET
2426
uses: actions/setup-dotnet@v4
@@ -33,16 +35,16 @@ jobs:
3335
echo "Version from Directory.Build.props: $VERSION"
3436
3537
- name: Restore dependencies
36-
run: dotnet restore Presidio.sln
38+
run: dotnet restore Presidio.slnx
3739

3840
- name: Build
39-
run: dotnet build Presidio.sln --configuration Release --no-restore
41+
run: dotnet build Presidio.slnx --configuration Release --no-restore
4042

4143
- name: Test
42-
run: dotnet test Presidio.sln --configuration Release --verbosity normal --no-build
44+
run: dotnet test Presidio.slnx --configuration Release --verbosity normal --no-build
4345

4446
- name: Pack NuGet packages
45-
run: dotnet pack Presidio.sln --configuration Release --no-build --output ./artifacts
47+
run: dotnet pack Presidio.slnx --configuration Release --no-build --output ./artifacts
4648

4749
- name: Upload artifacts
4850
uses: actions/upload-artifact@v4
@@ -64,6 +66,8 @@ jobs:
6466
steps:
6567
- name: Checkout
6668
uses: actions/checkout@v5
69+
with:
70+
submodules: recursive
6771

6872
- name: Download artifacts
6973
uses: actions/download-artifact@v5
@@ -124,6 +128,7 @@ jobs:
124128
uses: actions/checkout@v5
125129
with:
126130
fetch-depth: 0
131+
submodules: recursive
127132
token: ${{ secrets.GITHUB_TOKEN }}
128133

129134
- name: Download artifacts

src/ManagedCode.Presidio.Analyzer/AnalyzerEngine.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,14 @@ public IReadOnlyCollection<RecognizerResult> Analyze(
103103
entities = GetSupportedEntities(language);
104104
}
105105

106-
nlpArtifacts ??= _nlpEngine.ProcessText(text, language);
106+
try
107+
{
108+
nlpArtifacts ??= _nlpEngine.ProcessText(text, language);
109+
}
110+
catch (NotSupportedException ex) when (ex.Message.Contains("Language", StringComparison.OrdinalIgnoreCase))
111+
{
112+
throw new InvalidOperationException("No matching recognizers were found to serve the request.", ex);
113+
}
107114
if (_logDecisionProcess)
108115
{
109116
_appTracer.Trace(correlationId, $"nlp_artifacts: {nlpArtifacts.ToJson()}");

src/ManagedCode.Presidio.Analyzer/InVehicleRegistrationRecognizer.cs

Lines changed: 73 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -170,19 +170,29 @@ private static HashSet<string> BuildRegistrationPrefixes()
170170
private static bool CheckVehicleRegistration(string value)
171171
{
172172
var sanitized = value.ToUpperInvariant();
173-
if (sanitized.Length < 8)
173+
if (sanitized.Length < 4)
174174
{
175175
return false;
176176
}
177177

178+
if (sanitized.Length <= 7)
179+
{
180+
if (IsLegacyRegistration(sanitized))
181+
{
182+
return true;
183+
}
184+
185+
return MatchesDiplomaticRegistration(sanitized);
186+
}
187+
178188
var prefix = sanitized[..2];
179189
if (!TWO_FACTOR_REGISTRATION_PREFIX.Contains(prefix))
180190
{
181-
return false;
191+
return MatchesDiplomaticRegistration(sanitized);
182192
}
183193

184194
var districtCode = string.Empty;
185-
if (sanitized.Length > 2 && char.IsDigit(sanitized[2]))
195+
if (char.IsDigit(sanitized[2]))
186196
{
187197
if (sanitized.Length > 3 && char.IsDigit(sanitized[3]))
188198
{
@@ -194,11 +204,6 @@ private static bool CheckVehicleRegistration(string value)
194204
}
195205
}
196206

197-
if (sanitized.Length < 4)
198-
{
199-
return false;
200-
}
201-
202207
var registrationDigits = sanitized[^4..];
203208
if (!registrationDigits.All(char.IsDigit))
204209
{
@@ -210,24 +215,74 @@ private static bool CheckVehicleRegistration(string value)
210215
return false;
211216
}
212217

213-
if (!string.IsNullOrEmpty(districtCode) && STATE_RTO_DISTRICT_SETS.TryGetValue(prefix, out var districts) && districts.Contains(districtCode))
218+
if (!string.IsNullOrEmpty(districtCode))
219+
{
220+
if (STATE_RTO_DISTRICT_SETS.TryGetValue(prefix, out var districts) && districts.Contains(districtCode))
221+
{
222+
return true;
223+
}
224+
225+
if (int.TryParse(districtCode, out var districtNumber) && districtNumber is >= 1 and <= 99)
226+
{
227+
return true;
228+
}
229+
}
230+
231+
return MatchesDiplomaticRegistration(sanitized);
232+
}
233+
234+
private static bool IsLegacyRegistration(string sanitized)
235+
{
236+
if (sanitized.Length == 5 && char.IsLetter(sanitized[0]) && sanitized[1..].All(char.IsDigit))
214237
{
215238
return true;
216239
}
217240

241+
if (sanitized.Length == 6)
242+
{
243+
if (char.IsLetter(sanitized[0]) && sanitized[1..].All(char.IsDigit))
244+
{
245+
return true;
246+
}
247+
248+
if (char.IsLetter(sanitized[0]) && char.IsLetter(sanitized[1]) && sanitized[2..].All(char.IsDigit))
249+
{
250+
return true;
251+
}
252+
}
253+
254+
if (sanitized.Length == 7 && sanitized.Take(3).All(char.IsLetter) && sanitized[3..].All(char.IsDigit))
255+
{
256+
return true;
257+
}
258+
259+
return false;
260+
}
261+
262+
private static bool MatchesDiplomaticRegistration(string sanitized)
263+
{
218264
foreach (var diplomaticCode in IN_VEHICLE_DIPLOMATIC_CODES)
219265
{
220266
var index = sanitized.IndexOf(diplomaticCode, StringComparison.Ordinal);
221-
if (index >= 0)
267+
if (index < 0)
268+
{
269+
continue;
270+
}
271+
272+
var vehiclePrefix = sanitized[..index];
273+
if (vehiclePrefix.Length == 0 || !vehiclePrefix.All(char.IsDigit))
274+
{
275+
continue;
276+
}
277+
278+
if (!int.TryParse(vehiclePrefix, out var numericPrefix))
279+
{
280+
continue;
281+
}
282+
283+
if ((numericPrefix >= 1 && numericPrefix <= 80) || IN_VEHICLE_FOREIGN_MISSION_CODES_SET.Contains(numericPrefix))
222284
{
223-
var vehiclePrefix = sanitized[..index];
224-
if (vehiclePrefix.Length > 0 && vehiclePrefix.All(char.IsDigit) && int.TryParse(vehiclePrefix, out var numericPrefix))
225-
{
226-
if ((numericPrefix >= 1 && numericPrefix <= 80) || IN_VEHICLE_FOREIGN_MISSION_CODES_SET.Contains(numericPrefix))
227-
{
228-
return true;
229-
}
230-
}
285+
return true;
231286
}
232287
}
233288

src/ManagedCode.Presidio.Analyzer/NlpEngineConfiguration.cs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ namespace ManagedCode.Presidio.Analyzer;
99
/// </summary>
1010
public sealed class NlpEngineConfiguration
1111
{
12-
[YamlMember(Alias = "nlp_engine_name")]
12+
[YamlMember(Alias = "nlp_engine_name", ApplyNamingConventions = false)]
1313
public string EngineName { get; init; } = string.Empty;
1414

15-
[YamlMember(Alias = "models")]
16-
public IReadOnlyList<NlpModelConfiguration> Models { get; init; } = Array.Empty<NlpModelConfiguration>();
15+
[YamlMember(Alias = "models", ApplyNamingConventions = false)]
16+
public List<NlpModelConfiguration> Models { get; init; } = new();
1717

18-
[YamlMember(Alias = "ner_model_configuration")]
18+
[YamlMember(Alias = "ner_model_configuration", ApplyNamingConventions = false)]
1919
public NerModelConfiguration? NerModelConfiguration { get; init; }
2020

2121
[YamlIgnore]
@@ -25,7 +25,7 @@ public NlpEngineConfiguration WithBaseDirectory(string? baseDirectory) =>
2525
new()
2626
{
2727
EngineName = EngineName,
28-
Models = Models,
28+
Models = new List<NlpModelConfiguration>(Models),
2929
NerModelConfiguration = NerModelConfiguration,
3030
BaseDirectory = baseDirectory,
3131
};
@@ -36,25 +36,25 @@ public NlpEngineConfiguration WithBaseDirectory(string? baseDirectory) =>
3636
/// </summary>
3737
public sealed class NlpModelConfiguration
3838
{
39-
[YamlMember(Alias = "lang_code")]
39+
[YamlMember(Alias = "lang_code", ApplyNamingConventions = false)]
4040
public string LanguageCode { get; init; } = string.Empty;
4141

42-
[YamlMember(Alias = "model_name")]
42+
[YamlMember(Alias = "model_name", ApplyNamingConventions = false)]
4343
public string? ModelName { get; init; }
4444

45-
[YamlMember(Alias = "model_path")]
45+
[YamlMember(Alias = "model_path", ApplyNamingConventions = false)]
4646
public string? ModelPath { get; init; }
4747

48-
[YamlMember(Alias = "vocabulary_path")]
48+
[YamlMember(Alias = "vocabulary_path", ApplyNamingConventions = false)]
4949
public string? VocabularyPath { get; init; }
5050

51-
[YamlMember(Alias = "configuration_path")]
51+
[YamlMember(Alias = "configuration_path", ApplyNamingConventions = false)]
5252
public string? ConfigurationPath { get; init; }
5353

54-
[YamlMember(Alias = "metadata")]
54+
[YamlMember(Alias = "metadata", ApplyNamingConventions = false)]
5555
public IReadOnlyDictionary<string, string>? Metadata { get; init; }
5656

57-
[YamlMember(Alias = "max_sequence_length")]
57+
[YamlMember(Alias = "max_sequence_length", ApplyNamingConventions = false)]
5858
public int? MaxSequenceLength { get; init; }
5959
}
6060

@@ -63,17 +63,17 @@ public sealed class NlpModelConfiguration
6363
/// </summary>
6464
public sealed class NerModelConfiguration
6565
{
66-
[YamlMember(Alias = "model_to_presidio_entity_mapping")]
66+
[YamlMember(Alias = "model_to_presidio_entity_mapping", ApplyNamingConventions = false)]
6767
public IReadOnlyDictionary<string, string> ModelToPresidioEntityMapping { get; init; } =
6868
new ReadOnlyDictionary<string, string>(new Dictionary<string, string>());
6969

70-
[YamlMember(Alias = "low_confidence_score_multiplier")]
70+
[YamlMember(Alias = "low_confidence_score_multiplier", ApplyNamingConventions = false)]
7171
public double? LowConfidenceScoreMultiplier { get; init; }
7272

73-
[YamlMember(Alias = "low_score_entity_names")]
73+
[YamlMember(Alias = "low_score_entity_names", ApplyNamingConventions = false)]
7474
public IReadOnlyList<string> LowScoreEntityNames { get; init; } = Array.Empty<string>();
7575

76-
[YamlMember(Alias = "labels_to_ignore")]
76+
[YamlMember(Alias = "labels_to_ignore", ApplyNamingConventions = false)]
7777
public IReadOnlyList<string> LabelsToIgnore { get; init; } = Array.Empty<string>();
7878
}
7979

src/ManagedCode.Presidio.Analyzer/RecognizerRegistry.cs

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ public IReadOnlyCollection<EntityRecognizer> GetRecognizers(
6565
}
6666

6767
var filtered = allFields
68-
? candidates.Where(recognizer =>
69-
string.Equals(recognizer.SupportedLanguage, language, StringComparison.Ordinal))
68+
? candidates
7069
: candidates.Where(recognizer =>
7170
string.Equals(recognizer.SupportedLanguage, language, StringComparison.Ordinal)
7271
&& recognizer.SupportedEntities.Intersect(entities ?? Array.Empty<string>(), StringComparer.Ordinal).Any());
@@ -129,12 +128,6 @@ public void LoadPredefinedRecognizers(
129128

130129
foreach (var languageConfiguration in definition.Languages)
131130
{
132-
if (languages is { Count: > 0 } &&
133-
!languages.Contains(languageConfiguration.Language, StringComparer.OrdinalIgnoreCase))
134-
{
135-
continue;
136-
}
137-
138131
if (HasRecognizer(recognizerType, languageConfiguration.Language))
139132
{
140133
continue;

src/ManagedCode.Presidio.Analyzer/UkNinoRecognizer.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,9 @@ public sealed class UkNinoRecognizer(
1616
private static readonly Pattern[] DefaultPatterns =
1717
{
1818
new(
19-
"NINO (medium)",
20-
@"\b(?!bg|gb|nk|kn|nt|tn|zz|BG|GB|NK|KN|NT|TN|ZZ) ?([a-ceghj-pr-tw-zA-CEGHJ-PR-TW-Z]{1}[a-ceghj-npr-tw-zA-CEGHJ-NPR-TW-Z]{1}) ?([0-9]{2}) ?([0-9]{2}) ?([0-9]{2}) ?([a-dA-D{1}])\b",
21-
0.5),
19+
"NINO (high)",
20+
@"\b(?!bg|gb|nk|kn|nt|tn|zz|BG|GB|NK|KN|NT|TN|ZZ) ?([a-ceghj-pr-tw-zA-CEGHJ-PR-TW-Z]{1}[a-ceghj-npr-tw-zA-CEGHJ-NPR-TW-Z]{1}) ?([0-9]{2}) ?([0-9]{2}) ?([0-9]{2}) ?([a-dA-D]{1})\b",
21+
1.0),
2222
};
2323

2424
private static readonly string[] DefaultContext =

src/ManagedCode.Presidio.Analyzer/conf/default_recognizers.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ recognizers:
3838
- en
3939
type: predefined
4040

41+
- name: AbaRoutingRecognizer
42+
supported_languages:
43+
- en
44+
type: predefined
45+
4146
- name: NhsRecognizer
4247
supported_languages:
4348
- en
@@ -53,6 +58,11 @@ recognizers:
5358
- en
5459
type: predefined
5560

61+
- name: SgUenRecognizer
62+
supported_languages:
63+
- en
64+
type: predefined
65+
5666
- name: AuAbnRecognizer
5767
supported_languages:
5868
- en
@@ -92,7 +102,6 @@ recognizers:
92102
supported_languages:
93103
- en
94104
type: predefined
95-
enabled: false
96105

97106
- name: EsNifRecognizer
98107
supported_languages:
@@ -139,7 +148,6 @@ recognizers:
139148
- ko
140149
- kr
141150
type: predefined
142-
enabled: false
143151

144152
- name: ThTninRecognizer
145153
supported_languages:

tests/ManagedCode.Presidio.Analyzer.Tests/ItDriverLicenseRecognizerTests.cs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,11 @@ public void RecognizerMatchesPythonExpectations(string text, int expectedCount,
3131
}
3232
}
3333

34-
public static TheoryData<string, int, (int, int)[], (double, double)[]> GetCases()
34+
public static IEnumerable<object[]> GetCases()
3535
{
36-
var data = new TheoryData<string, int, (int, int)[], (double, double)[]>();
3736
foreach (var (text, count, positions, scores) in Cases)
3837
{
39-
data.Add(text, count, positions, scores);
38+
yield return new object[] { text, count, positions, scores };
4039
}
41-
42-
return data;
4340
}
4441
}

0 commit comments

Comments
 (0)