Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .claude/settings.local.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
"Bash(dotnet --version)",
"Bash(dotnet build:*)",
"Bash(dotnet test:*)",
"Bash(gh pr list:*)"
"Bash(gh pr list:*)",
"Bash(\"/mnt/c/Program Files/dotnet/dotnet.exe\" test Tests/Tests.csproj --filter \"FullyQualifiedName~OcrJapaneseImage\" --logger \"console;verbosity=detailed\")"
],
"deny": []
}
Expand Down
Binary file added Tests/Images/ja-黒くろごまのちから.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
39 changes: 38 additions & 1 deletion Tests/OcrTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System.Drawing;
using System.Drawing;
using System.IO;
using System.Text;
using System.Text.Json;
Expand Down Expand Up @@ -90,6 +90,21 @@ BANK CHARGES 344 670 326 49%
TOTAL EXPENDITURES 1,016,684 2,065,620 1,048,936 51%
REVENUES OVERY(UNDER) EXPENDITURES $9,749 $0 $9,749 N/A";

private const string jaWordBorders = @".\TextFiles\ja-word-borders.json";
private const string jaExpectedResult = """
くろ からだ しつ
黒ごまは体にいいです。タンパク質やカルシウムが
かみ くろ こうか
たくさんあります。髪を黒くする効果もあります。
くろ あぶら はだ かみ りょうり
黒ごま油は肌や髪に使います。料理にも使います。
かゆ た からだ
お粥やデザートに入れます。でも、食べすぎると体 た
によくないです。少しずつ食べましょう。
""";

private const string jaTestImagePath = @".\Images\ja-黒くろごまのちから.png";

[WpfFact]
public async Task OcrFontSampleImage()
{
Expand Down Expand Up @@ -362,4 +377,26 @@ public async Task GetTesseractGitHubLanguage()

File.Delete(tempFilePath);
}

[WpfFact]
public async Task OcrJapaneseImage()
{
// Given
//string testImagePath = jaTestImagePath;
string rawOutputFromOCR = await File.ReadAllTextAsync(jaWordBorders);

HistoryInfo jaHistoryInfo = JsonSerializer.Deserialize<HistoryInfo>(rawOutputFromOCR ?? "[]")
?? throw new Exception("Failed to deserialize HistoryInfo");
string expectedResult = jaExpectedResult;

List<WordBorderInfo> wordBorders = JsonSerializer.Deserialize<List<WordBorderInfo>>(jaHistoryInfo.WordBorderInfoJson ?? "[]")
?? throw new Exception("Failed to deserialize WordBorderInfo list");

// When
GlobalLang japaneseLanguage = new("ja-JP");
string ocrTextResult = PostOcrUtilities.GetTextFromWordBorderInfo(wordBorders, japaneseLanguage);

// Then
Assert.Equal(expectedResult, ocrTextResult);
}
}
6 changes: 3 additions & 3 deletions Tests/StringMethodTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@
Assert.Equal(expectedWord, singleWordAtSix);
}

private static string multiLineInput = @"Hello this is lots
private static string multiLineInput = @"Hello this is lots
of text which has several lines
and some spaces at the ends of line
and some spaces at the ends of line
to throw off any easy check";

[Theory]
[InlineData("Hello", "", " this ...")]
[InlineData("lots", "Hello this is ", " ...")]
[InlineData("lots", "Hello this is ", "...")]
[InlineData("of", "...", " text ...")]
[InlineData("several", "...h has ", " lines...")]
public void ReturnPreviewsFromWord(string firstWord, string expectedLeftPreview, string expectedRightPreview)
Expand Down Expand Up @@ -429,7 +429,7 @@
Assert.Equal(expected, inputString.LimitCharactersPerLine(charLimit, spotInLine));
}

private string actualGuids = """

Check warning on line 432 in Tests/StringMethodTests.cs

View workflow job for this annotation

GitHub Actions / build

The field 'StringMethodTests.actualGuids' is assigned but its value is never used
97a56312-d8e8-4ca5-87fa-18e35266d31e
bdc5a5f2-d6ff-403d-a632-f9006387e149
aeef14aa-9aff-4f0d-8ca5-e5df1b399c20
Expand Down
6 changes: 6 additions & 0 deletions Tests/Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,18 @@
</ItemGroup>

<ItemGroup>
<Content Update="Images\ja-黒くろごまのちから.png">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Update="Images\Table-Complex.png">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>

<ItemGroup>
<None Update="TextFiles\ja-word-borders.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="TextFiles\Table-Complex-WordBorders.json">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
15 changes: 15 additions & 0 deletions Tests/TextFiles/ja-word-borders.json

Large diffs are not rendered by default.

99 changes: 96 additions & 3 deletions Text-Grab/Utilities/OcrUtilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,18 @@
}
else
{
// For CJK languages, we need to filter and merge words intelligently
List<IOcrWord> words = [.. ocrLine.Words];

// Filter out likely furigana (very small text fragments that are single characters)
List<IOcrWord> filteredWords = FilterFurigana(words);

bool isFirstWord = true;
bool isPrevWordSpaceJoining = false;

Regex regexSpaceJoiningWord = SpaceJoiningWordRegex();

foreach (IOcrWord ocrWord in ocrLine.Words)
foreach (IOcrWord ocrWord in filteredWords)
{
string wordString = ocrWord.Text;

Expand All @@ -70,12 +76,79 @@
isFirstWord = false;
isPrevWordSpaceJoining = isThisWordSpaceJoining;
}

// Always add newline after processing each OCR line to preserve line boundaries
text.AppendLine();
}

if (DefaultSettings.CorrectToLatin)
text.ReplaceGreekOrCyrillicWithLatin();
}

private static List<IOcrWord> FilterFurigana(List<IOcrWord> words)
{
if (words.Count == 0)
return words;

// Calculate the median height of all words
List<double> heights = [.. words.Select(w => w.BoundingBox.Height).OrderBy(h => h)];
double medianHeight = heights.Count > 0 ? heights[heights.Count / 2] : 0;

// Furigana is typically 30-50% the height of main text
double furiganaThreshold = medianHeight * 0.6;

List<IOcrWord> filteredWords = [];

for (int i = 0; i < words.Count; i++)
{
IOcrWord word = words[i];

// Check if this word is likely furigana based on:
// 1. Much smaller height than median
// 2. Single character (especially hiragana/katakana)
// 3. Positioned above main text (smaller Y value)

bool isProbablyFurigana = false;

if (word.BoundingBox.Height < furiganaThreshold)
{
// This word is significantly smaller - likely furigana
// Check if there's a larger word below it (main kanji)
bool hasMainTextBelow = false;
for (int j = 0; j < words.Count; j++)
{
if (i == j) continue;
IOcrWord otherWord = words[j];

// Check if this word is below and overlaps horizontally
bool isBelow = otherWord.BoundingBox.Top > word.BoundingBox.Bottom;
bool overlapsHorizontally = !(otherWord.BoundingBox.Right < word.BoundingBox.Left ||
otherWord.BoundingBox.Left > word.BoundingBox.Right);
bool isLarger = otherWord.BoundingBox.Height > furiganaThreshold;

if (isBelow && overlapsHorizontally && isLarger)
{
hasMainTextBelow = true;
break;
}
}

if (hasMainTextBelow && word.Text.Length <= 2)
{
isProbablyFurigana = true;
}
}

if (!isProbablyFurigana)
{
filteredWords.Add(word);
}
}

// If we filtered everything, return original to avoid losing data
return filteredWords.Count > 0 ? filteredWords : words;
}

public static async Task<string> GetTextFromAbsoluteRectAsync(Rect rect, ILanguage language)
{
Rectangle selectedRegion = rect.AsRectangle();
Expand Down Expand Up @@ -155,7 +228,7 @@
{
if (language is WindowsAiLang winAiLang)
{
return new WinAiOcrLinesWords(await WindowsAiUtilities.GetOcrResultAsync(scaledBitmap));

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.

Check warning on line 231 in Text-Grab/Utilities/OcrUtilities.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'recognizedText' in 'WinAiOcrLinesWords.WinAiOcrLinesWords(RecognizedText recognizedText)'.
}

if (language is not GlobalLang globalLang)
Expand Down Expand Up @@ -323,8 +396,11 @@

bool isSpaceJoiningOCRLang = language.IsSpaceJoining();

foreach (IOcrLine ocrLine in ocrResult.Lines)
ocrLine.GetTextFromOcrLine(isSpaceJoiningOCRLang, text);
if (!isSpaceJoiningOCRLang)
text.Append(PostOcrUtilities.GetTextFromOcrResult(ocrResult, language));
else
foreach (IOcrLine ocrLine in ocrResult.Lines)
ocrLine.GetTextFromOcrLine(isSpaceJoiningOCRLang, text);

if (language.IsRightToLeft())
text.ReverseWordsForRightToLeft();
Expand Down Expand Up @@ -373,6 +449,23 @@
return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, language));
}

public static async Task<string> OcrAbsoluteFilePathWithLanguageAsync(string absolutePath, Language language)
{
Uri fileURI = new(absolutePath, UriKind.Absolute);
FileInfo fileInfo = new(fileURI.LocalPath);
RotateFlipType rotateFlipType = ImageMethods.GetRotateFlipType(absolutePath);
BitmapImage droppedImage = new();
droppedImage.BeginInit();
droppedImage.UriSource = fileURI;
ImageMethods.RotateImage(droppedImage, rotateFlipType);
droppedImage.CacheOption = BitmapCacheOption.None;
droppedImage.EndInit();
droppedImage.Freeze();
Bitmap bmp = ImageMethods.BitmapImageToBitmap(droppedImage);
GlobalLang globalLang = new(language);
return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, globalLang));
}

public static async Task<string> GetClickedWordAsync(Window passedWindow, Point clickedPoint, ILanguage OcrLang)
{
using Bitmap bmp = ImageMethods.GetWindowsBoundsBitmap(passedWindow);
Expand Down
Loading
Loading