diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 5939d37d..3bc442d9 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -10,7 +10,8 @@ "Bash(dotnet --version)", "Bash(dotnet build:*)", "Bash(dotnet test:*)", - "Bash(gh pr list:*)" + "Bash(gh pr list:*)", + "Bash(\"/mnt/c/Program Files/dotnet/dotnet.exe\" test Tests/Tests.csproj --filter \"FullyQualifiedName~OcrJapaneseImage\" --logger \"console;verbosity=detailed\")" ], "deny": [] } diff --git "a/Tests/Images/ja-\351\273\222\343\201\217\343\202\215\343\201\224\343\201\276\343\201\256\343\201\241\343\201\213\343\202\211.png" "b/Tests/Images/ja-\351\273\222\343\201\217\343\202\215\343\201\224\343\201\276\343\201\256\343\201\241\343\201\213\343\202\211.png" new file mode 100644 index 00000000..3d1c6ae9 Binary files /dev/null and "b/Tests/Images/ja-\351\273\222\343\201\217\343\202\215\343\201\224\343\201\276\343\201\256\343\201\241\343\201\213\343\202\211.png" differ diff --git a/Tests/OcrTests.cs b/Tests/OcrTests.cs index d0f94949..22ea1a45 100644 --- a/Tests/OcrTests.cs +++ b/Tests/OcrTests.cs @@ -1,4 +1,4 @@ -using System.Drawing; +using System.Drawing; using System.IO; using System.Text; using System.Text.Json; @@ -90,6 +90,21 @@ BANK CHARGES 344 670 326 49% TOTAL EXPENDITURES 1,016,684 2,065,620 1,048,936 51% REVENUES OVERY(UNDER) EXPENDITURES $9,749 $0 $9,749 N/A"; + private const string jaWordBorders = @".\TextFiles\ja-word-borders.json"; + private const string jaExpectedResult = """ + くろ からだ しつ + 黒ごまは体にいいです。タンパク質やカルシウムが + かみ くろ こうか + たくさんあります。髪を黒くする効果もあります。 + くろ あぶら はだ かみ りょうり + 黒ごま油は肌や髪に使います。料理にも使います。 + かゆ た からだ + お粥やデザートに入れます。でも、食べすぎると体 た + によくないです。少しずつ食べましょう。 + """; + + private const string jaTestImagePath = @".\Images\ja-黒くろごまのちから.png"; + [WpfFact] public async Task OcrFontSampleImage() { @@ -362,4 +377,26 @@ public async Task GetTesseractGitHubLanguage() File.Delete(tempFilePath); } + + [WpfFact] + public async Task OcrJapaneseImage() + { + // Given + //string testImagePath = jaTestImagePath; + string rawOutputFromOCR = await File.ReadAllTextAsync(jaWordBorders); + + HistoryInfo jaHistoryInfo = JsonSerializer.Deserialize(rawOutputFromOCR ?? "[]") + ?? throw new Exception("Failed to deserialize HistoryInfo"); + string expectedResult = jaExpectedResult; + + List wordBorders = JsonSerializer.Deserialize>(jaHistoryInfo.WordBorderInfoJson ?? "[]") + ?? throw new Exception("Failed to deserialize WordBorderInfo list"); + + // When + GlobalLang japaneseLanguage = new("ja-JP"); + string ocrTextResult = PostOcrUtilities.GetTextFromWordBorderInfo(wordBorders, japaneseLanguage); + + // Then + Assert.Equal(expectedResult, ocrTextResult); + } } diff --git a/Tests/StringMethodTests.cs b/Tests/StringMethodTests.cs index a0412b95..e27868bb 100644 --- a/Tests/StringMethodTests.cs +++ b/Tests/StringMethodTests.cs @@ -35,14 +35,14 @@ public void ReturnWordAtCursorPositionSix(string expectedWord, string fullLine) Assert.Equal(expectedWord, singleWordAtSix); } - private static string multiLineInput = @"Hello this is lots + private static string multiLineInput = @"Hello this is lots of text which has several lines -and some spaces at the ends of line +and some spaces at the ends of line to throw off any easy check"; [Theory] [InlineData("Hello", "", " this ...")] - [InlineData("lots", "Hello this is ", " ...")] + [InlineData("lots", "Hello this is ", "...")] [InlineData("of", "...", " text ...")] [InlineData("several", "...h has ", " lines...")] public void ReturnPreviewsFromWord(string firstWord, string expectedLeftPreview, string expectedRightPreview) diff --git a/Tests/Tests.csproj b/Tests/Tests.csproj index 3c6d82da..62113fa4 100644 --- a/Tests/Tests.csproj +++ b/Tests/Tests.csproj @@ -62,12 +62,18 @@ + + PreserveNewest + PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/Tests/TextFiles/ja-word-borders.json b/Tests/TextFiles/ja-word-borders.json new file mode 100644 index 00000000..3aeca8bf --- /dev/null +++ b/Tests/TextFiles/ja-word-borders.json @@ -0,0 +1,15 @@ +{ + "CaptureDateTime": "2025-11-10T04:19:00.6164022+00:00", + "ID": "5a9f20f1-ac30-4042-b6ba-daf72d32aa17", + "ImagePath": "a7b7a4cf-303d-45ef-b15d-dac96996503a.bmp", + "IsTable": false, + "DpiScaleFactor": 1, + "LanguageTag": "WinAI", + "LanguageKind": 2, + "HasCalcPaneOpen": false, + "CalcPaneWidth": 0, + "SourceMode": 1, + "TextContent": "\u304F\u308D\r\n\u304B\u3089\u3060\r\n\u9ED2\u3054\u307E\u306F \u4F53\u306B\u3044\u3044\u3067\u3059\u3002\u30BF\u30F3\u30D1\u30AF\u8CEA\u3084\u30AB\u30EB\u30B7\u30A6\u30E0\u304C\r\n\u3057\u3064\r\n\u305F\u304F\u3055\u3093 \u3042\u308A\u307E\u3059\u3002\u9AEA\u3092\u9ED2\u304F \u3059\u308B\u52B9\u679C\u3082 \u3042\u308A\u307E\u3059\u3002\r\n\u304B\u307F\r\n\u304F\u308D\r\n\u3053\u3046\u304B\r\n\u9ED2\u3054\u307E\u6CB9\u306F\u808C\u3084\u9AEA\u306B\u4F7F\u3044\u307E\u3059\u3002\u6599\u7406\u306B\u3082\u4F7F\u3044\u307E\u3059\u3002\r\n\u304F\u308D\r\n\u3042\u3076\u3089\r\n\u306F\u3060\r\n\u304B\u307F\r\n\u308A\u3087\u3046\u308A\r\n\u304B\u3086\r\n\u305F\r\n\u304B\u3089\u3060\r\n\u304A\u7CA5\u3084 \u30C7\u30B6\u30FC\u30C8\u306B\u5165\u308C\u307E\u3059\u3002\u3067\u3082\u3001\u98DF\u3079\u3059\u304E\u308B\u3068 \u4F53\r\n\u306B\u3088\u304F\u306A\u3044\u3067\u3059\u3002\u5C11\u3057\u305A\u3064\u98DF\u3079\u307E\u3057\u3087\u3046\u3002\r\n\u305F", + "WordBorderInfoJson": "[{\u0022Word\u0022:\u0022\\u304F\\u308D\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:28.417473608152424,\u0022Y\u0022:24.166040548665052},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:30.207550685831315,\u0022Height\u0022:16.78197260323962},\u0022X\u0022:28.417473608152424,\u0022Y\u0022:24.166040548665052,\u0022Width\u0022:30.207550685831315,\u0022Height\u0022:16.78197260323962,\u0022Left\u0022:28.417473608152424,\u0022Top\u0022:24.166040548665052,\u0022Right\u0022:58.62502429398374,\u0022Bottom\u0022:40.948013151904675,\u0022TopLeft\u0022:{\u0022X\u0022:28.417473608152424,\u0022Y\u0022:24.166040548665052},\u0022TopRight\u0022:{\u0022X\u0022:58.62502429398374,\u0022Y\u0022:24.166040548665052},\u0022BottomLeft\u0022:{\u0022X\u0022:28.417473608152424,\u0022Y\u0022:40.948013151904675},\u0022BottomRight\u0022:{\u0022X\u0022:58.62502429398374,\u0022Y\u0022:40.948013151904675}},\u0022LineNumber\u0022:0,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304B\\u3089\\u3060\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:128.43803032346057,\u0022Y\u0022:24.613559818084774},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:46.98952328907094,\u0022Height\u0022:15.663174429690311},\u0022X\u0022:128.43803032346057,\u0022Y\u0022:24.613559818084774,\u0022Width\u0022:46.98952328907094,\u0022Height\u0022:15.663174429690311,\u0022Left\u0022:128.43803032346057,\u0022Top\u0022:24.613559818084774,\u0022Right\u0022:175.4275536125315,\u0022Bottom\u0022:40.27673424777508,\u0022TopLeft\u0022:{\u0022X\u0022:128.43803032346057,\u0022Y\u0022:24.613559818084774},\u0022TopRight\u0022:{\u0022X\u0022:175.4275536125315,\u0022Y\u0022:24.613559818084774},\u0022BottomLeft\u0022:{\u0022X\u0022:128.43803032346057,\u0022Y\u0022:40.27673424777508},\u0022BottomRight\u0022:{\u0022X\u0022:175.4275536125315,\u0022Y\u0022:40.27673424777508}},\u0022LineNumber\u0022:1,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u9ED2\\u3054\\u307E\\u306F \\u4F53\\u306B\\u3044\\u3044\\u3067\\u3059\\u3002\\u30BF\\u30F3\\u30D1\\u30AF\\u8CEA\\u3084\\u30AB\\u30EB\\u30B7\\u30A6\\u30E0\\u304C\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:26.40363689576367,\u0022Y\u0022:39.15793607422578},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:598.7807824835896,\u0022Height\u0022:25.732357991634082},\u0022X\u0022:26.40363689576367,\u0022Y\u0022:39.15793607422578,\u0022Width\u0022:598.7807824835896,\u0022Height\u0022:25.732357991634082,\u0022Left\u0022:26.40363689576367,\u0022Top\u0022:39.15793607422578,\u0022Right\u0022:625.1844193793534,\u0022Bottom\u0022:64.89029406585986,\u0022TopLeft\u0022:{\u0022X\u0022:26.40363689576367,\u0022Y\u0022:39.15793607422578},\u0022TopRight\u0022:{\u0022X\u0022:625.1844193793534,\u0022Y\u0022:39.15793607422578},\u0022BottomLeft\u0022:{\u0022X\u0022:26.40363689576367,\u0022Y\u0022:64.89029406585986},\u0022BottomRight\u0022:{\u0022X\u0022:625.1844193793534,\u0022Y\u0022:64.89029406585986}},\u0022LineNumber\u0022:2,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u3057\\u3064\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:413.5078049438242,\u0022Y\u0022:23.94228091395519},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:33.11642593705952,\u0022Height\u0022:17.22949187265934},\u0022X\u0022:413.5078049438242,\u0022Y\u0022:23.94228091395519,\u0022Width\u0022:33.11642593705952,\u0022Height\u0022:17.22949187265934,\u0022Left\u0022:413.5078049438242,\u0022Top\u0022:23.94228091395519,\u0022Right\u0022:446.6242308808837,\u0022Bottom\u0022:41.17177278661453,\u0022TopLeft\u0022:{\u0022X\u0022:413.5078049438242,\u0022Y\u0022:23.94228091395519},\u0022TopRight\u0022:{\u0022X\u0022:446.6242308808837,\u0022Y\u0022:23.94228091395519},\u0022BottomLeft\u0022:{\u0022X\u0022:413.5078049438242,\u0022Y\u0022:41.17177278661453},\u0022BottomRight\u0022:{\u0022X\u0022:446.6242308808837,\u0022Y\u0022:41.17177278661453}},\u0022LineNumber\u0022:3,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u305F\\u304F\\u3055\\u3093 \\u3042\\u308A\\u307E\\u3059\\u3002\\u9AEA\\u3092\\u9ED2\\u304F \\u3059\\u308B\\u52B9\\u679C\\u3082 \\u3042\\u308A\\u307E\\u3059\\u3002\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:27.746194704022837,\u0022Y\u0022:109.41846137312233},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:571.0345877795668,\u0022Height\u0022:27.074915799893255},\u0022X\u0022:27.746194704022837,\u0022Y\u0022:109.41846137312233,\u0022Width\u0022:571.0345877795668,\u0022Height\u0022:27.074915799893255,\u0022Left\u0022:27.746194704022837,\u0022Top\u0022:109.41846137312233,\u0022Right\u0022:598.7807824835896,\u0022Bottom\u0022:136.49337717301557,\u0022TopLeft\u0022:{\u0022X\u0022:27.746194704022837,\u0022Y\u0022:109.41846137312233},\u0022TopRight\u0022:{\u0022X\u0022:598.7807824835896,\u0022Y\u0022:109.41846137312233},\u0022BottomLeft\u0022:{\u0022X\u0022:27.746194704022837,\u0022Y\u0022:136.49337717301557},\u0022BottomRight\u0022:{\u0022X\u0022:598.7807824835896,\u0022Y\u0022:136.49337717301557}},\u0022LineNumber\u0022:4,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304B\\u307F\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:246.35935781555762,\u0022Y\u0022:94.87408511698132},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:32.22138739822007,\u0022Height\u0022:15.215655160270588},\u0022X\u0022:246.35935781555762,\u0022Y\u0022:94.87408511698132,\u0022Width\u0022:32.22138739822007,\u0022Height\u0022:15.215655160270588,\u0022Left\u0022:246.35935781555762,\u0022Top\u0022:94.87408511698132,\u0022Right\u0022:278.5807452137777,\u0022Bottom\u0022:110.0897402772519,\u0022TopLeft\u0022:{\u0022X\u0022:246.35935781555762,\u0022Y\u0022:94.87408511698132},\u0022TopRight\u0022:{\u0022X\u0022:278.5807452137777,\u0022Y\u0022:94.87408511698132},\u0022BottomLeft\u0022:{\u0022X\u0022:246.35935781555762,\u0022Y\u0022:110.0897402772519},\u0022BottomRight\u0022:{\u0022X\u0022:278.5807452137777,\u0022Y\u0022:110.0897402772519}},\u0022LineNumber\u0022:5,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304F\\u308D\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:302.52302612773286,\u0022Y\u0022:93.08400803930242},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:28.417473608152424,\u0022Height\u0022:19.243328585048097},\u0022X\u0022:302.52302612773286,\u0022Y\u0022:93.08400803930242,\u0022Width\u0022:28.417473608152424,\u0022Height\u0022:19.243328585048097,\u0022Left\u0022:302.52302612773286,\u0022Top\u0022:93.08400803930242,\u0022Right\u0022:330.9404997358853,\u0022Bottom\u0022:112.32733662435052,\u0022TopLeft\u0022:{\u0022X\u0022:302.52302612773286,\u0022Y\u0022:93.08400803930242},\u0022TopRight\u0022:{\u0022X\u0022:330.9404997358853,\u0022Y\u0022:93.08400803930242},\u0022BottomLeft\u0022:{\u0022X\u0022:302.52302612773286,\u0022Y\u0022:112.32733662435052},\u0022BottomRight\u0022:{\u0022X\u0022:330.9404997358853,\u0022Y\u0022:112.32733662435052}},\u0022LineNumber\u0022:6,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u3053\\u3046\\u304B\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:415.29788202150314,\u0022Y\u0022:94.65032548227146},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:46.98952328907094,\u0022Height\u0022:16.334453333819898},\u0022X\u0022:415.29788202150314,\u0022Y\u0022:94.65032548227146,\u0022Width\u0022:46.98952328907094,\u0022Height\u0022:16.334453333819898,\u0022Left\u0022:415.29788202150314,\u0022Top\u0022:94.65032548227146,\u0022Right\u0022:462.28740531057406,\u0022Bottom\u0022:110.98477881609135,\u0022TopLeft\u0022:{\u0022X\u0022:415.29788202150314,\u0022Y\u0022:94.65032548227146},\u0022TopRight\u0022:{\u0022X\u0022:462.28740531057406,\u0022Y\u0022:94.65032548227146},\u0022BottomLeft\u0022:{\u0022X\u0022:415.29788202150314,\u0022Y\u0022:110.98477881609135},\u0022BottomRight\u0022:{\u0022X\u0022:462.28740531057406,\u0022Y\u0022:110.98477881609135}},\u0022LineNumber\u0022:7,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u9ED2\\u3054\\u307E\\u6CB9\\u306F\\u808C\\u3084\\u9AEA\\u306B\\u4F7F\\u3044\\u307E\\u3059\\u3002\\u6599\\u7406\\u306B\\u3082\\u4F7F\\u3044\\u307E\\u3059\\u3002\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:26.85115616518339,\u0022Y\u0022:179.90274630672872},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:577.9711364555725,\u0022Height\u0022:26.85115616518339},\u0022X\u0022:26.85115616518339,\u0022Y\u0022:179.90274630672872,\u0022Width\u0022:577.9711364555725,\u0022Height\u0022:26.85115616518339,\u0022Left\u0022:26.85115616518339,\u0022Top\u0022:179.90274630672872,\u0022Right\u0022:604.8222926207559,\u0022Bottom\u0022:206.7539024719121,\u0022TopLeft\u0022:{\u0022X\u0022:26.85115616518339,\u0022Y\u0022:179.90274630672872},\u0022TopRight\u0022:{\u0022X\u0022:604.8222926207559,\u0022Y\u0022:179.90274630672872},\u0022BottomLeft\u0022:{\u0022X\u0022:26.85115616518339,\u0022Y\u0022:206.7539024719121},\u0022BottomRight\u0022:{\u0022X\u0022:604.8222926207559,\u0022Y\u0022:206.7539024719121}},\u0022LineNumber\u0022:8,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304F\\u308D\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:29.312512146991867,\u0022Y\u0022:164.2395718770384},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:29.088752512282007,\u0022Height\u0022:17.453251507369206},\u0022X\u0022:29.312512146991867,\u0022Y\u0022:164.2395718770384,\u0022Width\u0022:29.088752512282007,\u0022Height\u0022:17.453251507369206,\u0022Left\u0022:29.312512146991867,\u0022Top\u0022:164.2395718770384,\u0022Right\u0022:58.40126465927388,\u0022Bottom\u0022:181.6928233844076,\u0022TopLeft\u0022:{\u0022X\u0022:29.312512146991867,\u0022Y\u0022:164.2395718770384},\u0022TopRight\u0022:{\u0022X\u0022:58.40126465927388,\u0022Y\u0022:164.2395718770384},\u0022BottomLeft\u0022:{\u0022X\u0022:29.312512146991867,\u0022Y\u0022:181.6928233844076},\u0022BottomRight\u0022:{\u0022X\u0022:58.40126465927388,\u0022Y\u0022:181.6928233844076}},\u0022LineNumber\u0022:9,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u3042\\u3076\\u3089\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:97.11168146407994,\u0022Y\u0022:165.35837005058772},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:42.514330594873705,\u0022Height\u0022:15.43941479498045},\u0022X\u0022:97.11168146407994,\u0022Y\u0022:165.35837005058772,\u0022Width\u0022:42.514330594873705,\u0022Height\u0022:15.43941479498045,\u0022Left\u0022:97.11168146407994,\u0022Top\u0022:165.35837005058772,\u0022Right\u0022:139.62601205895365,\u0022Bottom\u0022:180.79778484556817,\u0022TopLeft\u0022:{\u0022X\u0022:97.11168146407994,\u0022Y\u0022:165.35837005058772},\u0022TopRight\u0022:{\u0022X\u0022:139.62601205895365,\u0022Y\u0022:165.35837005058772},\u0022BottomLeft\u0022:{\u0022X\u0022:97.11168146407994,\u0022Y\u0022:180.79778484556817},\u0022BottomRight\u0022:{\u0022X\u0022:139.62601205895365,\u0022Y\u0022:180.79778484556817}},\u0022LineNumber\u0022:10,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u306F\\u3060\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:160.2118984522609,\u0022Y\u0022:165.80588932000745},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:31.102589224670762,\u0022Height\u0022:14.544376256141003},\u0022X\u0022:160.2118984522609,\u0022Y\u0022:165.80588932000745,\u0022Width\u0022:31.102589224670762,\u0022Height\u0022:14.544376256141003,\u0022Left\u0022:160.2118984522609,\u0022Top\u0022:165.80588932000745,\u0022Right\u0022:191.31448767693166,\u0022Bottom\u0022:180.35026557614844,\u0022TopLeft\u0022:{\u0022X\u0022:160.2118984522609,\u0022Y\u0022:165.80588932000745},\u0022TopRight\u0022:{\u0022X\u0022:191.31448767693166,\u0022Y\u0022:165.80588932000745},\u0022BottomLeft\u0022:{\u0022X\u0022:160.2118984522609,\u0022Y\u0022:180.35026557614844},\u0022BottomRight\u0022:{\u0022X\u0022:191.31448767693166,\u0022Y\u0022:180.35026557614844}},\u0022LineNumber\u0022:11,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304B\\u307F\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:214.36173005204742,\u0022Y\u0022:166.02964895471732},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:31.99762776351021,\u0022Height\u0022:14.544376256141003},\u0022X\u0022:214.36173005204742,\u0022Y\u0022:166.02964895471732,\u0022Width\u0022:31.99762776351021,\u0022Height\u0022:14.544376256141003,\u0022Left\u0022:214.36173005204742,\u0022Top\u0022:166.02964895471732,\u0022Right\u0022:246.35935781555764,\u0022Bottom\u0022:180.5740252108583,\u0022TopLeft\u0022:{\u0022X\u0022:214.36173005204742,\u0022Y\u0022:166.02964895471732},\u0022TopRight\u0022:{\u0022X\u0022:246.35935781555764,\u0022Y\u0022:166.02964895471732},\u0022BottomLeft\u0022:{\u0022X\u0022:214.36173005204742,\u0022Y\u0022:180.5740252108583},\u0022BottomRight\u0022:{\u0022X\u0022:246.35935781555764,\u0022Y\u0022:180.5740252108583}},\u0022LineNumber\u0022:12,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u308A\\u3087\\u3046\\u308A\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:388.6704854910296,\u0022Y\u0022:164.68709114645813},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:57.953745389854156,\u0022Height\u0022:17.453251507369206},\u0022X\u0022:388.6704854910296,\u0022Y\u0022:164.68709114645813,\u0022Width\u0022:57.953745389854156,\u0022Height\u0022:17.453251507369206,\u0022Left\u0022:388.6704854910296,\u0022Top\u0022:164.68709114645813,\u0022Right\u0022:446.6242308808837,\u0022Bottom\u0022:182.14034265382733,\u0022TopLeft\u0022:{\u0022X\u0022:388.6704854910296,\u0022Y\u0022:164.68709114645813},\u0022TopRight\u0022:{\u0022X\u0022:446.6242308808837,\u0022Y\u0022:164.68709114645813},\u0022BottomLeft\u0022:{\u0022X\u0022:388.6704854910296,\u0022Y\u0022:182.14034265382733},\u0022BottomRight\u0022:{\u0022X\u0022:446.6242308808837,\u0022Y\u0022:182.14034265382733}},\u0022LineNumber\u0022:13,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304B\\u3086\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:49.674638905589276,\u0022Y\u0022:235.61889534948426},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:29.760031416411593,\u0022Height\u0022:15.215655160270588},\u0022X\u0022:49.674638905589276,\u0022Y\u0022:235.61889534948426,\u0022Width\u0022:29.760031416411593,\u0022Height\u0022:15.215655160270588,\u0022Left\u0022:49.674638905589276,\u0022Top\u0022:235.61889534948426,\u0022Right\u0022:79.43467032200087,\u0022Bottom\u0022:250.83455050975485,\u0022TopLeft\u0022:{\u0022X\u0022:49.674638905589276,\u0022Y\u0022:235.61889534948426},\u0022TopRight\u0022:{\u0022X\u0022:79.43467032200087,\u0022Y\u0022:235.61889534948426},\u0022BottomLeft\u0022:{\u0022X\u0022:49.674638905589276,\u0022Y\u0022:250.83455050975485},\u0022BottomRight\u0022:{\u0022X\u0022:79.43467032200087,\u0022Y\u0022:250.83455050975485}},\u0022LineNumber\u0022:14,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u305F\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:428.72346010409484,\u0022Y\u0022:236.73769352303356},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:13.873097352011419,\u0022Height\u0022:13.425578082591695},\u0022X\u0022:428.72346010409484,\u0022Y\u0022:236.73769352303356,\u0022Width\u0022:13.873097352011419,\u0022Height\u0022:13.425578082591695,\u0022Left\u0022:428.72346010409484,\u0022Top\u0022:236.73769352303356,\u0022Right\u0022:442.59655745610627,\u0022Bottom\u0022:250.16327160562525,\u0022TopLeft\u0022:{\u0022X\u0022:428.72346010409484,\u0022Y\u0022:236.73769352303356},\u0022TopRight\u0022:{\u0022X\u0022:442.59655745610627,\u0022Y\u0022:236.73769352303356},\u0022BottomLeft\u0022:{\u0022X\u0022:428.72346010409484,\u0022Y\u0022:250.16327160562525},\u0022BottomRight\u0022:{\u0022X\u0022:442.59655745610627,\u0022Y\u0022:250.16327160562525}},\u0022LineNumber\u0022:15,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304B\\u3089\\u3060\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:566.7831547200794,\u0022Y\u0022:235.61889534948426},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:46.54200401965121,\u0022Height\u0022:15.663174429690311},\u0022X\u0022:566.7831547200794,\u0022Y\u0022:235.61889534948426,\u0022Width\u0022:46.54200401965121,\u0022Height\u0022:15.663174429690311,\u0022Left\u0022:566.7831547200794,\u0022Top\u0022:235.61889534948426,\u0022Right\u0022:613.3251587397307,\u0022Bottom\u0022:251.28206977917458,\u0022TopLeft\u0022:{\u0022X\u0022:566.7831547200794,\u0022Y\u0022:235.61889534948426},\u0022TopRight\u0022:{\u0022X\u0022:613.3251587397307,\u0022Y\u0022:235.61889534948426},\u0022BottomLeft\u0022:{\u0022X\u0022:566.7831547200794,\u0022Y\u0022:251.28206977917458},\u0022BottomRight\u0022:{\u0022X\u0022:613.3251587397307,\u0022Y\u0022:251.28206977917458}},\u0022LineNumber\u0022:16,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u304A\\u7CA5\\u3084 \\u30C7\\u30B6\\u30FC\\u30C8\\u306B\\u5165\\u308C\\u307E\\u3059\\u3002\\u3067\\u3082\\u3001\\u98DF\\u3079\\u3059\\u304E\\u308B\\u3068 \\u4F53\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:27.9699543387327,\u0022Y\u0022:250.61079087504498},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:584.6839254968684,\u0022Height\u0022:26.85115616518339},\u0022X\u0022:27.9699543387327,\u0022Y\u0022:250.61079087504498,\u0022Width\u0022:584.6839254968684,\u0022Height\u0022:26.85115616518339,\u0022Left\u0022:27.9699543387327,\u0022Top\u0022:250.61079087504498,\u0022Right\u0022:612.6538798356011,\u0022Bottom\u0022:277.4619470402284,\u0022TopLeft\u0022:{\u0022X\u0022:27.9699543387327,\u0022Y\u0022:250.61079087504498},\u0022TopRight\u0022:{\u0022X\u0022:612.6538798356011,\u0022Y\u0022:250.61079087504498},\u0022BottomLeft\u0022:{\u0022X\u0022:27.9699543387327,\u0022Y\u0022:277.4619470402284},\u0022BottomRight\u0022:{\u0022X\u0022:612.6538798356011,\u0022Y\u0022:277.4619470402284}},\u0022LineNumber\u0022:17,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u306B\\u3088\\u304F\\u306A\\u3044\\u3067\\u3059\\u3002\\u5C11\\u3057\\u305A\\u3064\\u98DF\\u3079\\u307E\\u3057\\u3087\\u3046\\u3002\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:28.641233242862285,\u0022Y\u0022:319.52875836568234},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:452.44198138334013,\u0022Height\u0022:29.088752512282007},\u0022X\u0022:28.641233242862285,\u0022Y\u0022:319.52875836568234,\u0022Width\u0022:452.44198138334013,\u0022Height\u0022:29.088752512282007,\u0022Left\u0022:28.641233242862285,\u0022Top\u0022:319.52875836568234,\u0022Right\u0022:481.08321462620245,\u0022Bottom\u0022:348.6175108779643,\u0022TopLeft\u0022:{\u0022X\u0022:28.641233242862285,\u0022Y\u0022:319.52875836568234},\u0022TopRight\u0022:{\u0022X\u0022:481.08321462620245,\u0022Y\u0022:319.52875836568234},\u0022BottomLeft\u0022:{\u0022X\u0022:28.641233242862285,\u0022Y\u0022:348.6175108779643},\u0022BottomRight\u0022:{\u0022X\u0022:481.08321462620245,\u0022Y\u0022:348.6175108779643}},\u0022LineNumber\u0022:18,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false},{\u0022Word\u0022:\u0022\\u305F\u0022,\u0022BorderRect\u0022:{\u0022IsEmpty\u0022:false,\u0022Location\u0022:{\u0022X\u0022:330.2692208317557,\u0022Y\u0022:306.77445918722026},\u0022Size\u0022:{\u0022IsEmpty\u0022:false,\u0022Width\u0022:16.55821296852976,\u0022Height\u0022:14.991895525560727},\u0022X\u0022:330.2692208317557,\u0022Y\u0022:306.77445918722026,\u0022Width\u0022:16.55821296852976,\u0022Height\u0022:14.991895525560727,\u0022Left\u0022:330.2692208317557,\u0022Top\u0022:306.77445918722026,\u0022Right\u0022:346.82743380028546,\u0022Bottom\u0022:321.766354712781,\u0022TopLeft\u0022:{\u0022X\u0022:330.2692208317557,\u0022Y\u0022:306.77445918722026},\u0022TopRight\u0022:{\u0022X\u0022:346.82743380028546,\u0022Y\u0022:306.77445918722026},\u0022BottomLeft\u0022:{\u0022X\u0022:330.2692208317557,\u0022Y\u0022:321.766354712781},\u0022BottomRight\u0022:{\u0022X\u0022:346.82743380028546,\u0022Y\u0022:321.766354712781}},\u0022LineNumber\u0022:19,\u0022ResultColumnID\u0022:0,\u0022ResultRowID\u0022:0,\u0022MatchingBackground\u0022:\u0022#FFFFFFFF\u0022,\u0022IsBarcode\u0022:false}]", + "RectAsString": "107,120.5,836,700" +} diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index b6741a26..ac0b3cde 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -48,12 +48,18 @@ public static void GetTextFromOcrLine(this IOcrLine ocrLine, bool isSpaceJoining } else { + // For CJK languages, we need to filter and merge words intelligently + List words = [.. ocrLine.Words]; + + // Filter out likely furigana (very small text fragments that are single characters) + List filteredWords = FilterFurigana(words); + bool isFirstWord = true; bool isPrevWordSpaceJoining = false; Regex regexSpaceJoiningWord = SpaceJoiningWordRegex(); - foreach (IOcrWord ocrWord in ocrLine.Words) + foreach (IOcrWord ocrWord in filteredWords) { string wordString = ocrWord.Text; @@ -70,12 +76,79 @@ public static void GetTextFromOcrLine(this IOcrLine ocrLine, bool isSpaceJoining isFirstWord = false; isPrevWordSpaceJoining = isThisWordSpaceJoining; } + + // Always add newline after processing each OCR line to preserve line boundaries + text.AppendLine(); } if (DefaultSettings.CorrectToLatin) text.ReplaceGreekOrCyrillicWithLatin(); } + private static List FilterFurigana(List words) + { + if (words.Count == 0) + return words; + + // Calculate the median height of all words + List heights = [.. words.Select(w => w.BoundingBox.Height).OrderBy(h => h)]; + double medianHeight = heights.Count > 0 ? heights[heights.Count / 2] : 0; + + // Furigana is typically 30-50% the height of main text + double furiganaThreshold = medianHeight * 0.6; + + List filteredWords = []; + + for (int i = 0; i < words.Count; i++) + { + IOcrWord word = words[i]; + + // Check if this word is likely furigana based on: + // 1. Much smaller height than median + // 2. Single character (especially hiragana/katakana) + // 3. Positioned above main text (smaller Y value) + + bool isProbablyFurigana = false; + + if (word.BoundingBox.Height < furiganaThreshold) + { + // This word is significantly smaller - likely furigana + // Check if there's a larger word below it (main kanji) + bool hasMainTextBelow = false; + for (int j = 0; j < words.Count; j++) + { + if (i == j) continue; + IOcrWord otherWord = words[j]; + + // Check if this word is below and overlaps horizontally + bool isBelow = otherWord.BoundingBox.Top > word.BoundingBox.Bottom; + bool overlapsHorizontally = !(otherWord.BoundingBox.Right < word.BoundingBox.Left || + otherWord.BoundingBox.Left > word.BoundingBox.Right); + bool isLarger = otherWord.BoundingBox.Height > furiganaThreshold; + + if (isBelow && overlapsHorizontally && isLarger) + { + hasMainTextBelow = true; + break; + } + } + + if (hasMainTextBelow && word.Text.Length <= 2) + { + isProbablyFurigana = true; + } + } + + if (!isProbablyFurigana) + { + filteredWords.Add(word); + } + } + + // If we filtered everything, return original to avoid losing data + return filteredWords.Count > 0 ? filteredWords : words; + } + public static async Task GetTextFromAbsoluteRectAsync(Rect rect, ILanguage language) { Rectangle selectedRegion = rect.AsRectangle(); @@ -323,8 +396,11 @@ private static OcrOutput GetTextFromOcrResult(ILanguage language, Bitmap? scaled bool isSpaceJoiningOCRLang = language.IsSpaceJoining(); - foreach (IOcrLine ocrLine in ocrResult.Lines) - ocrLine.GetTextFromOcrLine(isSpaceJoiningOCRLang, text); + if (!isSpaceJoiningOCRLang) + text.Append(PostOcrUtilities.GetTextFromOcrResult(ocrResult, language)); + else + foreach (IOcrLine ocrLine in ocrResult.Lines) + ocrLine.GetTextFromOcrLine(isSpaceJoiningOCRLang, text); if (language.IsRightToLeft()) text.ReverseWordsForRightToLeft(); @@ -373,6 +449,23 @@ public static async Task OcrAbsoluteFilePathAsync(string absolutePath, I return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, language)); } + public static async Task OcrAbsoluteFilePathWithLanguageAsync(string absolutePath, Language language) + { + Uri fileURI = new(absolutePath, UriKind.Absolute); + FileInfo fileInfo = new(fileURI.LocalPath); + RotateFlipType rotateFlipType = ImageMethods.GetRotateFlipType(absolutePath); + BitmapImage droppedImage = new(); + droppedImage.BeginInit(); + droppedImage.UriSource = fileURI; + ImageMethods.RotateImage(droppedImage, rotateFlipType); + droppedImage.CacheOption = BitmapCacheOption.None; + droppedImage.EndInit(); + droppedImage.Freeze(); + Bitmap bmp = ImageMethods.BitmapImageToBitmap(droppedImage); + GlobalLang globalLang = new(language); + return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, globalLang)); + } + public static async Task GetClickedWordAsync(Window passedWindow, Point clickedPoint, ILanguage OcrLang) { using Bitmap bmp = ImageMethods.GetWindowsBoundsBitmap(passedWindow); diff --git a/Text-Grab/Utilities/PostOcrUtilities.cs b/Text-Grab/Utilities/PostOcrUtilities.cs new file mode 100644 index 00000000..b4cea89c --- /dev/null +++ b/Text-Grab/Utilities/PostOcrUtilities.cs @@ -0,0 +1,167 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Text_Grab.Interfaces; +using Text_Grab.Models; + +namespace Text_Grab.Utilities; + +public static class PostOcrUtilities +{ + public static string GetTextFromOcrResult(IOcrLinesWords ocrResult, ILanguage language) + { + // Convert OCR result to WordBorderInfo objects for each individual word + List wordBorderInfos = []; + + foreach (IOcrLine ocrLine in ocrResult.Lines) + { + foreach (IOcrWord ocrWord in ocrLine.Words) + { + WordBorderInfo wordInfo = new() + { + BorderRect = new System.Windows.Rect( + ocrWord.BoundingBox.X, + ocrWord.BoundingBox.Y, + ocrWord.BoundingBox.Width, + ocrWord.BoundingBox.Height), + Word = ocrWord.Text, + ResultRowID = 0, + ResultColumnID = 0 + }; + + wordBorderInfos.Add(wordInfo); + } + } + + // Use the existing word border processing logic + return GetTextFromWordBorderInfo(wordBorderInfos, language); + } + + public static string GetTextFromWordBorderInfo(IEnumerable wordBorderInfos, ILanguage language) + { + if (language.LanguageTag.StartsWith("ja")) + { + return GetTextFromJaWordBorders(wordBorderInfos); + } + + StringBuilder sb = new(); + foreach (WordBorderInfo wordBorderInfo in wordBorderInfos) + { + sb.Append(wordBorderInfo.Word); + sb.Append(' '); + } + return sb.ToString().Trim(); + } + + private static string GetTextFromJaWordBorders(IEnumerable wordBorderInfos) + { + // Sort words by Y position (top to bottom), then by X position (left to right) + List sortedWords = [.. wordBorderInfos.OrderBy(w => w.BorderRect.Top).ThenBy(w => w.BorderRect.Left)]; + + if (sortedWords.Count == 0) + return string.Empty; + + List lines = []; + List currentLine = []; + double lineYThreshold = 5.0; // Pixels - words within this Y distance are on the same line + + foreach (WordBorderInfo word in sortedWords) + { + if (currentLine.Count == 0) + { + // Start a new line + currentLine.Add(word); + } + else + { + // Check if this word is on the same line as the current line + double currentLineY = currentLine.Average(w => w.BorderRect.Top); + double wordY = word.BorderRect.Top; + + if (Math.Abs(wordY - currentLineY) <= lineYThreshold) + { + // Same line - add to current line + currentLine.Add(word); + } + else + { + // Different line - process current line and start a new one + string processedLine = ProcessLineToString(currentLine); + lines.Add(processedLine); + currentLine.Clear(); + currentLine.Add(word); + } + } + } + + // Process the last line + if (currentLine.Count > 0) + { + string processedLine = ProcessLineToString(currentLine); + lines.Add(processedLine); + } + + // Post-process: merge single-character lines with previous line + List mergedLines = []; + for (int i = 0; i < lines.Count; i++) + { + string line = lines[i]; + string cleanLine = line.Trim(); + + // If this is a single short character and not the first line, append to previous + if (i > 0 && cleanLine.Length <= 2 && !cleanLine.Contains('\u3000')) + { + // Append to previous line with a space and restore newline + mergedLines[^1] = mergedLines[^1].TrimEnd() + " " + cleanLine + Environment.NewLine; + } + else + { + mergedLines.Add(line); + } + } + + return string.Join("", mergedLines).TrimEnd(); + } + + private static string ProcessLineToString(List lineWords) + { + if (lineWords.Count == 0) + return string.Empty; + + // Sort words by X position (left to right) + lineWords.Sort((a, b) => a.BorderRect.Left.CompareTo(b.BorderRect.Left)); + + // Determine if this is a furigana line (multiple short words) or main text + // Furigana typically consists of multiple short words that should be joined with spaces + bool isFurigana = lineWords.Count > 1 && lineWords.All(w => w.Word.Length <= 4); + + StringBuilder result = new(); + + if (isFurigana) + { + // Join with ideographic space (U+3000) + for (int i = 0; i < lineWords.Count; i++) + { + string cleanWord = lineWords[i].Word.Replace(" ", ""); + result.Append(cleanWord); + if (i < lineWords.Count - 1) + { + result.Append('\u3000'); // Ideographic space + } + } + } + else + { + // Main text - concatenate with spaces removed + foreach (WordBorderInfo word in lineWords) + { + string cleanWord = word.Word.Replace(" ", ""); + result.Append(cleanWord); + } + } + + result.AppendLine(); + return result.ToString(); + } +}