Skip to content

Commit d98e9f9

Browse files
guustysebieiText-CI
authored andcommitted
Implement overlap ratio to deal with overlapping ascent and descent lines
DEVSIX-8365 Autoported commit. Original commit hash: [ad7efa0]
1 parent 3f482a6 commit d98e9f9

15 files changed

+303
-37
lines changed
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
using System;
2+
using iText.PdfCleanup.Exceptions;
3+
using iText.Test;
4+
5+
namespace iText.PdfCleanup {
6+
[NUnit.Framework.Category("UnitTest")]
7+
public class CleanUpPropertiesUnitTest : ExtendedITextTest {
8+
[NUnit.Framework.Test]
9+
public virtual void NePropsAspectRatioReturnsNull() {
10+
CleanUpProperties properties = new CleanUpProperties();
11+
NUnit.Framework.Assert.IsNull(properties.GetOverlapRatio());
12+
}
13+
14+
[NUnit.Framework.Test]
15+
public virtual void SetAspectRatioWithValue0IsOk() {
16+
CleanUpProperties properties = new CleanUpProperties();
17+
Exception e = NUnit.Framework.Assert.Catch(typeof(ArgumentException), () => properties.SetOverlapRatio(0d)
18+
);
19+
NUnit.Framework.Assert.AreEqual(CleanupExceptionMessageConstant.OVERLAP_RATIO_SHOULD_BE_IN_RANGE, e.Message
20+
);
21+
}
22+
23+
[NUnit.Framework.Test]
24+
public virtual void SetAspectRatioWithValue1IsOk() {
25+
CleanUpProperties properties = new CleanUpProperties();
26+
properties.SetOverlapRatio(1.0);
27+
NUnit.Framework.Assert.AreEqual(1.0, properties.GetOverlapRatio());
28+
}
29+
30+
[NUnit.Framework.Test]
31+
public virtual void SetAspectRatioWithValueGreaterThan1ThrowsException() {
32+
CleanUpProperties properties = new CleanUpProperties();
33+
Exception e = NUnit.Framework.Assert.Catch(typeof(ArgumentException), () => properties.SetOverlapRatio(1.1
34+
));
35+
NUnit.Framework.Assert.AreEqual(CleanupExceptionMessageConstant.OVERLAP_RATIO_SHOULD_BE_IN_RANGE, e.Message
36+
);
37+
}
38+
39+
[NUnit.Framework.Test]
40+
public virtual void SetAspectRatioWithValueLessThan0ThrowsException() {
41+
CleanUpProperties properties = new CleanUpProperties();
42+
Exception e = NUnit.Framework.Assert.Catch(typeof(ArgumentException), () => properties.SetOverlapRatio(-0.1
43+
));
44+
NUnit.Framework.Assert.AreEqual(CleanupExceptionMessageConstant.OVERLAP_RATIO_SHOULD_BE_IN_RANGE, e.Message
45+
);
46+
}
47+
48+
[NUnit.Framework.Test]
49+
public virtual void SetAspectRatioWithValue0_5IsOk() {
50+
CleanUpProperties properties = new CleanUpProperties();
51+
properties.SetOverlapRatio(0.5);
52+
NUnit.Framework.Assert.AreEqual(0.5, properties.GetOverlapRatio());
53+
}
54+
55+
[NUnit.Framework.Test]
56+
public virtual void SettingAspectRatioToNullIsOk() {
57+
CleanUpProperties properties = new CleanUpProperties();
58+
properties.SetOverlapRatio(0.5);
59+
properties.SetOverlapRatio(null);
60+
NUnit.Framework.Assert.IsNull(properties.GetOverlapRatio());
61+
}
62+
}
63+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using iText.Kernel.Colors;
4+
using iText.Kernel.Geom;
5+
using iText.Kernel.Pdf;
6+
using iText.Kernel.Utils;
7+
using iText.Test;
8+
9+
namespace iText.PdfCleanup {
10+
[NUnit.Framework.Category("IntegrationTest")]
11+
public class OverlapRatioTest : ExtendedITextTest {
12+
private static readonly String inputPath = iText.Test.TestUtil.GetParentProjectDirectory(NUnit.Framework.TestContext
13+
.CurrentContext.TestDirectory) + "/resources/itext/pdfcleanup/OverlapRatioTest/";
14+
15+
private static readonly String outputPath = NUnit.Framework.TestContext.CurrentContext.TestDirectory + "/test/itext/pdfcleanup/OverlapRatioTest/";
16+
17+
[NUnit.Framework.OneTimeSetUp]
18+
public static void Before() {
19+
CreateOrClearDestinationFolder(outputPath);
20+
}
21+
22+
private static readonly double[][] coordinates = new double[][] {
23+
//Areas with small line spacing
24+
new double[] { 1, 149, 700, 63.75, 10.75 }, new double[] { 1, 149, 640, 63.75, 10.75 }, new double[] { 1,
25+
149, 520, 163.75, 50.75 },
26+
//Areas with big line spacing
27+
new double[] { 1, 149, 374, 63.75, 10.75 }, new double[] { 1, 149, 310, 63.75, 10.75 }, new double[] { 1,
28+
149, 120, 163.75, 50.75 } };
29+
30+
[NUnit.Framework.Test]
31+
public virtual void ExtractionWithoutSettingOverlapRatio() {
32+
String inputFile = inputPath + "redact_aspect_ratio_simple.pdf";
33+
String targetFile = outputPath + "wo_redact_aspect_ratio_simple_redact.pdf";
34+
String cmpFile = inputPath + "cmp_wo_redact_aspect_ratio_simple.pdf";
35+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputFile), new PdfWriter(targetFile));
36+
CleanUpProperties properties = new CleanUpProperties();
37+
PdfCleaner.CleanUp(pdfDoc, ConvertCleanupLocations(), properties);
38+
pdfDoc.Close();
39+
CompareTool cmpTool = new CompareTool();
40+
String errorMessage = cmpTool.CompareByContent(targetFile, cmpFile, outputPath, "diff_");
41+
NUnit.Framework.Assert.IsNull(errorMessage);
42+
}
43+
44+
[NUnit.Framework.Test]
45+
public virtual void ExtractionWithSettingOverlapRatio() {
46+
String inputFile = inputPath + "redact_aspect_ratio_simple.pdf";
47+
String targetFile = outputPath + "redact_aspect_ratio_simple_redact.pdf";
48+
String cmpFile = inputPath + "cmp_redact_aspect_ratio_simple.pdf";
49+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputFile), new PdfWriter(targetFile));
50+
CleanUpProperties properties = new CleanUpProperties();
51+
properties.SetOverlapRatio(0.35);
52+
PdfCleaner.CleanUp(pdfDoc, ConvertCleanupLocations(), properties);
53+
pdfDoc.Close();
54+
CompareTool cmpTool = new CompareTool();
55+
String errorMessage = cmpTool.CompareByContent(targetFile, cmpFile, outputPath, "diff_");
56+
NUnit.Framework.Assert.IsNull(errorMessage);
57+
}
58+
59+
[NUnit.Framework.Test]
60+
public virtual void ExtractionWithSettingOverlapRatioCloseTo0() {
61+
//In this test we expect it to behave as normal that everything that gets touched by the redaction \
62+
//area should be redacted.
63+
String inputFile = inputPath + "redact_aspect_ratio_simple.pdf";
64+
String targetFile = outputPath + "redact_aspect_ratio_0_simple_redact.pdf";
65+
String cmpFile = inputPath + "cmp_redact_aspect_ratio_0_simple.pdf";
66+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputFile), new PdfWriter(targetFile));
67+
CleanUpProperties properties = new CleanUpProperties();
68+
properties.SetOverlapRatio(0.0001);
69+
PdfCleaner.CleanUp(pdfDoc, ConvertCleanupLocations(), properties);
70+
pdfDoc.Close();
71+
CompareTool cmpTool = new CompareTool();
72+
String errorMessage = cmpTool.CompareByContent(targetFile, cmpFile, outputPath, "diff_");
73+
NUnit.Framework.Assert.IsNull(errorMessage);
74+
}
75+
76+
[NUnit.Framework.Test]
77+
public virtual void ExtractionWithSettingOverlapRatio1() {
78+
//In this sample we expect nothing to be redacted because of none of the items actually overlaps all of it.
79+
String inputFile = inputPath + "redact_aspect_ratio_simple.pdf";
80+
String targetFile = outputPath + "redact_aspect_ratio_1_simple_redact.pdf";
81+
String cmpFile = inputPath + "cmp_redact_aspect_ratio_1_simple.pdf";
82+
PdfDocument pdfDoc = new PdfDocument(new PdfReader(inputFile), new PdfWriter(targetFile));
83+
CleanUpProperties properties = new CleanUpProperties();
84+
properties.SetOverlapRatio(1d);
85+
IList<iText.PdfCleanup.PdfCleanUpLocation> cleanUpLocations = new List<iText.PdfCleanup.PdfCleanUpLocation
86+
>();
87+
// convertCleanupLocations();
88+
cleanUpLocations.Add(new iText.PdfCleanup.PdfCleanUpLocation(1, new Rectangle(20, 690, 263.75f, 40), ColorConstants
89+
.YELLOW));
90+
PdfCleaner.CleanUp(pdfDoc, cleanUpLocations, properties);
91+
pdfDoc.Close();
92+
CompareTool cmpTool = new CompareTool();
93+
String errorMessage = cmpTool.CompareByContent(targetFile, cmpFile, outputPath, "diff_");
94+
NUnit.Framework.Assert.IsNull(errorMessage);
95+
}
96+
97+
private static IList<iText.PdfCleanup.PdfCleanUpLocation> ConvertCleanupLocations() {
98+
IList<iText.PdfCleanup.PdfCleanUpLocation> cleanUpLocations = new List<iText.PdfCleanup.PdfCleanUpLocation
99+
>();
100+
foreach (double[] coord in coordinates) {
101+
int pageNumber = (int)coord[0];
102+
double x = coord[1];
103+
double y = coord[2];
104+
double width = coord[3];
105+
double height = coord[4];
106+
iText.PdfCleanup.PdfCleanUpLocation location = new iText.PdfCleanup.PdfCleanUpLocation(pageNumber, new Rectangle
107+
((float)x, (float)y, (float)width, (float)height), ColorConstants.BLACK);
108+
cleanUpLocations.Add(location);
109+
}
110+
return cleanUpLocations;
111+
}
112+
}
113+
}

itext.tests/itext.cleanup.tests/itext/pdfcleanup/PdfCleanUpFilterUnitTest.cs

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2020
You should have received a copy of the GNU Affero General Public License
2121
along with this program. If not, see <https://www.gnu.org/licenses/>.
2222
*/
23+
using System.Collections.Generic;
2324
using iText.Kernel.Geom;
2425
using iText.Test;
2526

@@ -32,7 +33,8 @@ public virtual void CheckIfRectanglesIntersect_completelyCoveredBasic() {
3233
(70, 80) };
3334
Point[] intersecting = new Point[] { new Point(50, 50), new Point(100, 50), new Point(100, 100), new Point
3435
(50, 100) };
35-
NUnit.Framework.Assert.IsTrue(PdfCleanUpFilter.CheckIfRectanglesIntersect(intersectSubject, intersecting));
36+
PdfCleanUpFilter filter = new PdfCleanUpFilter(new List<Rectangle>(), new CleanUpProperties());
37+
NUnit.Framework.Assert.IsTrue(filter.CheckIfRectanglesIntersect(intersectSubject, intersecting));
3638
}
3739

3840
[NUnit.Framework.Test]
@@ -41,7 +43,8 @@ public virtual void CheckIfRectanglesIntersect_completelyCoveredDegenerateWidth(
4143
(70, 80) };
4244
Point[] intersecting = new Point[] { new Point(50, 50), new Point(100, 50), new Point(100, 100), new Point
4345
(50, 100) };
44-
NUnit.Framework.Assert.IsTrue(PdfCleanUpFilter.CheckIfRectanglesIntersect(intersectSubject, intersecting));
46+
NUnit.Framework.Assert.IsTrue(new PdfCleanUpFilter(new List<Rectangle>(), new CleanUpProperties()).CheckIfRectanglesIntersect
47+
(intersectSubject, intersecting));
4548
}
4649

4750
[NUnit.Framework.Test]
@@ -50,7 +53,8 @@ public virtual void CheckIfRectanglesIntersect_completelyCoveredDegenerateHeight
5053
(70, 70) };
5154
Point[] intersecting = new Point[] { new Point(50, 50), new Point(100, 50), new Point(100, 100), new Point
5255
(50, 100) };
53-
NUnit.Framework.Assert.IsTrue(PdfCleanUpFilter.CheckIfRectanglesIntersect(intersectSubject, intersecting));
56+
NUnit.Framework.Assert.IsTrue(new PdfCleanUpFilter(new List<Rectangle>(), new CleanUpProperties()).CheckIfRectanglesIntersect
57+
(intersectSubject, intersecting));
5458
}
5559

5660
[NUnit.Framework.Test]
@@ -59,7 +63,8 @@ public virtual void CheckIfRectanglesIntersect_completelyCoveredDegeneratePoint(
5963
(70, 70) };
6064
Point[] intersecting = new Point[] { new Point(50, 50), new Point(100, 50), new Point(100, 100), new Point
6165
(50, 100) };
62-
NUnit.Framework.Assert.IsTrue(PdfCleanUpFilter.CheckIfRectanglesIntersect(intersectSubject, intersecting));
66+
NUnit.Framework.Assert.IsTrue(new PdfCleanUpFilter(new List<Rectangle>(), new CleanUpProperties()).CheckIfRectanglesIntersect
67+
(intersectSubject, intersecting));
6368
}
6469
}
6570
}

itext/itext.cleanup/itext/pdfcleanup/CleanUpProperties.cs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2020
You should have received a copy of the GNU Affero General Public License
2121
along with this program. If not, see <https://www.gnu.org/licenses/>.
2222
*/
23+
using System;
2324
using iText.Commons.Actions.Contexts;
25+
using iText.PdfCleanup.Exceptions;
2426

2527
namespace iText.PdfCleanup {
2628
/// <summary>
@@ -33,6 +35,8 @@ public class CleanUpProperties {
3335

3436
private bool processAnnotations;
3537

38+
private double? overlapRatio;
39+
3640
/// <summary>Creates default CleanUpProperties instance.</summary>
3741
public CleanUpProperties() {
3842
processAnnotations = true;
@@ -77,5 +81,41 @@ public virtual bool IsProcessAnnotations() {
7781
public virtual void SetProcessAnnotations(bool processAnnotations) {
7882
this.processAnnotations = processAnnotations;
7983
}
84+
85+
/// <summary>Gets the overlap ratio.</summary>
86+
/// <remarks>
87+
/// Gets the overlap ratio.
88+
/// This is a value between 0 and 1 that indicates how much the content region should overlap with the redaction
89+
/// area to be removed.
90+
/// </remarks>
91+
/// <returns>
92+
/// the overlap ratio or
93+
/// <see langword="null"/>
94+
/// if it has not been set.
95+
/// </returns>
96+
public virtual double? GetOverlapRatio() {
97+
return overlapRatio;
98+
}
99+
100+
/// <summary>Sets the overlap ratio.</summary>
101+
/// <remarks>
102+
/// Sets the overlap ratio.
103+
/// This is a value between 0 and 1 that indicates how much the content region should overlap with the
104+
/// redaction area to be removed.
105+
/// <para />
106+
/// Example: if the overlap ratio is set to 0.3, the content region will be removed if it overlaps with
107+
/// the redaction area by at least 30%.
108+
/// </remarks>
109+
/// <param name="overlapRatio">The overlap ratio to set.</param>
110+
public virtual void SetOverlapRatio(double? overlapRatio) {
111+
if (overlapRatio == null) {
112+
this.overlapRatio = null;
113+
return;
114+
}
115+
if (overlapRatio <= 0 || overlapRatio > 1) {
116+
throw new ArgumentException(CleanupExceptionMessageConstant.OVERLAP_RATIO_SHOULD_BE_IN_RANGE);
117+
}
118+
this.overlapRatio = overlapRatio;
119+
}
80120
}
81121
}

0 commit comments

Comments
 (0)