diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test1.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test1.pdf new file mode 100644 index 000000000..4a24a1ae7 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test1.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test2.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test2.pdf new file mode 100644 index 000000000..a965262dc Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test2.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs new file mode 100644 index 000000000..b9089ebf4 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs @@ -0,0 +1,46 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using System.Linq; + + public class LetterFilterTests + { + [Fact] + public void CanFilterClippedLetters() + { + var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf"); + + using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true })) + using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false })) + { + var allLetters = doc2.GetPage(5).Letters.Count; + var filteredLetters = doc1.GetPage(5).Letters.Count; + + Assert.True(filteredLetters < allLetters, + "Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters + } + } + + [Fact] + public void CanFilterClippedLetters_CheckBleedInSpecificWord() + { + var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf"); + + using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true })) + using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false })) + { + var allWords = doc2.GetPage(1).GetWords().ToList(); + var filteredWords = doc1.GetPage(1).GetWords().ToList(); + + // The table has hidden columns at the left end. Letters from these columns get merged in words + // which is incorrect. Filtering letters based on clip path should fix that... + const string wordToSearchAfterWhichTheActualTableStarts = "ARISER"; + + var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1; + Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2"); + + var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1; + Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT"); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs b/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs index 9998eea13..67e3e5c1a 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs @@ -140,9 +140,7 @@ public override TextOnlyPageContent Process(int pageNumberCurrent, } public override void RenderGlyph(IFont font, - IColor strokingColor, - IColor nonStrokingColor, - TextRenderingMode textRenderingMode, + CurrentGraphicsState currentState, double fontSize, double pointSize, int code, diff --git a/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs b/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs index d0d82dc52..e593a813d 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs @@ -61,9 +61,7 @@ public override void PushState() } public override void RenderGlyph(IFont font, - IColor strokingColor, - IColor nonStrokingColor, - TextRenderingMode textRenderingMode, + CurrentGraphicsState currentState, double fontSize, double pointSize, int code, @@ -74,6 +72,7 @@ public override void RenderGlyph(IFont font, in TransformationMatrix transformationMatrix, CharacterBoundingBox characterBoundingBox) { + var textRenderingMode = currentState.FontState.TextRenderingMode; if (textRenderingMode == TextRenderingMode.Neither) { return; @@ -85,7 +84,7 @@ public override void RenderGlyph(IFont font, if (font.TryGetNormalisedPath(code, out var path)) { var skPath = path.ToSKPath(); - ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix, + ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix, textMatrix, transformationMatrix); } else diff --git a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs index 59827c680..c24bdbdaa 100644 --- a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs +++ b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs @@ -517,6 +517,26 @@ public static bool IntersectsWith(this PdfRectangle rectangle, PdfRectangle othe } } + /// + /// Whether the one of rectangle corners is located inside the path. + /// + /// The path that should contain the rectangle corner. + /// The rectangle that should be intersected within the path. + /// If set to false, will return false if the rectangle is on the path's border. + public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false) + { + // NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges. + var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList(); + var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd; + foreach (var point in rectangle.ToClipperPolygon()) + { + if (PointInPaths(point, clipperPaths, fillType, includeBorder)) + return true; + } + + return false; + } + /// /// Gets the that is the intersection of two rectangles. /// Only works for axis-aligned rectangles. diff --git a/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs index 45c66218f..7159008df 100644 --- a/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs @@ -282,9 +282,7 @@ public void ShowText(IInputBytes bytes) var boundingBox = font.GetBoundingBox(code); RenderGlyph(font, - currentState.CurrentStrokingColor!, - currentState.CurrentNonStrokingColor!, - currentState.FontState.TextRenderingMode, + currentState, fontSize, pointSize, code, @@ -317,9 +315,7 @@ public void ShowText(IInputBytes bytes) /// Render glyph implement. /// public abstract void RenderGlyph(IFont font, - IColor strokingColor, - IColor nonStrokingColor, - TextRenderingMode textRenderingMode, + CurrentGraphicsState currentState, double fontSize, double pointSize, int code, diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index db4b05ac1..d0d634c9f 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -87,9 +87,7 @@ public override PageContent Process(int pageNumberCurrent, IReadOnlyList 0 && letters.Count > 0) { @@ -147,9 +152,9 @@ public override void RenderGlyph(IFont font, transformedPdfBounds.Width, fontSize, font.Details, - textRenderingMode, - strokingColor, - nonStrokingColor, + currentState.FontState.TextRenderingMode, + currentState.CurrentStrokingColor!, + currentState.CurrentNonStrokingColor!, pointSize, TextSequence); }