diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test1.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test1.pdf
new file mode 100644
index 000000000..4a24a1ae7
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test1.pdf differ
diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test2.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test2.pdf
new file mode 100644
index 000000000..a965262dc
Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/ClipPathLetterFilter-Test2.pdf differ
diff --git a/src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
new file mode 100644
index 000000000..b9089ebf4
--- /dev/null
+++ b/src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
@@ -0,0 +1,46 @@
+namespace UglyToad.PdfPig.Tests.Integration
+{
+ using System.Linq;
+
+ public class LetterFilterTests
+ {
+ [Fact]
+ public void CanFilterClippedLetters()
+ {
+ var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf");
+
+ using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
+ using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
+ {
+ var allLetters = doc2.GetPage(5).Letters.Count;
+ var filteredLetters = doc1.GetPage(5).Letters.Count;
+
+ Assert.True(filteredLetters < allLetters,
+ "Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters
+ }
+ }
+
+ [Fact]
+ public void CanFilterClippedLetters_CheckBleedInSpecificWord()
+ {
+ var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf");
+
+ using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
+ using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
+ {
+ var allWords = doc2.GetPage(1).GetWords().ToList();
+ var filteredWords = doc1.GetPage(1).GetWords().ToList();
+
+ // The table has hidden columns at the left end. Letters from these columns get merged in words
+ // which is incorrect. Filtering letters based on clip path should fix that...
+ const string wordToSearchAfterWhichTheActualTableStarts = "ARISER";
+
+ var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
+ Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2");
+
+ var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
+ Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT");
+ }
+ }
+ }
+}
diff --git a/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs b/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs
index 9998eea13..67e3e5c1a 100644
--- a/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/StreamProcessorTests.cs
@@ -140,9 +140,7 @@ public override TextOnlyPageContent Process(int pageNumberCurrent,
}
public override void RenderGlyph(IFont font,
- IColor strokingColor,
- IColor nonStrokingColor,
- TextRenderingMode textRenderingMode,
+ CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
diff --git a/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs b/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs
index d0d82dc52..e593a813d 100644
--- a/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/VisualVerification/SkiaHelpers/SkiaGlyphStreamProcessor.cs
@@ -61,9 +61,7 @@ public override void PushState()
}
public override void RenderGlyph(IFont font,
- IColor strokingColor,
- IColor nonStrokingColor,
- TextRenderingMode textRenderingMode,
+ CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
@@ -74,6 +72,7 @@ public override void RenderGlyph(IFont font,
in TransformationMatrix transformationMatrix,
CharacterBoundingBox characterBoundingBox)
{
+ var textRenderingMode = currentState.FontState.TextRenderingMode;
if (textRenderingMode == TextRenderingMode.Neither)
{
return;
@@ -85,7 +84,7 @@ public override void RenderGlyph(IFont font,
if (font.TryGetNormalisedPath(code, out var path))
{
var skPath = path.ToSKPath();
- ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix,
+ ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix,
textMatrix, transformationMatrix);
}
else
diff --git a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
index 59827c680..c24bdbdaa 100644
--- a/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
+++ b/src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
@@ -517,6 +517,26 @@ public static bool IntersectsWith(this PdfRectangle rectangle, PdfRectangle othe
}
}
+ ///
+ /// Whether the one of rectangle corners is located inside the path.
+ ///
+ /// The path that should contain the rectangle corner.
+ /// The rectangle that should be intersected within the path.
+ /// If set to false, will return false if the rectangle is on the path's border.
+ public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false)
+ {
+ // NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges.
+ var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList();
+ var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd;
+ foreach (var point in rectangle.ToClipperPolygon())
+ {
+ if (PointInPaths(point, clipperPaths, fillType, includeBorder))
+ return true;
+ }
+
+ return false;
+ }
+
///
/// Gets the that is the intersection of two rectangles.
/// Only works for axis-aligned rectangles.
diff --git a/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs
index 45c66218f..7159008df 100644
--- a/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs
+++ b/src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs
@@ -282,9 +282,7 @@ public void ShowText(IInputBytes bytes)
var boundingBox = font.GetBoundingBox(code);
RenderGlyph(font,
- currentState.CurrentStrokingColor!,
- currentState.CurrentNonStrokingColor!,
- currentState.FontState.TextRenderingMode,
+ currentState,
fontSize,
pointSize,
code,
@@ -317,9 +315,7 @@ public void ShowText(IInputBytes bytes)
/// Render glyph implement.
///
public abstract void RenderGlyph(IFont font,
- IColor strokingColor,
- IColor nonStrokingColor,
- TextRenderingMode textRenderingMode,
+ CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs
index db4b05ac1..d0d634c9f 100644
--- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs
+++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs
@@ -87,9 +87,7 @@ public override PageContent Process(int pageNumberCurrent, IReadOnlyList 0 && letters.Count > 0)
{
@@ -147,9 +152,9 @@ public override void RenderGlyph(IFont font,
transformedPdfBounds.Width,
fontSize,
font.Details,
- textRenderingMode,
- strokingColor,
- nonStrokingColor,
+ currentState.FontState.TextRenderingMode,
+ currentState.CurrentStrokingColor!,
+ currentState.CurrentNonStrokingColor!,
pointSize,
TextSequence);
}