Skip to content

Commit

Permalink
Enabled ClipPaths option to filter letters. (#905)
Browse files Browse the repository at this point in the history
* Enabled ClipPaths option to filter letters.

* parsingOptions is not passed as  an argument now.
  • Loading branch information
yjagota authored Sep 8, 2024
1 parent cd2a85e commit 09bddba
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 19 deletions.
Binary file not shown.
Binary file not shown.
46 changes: 46 additions & 0 deletions src/UglyToad.PdfPig.Tests/Integration/LetterFilterTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System.Linq;

public class LetterFilterTests
{
[Fact]
public void CanFilterClippedLetters()
{
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test1.pdf");

using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
{
var allLetters = doc2.GetPage(5).Letters.Count;
var filteredLetters = doc1.GetPage(5).Letters.Count;

Assert.True(filteredLetters < allLetters,
"Expected filtered letter count to be lower than non-filtered"); // Filtered: 3158 letters, Non-filtered: 3184 letters
}
}

[Fact]
public void CanFilterClippedLetters_CheckBleedInSpecificWord()
{
var one = IntegrationHelpers.GetDocumentPath("ClipPathLetterFilter-Test2.pdf");

using (var doc1 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = true }))
using (var doc2 = PdfDocument.Open(one, new ParsingOptions { ClipPaths = false }))
{
var allWords = doc2.GetPage(1).GetWords().ToList();
var filteredWords = doc1.GetPage(1).GetWords().ToList();

// The table has hidden columns at the left end. Letters from these columns get merged in words
// which is incorrect. Filtering letters based on clip path should fix that...
const string wordToSearchAfterWhichTheActualTableStarts = "ARISER";

var indexOfCheckedWordInAllWords = allWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
Assert.True(allWords[indexOfCheckedWordInAllWords].Text == "MLIA0U01CP00O0I3N6G2");

var indexOfCheckedWordInFilteredWords = filteredWords.FindIndex(x => x.Text.Equals(wordToSearchAfterWhichTheActualTableStarts)) + 1;
Assert.True(filteredWords[indexOfCheckedWordInFilteredWords].Text == "ACOGUT");
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,7 @@ public override TextOnlyPageContent Process(int pageNumberCurrent,
}

public override void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,7 @@ public override void PushState()
}

public override void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
Expand All @@ -74,6 +72,7 @@ public override void RenderGlyph(IFont font,
in TransformationMatrix transformationMatrix,
CharacterBoundingBox characterBoundingBox)
{
var textRenderingMode = currentState.FontState.TextRenderingMode;
if (textRenderingMode == TextRenderingMode.Neither)
{
return;
Expand All @@ -85,7 +84,7 @@ public override void RenderGlyph(IFont font,
if (font.TryGetNormalisedPath(code, out var path))
{
var skPath = path.ToSKPath();
ShowVectorFontGlyph(skPath, strokingColor, nonStrokingColor, textRenderingMode, renderingMatrix,
ShowVectorFontGlyph(skPath, currentState.CurrentStrokingColor!, currentState.CurrentNonStrokingColor!, textRenderingMode, renderingMatrix,
textMatrix, transformationMatrix);
}
else
Expand Down
20 changes: 20 additions & 0 deletions src/UglyToad.PdfPig/Geometry/GeometryExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,26 @@ public static bool IntersectsWith(this PdfRectangle rectangle, PdfRectangle othe
}
}

/// <summary>
/// Whether the one of rectangle corners is located inside the path.
/// </summary>
/// <param name="path">The path that should contain the rectangle corner.</param>
/// <param name="rectangle">The rectangle that should be intersected within the path.</param>
/// <param name="includeBorder">If set to false, will return false if the rectangle is on the path's border.</param>
public static bool IntersectsWith(this PdfPath path, PdfRectangle rectangle, bool includeBorder = false)
{
// NB, For later dev: Might not work for concave outer path, as it can contain all the points of the inner rectangle, but have overlapping edges.
var clipperPaths = path.Select(sp => sp.ToClipperPolygon().ToList()).ToList();
var fillType = path.FillingRule == FillingRule.NonZeroWinding ? ClipperPolyFillType.NonZero : ClipperPolyFillType.EvenOdd;
foreach (var point in rectangle.ToClipperPolygon())
{
if (PointInPaths(point, clipperPaths, fillType, includeBorder))
return true;
}

return false;
}

/// <summary>
/// Gets the <see cref="PdfRectangle"/> that is the intersection of two rectangles.
/// <para>Only works for axis-aligned rectangles.</para>
Expand Down
8 changes: 2 additions & 6 deletions src/UglyToad.PdfPig/Graphics/BaseStreamProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,7 @@ public void ShowText(IInputBytes bytes)
var boundingBox = font.GetBoundingBox(code);

RenderGlyph(font,
currentState.CurrentStrokingColor!,
currentState.CurrentNonStrokingColor!,
currentState.FontState.TextRenderingMode,
currentState,
fontSize,
pointSize,
code,
Expand Down Expand Up @@ -317,9 +315,7 @@ public void ShowText(IInputBytes bytes)
/// Render glyph implement.
/// </summary>
public abstract void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
Expand Down
17 changes: 11 additions & 6 deletions src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,7 @@ public override PageContent Process(int pageNumberCurrent, IReadOnlyList<IGraphi
}

public override void RenderGlyph(IFont font,
IColor strokingColor,
IColor nonStrokingColor,
TextRenderingMode textRenderingMode,
CurrentGraphicsState currentState,
double fontSize,
double pointSize,
int code,
Expand All @@ -109,6 +107,13 @@ public override void RenderGlyph(IFont font,
transformationMatrix,
new PdfRectangle(0, 0, characterBoundingBox.Width, 0));

if (ParsingOptions.ClipPaths)
{
var currentClipping = currentState.CurrentClippingPath;
if (currentClipping?.IntersectsWith(transformedGlyphBounds) == false)
return;
}

Letter letter = null;
if (Diacritics.IsInCombiningDiacriticRange(unicode) && currentOffset > 0 && letters.Count > 0)
{
Expand Down Expand Up @@ -147,9 +152,9 @@ public override void RenderGlyph(IFont font,
transformedPdfBounds.Width,
fontSize,
font.Details,
textRenderingMode,
strokingColor,
nonStrokingColor,
currentState.FontState.TextRenderingMode,
currentState.CurrentStrokingColor!,
currentState.CurrentNonStrokingColor!,
pointSize,
TextSequence);
}
Expand Down

0 comments on commit 09bddba

Please sign in to comment.