Skip to content

Commit

Permalink
Merge pull request #408 from nahsra/fix-paragraph-regex
Browse files Browse the repository at this point in the history
Fix paragraph regex
  • Loading branch information
spassarop authored Dec 21, 2023
2 parents 2170312 + ddbc6fb commit 6fc7a4b
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-anythinggoes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

<regexp name="anything" value=".*" />
<regexp name="numberOrPercent" value="(\d)+(%{0,1})" />
<regexp name="paragraph" value="([\p{L}\p{N},'\.\s\-_\(\)]|&amp;[0-9]{2};)*" />
<regexp name="paragraph" value="[\p{L}\p{N},'.\s\-_\(\)&amp;;]*" />
<regexp name="htmlId" value="[a-zA-Z0-9\:\-_\.]+" />
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*" /> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+" />
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-ebay.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

<regexp name="anything" value=".*" />
<regexp name="numberOrPercent" value="(\d)+(%{0,1})" />
<regexp name="paragraph" value="([\p{L}\p{N},'\.\s\-_\(\)]|&amp;[0-9]{2};)*" />
<regexp name="paragraph" value="[\p{L}\p{N},'.\s\-_\(\)&amp;;]*" />
<regexp name="htmlId" value="[a-zA-Z0-9\:\-_\.]+" />
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*" /> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+" />
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy-myspace.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

<regexp name="anything" value=".*" />
<regexp name="numberOrPercent" value="(\d)+(%{0,1})" />
<regexp name="paragraph" value="([\p{L}\p{N},'\.\s\-_\(\)]|&amp;[0-9]{2};)*" />
<regexp name="paragraph" value="[\p{L}\p{N},'.\s\-_\(\)&amp;;]*" />
<regexp name="htmlId" value="[a-zA-Z0-9\:\-_\.]+" />
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*" /> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+" />
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/antisamy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@

<regexp name="anything" value=".*" />
<regexp name="numberOrPercent" value="(\d)+(%{0,1})" />
<regexp name="paragraph" value="([\p{L}\p{N},'\.\s\-_\(\)]|&amp;[0-9]{2};)*" />
<regexp name="paragraph" value="[\p{L}\p{N},'.\s\-_\(\)&amp;;]*" />
<regexp name="htmlId" value="[a-zA-Z0-9\:\-_\.]+" />
<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*" /> <!-- force non-empty with a '+' at the end instead of '*' -->
<regexp name="htmlClass" value="[a-zA-Z0-9\s,\-_]+" />
Expand Down
16 changes: 16 additions & 0 deletions src/test/java/org/owasp/validator/html/test/AntiSamyTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2621,4 +2621,20 @@ public void testRawTextProcessingWhenPreservingComments() throws ScanException,
as.scan(payload, revised, AntiSamy.SAX).getCleanHTML(), not(containsString("mxss")));
}
}

@Test
public void testRegexStackOverflow() throws ScanException, PolicyException {
try {
String input =
"<img border=\"0\" width=\"320\" height=\"200\" style=\"width:3.368in;height:2.0486in\" id=\"id_123\" src=\"/url/uri\" alt=\"";
for (int i = 0; i < 2500; i++) {
input += "SampleText ";
}
input += "!\\\">";
as.scan(input, policy, AntiSamy.DOM).getCleanHTML();
as.scan(input, policy, AntiSamy.SAX).getCleanHTML();
} catch (StackOverflowError e) {
fail("Parser should not throw a stack overflow error");
}
}
}

0 comments on commit 6fc7a4b

Please sign in to comment.