Skip to content

Commit

Permalink
Properly support document prefixes (#365)
Browse files Browse the repository at this point in the history
* Revert "Allow parsing off-spec PDF files with prefixes before the header"

This reverts commit dfb170a.

* Properly support document with prefixing bytes
  • Loading branch information
gmalette authored Dec 19, 2024
1 parent 4f74343 commit 34e2d20
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 27 deletions.
29 changes: 2 additions & 27 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::collections::HashSet;
use std::str::{self, FromStr};

use nom::branch::alt;
use nom::bytes::complete::{tag, take, take_until, take_while, take_while1, take_while_m_n};
use nom::bytes::complete::{tag, take, take_while, take_while1, take_while_m_n};
use nom::character::complete::multispace1;
use nom::character::complete::{digit0, digit1, one_of};
use nom::character::complete::{space0, space1};
Expand Down Expand Up @@ -416,16 +416,7 @@ fn _indirect_object<'a>(
pub fn header(input: ParserInput) -> Option<String> {
strip_nom(map_res(
delimited(
tuple((
map_res(take_until("%PDF-"), |v: ParserInput| {
if v.len() > 1024 {
return Err("Header prefix too long");
}

Ok(v)
}),
tag(b"%PDF-"),
)),
tag(b"%PDF-"),
take_while(|c: u8| !b"\r\n".contains(&c)),
pair(eol, many0_count(comment)),
),
Expand Down Expand Up @@ -641,22 +632,6 @@ mod tests {
assert_eq!(real(test_span(b"10.")), Some(10.0));
}

#[test]
fn parses_malformed_header_with_prefix() {
let stream = b"\x20\x20\x20\x20\x0A\x0A\x20\x20%PDF-1.4\n";

assert_eq!(header(test_span(stream)), Some("1.4".to_string()));

// 1025 bytes prefix
let stream_with_prefix_too_long = [b' '; 1025]
.iter()
.copied()
.chain(b"%PDF-1.4".iter().copied())
.collect::<Vec<u8>>();

assert_eq!(header(test_span(&stream_with_prefix_too_long)), None)
}

#[test]
fn parse_string() {
let literal_string = |i| tstrip(literal_string(i));
Expand Down
12 changes: 12 additions & 0 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,9 @@ pub const MAX_BRACKET: usize = 100;
impl Reader<'_> {
/// Read whole document.
pub fn read(mut self, filter_func: Option<FilterFunc>) -> Result<Document> {
let offset = self.buffer.windows(5).position(|w| w == b"%PDF-").unwrap_or(0);
self.buffer = &self.buffer[offset..];

// The document structure can be expressed in PEG as:
// document <- header indirect_object* xref trailer xref_start
let version =
Expand Down Expand Up @@ -494,6 +497,15 @@ fn load_short_document() {
let _doc = Document::load_mem(b"%PDF-1.5\n%%EOF\n").unwrap();
}

#[test]
fn load_document_with_preceding_bytes() {
let mut content = Vec::new();
content.extend(b"garbage");
content.extend(include_bytes!("../assets/example.pdf"));
let doc = Document::load_mem(&content).unwrap();
assert_eq!(doc.version, "1.5");
}

#[test]
fn load_many_shallow_brackets() {
let content: String = std::iter::repeat("()")
Expand Down

0 comments on commit 34e2d20

Please sign in to comment.