From 0902b3e370028b17b4e724e2ca535a24e86e2dbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4rtl?= Date: Mon, 11 Mar 2019 18:10:22 +0100 Subject: [PATCH 1/5] Issue #307 Refactor check for temp file creation --- src/Pdf.php | 67 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/src/Pdf.php b/src/Pdf.php index b810dc3..7246ad9 100644 --- a/src/Pdf.php +++ b/src/Pdf.php @@ -17,12 +17,12 @@ class Pdf const TYPE_HTML = 'html'; const TYPE_XML = 'xml'; - // Regular expression to detect HTML strings - const REGEX_HTML = '/<(?:!doctype )?html/i'; - // Regular expression to detect XML strings const REGEX_XML = '/<\??xml/i'; + // Regular expression to detect URL strings + const REGEX_URL = '/^(https?:)?\/\//i'; + // Regular expression to detect options that expect an URL or a file name, // so we need to create a tmp file for the content. const REGEX_OPTS_TMPFILE = '/^((header|footer)-html|(xsl|user)-style-sheet)$/i'; @@ -122,8 +122,8 @@ public function __construct($options = null) */ public function addPage($input, $options = array(), $type = null) { - $options['inputArg'] = $this->processInput($input, $type); - $this->_objects[] = $this->processOptions($options); + $options['inputArg'] = $this->ensureUrlOrFile($input, $type); + $this->_objects[] = $this->ensureUrlOrFileOptions($options); return $this; } @@ -140,8 +140,8 @@ public function addPage($input, $options = array(), $type = null) public function addCover($input, $options = array(), $type = null) { $options['input'] = ($this->version9 ? '--' : '').'cover'; - $options['inputArg'] = $this->processInput($input, $type); - $this->_objects[] = $this->processOptions($options); + $options['inputArg'] = $this->ensureUrlOrFile($input, $type); + $this->_objects[] = $this->ensureUrlOrFileOptions($options); return $this; } @@ -154,7 +154,7 @@ public function addCover($input, $options = array(), $type = null) public function addToc($options = array()) { $options['input'] = ($this->version9 ? '--' : '')."toc"; - $this->_objects[] = $this->processOptions($options); + $this->_objects[] = $this->ensureUrlOrFileOptions($options); return $this; } @@ -215,12 +215,12 @@ public function toString() */ public function setOptions($options = array()) { - // #264 tmpDir must be set before calling processOptions + // #264 tmpDir must be set before calling ensureUrlOrFileOptions if (isset($options['tmpDir'])) { $this->tmpDir = $options['tmpDir']; unset($options['tmpDir']); } - $options = $this->processOptions($options); + $options = $this->ensureUrlOrFileOptions($options); foreach ($options as $key => $val) { if (is_int($key)) { $this->_options[] = $val; @@ -296,35 +296,50 @@ protected function createPdf() } /** - * @param string $input - * @param string|null $type a type hint if the input is a string of known type. This can either be - * `TYPE_HTML` or `TYPE_XML`. If `null` (default) the type is auto detected from the string content. - * @return \mikehaertl\tmp\File|string a File object if the input is a HTML or XML string. The unchanged input otherwhise. + * This method creates a temporary file if the string is neither a URL nor + * contains XML or HTML and is also not a valid file name. + * + * @param string $input the string to check + * @param string|null $type a type hint if the input is a string of known + * type. This can either be `TYPE_HTML` or `TYPE_XML`. If `null` (default) + * the type is auto detected from the string content. + * @return \mikehaertl\tmp\File|string a File object if the input is a HTML + * or XML string. The unchanged input otherwhise. */ - protected function processInput($input, $type = null) + protected function ensureUrlOrFile($input, $type = null) { - if ($type === self::TYPE_HTML || $type === null && preg_match(self::REGEX_HTML, $input)) { - return $this->_tmpFiles[] = new File($input, '.html', self::TMP_PREFIX, $this->tmpDir); - } elseif ($type === self::TYPE_XML || preg_match(self::REGEX_XML, $input)) { - return $this->_tmpFiles[] = new File($input, '.xml', self::TMP_PREFIX, $this->tmpDir); - } else { + if (preg_match(self::REGEX_URL, $input)) { return $input; + } elseif ($type === self::TYPE_XML || $type === null && preg_match(self::REGEX_XML, $input)) { + $ext = '.xml'; + } else { + $isHtml = $input !== strip_tags($input); + if (!$isHtml) { + defined('PHP_MAXPATHLEN') || define('PHP_MAXPATHLEN', 255); + if ((strlen($input) <= PHP_MAXPATHLEN) && is_file($input)) { + return $input; + } + } + $ext = '.html'; } + $file = new File($input, $ext, self::TMP_PREFIX, $this->tmpDir); + $this->_tmpFiles[] = $file; + return $file; } /** * @param array $options list of options as name/value pairs - * @return array options with raw content converted to tmp files where neccessary + * @return array options with raw HTML/XML/String content converted to tmp + * files where neccessary */ - protected function processOptions($options = array()) + protected function ensureUrlOrFileOptions($options = array()) { foreach ($options as $key => $val) { // Some options expect a URL or a file name, so check if we need a temp file if (is_string($val) && preg_match(self::REGEX_OPTS_TMPFILE, $key) ) { - defined('PHP_MAXPATHLEN') || define('PHP_MAXPATHLEN', 255); - $isFile = (strlen($val) <= PHP_MAXPATHLEN) ? is_file($val) : false; - if (!($isFile || preg_match('/^(https?:)?\/\//i',$val) || $val === strip_tags($val))) { - $options[$key] = new File($val, '.html', self::TMP_PREFIX, $this->tmpDir); + $file = $this->ensureUrlOrFile($val); + if ($file instanceof File) { + $options[$key] = $file; } } } From 4318898e06c8aacd0fa22201ae567377bc5ebe65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4rtl?= Date: Mon, 11 Mar 2019 19:52:16 +0100 Subject: [PATCH 2/5] Issue #307 Revert HTML detection change --- src/Pdf.php | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/Pdf.php b/src/Pdf.php index 7246ad9..2df6d55 100644 --- a/src/Pdf.php +++ b/src/Pdf.php @@ -17,6 +17,9 @@ class Pdf const TYPE_HTML = 'html'; const TYPE_XML = 'xml'; + // Regular expression to detect HTML strings + const REGEX_HTML = '/<(?:!doctype )?html/i'; + // Regular expression to detect XML strings const REGEX_XML = '/<\??xml/i'; @@ -27,9 +30,12 @@ class Pdf // so we need to create a tmp file for the content. const REGEX_OPTS_TMPFILE = '/^((header|footer)-html|(xsl|user)-style-sheet)$/i'; - // prefix for tmp files + // Prefix for tmp files const TMP_PREFIX = 'tmp_wkhtmlto_pdf_'; + // Maximum length of a file path if PHP_MAXPATHLEN is not defined + const MAX_PATHLEN = 255; + /** * @var string the name of the `wkhtmltopdf` binary. Default is * `wkhtmltopdf`. You can also configure a full path here. @@ -139,7 +145,7 @@ public function addPage($input, $options = array(), $type = null) */ public function addCover($input, $options = array(), $type = null) { - $options['input'] = ($this->version9 ? '--' : '').'cover'; + $options['input'] = ($this->version9 ? '--' : '') . 'cover'; $options['inputArg'] = $this->ensureUrlOrFile($input, $type); $this->_objects[] = $this->ensureUrlOrFileOptions($options); return $this; @@ -153,7 +159,7 @@ public function addCover($input, $options = array(), $type = null) */ public function addToc($options = array()) { - $options['input'] = ($this->version9 ? '--' : '')."toc"; + $options['input'] = ($this->version9 ? '--' : '') . 'toc'; $this->_objects[] = $this->ensureUrlOrFileOptions($options); return $this; } @@ -224,7 +230,7 @@ public function setOptions($options = array()) foreach ($options as $key => $val) { if (is_int($key)) { $this->_options[] = $val; - } elseif ($key[0]!=='_' && property_exists($this, $key)) { + } elseif ($key[0] !== '_' && property_exists($this, $key)) { $this->$key = $val; } else { $this->_options[$key] = $val; @@ -287,7 +293,7 @@ protected function createPdf() $command->addArg($fileName, null, true); // Always escape filename if (!$command->execute()) { $this->_error = $command->getError(); - if (!(file_exists($fileName) && filesize($fileName)!==0 && $this->ignoreWarnings)) { + if (!(file_exists($fileName) && filesize($fileName) !== 0 && $this->ignoreWarnings)) { return false; } } @@ -313,10 +319,14 @@ protected function ensureUrlOrFile($input, $type = null) } elseif ($type === self::TYPE_XML || $type === null && preg_match(self::REGEX_XML, $input)) { $ext = '.xml'; } else { - $isHtml = $input !== strip_tags($input); + // First check for obvious HTML content to avoid is_file() as much + // as possible as it can trigger open_basedir restriction warnings + // with long strings. + $isHtml = $type === self::TYPE_HTML || preg_match(self::REGEX_HTML, $input); if (!$isHtml) { - defined('PHP_MAXPATHLEN') || define('PHP_MAXPATHLEN', 255); - if ((strlen($input) <= PHP_MAXPATHLEN) && is_file($input)) { + $maxPathLen = defined('PHP_MAXPATHLEN') ? + constant('PHP_MAXPATHLEN') : self::MAX_PATHLEN; + if (strlen($input) <= $maxPathLen && is_file($input)) { return $input; } } From f35cc7c2fed4ddb83f1c914878f1d5928c682ac6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4rtl?= Date: Mon, 11 Mar 2019 20:22:29 +0100 Subject: [PATCH 3/5] Issue #307 Allow to pass File instance as option --- README.md | 2 ++ src/Pdf.php | 9 +++++---- tests/PdfTest.php | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index dc18852..da139b9 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,8 @@ composer require mikehaertl/phpwkhtmltopdf Make sure, that you include the composer [autoloader](https://getcomposer.org/doc/01-basic-usage.md#autoloading) somewhere in your codebase. +## Examples + ### Single page PDF ```php diff --git a/src/Pdf.php b/src/Pdf.php index 2df6d55..156da60 100644 --- a/src/Pdf.php +++ b/src/Pdf.php @@ -302,10 +302,11 @@ protected function createPdf() } /** - * This method creates a temporary file if the string is neither a URL nor - * contains XML or HTML and is also not a valid file name. + * This method creates a temporary file if the passed argument is neither a + * File instance or URL nor contains XML or HTML and is also not a valid + * file name. * - * @param string $input the string to check + * @param string|File $input the input argument File to check * @param string|null $type a type hint if the input is a string of known * type. This can either be `TYPE_HTML` or `TYPE_XML`. If `null` (default) * the type is auto detected from the string content. @@ -314,7 +315,7 @@ protected function createPdf() */ protected function ensureUrlOrFile($input, $type = null) { - if (preg_match(self::REGEX_URL, $input)) { + if ($input instanceof File || preg_match(self::REGEX_URL, $input)) { return $input; } elseif ($type === self::TYPE_XML || $type === null && preg_match(self::REGEX_XML, $input)) { $ext = '.xml'; diff --git a/tests/PdfTest.php b/tests/PdfTest.php index e32ae18..114a2bf 100644 --- a/tests/PdfTest.php +++ b/tests/PdfTest.php @@ -1,5 +1,6 @@ assertRegexp('/tmp_wkhtmlto_pdf_.*?\.html/', $pdf->getCommand()->getExecCommand()); unlink($outFile); } + public function testCanAddPageFromFileInstance() + { + $outFile = $this->getOutFile(); + $binary = $this->getBinary(); + + $pdf = new Pdf; + $pdf->binary = $binary; + $pdf->addPage(new File('Some content', '.html')); + $pdf->saveAs($outFile); + $this->assertRegexp('/php_tmpfile_.*?\.html/', $pdf->getCommand()->getExecCommand()); + unlink($outFile); + } public function testCanAddPageFromXmlString() { $outFile = $this->getOutFile(); @@ -353,6 +366,25 @@ public function testCanAddHeaderAndFooterAsHtml() $this->assertRegExp("#$binary --header-html '/tmp/[^ ]+' --footer-html '/tmp/[^ ]+' '$inFile' '$tmpFile'#", (string) $pdf->getCommand()); unlink($outFile); } + public function testCanAddHeaderAndFooterAsFile() + { + $inFile = $this->getHtmlAsset(); + $outFile = $this->getOutFile(); + $binary = $this->getBinary(); + + $pdf = new Pdf(array( + 'binary' => $binary, + 'header-html' => new File('Some header content', '.html'), + 'footer-html' => new File('Some footer content', '.html'), + )); + $this->assertInstanceOf('mikehaertl\wkhtmlto\Pdf', $pdf->addPage($inFile)); + $this->assertTrue($pdf->saveAs($outFile)); + $this->assertFileExists($outFile); + + $tmpFile = $pdf->getPdfFilename(); + $this->assertRegExp("#$binary --header-html '/tmp/[^ ]+' --footer-html '/tmp/[^ ]+' '$inFile' '$tmpFile'#", (string) $pdf->getCommand()); + unlink($outFile); + } public function testCanAddHeaderAndFooterAsHtmlToPagesAndCoverAndToc() { $inFile = $this->getHtmlAsset(); From c09a20d9d09db91bad1a536b4a1db52a9cd1d582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4rtl?= Date: Mon, 11 Mar 2019 20:30:46 +0100 Subject: [PATCH 4/5] Issue #307 Save reference to passed File instances --- src/Pdf.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Pdf.php b/src/Pdf.php index 156da60..8d87548 100644 --- a/src/Pdf.php +++ b/src/Pdf.php @@ -315,7 +315,10 @@ protected function createPdf() */ protected function ensureUrlOrFile($input, $type = null) { - if ($input instanceof File || preg_match(self::REGEX_URL, $input)) { + if ($input instanceof File) { + $this->_tmpFiles[] = $input; + return $input; + } elseif (preg_match(self::REGEX_URL, $input)) { return $input; } elseif ($type === self::TYPE_XML || $type === null && preg_match(self::REGEX_XML, $input)) { $ext = '.xml'; From 96c78ad436216e2fb97b14a61463113214db75f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20H=C3=A4rtl?= Date: Tue, 12 Mar 2019 08:03:18 +0100 Subject: [PATCH 5/5] Issue #307 Update README --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index da139b9..6a2d9e5 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,23 @@ $pdf = new Pdf(array( )); ``` +### Passing strings + +Some options like `header-html` usually expect a URL or a filename. With our +library you can also pass a string. The class will try to detect if the +argument is a URL, a filename or some HTML or XML content. To make detection +easier you can surround your content in `` tag. + +If this doesn't work correctly you can also pass an instance of our `File` +helper as a last resort: + +```php +use mikehaertl\tmp\File; +$options = [ + 'header-html' => new File('Complex content', '.html'), +]; +``` + ## Error handling `send()`, `saveAs()` and `toString()` will return `false` on error. In this case the detailed error message is