diff --git a/src/Core/XssSanitiser.php b/src/Core/XssSanitiser.php
new file mode 100644
index 00000000000..5ffdb224b95
--- /dev/null
+++ b/src/Core/XssSanitiser.php
@@ -0,0 +1,213 @@
+sanitiseHtmlValue($htmlValue);
+ return $htmlValue->getContent();
+ }
+
+ /**
+ * Remove XSS attack vectors from HTMLValue content
+ */
+ public function sanitiseHtmlValue(HTMLValue $html): void
+ {
+ foreach ($html->query('//*') as $element) {
+ if (!is_a($element, DOMElement::class)) {
+ continue;
+ }
+ $this->sanitiseElement($element);
+ }
+ }
+
+ /**
+ * Remove XSS attack vectors from a DOMElement
+ */
+ public function sanitiseElement(DOMElement $element): void
+ {
+ // Remove elements first - if we remove the element, we don't have any attributes to check so exit early
+ $removed = $this->stripElement($element);
+ if ($removed) {
+ return;
+ }
+ $this->stripAttributes($element);
+ $this->stripAttributeContents($element);
+ }
+
+ /**
+ * Get the names of elements which will be removed.
+ */
+ public function getElementsToRemove(): array
+ {
+ return $this->elementsToRemove;
+ }
+
+ /**
+ * Set the names of elements which will be removed.
+ * Note that allowing the elements which are included in the default list could result in XSS vulnerabilities.
+ */
+ public function setElementsToRemove(array $elements): static
+ {
+ $this->elementsToRemove = $elements;
+ return $this;
+ }
+
+ /**
+ * Get the names of attributes which will be removed from any elements that have them.
+ */
+ public function getAttributesToRemove(): array
+ {
+ return $this->attributesToRemove;
+ }
+
+ /**
+ * Set the names of attributes which will be removed from any elements that have them.
+ * Note that allowing the attributes which are included in the default list could result in XSS vulnerabilities.
+ */
+ public function setAttributesToRemove(array $attributes): static
+ {
+ $this->attributesToRemove = $attributes;
+ return $this;
+ }
+
+ /**
+ * Get whether the inner contents of an element will be kept for elements that get removed.
+ */
+ public function getKeepInnerHtmlOnRemoveElement(): bool
+ {
+ return $this->keepInnerHtmlOnRemoveElement;
+ }
+
+ /**
+ * Set whether to keep the inner contents of an element if it gets removed.
+ */
+ public function setKeepInnerHtmlOnRemoveElement(bool $keep): static
+ {
+ $this->keepInnerHtmlOnRemoveElement = $keep;
+ return $this;
+ }
+
+ /**
+ * If $element is one of the elements in $elementsToRemove, replace it
+ * with a text node.
+ */
+ private function stripElement(DOMElement $element): bool
+ {
+ if (!in_array($element->tagName, $this->getElementsToRemove())) {
+ return false;
+ }
+ // Make sure we don't remove any child nodes
+ $parentNode = $element->parentNode;
+ if ($this->getKeepInnerHtmlOnRemoveElement() && $parentNode && $element->hasChildNodes()) {
+ // We can't just iterate through $node->childNodes because that seems to skip some children
+ while ($element->hasChildNodes()) {
+ $parentNode->insertBefore($element->firstChild, $element);
+ }
+ }
+ $element->remove();
+ return true;
+ }
+
+ /**
+ * Remove all attributes in $attributesToRemove from the element.
+ */
+ private function stripAttributes(DOMElement $element): void
+ {
+ $attributesToRemove = $this->getAttributesToRemove();
+ if (empty($attributesToRemove)) {
+ return;
+ }
+ $attributes = $element->attributes;
+ for ($i = count($attributes) - 1; $i >= 0; $i--) {
+ /** @var DOMAttr $attr */
+ $attr = $attributes->item($i);
+ foreach ($attributesToRemove as $toRemove) {
+ if (str_starts_with($toRemove, '*') && str_ends_with($attr->name, str_replace('*', '', $toRemove))) {
+ $element->removeAttributeNode($attr);
+ } elseif (str_ends_with($toRemove, '*') && str_starts_with($attr->name, str_replace('*', '', $toRemove))) {
+ $element->removeAttributeNode($attr);
+ } elseif (!str_contains($toRemove, '*') && $attr->name === $toRemove) {
+ $element->removeAttributeNode($attr);
+ }
+ }
+ }
+ }
+
+ /**
+ * Strip out attributes which have dangerous content which might otherwise execute javascript.
+ * This is content that we will always remove regardless of whether the attributes and elements in question
+ * are otherwise allowed, e.g. via WYSIWYG configuration.
+ */
+ private function stripAttributeContents(DOMElement $element): void
+ {
+ $regex = $this->getStripAttributeContentsRegex();
+ foreach (['lowsrc', 'src', 'href', 'data'] as $dangerAttribute) {
+ if ($element->hasAttribute($dangerAttribute)) {
+ $attrContent = $element->getAttribute($dangerAttribute);
+ if (preg_match($regex, $attrContent)) {
+ $element->removeAttribute($dangerAttribute);
+ }
+ }
+ }
+ }
+
+ private function getStripAttributeContentsRegex(): string
+ {
+ $regexes = [
+ $this->splitWithWhitespaceRegex('javascript:'),
+ $this->splitWithWhitespaceRegex('data:text/html'),
+ $this->splitWithWhitespaceRegex('vbscript:'),
+ ];
+ // Regex is "starts with any of these, with optional whitespace at the start, case insensitive"
+ return '#^\s*(' . implode('|', $regexes) . ')#iu';
+ }
+
+ private function splitWithWhitespaceRegex(string $string): string
+ {
+ // Note that `\s` explicitly includes ALL invisible characters when used with the `u` modifier.
+ // That includes unicode characters like the non-breaking space.
+ return implode('\s*', str_split($string));
+ }
+}
diff --git a/src/Dev/Debug.php b/src/Dev/Debug.php
index 5701577f08f..2bfba6e6b5f 100644
--- a/src/Dev/Debug.php
+++ b/src/Dev/Debug.php
@@ -164,16 +164,20 @@ protected static function supportsHTML(HTTPRequest $request = null)
if (Director::is_cli()) {
return false;
}
+ $accepted = [];
// Get current request if registered
if (!$request && Injector::inst()->has(HTTPRequest::class)) {
$request = Injector::inst()->get(HTTPRequest::class);
}
- if (!$request) {
- return false;
+ if ($request) {
+ $accepted = $request->getAcceptMimetypes(false);
+ } elseif (isset($_SERVER['HTTP_ACCEPT'])) {
+ // If there's no request object available, fallback to global $_SERVER
+ // This can happen in some circumstances when a PHP error is triggered
+ // during a regular HTTP request
+ $accepted = preg_split('#\s*,\s*#', $_SERVER['HTTP_ACCEPT']);
}
- // Request must include text/html
- $accepted = $request->getAcceptMimetypes(false);
// Explicit opt in
if (in_array('text/html', $accepted ?? [])) {
diff --git a/src/Forms/FormMessage.php b/src/Forms/FormMessage.php
index ce18ee153a6..8a6d85b74a3 100644
--- a/src/Forms/FormMessage.php
+++ b/src/Forms/FormMessage.php
@@ -3,6 +3,7 @@
namespace SilverStripe\Forms;
use InvalidArgumentException;
+use SilverStripe\Core\XssSanitiser;
use SilverStripe\ORM\ValidationResult;
use SilverStripe\View\ViewableData;
@@ -33,6 +34,7 @@ trait FormMessage
/**
* Returns the field message, used by form validation.
+ * If the current cast is ValidationResult::CAST_HTML, the message will be sanitised.
*
* Use {@link setError()} to set this property.
*
@@ -40,7 +42,11 @@ trait FormMessage
*/
public function getMessage()
{
- return $this->message;
+ $message = $this->message;
+ if ($this->getMessageCast() === ValidationResult::CAST_HTML) {
+ $message = XssSanitiser::create()->sanitiseString($message);
+ }
+ return $message;
}
/**
diff --git a/src/Forms/GridField/GridFieldDetailForm_ItemRequest.php b/src/Forms/GridField/GridFieldDetailForm_ItemRequest.php
index f4294dd949d..5421edc8b1a 100644
--- a/src/Forms/GridField/GridFieldDetailForm_ItemRequest.php
+++ b/src/Forms/GridField/GridFieldDetailForm_ItemRequest.php
@@ -567,13 +567,13 @@ public function doSave($data, $form)
$this->saveFormIntoRecord($data, $form);
$link = '"'
- . htmlspecialchars($this->record->Title ?? '', ENT_QUOTES)
+ . Convert::raw2xml($this->record->Title ?? '', ENT_QUOTES)
. '"';
$message = _t(
'SilverStripe\\Forms\\GridField\\GridFieldDetailForm.Saved',
'Saved {name} {link}',
[
- 'name' => $this->getModelName(),
+ 'name' => Convert::raw2xml($this->getModelName()),
'link' => $link
]
);
@@ -834,8 +834,8 @@ public function doDelete($data, $form)
'SilverStripe\\Forms\\GridField\\GridFieldDetailForm.Deleted',
'Deleted {type} "{name}"',
[
- 'type' => $this->getModelName(),
- 'name' => $this->record->Title
+ 'type' => Convert::raw2xml($this->getModelName()),
+ 'name' => Convert::raw2xml($this->record->Title)
]
);
diff --git a/src/Forms/HTMLEditor/HTMLEditorSanitiser.php b/src/Forms/HTMLEditor/HTMLEditorSanitiser.php
index c056bca10f3..49eda400afa 100644
--- a/src/Forms/HTMLEditor/HTMLEditorSanitiser.php
+++ b/src/Forms/HTMLEditor/HTMLEditorSanitiser.php
@@ -6,6 +6,7 @@
use DOMElement;
use SilverStripe\Core\Config\Configurable;
use SilverStripe\Core\Injector\Injectable;
+use SilverStripe\Core\XssSanitiser;
use SilverStripe\View\Parsers\HTMLValue;
use stdClass;
@@ -289,6 +290,10 @@ public function sanitise(HTMLValue $html)
{
$linkRelValue = $this->config()->get('link_rel_value');
$doc = $html->getDocument();
+ // Get a sanitiser but don't deny any specific attributes or elements, since that's
+ // handled as part of the element rules.
+ $xssSanitiser = XssSanitiser::create();
+ $xssSanitiser->setElementsToRemove([])->setAttributesToRemove([]);
/** @var DOMElement $el */
foreach ($html->query('//body//*') as $el) {
@@ -342,16 +347,8 @@ public function sanitise(HTMLValue $html)
$el->setAttribute($attr, $forced);
}
- // Matches "javascript:" with any arbitrary linebreaks inbetween the characters.
- $regex = '#^\s*(' . implode('\s*', str_split('javascript:')) . '|' . implode('\s*', str_split('data:text/html;')) . ')#i';
- // Strip out javascript execution in href or src attributes.
- foreach (['src', 'href', 'data'] as $dangerAttribute) {
- if ($el->hasAttribute($dangerAttribute)) {
- if (preg_match($regex, $el->getAttribute($dangerAttribute))) {
- $el->removeAttribute($dangerAttribute);
- }
- }
- }
+ // Explicit XSS sanitisation for anything that there's really no sensible use case for in a WYSIWYG
+ $xssSanitiser->sanitiseElement($el);
}
if ($el->tagName === 'a' && $linkRelValue !== null) {
diff --git a/src/View/Shortcodes/EmbedShortcodeProvider.php b/src/View/Shortcodes/EmbedShortcodeProvider.php
index 89806d6bc26..eb959d2b5ef 100644
--- a/src/View/Shortcodes/EmbedShortcodeProvider.php
+++ b/src/View/Shortcodes/EmbedShortcodeProvider.php
@@ -6,6 +6,7 @@
use Embed\Http\RequestException;
use Psr\SimpleCache\CacheInterface;
use Psr\SimpleCache\InvalidArgumentException;
+use RuntimeException;
use SilverStripe\Core\Convert;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\ORM\ArrayList;
@@ -28,6 +29,31 @@ class EmbedShortcodeProvider implements ShortcodeHandler
{
use Configurable;
+ /**
+ * Domains to exclude from sandboxing content in an iframe
+ * This will also exclude any subdomains
+ * e.g. if 'example.com' is excluded then 'www.example.com' will also be excluded
+ * Do not include the protocol in the domain i.e. exclude the leading https://
+ */
+ private static array $domains_excluded_from_sandboxing = [];
+
+ /**
+ * Attributes to add to the iframe when sandboxing
+ * Note that the 'src' attribute cannot be set via config
+ * If a style attribute is set via config, width and height values will be overriden by
+ * any shortcode width and height arguments
+ */
+ private static array $sandboxed_iframe_attributes = [];
+
+ /**
+ * The url of the last extractor used
+ * This is used instead of adding a new param to an existing method
+ * which would be backwards incompatible
+ *
+ * @internal
+ */
+ private static string $extractorUrl = '';
+
/**
* Gets the list of shortcodes provided by this handler
*
@@ -140,6 +166,7 @@ public static function embeddableToHtml(Embeddable $embeddable, array $arguments
return '';
}
$extractor = $embeddable->getExtractor();
+ EmbedShortcodeProvider::$extractorUrl = (string) $extractor->url;
$type = $embeddable->getType();
if ($type === 'video' || $type === 'rich') {
// Attempt to inherit width (but leave height auto)
@@ -194,6 +221,7 @@ protected static function videoEmbed($arguments, $content)
]));
}
+ $content = EmbedShortcodeProvider::sandboxHtml($content, $arguments);
$data = [
'Arguments' => $arguments,
'Attributes' => $attributes,
@@ -342,4 +370,72 @@ private static function cleanKeySegment(string $str): string
{
return preg_replace('/[^a-zA-Z0-9\-]/', '', $str ?? '');
}
+
+ /**
+ * Wrap potentially dangerous html embeds in an iframe to sandbox them
+ * Potentially dangerous html embeds would could be those that contain
+ // and
+ // If there's more than 2 HTML tags then sandbox it
+ if (substr_count($html, '<') <= 2) {
+ return $html;
+ }
+ }
+ // Sandbox the html in an iframe
+ $style = '';
+ if (!empty($arguments['width'])) {
+ $style .= 'width:' . intval($arguments['width']) . 'px;';
+ }
+ if (!empty($arguments['height'])) {
+ $style .= 'height:' . intval($arguments['height']) . 'px;';
+ }
+ $attrs = array_merge([
+ 'frameborder' => '0',
+ ], static::config()->get('sandboxed_iframe_attributes'));
+ $attrs['src'] = 'data:text/html;charset=utf-8,' . rawurlencode($html);
+ if (array_key_exists('style', $attrs)) {
+ $attrs['style'] .= ";$style";
+ $attrs['style'] = ltrim($attrs['style'], ';');
+ } else {
+ $attrs['style'] = $style;
+ }
+ $html = HTML::createTag('iframe', $attrs);
+ return $html;
+ }
+
+ /**
+ * Check if the domain is excluded from sandboxing based on config
+ */
+ private static function domainIsExcludedFromSandboxing(): bool
+ {
+ $domain = (string) parse_url(EmbedShortcodeProvider::$extractorUrl, PHP_URL_HOST);
+ $config = static::config()->get('domains_excluded_from_sandboxing');
+ foreach ($config as $excluded) {
+ if (!is_string($excluded)) {
+ throw new RuntimeException('domains_excluded_from_sandboxing must be an array of strings');
+ }
+ $excludedDomain = parse_url($excluded, PHP_URL_HOST);
+ if (!$excludedDomain) {
+ // Try adding a protocol so that parse_url can parse it
+ $excludedDomain = parse_url('http://' . $excluded, PHP_URL_HOST);
+ }
+ if (!$excludedDomain) {
+ throw new RuntimeException('Not a valid domain: ' . $excluded);
+ }
+ if (str_ends_with($domain, $excludedDomain)) {
+ return true;
+ }
+ }
+ return false;
+ }
}
diff --git a/tests/php/Core/XssSanitiserTest.php b/tests/php/Core/XssSanitiserTest.php
new file mode 100644
index 00000000000..0d1ef959447
--- /dev/null
+++ b/tests/php/Core/XssSanitiserTest.php
@@ -0,0 +1,463 @@
+ '',
+ 'expected' => '',
+ ],
+ [
+ 'input' => 'hello world',
+ 'expected' => 'hello world',
+ ],
+ [
+ 'input' => '<hello world>',
+ 'expected' => '<hello world>',
+ ],
+ [
+ 'input' => '< Hello',
+ 'expected' => ' Hello',
+ ],
+ [
+ 'input' => 'Lorem & Ipsum',
+ 'expected' => 'Lorem & Ipsum',
+ ],
+ // Unknown tag
+ [
+ 'input' => 'Lorem ipsum',
+ 'expected' => 'Lorem ipsum',
+ ],
+ // Scripts
+ [
+ 'input' => '',
+ 'expected' => 'alert(\'ok\');',
+ ],
+ [
+ 'input' => 'javascript:/*-->