Improve html validation
This commit is contained in:
parent
6eaa0ccc41
commit
97ae948618
|
|
@ -79,16 +79,141 @@ class PdfBuilder
|
|||
/**
|
||||
* Final method to get compiled HTML.
|
||||
*
|
||||
* @param bool $final @deprecated // is it? i still see it being called elsewhere
|
||||
* @param bool $final Whether this is the final compilation
|
||||
* @return string
|
||||
*/
|
||||
public function getCompiledHTML($final = false)
|
||||
{
|
||||
$this->cleanHtml();
|
||||
$html = $this->document->saveHTML();
|
||||
|
||||
return str_replace('%24', '$', $html);
|
||||
}
|
||||
|
||||
private function cleanHtml(): self
|
||||
{
|
||||
if (!$this->document || !$this->document->documentElement) {
|
||||
return $this;
|
||||
}
|
||||
|
||||
$dangerous_elements = [
|
||||
'iframe', 'form', 'object', 'embed',
|
||||
'applet', 'audio', 'video',
|
||||
'frame', 'frameset', 'base','svg'
|
||||
];
|
||||
|
||||
$dangerous_attributes = [
|
||||
'onabort', 'onblur', 'onchange', 'onclick', 'ondblclick',
|
||||
'onerror', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup',
|
||||
'onload', 'onmousedown', 'onmousemove', 'onmouseout',
|
||||
'onmouseover', 'onmouseup', 'onreset', 'onresize',
|
||||
'onselect', 'onsubmit', 'onunload'
|
||||
];
|
||||
|
||||
// Function to recursively check nodes
|
||||
$removeNodes = function ($node) use (&$removeNodes, $dangerous_elements, $dangerous_attributes) {
|
||||
if (!$node) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Store children in array first to avoid modification during iteration
|
||||
$children = [];
|
||||
if ($node->hasChildNodes()) {
|
||||
foreach ($node->childNodes as $child) {
|
||||
$children[] = $child;
|
||||
}
|
||||
}
|
||||
|
||||
// Process each child
|
||||
foreach ($children as $child) {
|
||||
$removeNodes($child);
|
||||
}
|
||||
|
||||
// Only process element nodes
|
||||
if ($node instanceof \DOMElement) {
|
||||
// Remove dangerous elements
|
||||
if (in_array(strtolower($node->tagName), $dangerous_elements)) {
|
||||
if ($node->parentNode) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove dangerous attributes
|
||||
$attributes_to_remove = [];
|
||||
foreach ($node->attributes as $attr) {
|
||||
$attr_name = strtolower($attr->name);
|
||||
$attr_value = strtolower($attr->value);
|
||||
|
||||
// Remove event handlers
|
||||
if (in_array($attr_name, $dangerous_attributes) || strpos($attr_name, 'on') === 0) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove dangerous URLs/protocols
|
||||
if (in_array($attr_name, ['data', 'href', 'meta', 'link'])) {
|
||||
if (preg_match('/(javascript|data|file|ftp|jar|dict|gopher|ldap|smb|php|alert|prompt|confirm):|\/\/\/\/+|127\.0\.0\.1|localhost/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
}else if ($attr_name === 'src') {
|
||||
// For src attributes, only block dangerous protocols but allow data:image
|
||||
if (preg_match('/(javascript|file|ftp|jar|dict|gopher|ldap|smb|php):|\/\/\/\/+|127\.0\.0\.1|localhost/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
// Additional check for data: URLs - only allow image types
|
||||
if (strpos($attr_value, 'data:') === 0 && !preg_match('/^data:image\//i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for localhost references
|
||||
if (preg_match('/localhost|127\.|0\.0\.0\.0|::1|0:0:0:0:0:0:0:1/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
}elseif ($attr_name === 'style') {
|
||||
|
||||
if (preg_match('/(expression|javascript|behavior|vbscript):|url\s*\(|import|@import|eval\s*\(|-moz-binding|behavior|expression/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Remove expressions
|
||||
if (preg_match('/expression|javascript:|vbscript:|livescript:/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the collected dangerous attributes
|
||||
foreach ($attributes_to_remove as $attr) {
|
||||
$node->removeAttribute($attr);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
$removeNodes($this->document->documentElement);
|
||||
} catch (\Exception $e) {
|
||||
info('Error cleaning HTML: ' . $e->getMessage());
|
||||
|
||||
// Clear the document to prevent unsanitized content
|
||||
$this->document = new \DOMDocument();
|
||||
|
||||
// Throw sanitized exception to alert calling code
|
||||
throw new \RuntimeException('HTML sanitization failed');
|
||||
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the template
|
||||
*
|
||||
|
|
|
|||
|
|
@ -129,8 +129,140 @@ class PdfMaker
|
|||
*/
|
||||
public function getCompiledHTML($final = false)
|
||||
{
|
||||
$this->cleanHtml();
|
||||
|
||||
$html = $this->document->saveHTML();
|
||||
|
||||
return str_replace('%24', '$', $html);
|
||||
}
|
||||
|
||||
|
||||
private function cleanHtml(): self
|
||||
{
|
||||
if (!$this->document || !$this->document->documentElement) {
|
||||
return $this;
|
||||
}
|
||||
|
||||
$dangerous_elements = [
|
||||
'iframe', 'form', 'object', 'embed',
|
||||
'applet', 'audio', 'video',
|
||||
'frame', 'frameset', 'base','svg'
|
||||
];
|
||||
|
||||
$dangerous_attributes = [
|
||||
'onabort', 'onblur', 'onchange', 'onclick', 'ondblclick',
|
||||
'onerror', 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup',
|
||||
'onload', 'onmousedown', 'onmousemove', 'onmouseout',
|
||||
'onmouseover', 'onmouseup', 'onreset', 'onresize',
|
||||
'onselect', 'onsubmit', 'onunload'
|
||||
];
|
||||
|
||||
// Function to recursively check nodes
|
||||
$removeNodes = function ($node) use (&$removeNodes, $dangerous_elements, $dangerous_attributes) {
|
||||
if (!$node) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Store children in array first to avoid modification during iteration
|
||||
$children = [];
|
||||
if ($node->hasChildNodes()) {
|
||||
foreach ($node->childNodes as $child) {
|
||||
$children[] = $child;
|
||||
}
|
||||
}
|
||||
|
||||
// Process each child
|
||||
foreach ($children as $child) {
|
||||
$removeNodes($child);
|
||||
}
|
||||
|
||||
// Only process element nodes
|
||||
if ($node instanceof \DOMElement) {
|
||||
// Remove dangerous elements
|
||||
if (in_array(strtolower($node->tagName), $dangerous_elements)) {
|
||||
if ($node->parentNode) {
|
||||
$node->parentNode->removeChild($node);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Remove dangerous attributes
|
||||
$attributes_to_remove = [];
|
||||
foreach ($node->attributes as $attr) {
|
||||
$attr_name = strtolower($attr->name);
|
||||
$attr_value = strtolower($attr->value);
|
||||
|
||||
// Remove event handlers
|
||||
if (in_array($attr_name, $dangerous_attributes) || strpos($attr_name, 'on') === 0) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Remove dangerous URLs/protocols
|
||||
if (in_array($attr_name, ['data', 'href', 'meta', 'link'])) {
|
||||
if (preg_match('/(javascript|data|file|ftp|jar|dict|gopher|ldap|smb|php|alert|prompt|confirm):|\/\/\/\/+|127\.0\.0\.1|localhost/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
}else if ($attr_name === 'src') {
|
||||
// For src attributes, only block dangerous protocols but allow data:image
|
||||
if (preg_match('/(javascript|file|ftp|jar|dict|gopher|ldap|smb|php):|\/\/\/\/+|127\.0\.0\.1|localhost/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
// Additional check for data: URLs - only allow image types
|
||||
if (strpos($attr_value, 'data:') === 0 && !preg_match('/^data:image\//i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for localhost references
|
||||
if (preg_match('/localhost|127\.|0\.0\.0\.0|::1|0:0:0:0:0:0:0:1/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
}elseif ($attr_name === 'style') {
|
||||
// if (preg_match('/(expression|javascript|behavior|vbscript):|url\s*\(|import/i', $attr_value)) {
|
||||
// $attributes_to_remove[] = $attr->name;
|
||||
// }
|
||||
|
||||
if (preg_match('/(expression|javascript|behavior|vbscript):|url\s*\(|import|@import|eval\s*\(|-moz-binding|behavior|expression/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Remove expressions
|
||||
if (preg_match('/expression|javascript:|vbscript:|livescript:/i', $attr_value)) {
|
||||
$attributes_to_remove[] = $attr->name;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the collected dangerous attributes
|
||||
foreach ($attributes_to_remove as $attr) {
|
||||
$node->removeAttribute($attr);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
$removeNodes($this->document->documentElement);
|
||||
} catch (\Exception $e) {
|
||||
info('Error cleaning HTML: ' . $e->getMessage());
|
||||
|
||||
// Clear the document to prevent unsanitized content
|
||||
$this->document = new \DOMDocument();
|
||||
|
||||
// Throw sanitized exception to alert calling code
|
||||
throw new \RuntimeException('HTML sanitization failed');
|
||||
|
||||
}
|
||||
|
||||
return $this;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@
|
|||
"aws/aws-sdk-php": "^3.319",
|
||||
"babenkoivan/elastic-scout-driver": "^4.0",
|
||||
"bacon/bacon-qr-code": "^2.0",
|
||||
"beganovich/snappdf": "dev-master",
|
||||
"beganovich/snappdf": "^5.0",
|
||||
"braintree/braintree_php": "^6.0",
|
||||
"btcpayserver/btcpayserver-greenfield-php": "^2.6",
|
||||
"checkout/checkout-sdk-php": "^3.0",
|
||||
|
|
@ -218,10 +218,6 @@
|
|||
{
|
||||
"type": "vcs",
|
||||
"url": "https://github.com/beganovich/php-ansible"
|
||||
},
|
||||
{
|
||||
"type": "vcs",
|
||||
"url": "https://github.com/turbo124/snappdf"
|
||||
}
|
||||
],
|
||||
"minimum-stability": "dev",
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue