Skip to content

Restore html validation and enable it on Mostly Static Pages (CFM-62) #297

Merged
merged 1 commit into from Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
28 changes: 6 additions & 22 deletions app/src/Lib/Traits/ValidationTrait.php
Expand Up @@ -33,8 +33,6 @@
use Cake\Database\Schema\TableSchemaInterface;
use Cake\ORM\TableRegistry;
use Cake\Validation\Validator;
use Symfony\Component\HtmlSanitizer\HtmlSanitizer;
use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig;

trait ValidationTrait {
/**
Expand Down Expand Up @@ -240,28 +238,14 @@ public function validateInput($value, array $context) {
if(!empty($context['type'])) {
switch($context['type']) {
case 'html':
// We are accepting HTML input. Pass it through the Symfony HTML Sanitizer to
// disallow dom elements like <script> and <style>.
$htmlSanitizer = new HtmlSanitizer(
// Allow all elements from the W3C Sanitizer API. This is more permissive than "allowSafeElements()".
// See: https://github.com/symfony/symfony/blob/7.2/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php
(new HtmlSanitizerConfig())->allowStaticElements()
);
$sanitizedValue = $htmlSanitizer->sanitize($value);

// Compare $value and $sanitizedValue to see if anything changed. Because white space and closing slashes
// can be significantly altered during sanitization, normalize the strings prior to comparison.
// (Unfortunately, the HtmlSanitizer does not generate a report on what it changed, which would be better.)
$valueNormalized = preg_replace(['/\s+/','/\//'], '', $value);
$sanitizedValueNormalized = preg_replace(['/\s+/','/\//'], '', $sanitizedValue);
// XXX Note: stripping forward slashes allows us to ignore the differences between <br> and <br/>
// (for example), but it also allows malformed tags such as <br////> or <div/></div> to get through.

if($valueNormalized !== $sanitizedValueNormalized) {
// Disallowed HTML is in the input, so throw an error.
// We are accepting HTML input. We will mostly pass it all through and ensure
// properly sanitized output. However, we can do some very rudimentary checking for script tags.
// (An informational note should be placed below these fields as well.)
$lowercaseVal = strtolower($value);
if(str_contains($lowercaseVal, '<script')) {
// Disallowed HTML is in the input, so warn the user.
return __d('error', 'input.invalid.html');
}

return true;
default:
// We use h() (htmlspecialchars) for consistency with the views.
Expand Down
2 changes: 1 addition & 1 deletion app/src/Model/Table/MostlyStaticPagesTable.php
Expand Up @@ -336,7 +336,7 @@ public function validationDefault(Validator $validator): Validator {
$validator->notEmptyString('context');

$validator->add('body', [
'filter' => ['rule' => ['validateInput'],
'filter' => ['rule' => ['validateInput',['type' => 'html']],
'provider' => 'table']
]);
$validator->allowEmptyString('body');
Expand Down