import CleanCSS from 'clean-css'; import { decodeHTMLStrict, decodeHTML } from 'entities'; import RelateURL from 'relateurl'; import { minify as terser } from 'terser'; import { HTMLParser, endTag } from './htmlparser.js'; import TokenChain from './tokenchain.js'; import { replaceAsync } from './utils.js'; function trimWhitespace(str) { return str && str.replace(/^[ \n\r\t\f]+/, '').replace(/[ \n\r\t\f]+$/, ''); } function collapseWhitespaceAll(str) { // Non-breaking space is specifically handled inside the replacer function here: return str && str.replace(/[ \n\r\t\f\xA0]+/g, function (spaces) { return spaces === '\t' ? '\t' : spaces.replace(/(^|\xA0+)[^\xA0]+/g, '$1 '); }); } function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) { let lineBreakBefore = ''; let lineBreakAfter = ''; if (options.preserveLineBreaks) { str = str.replace(/^[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*/, function () { lineBreakBefore = '\n'; return ''; }).replace(/[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*$/, function () { lineBreakAfter = '\n'; return ''; }); } if (trimLeft) { // Non-breaking space is specifically handled inside the replacer function here: str = str.replace(/^[ \n\r\t\f\xA0]+/, function (spaces) { const conservative = !lineBreakBefore && options.conservativeCollapse; if (conservative && spaces === '\t') { return '\t'; } return spaces.replace(/^[^\xA0]+/, '').replace(/(\xA0+)[^\xA0]+/g, '$1 ') || (conservative ? ' ' : ''); }); } if (trimRight) { // Non-breaking space is specifically handled inside the replacer function here: str = str.replace(/[ \n\r\t\f\xA0]+$/, function (spaces) { const conservative = !lineBreakAfter && options.conservativeCollapse; if (conservative && spaces === '\t') { return '\t'; } return spaces.replace(/[^\xA0]+(\xA0+)/g, ' $1').replace(/[^\xA0]+$/, '') || (conservative ? ' ' : ''); }); } if (collapseAll) { // strip non space whitespace then compress spaces to one str = collapseWhitespaceAll(str); } return lineBreakBefore + str + lineBreakAfter; } // non-empty tags that will maintain whitespace around them const inlineTags = new Set(['a', 'abbr', 'acronym', 'b', 'bdi', 'bdo', 'big', 'button', 'cite', 'code', 'del', 'dfn', 'em', 'font', 'i', 'ins', 'kbd', 'label', 'mark', 'math', 'nobr', 'object', 'q', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'tt', 'u', 'var']); // non-empty tags that will maintain whitespace within them const inlineTextTags = new Set(['a', 'abbr', 'acronym', 'b', 'big', 'del', 'em', 'font', 'i', 'ins', 'kbd', 'mark', 'nobr', 'rp', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'time', 'tt', 'u', 'var']); // self-closing tags that will maintain whitespace around them const selfClosingInlineTags = new Set(['comment', 'img', 'input', 'wbr']); function collapseWhitespaceSmart(str, prevTag, nextTag, options) { let trimLeft = prevTag && !selfClosingInlineTags.has(prevTag); if (trimLeft && !options.collapseInlineTagWhitespace) { trimLeft = prevTag.charAt(0) === '/' ? !inlineTags.has(prevTag.slice(1)) : !inlineTextTags.has(prevTag); } let trimRight = nextTag && !selfClosingInlineTags.has(nextTag); if (trimRight && !options.collapseInlineTagWhitespace) { trimRight = nextTag.charAt(0) === '/' ? !inlineTextTags.has(nextTag.slice(1)) : !inlineTags.has(nextTag); } return collapseWhitespace(str, options, trimLeft, trimRight, prevTag && nextTag); } function isConditionalComment(text) { return /^\[if\s[^\]]+]|\[endif]$/.test(text); } function isIgnoredComment(text, options) { for (let i = 0, len = options.ignoreCustomComments.length; i < len; i++) { if (options.ignoreCustomComments[i].test(text)) { return true; } } return false; } function isEventAttribute(attrName, options) { const patterns = options.customEventAttributes; if (patterns) { for (let i = patterns.length; i--;) { if (patterns[i].test(attrName)) { return true; } } return false; } return /^on[a-z]{3,}$/.test(attrName); } function canRemoveAttributeQuotes(value) { // https://mathiasbynens.be/notes/unquoted-attribute-values return /^[^ \t\n\f\r"'`=<>]+$/.test(value); } function attributesInclude(attributes, attribute) { for (let i = attributes.length; i--;) { if (attributes[i].name.toLowerCase() === attribute) { return true; } } return false; } function isAttributeRedundant(tag, attrName, attrValue, attrs) { attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : ''; return ( (tag === 'script' && attrName === 'language' && attrValue === 'javascript') || (tag === 'form' && attrName === 'method' && attrValue === 'get') || (tag === 'input' && attrName === 'type' && attrValue === 'text') || (tag === 'script' && attrName === 'charset' && !attributesInclude(attrs, 'src')) || (tag === 'a' && attrName === 'name' && attributesInclude(attrs, 'id')) || (tag === 'area' && attrName === 'shape' && attrValue === 'rect') ); } // https://mathiasbynens.be/demo/javascript-mime-type // https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type const executableScriptsMimetypes = new Set([ 'text/javascript', 'text/ecmascript', 'text/jscript', 'application/javascript', 'application/x-javascript', 'application/ecmascript', 'module' ]); const keepScriptsMimetypes = new Set([ 'module' ]); function isScriptTypeAttribute(attrValue = '') { attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase(); return attrValue === '' || executableScriptsMimetypes.has(attrValue); } function keepScriptTypeAttribute(attrValue = '') { attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase(); return keepScriptsMimetypes.has(attrValue); } function isExecutableScript(tag, attrs) { if (tag !== 'script') { return false; } for (let i = 0, len = attrs.length; i < len; i++) { const attrName = attrs[i].name.toLowerCase(); if (attrName === 'type') { return isScriptTypeAttribute(attrs[i].value); } } return true; } function isStyleLinkTypeAttribute(attrValue = '') { attrValue = trimWhitespace(attrValue).toLowerCase(); return attrValue === '' || attrValue === 'text/css'; } function isStyleSheet(tag, attrs) { if (tag !== 'style') { return false; } for (let i = 0, len = attrs.length; i < len; i++) { const attrName = attrs[i].name.toLowerCase(); if (attrName === 'type') { return isStyleLinkTypeAttribute(attrs[i].value); } } return true; } const isSimpleBoolean = new Set(['allowfullscreen', 'async', 'autofocus', 'autoplay', 'checked', 'compact', 'controls', 'declare', 'default', 'defaultchecked', 'defaultmuted', 'defaultselected', 'defer', 'disabled', 'enabled', 'formnovalidate', 'hidden', 'indeterminate', 'inert', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nohref', 'noresize', 'noshade', 'novalidate', 'nowrap', 'open', 'pauseonexit', 'readonly', 'required', 'reversed', 'scoped', 'seamless', 'selected', 'sortable', 'truespeed', 'typemustmatch', 'visible']); const isBooleanValue = new Set(['true', 'false']); function isBooleanAttribute(attrName, attrValue) { return isSimpleBoolean.has(attrName) || (attrName === 'draggable' && !isBooleanValue.has(attrValue)); } function isUriTypeAttribute(attrName, tag) { return ( (/^(?:a|area|link|base)$/.test(tag) && attrName === 'href') || (tag === 'img' && /^(?:src|longdesc|usemap)$/.test(attrName)) || (tag === 'object' && /^(?:classid|codebase|data|usemap)$/.test(attrName)) || (tag === 'q' && attrName === 'cite') || (tag === 'blockquote' && attrName === 'cite') || ((tag === 'ins' || tag === 'del') && attrName === 'cite') || (tag === 'form' && attrName === 'action') || (tag === 'input' && (attrName === 'src' || attrName === 'usemap')) || (tag === 'head' && attrName === 'profile') || (tag === 'script' && (attrName === 'src' || attrName === 'for')) ); } function isNumberTypeAttribute(attrName, tag) { return ( (/^(?:a|area|object|button)$/.test(tag) && attrName === 'tabindex') || (tag === 'input' && (attrName === 'maxlength' || attrName === 'tabindex')) || (tag === 'select' && (attrName === 'size' || attrName === 'tabindex')) || (tag === 'textarea' && /^(?:rows|cols|tabindex)$/.test(attrName)) || (tag === 'colgroup' && attrName === 'span') || (tag === 'col' && attrName === 'span') || ((tag === 'th' || tag === 'td') && (attrName === 'rowspan' || attrName === 'colspan')) ); } function isLinkType(tag, attrs, value) { if (tag !== 'link') { return false; } for (let i = 0, len = attrs.length; i < len; i++) { if (attrs[i].name === 'rel' && attrs[i].value === value) { return true; } } } function isMediaQuery(tag, attrs, attrName) { return attrName === 'media' && (isLinkType(tag, attrs, 'stylesheet') || isStyleSheet(tag, attrs)); } const srcsetTags = new Set(['img', 'source']); function isSrcset(attrName, tag) { return attrName === 'srcset' && srcsetTags.has(tag); } async function cleanAttributeValue(tag, attrName, attrValue, options, attrs) { if (isEventAttribute(attrName, options)) { attrValue = trimWhitespace(attrValue).replace(/^javascript:\s*/i, ''); return options.minifyJS(attrValue, true); } else if (attrName === 'class') { attrValue = trimWhitespace(attrValue); if (options.sortClassName) { attrValue = options.sortClassName(attrValue); } else { attrValue = collapseWhitespaceAll(attrValue); } return attrValue; } else if (isUriTypeAttribute(attrName, tag)) { attrValue = trimWhitespace(attrValue); return isLinkType(tag, attrs, 'canonical') ? attrValue : options.minifyURLs(attrValue); } else if (isNumberTypeAttribute(attrName, tag)) { return trimWhitespace(attrValue); } else if (attrName === 'style') { attrValue = trimWhitespace(attrValue); if (attrValue) { if (/;$/.test(attrValue) && !/&#?[0-9a-zA-Z]+;$/.test(attrValue)) { attrValue = attrValue.replace(/\s*;$/, ';'); } attrValue = await options.minifyCSS(attrValue, 'inline'); } return attrValue; } else if (isSrcset(attrName, tag)) { // https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset attrValue = trimWhitespace(attrValue).split(/\s+,\s*|\s*,\s+/).map(function (candidate) { let url = candidate; let descriptor = ''; const match = candidate.match(/\s+([1-9][0-9]*w|[0-9]+(?:\.[0-9]+)?x)$/); if (match) { url = url.slice(0, -match[0].length); const num = +match[1].slice(0, -1); const suffix = match[1].slice(-1); if (num !== 1 || suffix !== 'x') { descriptor = ' ' + num + suffix; } } return options.minifyURLs(url) + descriptor; }).join(', '); } else if (isMetaViewport(tag, attrs) && attrName === 'content') { attrValue = attrValue.replace(/\s+/g, '').replace(/[0-9]+\.[0-9]+/g, function (numString) { // "0.90000" -> "0.9" // "1.0" -> "1" // "1.0001" -> "1.0001" (unchanged) return (+numString).toString(); }); } else if (isContentSecurityPolicy(tag, attrs) && attrName.toLowerCase() === 'content') { return collapseWhitespaceAll(attrValue); } else if (options.customAttrCollapse && options.customAttrCollapse.test(attrName)) { attrValue = trimWhitespace(attrValue.replace(/ ?[\n\r]+ ?/g, '').replace(/\s{2,}/g, options.conservativeCollapse ? ' ' : '')); } else if (tag === 'script' && attrName === 'type') { attrValue = trimWhitespace(attrValue.replace(/\s*;\s*/g, ';')); } else if (isMediaQuery(tag, attrs, attrName)) { attrValue = trimWhitespace(attrValue); return options.minifyCSS(attrValue, 'media'); } return attrValue; } function isMetaViewport(tag, attrs) { if (tag !== 'meta') { return false; } for (let i = 0, len = attrs.length; i < len; i++) { if (attrs[i].name === 'name' && attrs[i].value === 'viewport') { return true; } } } function isContentSecurityPolicy(tag, attrs) { if (tag !== 'meta') { return false; } for (let i = 0, len = attrs.length; i < len; i++) { if (attrs[i].name.toLowerCase() === 'http-equiv' && attrs[i].value.toLowerCase() === 'content-security-policy') { return true; } } } function ignoreCSS(id) { return '/* clean-css ignore:start */' + id + '/* clean-css ignore:end */'; } // Wrap CSS declarations for CleanCSS > 3.x // See https://github.com/jakubpawlowicz/clean-css/issues/418 function wrapCSS(text, type) { switch (type) { case 'inline': return '*{' + text + '}'; case 'media': return '@media ' + text + '{a{top:0}}'; default: return text; } } function unwrapCSS(text, type) { let matches; switch (type) { case 'inline': matches = text.match(/^\*\{([\s\S]*)\}$/); break; case 'media': matches = text.match(/^@media ([\s\S]*?)\s*{[\s\S]*}$/); break; } return matches ? matches[1] : text; } async function cleanConditionalComment(comment, options) { return options.processConditionalComments ? await replaceAsync(comment, /^(\[if\s[^\]]+]>)([\s\S]*?)( -1) { return await minifyHTML(text, options); } } return text; } // Tag omission rules from https://html.spec.whatwg.org/multipage/syntax.html#optional-tags // with the following deviations: // - retain if followed by