import CleanCSS from 'clean-css';
import { decodeHTMLStrict, decodeHTML } from 'entities';
import RelateURL from 'relateurl';
import { minify as terser } from 'terser';
import { HTMLParser, endTag } from './htmlparser.js';
import TokenChain from './tokenchain.js';
import { replaceAsync } from './utils.js';
function trimWhitespace(str) {
return str && str.replace(/^[ \n\r\t\f]+/, '').replace(/[ \n\r\t\f]+$/, '');
}
function collapseWhitespaceAll(str) {
// Non-breaking space is specifically handled inside the replacer function here:
return str && str.replace(/[ \n\r\t\f\xA0]+/g, function (spaces) {
return spaces === '\t' ? '\t' : spaces.replace(/(^|\xA0+)[^\xA0]+/g, '$1 ');
});
}
function collapseWhitespace(str, options, trimLeft, trimRight, collapseAll) {
let lineBreakBefore = ''; let lineBreakAfter = '';
if (options.preserveLineBreaks) {
str = str.replace(/^[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*/, function () {
lineBreakBefore = '\n';
return '';
}).replace(/[ \n\r\t\f]*?[\n\r][ \n\r\t\f]*$/, function () {
lineBreakAfter = '\n';
return '';
});
}
if (trimLeft) {
// Non-breaking space is specifically handled inside the replacer function here:
str = str.replace(/^[ \n\r\t\f\xA0]+/, function (spaces) {
const conservative = !lineBreakBefore && options.conservativeCollapse;
if (conservative && spaces === '\t') {
return '\t';
}
return spaces.replace(/^[^\xA0]+/, '').replace(/(\xA0+)[^\xA0]+/g, '$1 ') || (conservative ? ' ' : '');
});
}
if (trimRight) {
// Non-breaking space is specifically handled inside the replacer function here:
str = str.replace(/[ \n\r\t\f\xA0]+$/, function (spaces) {
const conservative = !lineBreakAfter && options.conservativeCollapse;
if (conservative && spaces === '\t') {
return '\t';
}
return spaces.replace(/[^\xA0]+(\xA0+)/g, ' $1').replace(/[^\xA0]+$/, '') || (conservative ? ' ' : '');
});
}
if (collapseAll) {
// strip non space whitespace then compress spaces to one
str = collapseWhitespaceAll(str);
}
return lineBreakBefore + str + lineBreakAfter;
}
// non-empty tags that will maintain whitespace around them
const inlineTags = new Set(['a', 'abbr', 'acronym', 'b', 'bdi', 'bdo', 'big', 'button', 'cite', 'code', 'del', 'dfn', 'em', 'font', 'i', 'ins', 'kbd', 'label', 'mark', 'math', 'nobr', 'object', 'q', 'rp', 'rt', 'rtc', 'ruby', 's', 'samp', 'select', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'svg', 'textarea', 'time', 'tt', 'u', 'var']);
// non-empty tags that will maintain whitespace within them
const inlineTextTags = new Set(['a', 'abbr', 'acronym', 'b', 'big', 'del', 'em', 'font', 'i', 'ins', 'kbd', 'mark', 'nobr', 'rp', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', 'time', 'tt', 'u', 'var']);
// self-closing tags that will maintain whitespace around them
const selfClosingInlineTags = new Set(['comment', 'img', 'input', 'wbr']);
function collapseWhitespaceSmart(str, prevTag, nextTag, options) {
let trimLeft = prevTag && !selfClosingInlineTags.has(prevTag);
if (trimLeft && !options.collapseInlineTagWhitespace) {
trimLeft = prevTag.charAt(0) === '/' ? !inlineTags.has(prevTag.slice(1)) : !inlineTextTags.has(prevTag);
}
let trimRight = nextTag && !selfClosingInlineTags.has(nextTag);
if (trimRight && !options.collapseInlineTagWhitespace) {
trimRight = nextTag.charAt(0) === '/' ? !inlineTextTags.has(nextTag.slice(1)) : !inlineTags.has(nextTag);
}
return collapseWhitespace(str, options, trimLeft, trimRight, prevTag && nextTag);
}
function isConditionalComment(text) {
return /^\[if\s[^\]]+]|\[endif]$/.test(text);
}
function isIgnoredComment(text, options) {
for (let i = 0, len = options.ignoreCustomComments.length; i < len; i++) {
if (options.ignoreCustomComments[i].test(text)) {
return true;
}
}
return false;
}
function isEventAttribute(attrName, options) {
const patterns = options.customEventAttributes;
if (patterns) {
for (let i = patterns.length; i--;) {
if (patterns[i].test(attrName)) {
return true;
}
}
return false;
}
return /^on[a-z]{3,}$/.test(attrName);
}
function canRemoveAttributeQuotes(value) {
// https://mathiasbynens.be/notes/unquoted-attribute-values
return /^[^ \t\n\f\r"'`=<>]+$/.test(value);
}
function attributesInclude(attributes, attribute) {
for (let i = attributes.length; i--;) {
if (attributes[i].name.toLowerCase() === attribute) {
return true;
}
}
return false;
}
function isAttributeRedundant(tag, attrName, attrValue, attrs) {
attrValue = attrValue ? trimWhitespace(attrValue.toLowerCase()) : '';
return (
(tag === 'script' &&
attrName === 'language' &&
attrValue === 'javascript') ||
(tag === 'form' &&
attrName === 'method' &&
attrValue === 'get') ||
(tag === 'input' &&
attrName === 'type' &&
attrValue === 'text') ||
(tag === 'script' &&
attrName === 'charset' &&
!attributesInclude(attrs, 'src')) ||
(tag === 'a' &&
attrName === 'name' &&
attributesInclude(attrs, 'id')) ||
(tag === 'area' &&
attrName === 'shape' &&
attrValue === 'rect')
);
}
// https://mathiasbynens.be/demo/javascript-mime-type
// https://developer.mozilla.org/en/docs/Web/HTML/Element/script#attr-type
const executableScriptsMimetypes = new Set([
'text/javascript',
'text/ecmascript',
'text/jscript',
'application/javascript',
'application/x-javascript',
'application/ecmascript',
'module'
]);
const keepScriptsMimetypes = new Set([
'module'
]);
function isScriptTypeAttribute(attrValue = '') {
attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase();
return attrValue === '' || executableScriptsMimetypes.has(attrValue);
}
function keepScriptTypeAttribute(attrValue = '') {
attrValue = trimWhitespace(attrValue.split(/;/, 2)[0]).toLowerCase();
return keepScriptsMimetypes.has(attrValue);
}
function isExecutableScript(tag, attrs) {
if (tag !== 'script') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
const attrName = attrs[i].name.toLowerCase();
if (attrName === 'type') {
return isScriptTypeAttribute(attrs[i].value);
}
}
return true;
}
function isStyleLinkTypeAttribute(attrValue = '') {
attrValue = trimWhitespace(attrValue).toLowerCase();
return attrValue === '' || attrValue === 'text/css';
}
function isStyleSheet(tag, attrs) {
if (tag !== 'style') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
const attrName = attrs[i].name.toLowerCase();
if (attrName === 'type') {
return isStyleLinkTypeAttribute(attrs[i].value);
}
}
return true;
}
const isSimpleBoolean = new Set(['allowfullscreen', 'async', 'autofocus', 'autoplay', 'checked', 'compact', 'controls', 'declare', 'default', 'defaultchecked', 'defaultmuted', 'defaultselected', 'defer', 'disabled', 'enabled', 'formnovalidate', 'hidden', 'indeterminate', 'inert', 'ismap', 'itemscope', 'loop', 'multiple', 'muted', 'nohref', 'noresize', 'noshade', 'novalidate', 'nowrap', 'open', 'pauseonexit', 'readonly', 'required', 'reversed', 'scoped', 'seamless', 'selected', 'sortable', 'truespeed', 'typemustmatch', 'visible']);
const isBooleanValue = new Set(['true', 'false']);
function isBooleanAttribute(attrName, attrValue) {
return isSimpleBoolean.has(attrName) || (attrName === 'draggable' && !isBooleanValue.has(attrValue));
}
function isUriTypeAttribute(attrName, tag) {
return (
(/^(?:a|area|link|base)$/.test(tag) && attrName === 'href') ||
(tag === 'img' && /^(?:src|longdesc|usemap)$/.test(attrName)) ||
(tag === 'object' && /^(?:classid|codebase|data|usemap)$/.test(attrName)) ||
(tag === 'q' && attrName === 'cite') ||
(tag === 'blockquote' && attrName === 'cite') ||
((tag === 'ins' || tag === 'del') && attrName === 'cite') ||
(tag === 'form' && attrName === 'action') ||
(tag === 'input' && (attrName === 'src' || attrName === 'usemap')) ||
(tag === 'head' && attrName === 'profile') ||
(tag === 'script' && (attrName === 'src' || attrName === 'for'))
);
}
function isNumberTypeAttribute(attrName, tag) {
return (
(/^(?:a|area|object|button)$/.test(tag) && attrName === 'tabindex') ||
(tag === 'input' && (attrName === 'maxlength' || attrName === 'tabindex')) ||
(tag === 'select' && (attrName === 'size' || attrName === 'tabindex')) ||
(tag === 'textarea' && /^(?:rows|cols|tabindex)$/.test(attrName)) ||
(tag === 'colgroup' && attrName === 'span') ||
(tag === 'col' && attrName === 'span') ||
((tag === 'th' || tag === 'td') && (attrName === 'rowspan' || attrName === 'colspan'))
);
}
function isLinkType(tag, attrs, value) {
if (tag !== 'link') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
if (attrs[i].name === 'rel' && attrs[i].value === value) {
return true;
}
}
}
function isMediaQuery(tag, attrs, attrName) {
return attrName === 'media' && (isLinkType(tag, attrs, 'stylesheet') || isStyleSheet(tag, attrs));
}
const srcsetTags = new Set(['img', 'source']);
function isSrcset(attrName, tag) {
return attrName === 'srcset' && srcsetTags.has(tag);
}
async function cleanAttributeValue(tag, attrName, attrValue, options, attrs) {
if (isEventAttribute(attrName, options)) {
attrValue = trimWhitespace(attrValue).replace(/^javascript:\s*/i, '');
return options.minifyJS(attrValue, true);
} else if (attrName === 'class') {
attrValue = trimWhitespace(attrValue);
if (options.sortClassName) {
attrValue = options.sortClassName(attrValue);
} else {
attrValue = collapseWhitespaceAll(attrValue);
}
return attrValue;
} else if (isUriTypeAttribute(attrName, tag)) {
attrValue = trimWhitespace(attrValue);
return isLinkType(tag, attrs, 'canonical') ? attrValue : options.minifyURLs(attrValue);
} else if (isNumberTypeAttribute(attrName, tag)) {
return trimWhitespace(attrValue);
} else if (attrName === 'style') {
attrValue = trimWhitespace(attrValue);
if (attrValue) {
if (/;$/.test(attrValue) && !/?[0-9a-zA-Z]+;$/.test(attrValue)) {
attrValue = attrValue.replace(/\s*;$/, ';');
}
attrValue = await options.minifyCSS(attrValue, 'inline');
}
return attrValue;
} else if (isSrcset(attrName, tag)) {
// https://html.spec.whatwg.org/multipage/embedded-content.html#attr-img-srcset
attrValue = trimWhitespace(attrValue).split(/\s+,\s*|\s*,\s+/).map(function (candidate) {
let url = candidate;
let descriptor = '';
const match = candidate.match(/\s+([1-9][0-9]*w|[0-9]+(?:\.[0-9]+)?x)$/);
if (match) {
url = url.slice(0, -match[0].length);
const num = +match[1].slice(0, -1);
const suffix = match[1].slice(-1);
if (num !== 1 || suffix !== 'x') {
descriptor = ' ' + num + suffix;
}
}
return options.minifyURLs(url) + descriptor;
}).join(', ');
} else if (isMetaViewport(tag, attrs) && attrName === 'content') {
attrValue = attrValue.replace(/\s+/g, '').replace(/[0-9]+\.[0-9]+/g, function (numString) {
// "0.90000" -> "0.9"
// "1.0" -> "1"
// "1.0001" -> "1.0001" (unchanged)
return (+numString).toString();
});
} else if (isContentSecurityPolicy(tag, attrs) && attrName.toLowerCase() === 'content') {
return collapseWhitespaceAll(attrValue);
} else if (options.customAttrCollapse && options.customAttrCollapse.test(attrName)) {
attrValue = trimWhitespace(attrValue.replace(/ ?[\n\r]+ ?/g, '').replace(/\s{2,}/g, options.conservativeCollapse ? ' ' : ''));
} else if (tag === 'script' && attrName === 'type') {
attrValue = trimWhitespace(attrValue.replace(/\s*;\s*/g, ';'));
} else if (isMediaQuery(tag, attrs, attrName)) {
attrValue = trimWhitespace(attrValue);
return options.minifyCSS(attrValue, 'media');
}
return attrValue;
}
function isMetaViewport(tag, attrs) {
if (tag !== 'meta') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
if (attrs[i].name === 'name' && attrs[i].value === 'viewport') {
return true;
}
}
}
function isContentSecurityPolicy(tag, attrs) {
if (tag !== 'meta') {
return false;
}
for (let i = 0, len = attrs.length; i < len; i++) {
if (attrs[i].name.toLowerCase() === 'http-equiv' && attrs[i].value.toLowerCase() === 'content-security-policy') {
return true;
}
}
}
function ignoreCSS(id) {
return '/* clean-css ignore:start */' + id + '/* clean-css ignore:end */';
}
// Wrap CSS declarations for CleanCSS > 3.x
// See https://github.com/jakubpawlowicz/clean-css/issues/418
function wrapCSS(text, type) {
switch (type) {
case 'inline':
return '*{' + text + '}';
case 'media':
return '@media ' + text + '{a{top:0}}';
default:
return text;
}
}
function unwrapCSS(text, type) {
let matches;
switch (type) {
case 'inline':
matches = text.match(/^\*\{([\s\S]*)\}$/);
break;
case 'media':
matches = text.match(/^@media ([\s\S]*?)\s*{[\s\S]*}$/);
break;
}
return matches ? matches[1] : text;
}
async function cleanConditionalComment(comment, options) {
return options.processConditionalComments
? await replaceAsync(comment, /^(\[if\s[^\]]+]>)([\s\S]*?)( -1) {
return await minifyHTML(text, options);
}
}
return text;
}
// Tag omission rules from https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
// with the following deviations:
// - retain
if followed by