This commit is contained in:
2024-03-22 03:47:51 +05:30
parent 8bcf3d211e
commit 89819f6fe2
28440 changed files with 3211033 additions and 2 deletions

View File

@@ -0,0 +1,15 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'
declare module 'micromark-util-types' {
interface Token {
_gfmAutolinkLiteralWalkedInto?: boolean
}
interface TokenTypeMap {
literalAutolink: 'literalAutolink'
literalAutolinkEmail: 'literalAutolinkEmail'
literalAutolinkHttp: 'literalAutolinkHttp'
literalAutolinkWww: 'literalAutolinkWww'
}
}

View File

@@ -0,0 +1,2 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'

View File

@@ -0,0 +1,13 @@
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml(): HtmlExtension
export type CompileContext = import('micromark-util-types').CompileContext
export type Handle = import('micromark-util-types').Handle
export type HtmlExtension = import('micromark-util-types').HtmlExtension
export type Token = import('micromark-util-types').Token

View File

@@ -0,0 +1,59 @@
/**
* @typedef {import('micromark-util-types').CompileContext} CompileContext
* @typedef {import('micromark-util-types').Handle} Handle
* @typedef {import('micromark-util-types').HtmlExtension} HtmlExtension
* @typedef {import('micromark-util-types').Token} Token
*/
import {sanitizeUri} from 'micromark-util-sanitize-uri'
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml() {
return {
exit: {literalAutolinkEmail, literalAutolinkHttp, literalAutolinkWww}
}
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkWww(token) {
anchorFromToken.call(this, token, 'http://')
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkEmail(token) {
anchorFromToken.call(this, token, 'mailto:')
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkHttp(token) {
anchorFromToken.call(this, token)
}
/**
* @this CompileContext
* @param {Token} token
* @param {string | null | undefined} [protocol]
* @returns {undefined}
*/
function anchorFromToken(token, protocol) {
const url = this.sliceSerialize(token)
this.tag('<a href="' + sanitizeUri((protocol || '') + url) + '">')
this.raw(this.encode(url))
this.tag('</a>')
}

View File

@@ -0,0 +1,17 @@
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral(): Extension
export type Code = import('micromark-util-types').Code
export type ConstructRecord = import('micromark-util-types').ConstructRecord
export type Event = import('micromark-util-types').Event
export type Extension = import('micromark-util-types').Extension
export type Previous = import('micromark-util-types').Previous
export type State = import('micromark-util-types').State
export type TokenizeContext = import('micromark-util-types').TokenizeContext
export type Tokenizer = import('micromark-util-types').Tokenizer

View File

@@ -0,0 +1,979 @@
/**
* @typedef {import('micromark-util-types').Code} Code
* @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').Previous} Previous
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {
asciiAlpha,
asciiAlphanumeric,
asciiControl,
markdownLineEndingOrSpace,
unicodePunctuation,
unicodeWhitespace
} from 'micromark-util-character'
import {codes} from 'micromark-util-symbol'
const wwwPrefix = {tokenize: tokenizeWwwPrefix, partial: true}
const domain = {tokenize: tokenizeDomain, partial: true}
const path = {tokenize: tokenizePath, partial: true}
const trail = {tokenize: tokenizeTrail, partial: true}
const emailDomainDotTrail = {
tokenize: tokenizeEmailDomainDotTrail,
partial: true
}
const wwwAutolink = {tokenize: tokenizeWwwAutolink, previous: previousWww}
const protocolAutolink = {
tokenize: tokenizeProtocolAutolink,
previous: previousProtocol
}
const emailAutolink = {tokenize: tokenizeEmailAutolink, previous: previousEmail}
/** @type {ConstructRecord} */
const text = {}
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral() {
return {text}
}
/** @type {Code} */
let code = codes.digit0
// Add alphanumerics.
while (code < codes.leftCurlyBrace) {
text[code] = emailAutolink
code++
if (code === codes.colon) code = codes.uppercaseA
else if (code === codes.leftSquareBracket) code = codes.lowercaseA
}
text[codes.plusSign] = emailAutolink
text[codes.dash] = emailAutolink
text[codes.dot] = emailAutolink
text[codes.underscore] = emailAutolink
text[codes.uppercaseH] = [emailAutolink, protocolAutolink]
text[codes.lowercaseH] = [emailAutolink, protocolAutolink]
text[codes.uppercaseW] = [emailAutolink, wwwAutolink]
text[codes.lowercaseW] = [emailAutolink, wwwAutolink]
// To do: perform email autolink literals on events, afterwards.
// Thats where `markdown-rs` and `cmark-gfm` perform it.
// It should look for `@`, then for atext backwards, and then for a label
// forwards.
// To do: `mailto:`, `xmpp:` protocol as prefix.
/**
* Email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailAutolink(effects, ok, nok) {
const self = this
/** @type {boolean | undefined} */
let dot
/** @type {boolean} */
let data
return start
/**
* Start of email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function start(code) {
if (
!gfmAtext(code) ||
!previousEmail.call(self, self.previous) ||
previousUnbalanced(self.events)
) {
return nok(code)
}
effects.enter('literalAutolink')
effects.enter('literalAutolinkEmail')
return atext(code)
}
/**
* In email atext.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function atext(code) {
if (gfmAtext(code)) {
effects.consume(code)
return atext
}
if (code === codes.atSign) {
effects.consume(code)
return emailDomain
}
return nok(code)
}
/**
* In email domain.
*
* The reference code is a bit overly complex as it handles the `@`, of which
* there may be just one.
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318>
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomain(code) {
// Dot followed by alphanumerical (not `-` or `_`).
if (code === codes.dot) {
return effects.check(
emailDomainDotTrail,
emailDomainAfter,
emailDomainDot
)(code)
}
// Alphanumerical, `-`, and `_`.
if (
code === codes.dash ||
code === codes.underscore ||
asciiAlphanumeric(code)
) {
data = true
effects.consume(code)
return emailDomain
}
// To do: `/` if xmpp.
// Note: normally wed truncate trailing punctuation from the link.
// However, email autolink literals cannot contain any of those markers,
// except for `.`, but that can only occur if it isnt trailing.
// So we can ignore truncating!
return emailDomainAfter(code)
}
/**
* In email domain, on dot that is not a trail.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainDot(code) {
effects.consume(code)
dot = true
return emailDomain
}
/**
* After email domain.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainAfter(code) {
// Domain must not be empty, must include a dot, and must end in alphabetical.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>.
if (data && dot && asciiAlpha(self.previous)) {
effects.exit('literalAutolinkEmail')
effects.exit('literalAutolink')
return ok(code)
}
return nok(code)
}
}
/**
* `www` autolink literal.
*
* ```markdown
* > | a www.example.org b
* ^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwAutolink(effects, ok, nok) {
const self = this
return wwwStart
/**
* Start of www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwStart(code) {
if (
(code !== codes.uppercaseW && code !== codes.lowercaseW) ||
!previousWww.call(self, self.previous) ||
previousUnbalanced(self.events)
) {
return nok(code)
}
effects.enter('literalAutolink')
effects.enter('literalAutolinkWww')
// Note: we *check*, so we can discard the `www.` we parsed.
// If it worked, we consider it as a part of the domain.
return effects.check(
wwwPrefix,
effects.attempt(domain, effects.attempt(path, wwwAfter), nok),
nok
)(code)
}
/**
* After a www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwAfter(code) {
effects.exit('literalAutolinkWww')
effects.exit('literalAutolink')
return ok(code)
}
}
/**
* Protocol autolink literal.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeProtocolAutolink(effects, ok, nok) {
const self = this
let buffer = ''
let seen = false
return protocolStart
/**
* Start of protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolStart(code) {
if (
(code === codes.uppercaseH || code === codes.lowercaseH) &&
previousProtocol.call(self, self.previous) &&
!previousUnbalanced(self.events)
) {
effects.enter('literalAutolink')
effects.enter('literalAutolinkHttp')
buffer += String.fromCodePoint(code)
effects.consume(code)
return protocolPrefixInside
}
return nok(code)
}
/**
* In protocol.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^^^^
* ```
*
* @type {State}
*/
function protocolPrefixInside(code) {
// `5` is size of `https`
if (asciiAlpha(code) && buffer.length < 5) {
// @ts-expect-error: definitely number.
buffer += String.fromCodePoint(code)
effects.consume(code)
return protocolPrefixInside
}
if (code === codes.colon) {
const protocol = buffer.toLowerCase()
if (protocol === 'http' || protocol === 'https') {
effects.consume(code)
return protocolSlashesInside
}
}
return nok(code)
}
/**
* In slashes.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^
* ```
*
* @type {State}
*/
function protocolSlashesInside(code) {
if (code === codes.slash) {
effects.consume(code)
if (seen) {
return afterProtocol
}
seen = true
return protocolSlashesInside
}
return nok(code)
}
/**
* After protocol, before domain.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function afterProtocol(code) {
// To do: this is different from `markdown-rs`:
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182
return code === codes.eof ||
asciiControl(code) ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code) ||
unicodePunctuation(code)
? nok(code)
: effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code)
}
/**
* After a protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolAfter(code) {
effects.exit('literalAutolinkHttp')
effects.exit('literalAutolink')
return ok(code)
}
}
/**
* `www` prefix.
*
* ```markdown
* > | a www.example.org b
* ^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwPrefix(effects, ok, nok) {
let size = 0
return wwwPrefixInside
/**
* In www prefix.
*
* ```markdown
* > | www.example.com
* ^^^^
* ```
*
* @type {State}
*/
function wwwPrefixInside(code) {
if ((code === codes.uppercaseW || code === codes.lowercaseW) && size < 3) {
size++
effects.consume(code)
return wwwPrefixInside
}
if (code === codes.dot && size === 3) {
effects.consume(code)
return wwwPrefixAfter
}
return nok(code)
}
/**
* After www prefix.
*
* ```markdown
* > | www.example.com
* ^
* ```
*
* @type {State}
*/
function wwwPrefixAfter(code) {
// If there is *anything*, we can link.
return code === codes.eof ? nok(code) : ok(code)
}
}
/**
* Domain.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDomain(effects, ok, nok) {
/** @type {boolean | undefined} */
let underscoreInLastSegment
/** @type {boolean | undefined} */
let underscoreInLastLastSegment
/** @type {boolean | undefined} */
let seen
return domainInside
/**
* In domain.
*
* ```markdown
* > | https://example.com/a
* ^^^^^^^^^^^
* ```
*
* @type {State}
*/
function domainInside(code) {
// Check whether this marker, which is a trailing punctuation
// marker, optionally followed by more trailing markers, and then
// followed by an end.
if (code === codes.dot || code === codes.underscore) {
return effects.check(trail, domainAfter, domainAtPunctuation)(code)
}
// GH documents that only alphanumerics (other than `-`, `.`, and `_`) can
// occur, which sounds like ASCII only, but they also support `www.點看.com`,
// so thats Unicode.
// Instead of some new production for Unicode alphanumerics, markdown
// already has that for Unicode punctuation and whitespace, so use those.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
if (
code === codes.eof ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code) ||
(code !== codes.dash && unicodePunctuation(code))
) {
return domainAfter(code)
}
seen = true
effects.consume(code)
return domainInside
}
/**
* In domain, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com
* ^
* ```
*
* @type {State}
*/
function domainAtPunctuation(code) {
// There is an underscore in the last segment of the domain
if (code === codes.underscore) {
underscoreInLastSegment = true
}
// Otherwise, its a `.`: save the last segment underscore in the
// penultimate segment slot.
else {
underscoreInLastLastSegment = underscoreInLastSegment
underscoreInLastSegment = undefined
}
effects.consume(code)
return domainInside
}
/**
* After domain.
*
* ```markdown
* > | https://example.com/a
* ^
* ```
*
* @type {State} */
function domainAfter(code) {
// Note: thats GH says a dot is needed, but its not true:
// <https://github.com/github/cmark-gfm/issues/279>
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) {
return nok(code)
}
return ok(code)
}
}
/**
* Path.
*
* ```markdown
* > | a https://example.org/stuff b
* ^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizePath(effects, ok) {
let sizeOpen = 0
let sizeClose = 0
return pathInside
/**
* In path.
*
* ```markdown
* > | https://example.com/a
* ^^
* ```
*
* @type {State}
*/
function pathInside(code) {
if (code === codes.leftParenthesis) {
sizeOpen++
effects.consume(code)
return pathInside
}
// To do: `markdown-rs` also needs this.
// If this is a paren, and there are less closings than openings,
// we dont check for a trail.
if (code === codes.rightParenthesis && sizeClose < sizeOpen) {
return pathAtPunctuation(code)
}
// Check whether this trailing punctuation marker is optionally
// followed by more trailing markers, and then followed
// by an end.
if (
code === codes.exclamationMark ||
code === codes.quotationMark ||
code === codes.ampersand ||
code === codes.apostrophe ||
code === codes.rightParenthesis ||
code === codes.asterisk ||
code === codes.comma ||
code === codes.dot ||
code === codes.colon ||
code === codes.semicolon ||
code === codes.lessThan ||
code === codes.questionMark ||
code === codes.rightSquareBracket ||
code === codes.underscore ||
code === codes.tilde
) {
return effects.check(trail, ok, pathAtPunctuation)(code)
}
if (
code === codes.eof ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
effects.consume(code)
return pathInside
}
/**
* In path, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com/a"b
* ^
* ```
*
* @type {State}
*/
function pathAtPunctuation(code) {
// Count closing parens.
if (code === codes.rightParenthesis) {
sizeClose++
}
effects.consume(code)
return pathInside
}
}
/**
* Trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the entire trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | https://example.com").
* ^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTrail(effects, ok, nok) {
return trail
/**
* In trail of domain or path.
*
* ```markdown
* > | https://example.com").
* ^
* ```
*
* @type {State}
*/
function trail(code) {
// Regular trailing punctuation.
if (
code === codes.exclamationMark ||
code === codes.quotationMark ||
code === codes.apostrophe ||
code === codes.rightParenthesis ||
code === codes.asterisk ||
code === codes.comma ||
code === codes.dot ||
code === codes.colon ||
code === codes.semicolon ||
code === codes.questionMark ||
code === codes.underscore ||
code === codes.tilde
) {
effects.consume(code)
return trail
}
// `&` followed by one or more alphabeticals and then a `;`, is
// as a whole considered as trailing punctuation.
// In all other cases, it is considered as continuation of the URL.
if (code === codes.ampersand) {
effects.consume(code)
return trailCharRefStart
}
// Needed because we allow literals after `[`, as we fix:
// <https://github.com/github/cmark-gfm/issues/278>.
// Check that it is not followed by `(` or `[`.
if (code === codes.rightSquareBracket) {
effects.consume(code)
return trailBracketAfter
}
if (
// `<` is an end.
code === codes.lessThan ||
// So is whitespace.
code === codes.eof ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
return nok(code)
}
/**
* In trail, after `]`.
*
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug.
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more.
*
* ```markdown
* > | https://example.com](
* ^
* ```
*
* @type {State}
*/
function trailBracketAfter(code) {
// Whitespace or something that could start a resource or reference is the end.
// Switch back to trail otherwise.
if (
code === codes.eof ||
code === codes.leftParenthesis ||
code === codes.leftSquareBracket ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
return trail(code)
}
/**
* In character-reference like trail, after `&`.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharRefStart(code) {
// When non-alpha, its not a trail.
return asciiAlpha(code) ? trailCharRefInside(code) : nok(code)
}
/**
* In character-reference like trail.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharRefInside(code) {
// Switch back to trail if this is well-formed.
if (code === codes.semicolon) {
effects.consume(code)
return trail
}
if (asciiAlpha(code)) {
effects.consume(code)
return trailCharRefInside
}
// Its not a trail.
return nok(code)
}
}
/**
* Dot in email domain trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | contact@example.org.
* ^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailDomainDotTrail(effects, ok, nok) {
return start
/**
* Dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function start(code) {
// Must be dot.
effects.consume(code)
return after
}
/**
* After dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function after(code) {
// Not a trail if alphanumeric.
return asciiAlphanumeric(code) ? nok(code) : ok(code)
}
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
*
* @type {Previous}
*/
function previousWww(code) {
return (
code === codes.eof ||
code === codes.leftParenthesis ||
code === codes.asterisk ||
code === codes.underscore ||
code === codes.leftSquareBracket ||
code === codes.rightSquareBracket ||
code === codes.tilde ||
markdownLineEndingOrSpace(code)
)
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>.
*
* @type {Previous}
*/
function previousProtocol(code) {
return !asciiAlpha(code)
}
/**
* @this {TokenizeContext}
* @type {Previous}
*/
function previousEmail(code) {
// Do not allow a slash “inside” atext.
// The reference code is a bit weird, but thats what it results in.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>.
// Other than slash, every preceding character is allowed.
return !(code === codes.slash || gfmAtext(code))
}
/**
* @param {Code} code
* @returns {boolean}
*/
function gfmAtext(code) {
return (
code === codes.plusSign ||
code === codes.dash ||
code === codes.dot ||
code === codes.underscore ||
asciiAlphanumeric(code)
)
}
/**
* @param {Array<Event>} events
* @returns {boolean}
*/
function previousUnbalanced(events) {
let index = events.length
let result = false
while (index--) {
const token = events[index][1]
if (
(token.type === 'labelLink' || token.type === 'labelImage') &&
!token._balanced
) {
result = true
break
}
// If weve seen this token, and it was marked as not having any unbalanced
// bracket before it, we can exit.
if (token._gfmAutolinkLiteralWalkedInto) {
result = false
break
}
}
if (events.length > 0 && !result) {
// Mark the last token as “walked into” w/o finding
// anything.
events[events.length - 1][1]._gfmAutolinkLiteralWalkedInto = true
}
return result
}

View File

@@ -0,0 +1,15 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'
declare module 'micromark-util-types' {
interface Token {
_gfmAutolinkLiteralWalkedInto?: boolean
}
interface TokenTypeMap {
literalAutolink: 'literalAutolink'
literalAutolinkEmail: 'literalAutolinkEmail'
literalAutolinkHttp: 'literalAutolinkHttp'
literalAutolinkWww: 'literalAutolinkWww'
}
}

View File

@@ -0,0 +1,2 @@
export {gfmAutolinkLiteral} from './lib/syntax.js'
export {gfmAutolinkLiteralHtml} from './lib/html.js'

View File

@@ -0,0 +1,13 @@
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml(): HtmlExtension
export type CompileContext = import('micromark-util-types').CompileContext
export type Handle = import('micromark-util-types').Handle
export type HtmlExtension = import('micromark-util-types').HtmlExtension
export type Token = import('micromark-util-types').Token

View File

@@ -0,0 +1,63 @@
/**
* @typedef {import('micromark-util-types').CompileContext} CompileContext
* @typedef {import('micromark-util-types').Handle} Handle
* @typedef {import('micromark-util-types').HtmlExtension} HtmlExtension
* @typedef {import('micromark-util-types').Token} Token
*/
import {sanitizeUri} from 'micromark-util-sanitize-uri'
/**
* Create an HTML extension for `micromark` to support GitHub autolink literal
* when serializing to HTML.
*
* @returns {HtmlExtension}
* Extension for `micromark` that can be passed in `htmlExtensions` to
* support GitHub autolink literal when serializing to HTML.
*/
export function gfmAutolinkLiteralHtml() {
return {
exit: {
literalAutolinkEmail,
literalAutolinkHttp,
literalAutolinkWww
}
}
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkWww(token) {
anchorFromToken.call(this, token, 'http://')
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkEmail(token) {
anchorFromToken.call(this, token, 'mailto:')
}
/**
* @this {CompileContext}
* @type {Handle}
*/
function literalAutolinkHttp(token) {
anchorFromToken.call(this, token)
}
/**
* @this CompileContext
* @param {Token} token
* @param {string | null | undefined} [protocol]
* @returns {undefined}
*/
function anchorFromToken(token, protocol) {
const url = this.sliceSerialize(token)
this.tag('<a href="' + sanitizeUri((protocol || '') + url) + '">')
this.raw(this.encode(url))
this.tag('</a>')
}

View File

@@ -0,0 +1,17 @@
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral(): Extension
export type Code = import('micromark-util-types').Code
export type ConstructRecord = import('micromark-util-types').ConstructRecord
export type Event = import('micromark-util-types').Event
export type Extension = import('micromark-util-types').Extension
export type Previous = import('micromark-util-types').Previous
export type State = import('micromark-util-types').State
export type TokenizeContext = import('micromark-util-types').TokenizeContext
export type Tokenizer = import('micromark-util-types').Tokenizer

View File

@@ -0,0 +1,957 @@
/**
* @typedef {import('micromark-util-types').Code} Code
* @typedef {import('micromark-util-types').ConstructRecord} ConstructRecord
* @typedef {import('micromark-util-types').Event} Event
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').Previous} Previous
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
import {
asciiAlpha,
asciiAlphanumeric,
asciiControl,
markdownLineEndingOrSpace,
unicodePunctuation,
unicodeWhitespace
} from 'micromark-util-character'
const wwwPrefix = {
tokenize: tokenizeWwwPrefix,
partial: true
}
const domain = {
tokenize: tokenizeDomain,
partial: true
}
const path = {
tokenize: tokenizePath,
partial: true
}
const trail = {
tokenize: tokenizeTrail,
partial: true
}
const emailDomainDotTrail = {
tokenize: tokenizeEmailDomainDotTrail,
partial: true
}
const wwwAutolink = {
tokenize: tokenizeWwwAutolink,
previous: previousWww
}
const protocolAutolink = {
tokenize: tokenizeProtocolAutolink,
previous: previousProtocol
}
const emailAutolink = {
tokenize: tokenizeEmailAutolink,
previous: previousEmail
}
/** @type {ConstructRecord} */
const text = {}
/**
* Create an extension for `micromark` to support GitHub autolink literal
* syntax.
*
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable GFM
* autolink literal syntax.
*/
export function gfmAutolinkLiteral() {
return {
text
}
}
/** @type {Code} */
let code = 48
// Add alphanumerics.
while (code < 123) {
text[code] = emailAutolink
code++
if (code === 58) code = 65
else if (code === 91) code = 97
}
text[43] = emailAutolink
text[45] = emailAutolink
text[46] = emailAutolink
text[95] = emailAutolink
text[72] = [emailAutolink, protocolAutolink]
text[104] = [emailAutolink, protocolAutolink]
text[87] = [emailAutolink, wwwAutolink]
text[119] = [emailAutolink, wwwAutolink]
// To do: perform email autolink literals on events, afterwards.
// Thats where `markdown-rs` and `cmark-gfm` perform it.
// It should look for `@`, then for atext backwards, and then for a label
// forwards.
// To do: `mailto:`, `xmpp:` protocol as prefix.
/**
* Email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailAutolink(effects, ok, nok) {
const self = this
/** @type {boolean | undefined} */
let dot
/** @type {boolean} */
let data
return start
/**
* Start of email autolink literal.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function start(code) {
if (
!gfmAtext(code) ||
!previousEmail.call(self, self.previous) ||
previousUnbalanced(self.events)
) {
return nok(code)
}
effects.enter('literalAutolink')
effects.enter('literalAutolinkEmail')
return atext(code)
}
/**
* In email atext.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function atext(code) {
if (gfmAtext(code)) {
effects.consume(code)
return atext
}
if (code === 64) {
effects.consume(code)
return emailDomain
}
return nok(code)
}
/**
* In email domain.
*
* The reference code is a bit overly complex as it handles the `@`, of which
* there may be just one.
* Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L318>
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomain(code) {
// Dot followed by alphanumerical (not `-` or `_`).
if (code === 46) {
return effects.check(
emailDomainDotTrail,
emailDomainAfter,
emailDomainDot
)(code)
}
// Alphanumerical, `-`, and `_`.
if (code === 45 || code === 95 || asciiAlphanumeric(code)) {
data = true
effects.consume(code)
return emailDomain
}
// To do: `/` if xmpp.
// Note: normally wed truncate trailing punctuation from the link.
// However, email autolink literals cannot contain any of those markers,
// except for `.`, but that can only occur if it isnt trailing.
// So we can ignore truncating!
return emailDomainAfter(code)
}
/**
* In email domain, on dot that is not a trail.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainDot(code) {
effects.consume(code)
dot = true
return emailDomain
}
/**
* After email domain.
*
* ```markdown
* > | a contact@example.org b
* ^
* ```
*
* @type {State}
*/
function emailDomainAfter(code) {
// Domain must not be empty, must include a dot, and must end in alphabetical.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L332>.
if (data && dot && asciiAlpha(self.previous)) {
effects.exit('literalAutolinkEmail')
effects.exit('literalAutolink')
return ok(code)
}
return nok(code)
}
}
/**
* `www` autolink literal.
*
* ```markdown
* > | a www.example.org b
* ^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwAutolink(effects, ok, nok) {
const self = this
return wwwStart
/**
* Start of www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwStart(code) {
if (
(code !== 87 && code !== 119) ||
!previousWww.call(self, self.previous) ||
previousUnbalanced(self.events)
) {
return nok(code)
}
effects.enter('literalAutolink')
effects.enter('literalAutolinkWww')
// Note: we *check*, so we can discard the `www.` we parsed.
// If it worked, we consider it as a part of the domain.
return effects.check(
wwwPrefix,
effects.attempt(domain, effects.attempt(path, wwwAfter), nok),
nok
)(code)
}
/**
* After a www autolink literal.
*
* ```markdown
* > | www.example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function wwwAfter(code) {
effects.exit('literalAutolinkWww')
effects.exit('literalAutolink')
return ok(code)
}
}
/**
* Protocol autolink literal.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeProtocolAutolink(effects, ok, nok) {
const self = this
let buffer = ''
let seen = false
return protocolStart
/**
* Start of protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolStart(code) {
if (
(code === 72 || code === 104) &&
previousProtocol.call(self, self.previous) &&
!previousUnbalanced(self.events)
) {
effects.enter('literalAutolink')
effects.enter('literalAutolinkHttp')
buffer += String.fromCodePoint(code)
effects.consume(code)
return protocolPrefixInside
}
return nok(code)
}
/**
* In protocol.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^^^^
* ```
*
* @type {State}
*/
function protocolPrefixInside(code) {
// `5` is size of `https`
if (asciiAlpha(code) && buffer.length < 5) {
// @ts-expect-error: definitely number.
buffer += String.fromCodePoint(code)
effects.consume(code)
return protocolPrefixInside
}
if (code === 58) {
const protocol = buffer.toLowerCase()
if (protocol === 'http' || protocol === 'https') {
effects.consume(code)
return protocolSlashesInside
}
}
return nok(code)
}
/**
* In slashes.
*
* ```markdown
* > | https://example.com/a?b#c
* ^^
* ```
*
* @type {State}
*/
function protocolSlashesInside(code) {
if (code === 47) {
effects.consume(code)
if (seen) {
return afterProtocol
}
seen = true
return protocolSlashesInside
}
return nok(code)
}
/**
* After protocol, before domain.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function afterProtocol(code) {
// To do: this is different from `markdown-rs`:
// https://github.com/wooorm/markdown-rs/blob/b3a921c761309ae00a51fe348d8a43adbc54b518/src/construct/gfm_autolink_literal.rs#L172-L182
return code === null ||
asciiControl(code) ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code) ||
unicodePunctuation(code)
? nok(code)
: effects.attempt(domain, effects.attempt(path, protocolAfter), nok)(code)
}
/**
* After a protocol autolink literal.
*
* ```markdown
* > | https://example.com/a?b#c
* ^
* ```
*
* @type {State}
*/
function protocolAfter(code) {
effects.exit('literalAutolinkHttp')
effects.exit('literalAutolink')
return ok(code)
}
}
/**
* `www` prefix.
*
* ```markdown
* > | a www.example.org b
* ^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeWwwPrefix(effects, ok, nok) {
let size = 0
return wwwPrefixInside
/**
* In www prefix.
*
* ```markdown
* > | www.example.com
* ^^^^
* ```
*
* @type {State}
*/
function wwwPrefixInside(code) {
if ((code === 87 || code === 119) && size < 3) {
size++
effects.consume(code)
return wwwPrefixInside
}
if (code === 46 && size === 3) {
effects.consume(code)
return wwwPrefixAfter
}
return nok(code)
}
/**
* After www prefix.
*
* ```markdown
* > | www.example.com
* ^
* ```
*
* @type {State}
*/
function wwwPrefixAfter(code) {
// If there is *anything*, we can link.
return code === null ? nok(code) : ok(code)
}
}
/**
* Domain.
*
* ```markdown
* > | a https://example.org b
* ^^^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeDomain(effects, ok, nok) {
/** @type {boolean | undefined} */
let underscoreInLastSegment
/** @type {boolean | undefined} */
let underscoreInLastLastSegment
/** @type {boolean | undefined} */
let seen
return domainInside
/**
* In domain.
*
* ```markdown
* > | https://example.com/a
* ^^^^^^^^^^^
* ```
*
* @type {State}
*/
function domainInside(code) {
// Check whether this marker, which is a trailing punctuation
// marker, optionally followed by more trailing markers, and then
// followed by an end.
if (code === 46 || code === 95) {
return effects.check(trail, domainAfter, domainAtPunctuation)(code)
}
// GH documents that only alphanumerics (other than `-`, `.`, and `_`) can
// occur, which sounds like ASCII only, but they also support `www.點看.com`,
// so thats Unicode.
// Instead of some new production for Unicode alphanumerics, markdown
// already has that for Unicode punctuation and whitespace, so use those.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
if (
code === null ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code) ||
(code !== 45 && unicodePunctuation(code))
) {
return domainAfter(code)
}
seen = true
effects.consume(code)
return domainInside
}
/**
* In domain, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com
* ^
* ```
*
* @type {State}
*/
function domainAtPunctuation(code) {
// There is an underscore in the last segment of the domain
if (code === 95) {
underscoreInLastSegment = true
}
// Otherwise, its a `.`: save the last segment underscore in the
// penultimate segment slot.
else {
underscoreInLastLastSegment = underscoreInLastSegment
underscoreInLastSegment = undefined
}
effects.consume(code)
return domainInside
}
/**
* After domain.
*
* ```markdown
* > | https://example.com/a
* ^
* ```
*
* @type {State} */
function domainAfter(code) {
// Note: thats GH says a dot is needed, but its not true:
// <https://github.com/github/cmark-gfm/issues/279>
if (underscoreInLastLastSegment || underscoreInLastSegment || !seen) {
return nok(code)
}
return ok(code)
}
}
/**
* Path.
*
* ```markdown
* > | a https://example.org/stuff b
* ^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizePath(effects, ok) {
let sizeOpen = 0
let sizeClose = 0
return pathInside
/**
* In path.
*
* ```markdown
* > | https://example.com/a
* ^^
* ```
*
* @type {State}
*/
function pathInside(code) {
if (code === 40) {
sizeOpen++
effects.consume(code)
return pathInside
}
// To do: `markdown-rs` also needs this.
// If this is a paren, and there are less closings than openings,
// we dont check for a trail.
if (code === 41 && sizeClose < sizeOpen) {
return pathAtPunctuation(code)
}
// Check whether this trailing punctuation marker is optionally
// followed by more trailing markers, and then followed
// by an end.
if (
code === 33 ||
code === 34 ||
code === 38 ||
code === 39 ||
code === 41 ||
code === 42 ||
code === 44 ||
code === 46 ||
code === 58 ||
code === 59 ||
code === 60 ||
code === 63 ||
code === 93 ||
code === 95 ||
code === 126
) {
return effects.check(trail, ok, pathAtPunctuation)(code)
}
if (
code === null ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
effects.consume(code)
return pathInside
}
/**
* In path, at potential trailing punctuation, that was not trailing.
*
* ```markdown
* > | https://example.com/a"b
* ^
* ```
*
* @type {State}
*/
function pathAtPunctuation(code) {
// Count closing parens.
if (code === 41) {
sizeClose++
}
effects.consume(code)
return pathInside
}
}
/**
* Trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the entire trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | https://example.com").
* ^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTrail(effects, ok, nok) {
return trail
/**
* In trail of domain or path.
*
* ```markdown
* > | https://example.com").
* ^
* ```
*
* @type {State}
*/
function trail(code) {
// Regular trailing punctuation.
if (
code === 33 ||
code === 34 ||
code === 39 ||
code === 41 ||
code === 42 ||
code === 44 ||
code === 46 ||
code === 58 ||
code === 59 ||
code === 63 ||
code === 95 ||
code === 126
) {
effects.consume(code)
return trail
}
// `&` followed by one or more alphabeticals and then a `;`, is
// as a whole considered as trailing punctuation.
// In all other cases, it is considered as continuation of the URL.
if (code === 38) {
effects.consume(code)
return trailCharRefStart
}
// Needed because we allow literals after `[`, as we fix:
// <https://github.com/github/cmark-gfm/issues/278>.
// Check that it is not followed by `(` or `[`.
if (code === 93) {
effects.consume(code)
return trailBracketAfter
}
if (
// `<` is an end.
code === 60 ||
// So is whitespace.
code === null ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
return nok(code)
}
/**
* In trail, after `]`.
*
* > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug.
* > See end of <https://github.com/github/cmark-gfm/issues/278> for more.
*
* ```markdown
* > | https://example.com](
* ^
* ```
*
* @type {State}
*/
function trailBracketAfter(code) {
// Whitespace or something that could start a resource or reference is the end.
// Switch back to trail otherwise.
if (
code === null ||
code === 40 ||
code === 91 ||
markdownLineEndingOrSpace(code) ||
unicodeWhitespace(code)
) {
return ok(code)
}
return trail(code)
}
/**
* In character-reference like trail, after `&`.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharRefStart(code) {
// When non-alpha, its not a trail.
return asciiAlpha(code) ? trailCharRefInside(code) : nok(code)
}
/**
* In character-reference like trail.
*
* ```markdown
* > | https://example.com&amp;).
* ^
* ```
*
* @type {State}
*/
function trailCharRefInside(code) {
// Switch back to trail if this is well-formed.
if (code === 59) {
effects.consume(code)
return trail
}
if (asciiAlpha(code)) {
effects.consume(code)
return trailCharRefInside
}
// Its not a trail.
return nok(code)
}
}
/**
* Dot in email domain trail.
*
* This calls `ok` if this *is* the trail, followed by an end, which means
* the trail is not part of the link.
* It calls `nok` if this *is* part of the link.
*
* ```markdown
* > | contact@example.org.
* ^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeEmailDomainDotTrail(effects, ok, nok) {
return start
/**
* Dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function start(code) {
// Must be dot.
effects.consume(code)
return after
}
/**
* After dot.
*
* ```markdown
* > | contact@example.org.
* ^ ^
* ```
*
* @type {State}
*/
function after(code) {
// Not a trail if alphanumeric.
return asciiAlphanumeric(code) ? nok(code) : ok(code)
}
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
*
* @type {Previous}
*/
function previousWww(code) {
return (
code === null ||
code === 40 ||
code === 42 ||
code === 95 ||
code === 91 ||
code === 93 ||
code === 126 ||
markdownLineEndingOrSpace(code)
)
}
/**
* See:
* <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>.
*
* @type {Previous}
*/
function previousProtocol(code) {
return !asciiAlpha(code)
}
/**
* @this {TokenizeContext}
* @type {Previous}
*/
function previousEmail(code) {
// Do not allow a slash “inside” atext.
// The reference code is a bit weird, but thats what it results in.
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>.
// Other than slash, every preceding character is allowed.
return !(code === 47 || gfmAtext(code))
}
/**
* @param {Code} code
* @returns {boolean}
*/
function gfmAtext(code) {
return (
code === 43 ||
code === 45 ||
code === 46 ||
code === 95 ||
asciiAlphanumeric(code)
)
}
/**
* @param {Array<Event>} events
* @returns {boolean}
*/
function previousUnbalanced(events) {
let index = events.length
let result = false
while (index--) {
const token = events[index][1]
if (
(token.type === 'labelLink' || token.type === 'labelImage') &&
!token._balanced
) {
result = true
break
}
// If weve seen this token, and it was marked as not having any unbalanced
// bracket before it, we can exit.
if (token._gfmAutolinkLiteralWalkedInto) {
result = false
break
}
}
if (events.length > 0 && !result) {
// Mark the last token as “walked into” w/o finding
// anything.
events[events.length - 1][1]._gfmAutolinkLiteralWalkedInto = true
}
return result
}

View File

@@ -0,0 +1,22 @@
(The MIT License)
Copyright (c) 2020 Titus Wormer <tituswormer@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
'Software'), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,64 @@
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code: Code): boolean;
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code: Code): boolean;
export function asciiAlpha(code: Code): boolean;
export function asciiAlphanumeric(code: Code): boolean;
export function asciiAtext(code: Code): boolean;
export function asciiDigit(code: Code): boolean;
export function asciiHexDigit(code: Code): boolean;
export function asciiPunctuation(code: Code): boolean;
export function unicodePunctuation(code: Code): boolean;
export function unicodeWhitespace(code: Code): boolean;
export type Code = import('micromark-util-types').Code;
//# sourceMappingURL=index.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.js"],"names":[],"mappings":"AA8DA;;;;;;;;;;GAUG;AACH,mCALW,IAAI,GAEF,OAAO,CASnB;AAkDD;;;;;;;;;;;;;;GAcG;AACH,yCALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;GAQG;AACH,gDALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;;;;;;;GAcG;AACH,oCALW,IAAI,GAEF,OAAO,CASnB;AAmDY,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,oCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,uCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,yCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;mBAxOvB,OAAO,sBAAsB,EAAE,IAAI"}

View File

@@ -0,0 +1,250 @@
/**
* @typedef {import('micromark-util-types').Code} Code
*/
import {codes} from 'micromark-util-symbol'
/**
* Check whether the character code represents an ASCII alpha (`a` through `z`,
* case insensitive).
*
* An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
*
* An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
* to U+005A (`Z`).
*
* An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
* to U+007A (`z`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlpha = regexCheck(/[A-Za-z]/)
/**
* Check whether the character code represents an ASCII alphanumeric (`a`
* through `z`, case insensitive, or `0` through `9`).
*
* An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
* (see `asciiAlpha`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)
/**
* Check whether the character code represents an ASCII atext.
*
* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
* (`{`) to U+007E TILDE (`~`).
*
* See:
* **\[RFC5322]**:
* [Internet Message Format](https://tools.ietf.org/html/rfc5322).
* P. Resnick.
* IETF.
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code) {
return (
// Special whitespace codes (which have negative values), C0 and Control
// character DEL
code !== null && (code < codes.space || code === codes.del)
)
}
/**
* Check whether the character code represents an ASCII digit (`0` through `9`).
*
* An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
* U+0039 (`9`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiDigit = regexCheck(/\d/)
/**
* Check whether the character code represents an ASCII hex digit (`a` through
* `f`, case insensitive, or `0` through `9`).
*
* An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
* digit, or an ASCII lower hex digit.
*
* An **ASCII upper hex digit** is a character in the inclusive range U+0041
* (`A`) to U+0046 (`F`).
*
* An **ASCII lower hex digit** is a character in the inclusive range U+0061
* (`a`) to U+0066 (`f`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)
/**
* Check whether the character code represents ASCII punctuation.
*
* An **ASCII punctuation** is a character in the inclusive ranges U+0021
* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code) {
return code !== null && code < codes.horizontalTab
}
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code) {
return code !== null && (code < codes.nul || code === codes.space)
}
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code) {
return (
code === codes.horizontalTab ||
code === codes.virtualSpace ||
code === codes.space
)
}
// Size note: removing ASCII from the regex and using `asciiPunctuation` here
// In fact adds to the bundle size.
/**
* Check whether the character code represents Unicode punctuation.
*
* A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
* punctuation (see `asciiPunctuation`).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u)
/**
* Check whether the character code represents Unicode whitespace.
*
* Note that this does handle micromark specific markdown whitespace characters.
* See `markdownLineEndingOrSpace` to check that.
*
* A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodeWhitespace = regexCheck(/\s/)
/**
* Create a code check from a regex.
*
* @param {RegExp} regex
* @returns {(code: Code) => boolean}
*/
function regexCheck(regex) {
return check
/**
* Check whether a code matches the bound regex.
*
* @param {Code} code
* Character code.
* @returns {boolean}
* Whether the character code matches the bound regex.
*/
function check(code) {
return code !== null && code > -1 && regex.test(String.fromCharCode(code))
}
}

View File

@@ -0,0 +1,64 @@
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code: Code): boolean;
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code: Code): boolean;
export function asciiAlpha(code: Code): boolean;
export function asciiAlphanumeric(code: Code): boolean;
export function asciiAtext(code: Code): boolean;
export function asciiDigit(code: Code): boolean;
export function asciiHexDigit(code: Code): boolean;
export function asciiPunctuation(code: Code): boolean;
export function unicodePunctuation(code: Code): boolean;
export function unicodeWhitespace(code: Code): boolean;
export type Code = import('micromark-util-types').Code;
//# sourceMappingURL=index.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.js"],"names":[],"mappings":"AA8DA;;;;;;;;;;GAUG;AACH,mCALW,IAAI,GAEF,OAAO,CASnB;AAkDD;;;;;;;;;;;;;;GAcG;AACH,yCALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;GAQG;AACH,gDALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;;;;;;;GAcG;AACH,oCALW,IAAI,GAEF,OAAO,CASnB;AAmDY,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,oCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,uCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,yCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;mBAxOvB,OAAO,sBAAsB,EAAE,IAAI"}

View File

@@ -0,0 +1,244 @@
/**
* @typedef {import('micromark-util-types').Code} Code
*/
/**
* Check whether the character code represents an ASCII alpha (`a` through `z`,
* case insensitive).
*
* An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
*
* An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
* to U+005A (`Z`).
*
* An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
* to U+007A (`z`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlpha = regexCheck(/[A-Za-z]/);
/**
* Check whether the character code represents an ASCII alphanumeric (`a`
* through `z`, case insensitive, or `0` through `9`).
*
* An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
* (see `asciiAlpha`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/);
/**
* Check whether the character code represents an ASCII atext.
*
* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
* (`{`) to U+007E TILDE (`~`).
*
* See:
* **\[RFC5322]**:
* [Internet Message Format](https://tools.ietf.org/html/rfc5322).
* P. Resnick.
* IETF.
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/);
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code) {
return (
// Special whitespace codes (which have negative values), C0 and Control
// character DEL
code !== null && (code < 32 || code === 127)
);
}
/**
* Check whether the character code represents an ASCII digit (`0` through `9`).
*
* An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
* U+0039 (`9`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiDigit = regexCheck(/\d/);
/**
* Check whether the character code represents an ASCII hex digit (`a` through
* `f`, case insensitive, or `0` through `9`).
*
* An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
* digit, or an ASCII lower hex digit.
*
* An **ASCII upper hex digit** is a character in the inclusive range U+0041
* (`A`) to U+0046 (`F`).
*
* An **ASCII lower hex digit** is a character in the inclusive range U+0061
* (`a`) to U+0066 (`f`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/);
/**
* Check whether the character code represents ASCII punctuation.
*
* An **ASCII punctuation** is a character in the inclusive ranges U+0021
* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/);
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code) {
return code !== null && code < -2;
}
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code) {
return code !== null && (code < 0 || code === 32);
}
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code) {
return code === -2 || code === -1 || code === 32;
}
// Size note: removing ASCII from the regex and using `asciiPunctuation` here
// In fact adds to the bundle size.
/**
* Check whether the character code represents Unicode punctuation.
*
* A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
* punctuation (see `asciiPunctuation`).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
/**
* Check whether the character code represents Unicode whitespace.
*
* Note that this does handle micromark specific markdown whitespace characters.
* See `markdownLineEndingOrSpace` to check that.
*
* A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodeWhitespace = regexCheck(/\s/);
/**
* Create a code check from a regex.
*
* @param {RegExp} regex
* @returns {(code: Code) => boolean}
*/
function regexCheck(regex) {
return check;
/**
* Check whether a code matches the bound regex.
*
* @param {Code} code
* Character code.
* @returns {boolean}
* Whether the character code matches the bound regex.
*/
function check(code) {
return code !== null && code > -1 && regex.test(String.fromCharCode(code));
}
}

View File

@@ -0,0 +1,57 @@
{
"name": "micromark-util-character",
"version": "2.1.0",
"description": "micromark utility to handle character codes",
"license": "MIT",
"keywords": [
"micromark",
"util",
"utility",
"character"
],
"repository": "https://github.com/micromark/micromark/tree/main/packages/micromark-util-character",
"bugs": "https://github.com/micromark/micromark/issues",
"funding": [
{
"type": "GitHub Sponsors",
"url": "https://github.com/sponsors/unifiedjs"
},
{
"type": "OpenCollective",
"url": "https://opencollective.com/unified"
}
],
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"dev/",
"lib/",
"index.d.ts.map",
"index.d.ts",
"index.js"
],
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
},
"dependencies": {
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"scripts": {
"build": "micromark-build"
},
"xo": {
"envs": [
"shared-node-browser"
],
"prettier": true,
"rules": {
"unicorn/prefer-code-point": "off"
}
}
}

View File

@@ -0,0 +1,446 @@
# micromark-util-character
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][bundle-size-badge]][bundle-size]
[![Sponsors][sponsors-badge]][opencollective]
[![Backers][backers-badge]][opencollective]
[![Chat][chat-badge]][chat]
[micromark][] utility to handle [character codes][code].
## Contents
* [What is this?](#what-is-this)
* [When should I use this?](#when-should-i-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [`asciiAlpha(code)`](#asciialphacode)
* [`asciiAlphanumeric(code)`](#asciialphanumericcode)
* [`asciiAtext(code)`](#asciiatextcode)
* [`asciiControl(code)`](#asciicontrolcode)
* [`asciiDigit(code)`](#asciidigitcode)
* [`asciiHexDigit(code)`](#asciihexdigitcode)
* [`asciiPunctuation(code)`](#asciipunctuationcode)
* [`markdownLineEnding(code)`](#markdownlineendingcode)
* [`markdownLineEndingOrSpace(code)`](#markdownlineendingorspacecode)
* [`markdownSpace(code)`](#markdownspacecode)
* [`unicodePunctuation(code)`](#unicodepunctuationcode)
* [`unicodeWhitespace(code)`](#unicodewhitespacecode)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package exposes algorithms to check whether characters match groups.
## When should I use this?
This package might be useful when you are making your own micromark extensions.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-util-character
```
In Deno with [`esm.sh`][esmsh]:
```js
import * as character from 'https://esm.sh/micromark-util-character@1'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import * as character from 'https://esm.sh/micromark-util-character@1?bundle'
</script>
```
## Use
```js
import {asciiAlpha} from 'micromark-util-character'
console.log(asciiAlpha(64)) // false
console.log(asciiAlpha(65)) // true
```
## API
This module exports the identifiers
[`asciiAlpha`][api-ascii-alpha],
[`asciiAlphanumeric`][api-ascii-alphanumeric],
[`asciiAtext`][api-ascii-atext],
[`asciiControl`][api-ascii-control],
[`asciiDigit`][api-ascii-digit],
[`asciiHexDigit`][api-ascii-hex-digit],
[`asciiPunctuation`][api-ascii-punctuation],
[`markdownLineEnding`][api-markdown-line-ending],
[`markdownLineEndingOrSpace`][api-markdown-line-ending-or-space],
[`markdownSpace`][api-markdown-space],
[`unicodePunctuation`][api-unicode-punctuation],
[`unicodeWhitespace`][api-unicode-whitespace].
There is no default export.
### `asciiAlpha(code)`
Check whether the [character code][code] represents an ASCII alpha (`a` through
`z`, case insensitive).
An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
to U+005A (`Z`).
An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
to U+007A (`z`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiAlphanumeric(code)`
Check whether the [character code][code] represents an ASCII alphanumeric (`a`
through `z`, case insensitive, or `0` through `9`).
An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
(see `asciiAlpha`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiAtext(code)`
Check whether the [character code][code] represents an ASCII atext.
atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
(`{`) to U+007E TILDE (`~`) (**\[RFC5322]**).
See **\[RFC5322]**:\
[Internet Message Format](https://tools.ietf.org/html/rfc5322).\
P. Resnick.\
IETF.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiControl(code)`
Check whether a [character code][code] is an ASCII control character.
An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
to U+001F (US), or U+007F (DEL).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiDigit(code)`
Check whether the [character code][code] represents an ASCII digit (`0` through
`9`).
An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
U+0039 (`9`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiHexDigit(code)`
Check whether the [character code][code] represents an ASCII hex digit (`a`
through `f`, case insensitive, or `0` through `9`).
An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
digit, or an ASCII lower hex digit.
An **ASCII upper hex digit** is a character in the inclusive range U+0041
(`A`) to U+0046 (`F`).
An **ASCII lower hex digit** is a character in the inclusive range U+0061
(`a`) to U+0066 (`f`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiPunctuation(code)`
Check whether the [character code][code] represents ASCII punctuation.
An **ASCII punctuation** is a character in the inclusive ranges U+0021
EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
(`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `markdownLineEnding(code)`
Check whether a [character code][code] is a markdown line ending.
A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
RETURN (CR) are replaced by these virtual characters depending on whether
they occurred together.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `markdownLineEndingOrSpace(code)`
Check whether a [character code][code] is a markdown line ending (see
`markdownLineEnding`) or markdown space (see `markdownSpace`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `markdownSpace(code)`
Check whether a [character code][code] is a markdown space.
A **markdown space** is the concrete character U+0020 SPACE (SP) and the
virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
SPACE (VS) characters, depending on the column at which the tab occurred.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `unicodePunctuation(code)`
Check whether the [character code][code] represents Unicode punctuation.
A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
(Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
(Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
punctuation (see `asciiPunctuation`) (**\[UNICODE]**).
See **\[UNICODE]**:\
[The Unicode Standard](https://www.unicode.org/versions/).\
Unicode Consortium.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `unicodeWhitespace(code)`
Check whether the [character code][code] represents Unicode whitespace.
Note that this does handle micromark specific markdown whitespace characters.
See `markdownLineEndingOrSpace` to check that.
A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
See **\[UNICODE]**:\
[The Unicode Standard](https://www.unicode.org/versions/).\
Unicode Consortium.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-util-character@^2`, compatible with Node.js 16.
This package works with `micromark@^3`.
## Security
This package is safe.
See [`security.md`][securitymd] in [`micromark/.github`][health] for how to
submit a security report.
## Contribute
See [`contributing.md`][contributing] in [`micromark/.github`][health] for ways
to get started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organisation, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark.svg
[coverage]: https://codecov.io/github/micromark/micromark
[downloads-badge]: https://img.shields.io/npm/dm/micromark-util-character.svg
[downloads]: https://www.npmjs.com/package/micromark-util-character
[bundle-size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-util-character
[bundle-size]: https://bundlejs.com/?q=micromark-util-character
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[opencollective]: https://opencollective.com/unified
[npm]: https://docs.npmjs.com/cli/install
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[esmsh]: https://esm.sh
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[license]: https://github.com/micromark/micromark/blob/main/license
[author]: https://wooorm.com
[health]: https://github.com/micromark/.github
[securitymd]: https://github.com/micromark/.github/blob/main/security.md
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[typescript]: https://www.typescriptlang.org
[micromark]: https://github.com/micromark/micromark
[code]: https://github.com/micromark/micromark#preprocess
[api-ascii-alpha]: #asciialphacode
[api-ascii-alphanumeric]: #asciialphanumericcode
[api-ascii-atext]: #asciiatextcode
[api-ascii-control]: #asciicontrolcode
[api-ascii-digit]: #asciidigitcode
[api-ascii-hex-digit]: #asciihexdigitcode
[api-ascii-punctuation]: #asciipunctuationcode
[api-markdown-line-ending]: #markdownlineendingcode
[api-markdown-line-ending-or-space]: #markdownlineendingorspacecode
[api-markdown-space]: #markdownspacecode
[api-unicode-punctuation]: #unicodepunctuationcode
[api-unicode-whitespace]: #unicodewhitespacecode

View File

@@ -0,0 +1,138 @@
export namespace codes {
let carriageReturn: -5
let lineFeed: -4
let carriageReturnLineFeed: -3
let horizontalTab: -2
let virtualSpace: -1
let eof: null
let nul: 0
let soh: 1
let stx: 2
let etx: 3
let eot: 4
let enq: 5
let ack: 6
let bel: 7
let bs: 8
let ht: 9
let lf: 10
let vt: 11
let ff: 12
let cr: 13
let so: 14
let si: 15
let dle: 16
let dc1: 17
let dc2: 18
let dc3: 19
let dc4: 20
let nak: 21
let syn: 22
let etb: 23
let can: 24
let em: 25
let sub: 26
let esc: 27
let fs: 28
let gs: 29
let rs: 30
let us: 31
let space: 32
let exclamationMark: 33
let quotationMark: 34
let numberSign: 35
let dollarSign: 36
let percentSign: 37
let ampersand: 38
let apostrophe: 39
let leftParenthesis: 40
let rightParenthesis: 41
let asterisk: 42
let plusSign: 43
let comma: 44
let dash: 45
let dot: 46
let slash: 47
let digit0: 48
let digit1: 49
let digit2: 50
let digit3: 51
let digit4: 52
let digit5: 53
let digit6: 54
let digit7: 55
let digit8: 56
let digit9: 57
let colon: 58
let semicolon: 59
let lessThan: 60
let equalsTo: 61
let greaterThan: 62
let questionMark: 63
let atSign: 64
let uppercaseA: 65
let uppercaseB: 66
let uppercaseC: 67
let uppercaseD: 68
let uppercaseE: 69
let uppercaseF: 70
let uppercaseG: 71
let uppercaseH: 72
let uppercaseI: 73
let uppercaseJ: 74
let uppercaseK: 75
let uppercaseL: 76
let uppercaseM: 77
let uppercaseN: 78
let uppercaseO: 79
let uppercaseP: 80
let uppercaseQ: 81
let uppercaseR: 82
let uppercaseS: 83
let uppercaseT: 84
let uppercaseU: 85
let uppercaseV: 86
let uppercaseW: 87
let uppercaseX: 88
let uppercaseY: 89
let uppercaseZ: 90
let leftSquareBracket: 91
let backslash: 92
let rightSquareBracket: 93
let caret: 94
let underscore: 95
let graveAccent: 96
let lowercaseA: 97
let lowercaseB: 98
let lowercaseC: 99
let lowercaseD: 100
let lowercaseE: 101
let lowercaseF: 102
let lowercaseG: 103
let lowercaseH: 104
let lowercaseI: 105
let lowercaseJ: 106
let lowercaseK: 107
let lowercaseL: 108
let lowercaseM: 109
let lowercaseN: 110
let lowercaseO: 111
let lowercaseP: 112
let lowercaseQ: 113
let lowercaseR: 114
let lowercaseS: 115
let lowercaseT: 116
let lowercaseU: 117
let lowercaseV: 118
let lowercaseW: 119
let lowercaseX: 120
let lowercaseY: 121
let lowercaseZ: 122
let leftCurlyBrace: 123
let verticalBar: 124
let rightCurlyBrace: 125
let tilde: 126
let del: 127
let byteOrderMarker: 65279
let replacementCharacter: 65533
}

View File

@@ -0,0 +1,158 @@
/**
* Character codes.
*
* This module is compiled away!
*
* micromark works based on character codes.
* This module contains constants for the ASCII block and the replacement
* character.
* A couple of them are handled in a special way, such as the line endings
* (CR, LF, and CR+LF, commonly known as end-of-line: EOLs), the tab (horizontal
* tab) and its expansion based on what column its at (virtual space),
* and the end-of-file (eof) character.
* As values are preprocessed before handling them, the actual characters LF,
* CR, HT, and NUL (which is present as the replacement character), are
* guaranteed to not exist.
*
* Unicode basic latin block.
*/
export const codes = /** @type {const} */ ({
carriageReturn: -5,
lineFeed: -4,
carriageReturnLineFeed: -3,
horizontalTab: -2,
virtualSpace: -1,
eof: null,
nul: 0,
soh: 1,
stx: 2,
etx: 3,
eot: 4,
enq: 5,
ack: 6,
bel: 7,
bs: 8,
ht: 9, // `\t`
lf: 10, // `\n`
vt: 11, // `\v`
ff: 12, // `\f`
cr: 13, // `\r`
so: 14,
si: 15,
dle: 16,
dc1: 17,
dc2: 18,
dc3: 19,
dc4: 20,
nak: 21,
syn: 22,
etb: 23,
can: 24,
em: 25,
sub: 26,
esc: 27,
fs: 28,
gs: 29,
rs: 30,
us: 31,
space: 32,
exclamationMark: 33, // `!`
quotationMark: 34, // `"`
numberSign: 35, // `#`
dollarSign: 36, // `$`
percentSign: 37, // `%`
ampersand: 38, // `&`
apostrophe: 39, // `'`
leftParenthesis: 40, // `(`
rightParenthesis: 41, // `)`
asterisk: 42, // `*`
plusSign: 43, // `+`
comma: 44, // `,`
dash: 45, // `-`
dot: 46, // `.`
slash: 47, // `/`
digit0: 48, // `0`
digit1: 49, // `1`
digit2: 50, // `2`
digit3: 51, // `3`
digit4: 52, // `4`
digit5: 53, // `5`
digit6: 54, // `6`
digit7: 55, // `7`
digit8: 56, // `8`
digit9: 57, // `9`
colon: 58, // `:`
semicolon: 59, // `;`
lessThan: 60, // `<`
equalsTo: 61, // `=`
greaterThan: 62, // `>`
questionMark: 63, // `?`
atSign: 64, // `@`
uppercaseA: 65, // `A`
uppercaseB: 66, // `B`
uppercaseC: 67, // `C`
uppercaseD: 68, // `D`
uppercaseE: 69, // `E`
uppercaseF: 70, // `F`
uppercaseG: 71, // `G`
uppercaseH: 72, // `H`
uppercaseI: 73, // `I`
uppercaseJ: 74, // `J`
uppercaseK: 75, // `K`
uppercaseL: 76, // `L`
uppercaseM: 77, // `M`
uppercaseN: 78, // `N`
uppercaseO: 79, // `O`
uppercaseP: 80, // `P`
uppercaseQ: 81, // `Q`
uppercaseR: 82, // `R`
uppercaseS: 83, // `S`
uppercaseT: 84, // `T`
uppercaseU: 85, // `U`
uppercaseV: 86, // `V`
uppercaseW: 87, // `W`
uppercaseX: 88, // `X`
uppercaseY: 89, // `Y`
uppercaseZ: 90, // `Z`
leftSquareBracket: 91, // `[`
backslash: 92, // `\`
rightSquareBracket: 93, // `]`
caret: 94, // `^`
underscore: 95, // `_`
graveAccent: 96, // `` ` ``
lowercaseA: 97, // `a`
lowercaseB: 98, // `b`
lowercaseC: 99, // `c`
lowercaseD: 100, // `d`
lowercaseE: 101, // `e`
lowercaseF: 102, // `f`
lowercaseG: 103, // `g`
lowercaseH: 104, // `h`
lowercaseI: 105, // `i`
lowercaseJ: 106, // `j`
lowercaseK: 107, // `k`
lowercaseL: 108, // `l`
lowercaseM: 109, // `m`
lowercaseN: 110, // `n`
lowercaseO: 111, // `o`
lowercaseP: 112, // `p`
lowercaseQ: 113, // `q`
lowercaseR: 114, // `r`
lowercaseS: 115, // `s`
lowercaseT: 116, // `t`
lowercaseU: 117, // `u`
lowercaseV: 118, // `v`
lowercaseW: 119, // `w`
lowercaseX: 120, // `x`
lowercaseY: 121, // `y`
lowercaseZ: 122, // `z`
leftCurlyBrace: 123, // `{`
verticalBar: 124, // `|`
rightCurlyBrace: 125, // `}`
tilde: 126, // `~`
del: 127,
// Unicode Specials block.
byteOrderMarker: 65279,
// Unicode Specials block.
replacementCharacter: 65533 // `<60>`
})

View File

@@ -0,0 +1,36 @@
export namespace constants {
let attentionSideBefore: 1
let attentionSideAfter: 2
let atxHeadingOpeningFenceSizeMax: 6
let autolinkDomainSizeMax: 63
let autolinkSchemeSizeMax: 32
let cdataOpeningString: 'CDATA['
let characterGroupWhitespace: 1
let characterGroupPunctuation: 2
let characterReferenceDecimalSizeMax: 7
let characterReferenceHexadecimalSizeMax: 6
let characterReferenceNamedSizeMax: 31
let codeFencedSequenceSizeMin: 3
let contentTypeDocument: 'document'
let contentTypeFlow: 'flow'
let contentTypeContent: 'content'
let contentTypeString: 'string'
let contentTypeText: 'text'
let hardBreakPrefixSizeMin: 2
let htmlRaw: 1
let htmlComment: 2
let htmlInstruction: 3
let htmlDeclaration: 4
let htmlCdata: 5
let htmlBasic: 6
let htmlComplete: 7
let htmlRawSizeMax: 8
let linkResourceDestinationBalanceMax: 32
let linkReferenceSizeMax: 999
let listItemValueSizeMax: 10
let numericBaseDecimal: 10
let numericBaseHexadecimal: 16
let tabSize: 4
let thematicBreakMarkerCountMin: 3
let v8MaxSafeChunkSize: 10000
}

View File

@@ -0,0 +1,44 @@
/**
* This module is compiled away!
*
* Parsing markdown comes with a couple of constants, such as minimum or maximum
* sizes of certain sequences.
* Additionally, there are a couple symbols used inside micromark.
* These are all defined here, but compiled away by scripts.
*/
export const constants = /** @type {const} */ ({
attentionSideBefore: 1, // Symbol to mark an attention sequence as before content: `*a`
attentionSideAfter: 2, // Symbol to mark an attention sequence as after content: `a*`
atxHeadingOpeningFenceSizeMax: 6, // 6 number signs is fine, 7 isnt.
autolinkDomainSizeMax: 63, // 63 characters is fine, 64 is too many.
autolinkSchemeSizeMax: 32, // 32 characters is fine, 33 is too many.
cdataOpeningString: 'CDATA[', // And preceded by `<![`.
characterGroupWhitespace: 1, // Symbol used to indicate a character is whitespace
characterGroupPunctuation: 2, // Symbol used to indicate a character is punctuation
characterReferenceDecimalSizeMax: 7, // `&#9999999;`.
characterReferenceHexadecimalSizeMax: 6, // `&#xff9999;`.
characterReferenceNamedSizeMax: 31, // `&CounterClockwiseContourIntegral;`.
codeFencedSequenceSizeMin: 3, // At least 3 ticks or tildes are needed.
contentTypeDocument: 'document',
contentTypeFlow: 'flow',
contentTypeContent: 'content',
contentTypeString: 'string',
contentTypeText: 'text',
hardBreakPrefixSizeMin: 2, // At least 2 trailing spaces are needed.
htmlRaw: 1, // Symbol for `<script>`
htmlComment: 2, // Symbol for `<!---->`
htmlInstruction: 3, // Symbol for `<?php?>`
htmlDeclaration: 4, // Symbol for `<!doctype>`
htmlCdata: 5, // Symbol for `<![CDATA[]]>`
htmlBasic: 6, // Symbol for `<div`
htmlComplete: 7, // Symbol for `<x>`
htmlRawSizeMax: 8, // Length of `textarea`.
linkResourceDestinationBalanceMax: 32, // See: <https://spec.commonmark.org/0.30/#link-destination>, <https://github.com/remarkjs/react-markdown/issues/658#issuecomment-984345577>
linkReferenceSizeMax: 999, // See: <https://spec.commonmark.org/0.30/#link-label>
listItemValueSizeMax: 10, // See: <https://spec.commonmark.org/0.30/#ordered-list-marker>
numericBaseDecimal: 10,
numericBaseHexadecimal: 0x10,
tabSize: 4, // Tabs have a hard-coded size of 4, per CommonMark.
thematicBreakMarkerCountMin: 3, // At least 3 asterisks, dashes, or underscores are needed.
v8MaxSafeChunkSize: 10000 // V8 (and potentially others) have problems injecting giant arrays into other arrays, hence we operate in chunks.
})

View File

@@ -0,0 +1,4 @@
export {codes} from './codes.js'
export {constants} from './constants.js'
export {types} from './types.js'
export {values} from './values.js'

View File

@@ -0,0 +1,4 @@
export {codes} from './codes.js'
export {constants} from './constants.js'
export {types} from './types.js'
export {values} from './values.js'

View File

@@ -0,0 +1,105 @@
export namespace types {
let data: 'data'
let whitespace: 'whitespace'
let lineEnding: 'lineEnding'
let lineEndingBlank: 'lineEndingBlank'
let linePrefix: 'linePrefix'
let lineSuffix: 'lineSuffix'
let atxHeading: 'atxHeading'
let atxHeadingSequence: 'atxHeadingSequence'
let atxHeadingText: 'atxHeadingText'
let autolink: 'autolink'
let autolinkEmail: 'autolinkEmail'
let autolinkMarker: 'autolinkMarker'
let autolinkProtocol: 'autolinkProtocol'
let characterEscape: 'characterEscape'
let characterEscapeValue: 'characterEscapeValue'
let characterReference: 'characterReference'
let characterReferenceMarker: 'characterReferenceMarker'
let characterReferenceMarkerNumeric: 'characterReferenceMarkerNumeric'
let characterReferenceMarkerHexadecimal: 'characterReferenceMarkerHexadecimal'
let characterReferenceValue: 'characterReferenceValue'
let codeFenced: 'codeFenced'
let codeFencedFence: 'codeFencedFence'
let codeFencedFenceSequence: 'codeFencedFenceSequence'
let codeFencedFenceInfo: 'codeFencedFenceInfo'
let codeFencedFenceMeta: 'codeFencedFenceMeta'
let codeFlowValue: 'codeFlowValue'
let codeIndented: 'codeIndented'
let codeText: 'codeText'
let codeTextData: 'codeTextData'
let codeTextPadding: 'codeTextPadding'
let codeTextSequence: 'codeTextSequence'
let content: 'content'
let definition: 'definition'
let definitionDestination: 'definitionDestination'
let definitionDestinationLiteral: 'definitionDestinationLiteral'
let definitionDestinationLiteralMarker: 'definitionDestinationLiteralMarker'
let definitionDestinationRaw: 'definitionDestinationRaw'
let definitionDestinationString: 'definitionDestinationString'
let definitionLabel: 'definitionLabel'
let definitionLabelMarker: 'definitionLabelMarker'
let definitionLabelString: 'definitionLabelString'
let definitionMarker: 'definitionMarker'
let definitionTitle: 'definitionTitle'
let definitionTitleMarker: 'definitionTitleMarker'
let definitionTitleString: 'definitionTitleString'
let emphasis: 'emphasis'
let emphasisSequence: 'emphasisSequence'
let emphasisText: 'emphasisText'
let escapeMarker: 'escapeMarker'
let hardBreakEscape: 'hardBreakEscape'
let hardBreakTrailing: 'hardBreakTrailing'
let htmlFlow: 'htmlFlow'
let htmlFlowData: 'htmlFlowData'
let htmlText: 'htmlText'
let htmlTextData: 'htmlTextData'
let image: 'image'
let label: 'label'
let labelText: 'labelText'
let labelLink: 'labelLink'
let labelImage: 'labelImage'
let labelMarker: 'labelMarker'
let labelImageMarker: 'labelImageMarker'
let labelEnd: 'labelEnd'
let link: 'link'
let paragraph: 'paragraph'
let reference: 'reference'
let referenceMarker: 'referenceMarker'
let referenceString: 'referenceString'
let resource: 'resource'
let resourceDestination: 'resourceDestination'
let resourceDestinationLiteral: 'resourceDestinationLiteral'
let resourceDestinationLiteralMarker: 'resourceDestinationLiteralMarker'
let resourceDestinationRaw: 'resourceDestinationRaw'
let resourceDestinationString: 'resourceDestinationString'
let resourceMarker: 'resourceMarker'
let resourceTitle: 'resourceTitle'
let resourceTitleMarker: 'resourceTitleMarker'
let resourceTitleString: 'resourceTitleString'
let setextHeading: 'setextHeading'
let setextHeadingText: 'setextHeadingText'
let setextHeadingLine: 'setextHeadingLine'
let setextHeadingLineSequence: 'setextHeadingLineSequence'
let strong: 'strong'
let strongSequence: 'strongSequence'
let strongText: 'strongText'
let thematicBreak: 'thematicBreak'
let thematicBreakSequence: 'thematicBreakSequence'
let blockQuote: 'blockQuote'
let blockQuotePrefix: 'blockQuotePrefix'
let blockQuoteMarker: 'blockQuoteMarker'
let blockQuotePrefixWhitespace: 'blockQuotePrefixWhitespace'
let listOrdered: 'listOrdered'
let listUnordered: 'listUnordered'
let listItemIndent: 'listItemIndent'
let listItemMarker: 'listItemMarker'
let listItemPrefix: 'listItemPrefix'
let listItemPrefixWhitespace: 'listItemPrefixWhitespace'
let listItemValue: 'listItemValue'
let chunkDocument: 'chunkDocument'
let chunkContent: 'chunkContent'
let chunkFlow: 'chunkFlow'
let chunkText: 'chunkText'
let chunkString: 'chunkString'
}

View File

@@ -0,0 +1,453 @@
/**
* This module is compiled away!
*
* Here is the list of all types of tokens exposed by micromark, with a short
* explanation of what they include and where they are found.
* In picking names, generally, the rule is to be as explicit as possible
* instead of reusing names.
* For example, there is a `definitionDestination` and a `resourceDestination`,
* instead of one shared name.
*/
// Note: when changing the next record, you must also change `TokenTypeMap`
// in `micromark-util-types/index.d.ts`.
export const types = /** @type {const} */ ({
// Generic type for data, such as in a title, a destination, etc.
data: 'data',
// Generic type for syntactic whitespace (tabs, virtual spaces, spaces).
// Such as, between a fenced code fence and an info string.
whitespace: 'whitespace',
// Generic type for line endings (line feed, carriage return, carriage return +
// line feed).
lineEnding: 'lineEnding',
// A line ending, but ending a blank line.
lineEndingBlank: 'lineEndingBlank',
// Generic type for whitespace (tabs, virtual spaces, spaces) at the start of a
// line.
linePrefix: 'linePrefix',
// Generic type for whitespace (tabs, virtual spaces, spaces) at the end of a
// line.
lineSuffix: 'lineSuffix',
// Whole ATX heading:
//
// ```markdown
// #
// ## Alpha
// ### Bravo ###
// ```
//
// Includes `atxHeadingSequence`, `whitespace`, `atxHeadingText`.
atxHeading: 'atxHeading',
// Sequence of number signs in an ATX heading (`###`).
atxHeadingSequence: 'atxHeadingSequence',
// Content in an ATX heading (`alpha`).
// Includes text.
atxHeadingText: 'atxHeadingText',
// Whole autolink (`<https://example.com>` or `<admin@example.com>`)
// Includes `autolinkMarker` and `autolinkProtocol` or `autolinkEmail`.
autolink: 'autolink',
// Email autolink w/o markers (`admin@example.com`)
autolinkEmail: 'autolinkEmail',
// Marker around an `autolinkProtocol` or `autolinkEmail` (`<` or `>`).
autolinkMarker: 'autolinkMarker',
// Protocol autolink w/o markers (`https://example.com`)
autolinkProtocol: 'autolinkProtocol',
// A whole character escape (`\-`).
// Includes `escapeMarker` and `characterEscapeValue`.
characterEscape: 'characterEscape',
// The escaped character (`-`).
characterEscapeValue: 'characterEscapeValue',
// A whole character reference (`&amp;`, `&#8800;`, or `&#x1D306;`).
// Includes `characterReferenceMarker`, an optional
// `characterReferenceMarkerNumeric`, in which case an optional
// `characterReferenceMarkerHexadecimal`, and a `characterReferenceValue`.
characterReference: 'characterReference',
// The start or end marker (`&` or `;`).
characterReferenceMarker: 'characterReferenceMarker',
// Mark reference as numeric (`#`).
characterReferenceMarkerNumeric: 'characterReferenceMarkerNumeric',
// Mark reference as numeric (`x` or `X`).
characterReferenceMarkerHexadecimal: 'characterReferenceMarkerHexadecimal',
// Value of character reference w/o markers (`amp`, `8800`, or `1D306`).
characterReferenceValue: 'characterReferenceValue',
// Whole fenced code:
//
// ````markdown
// ```js
// alert(1)
// ```
// ````
codeFenced: 'codeFenced',
// A fenced code fence, including whitespace, sequence, info, and meta
// (` ```js `).
codeFencedFence: 'codeFencedFence',
// Sequence of grave accent or tilde characters (` ``` `) in a fence.
codeFencedFenceSequence: 'codeFencedFenceSequence',
// Info word (`js`) in a fence.
// Includes string.
codeFencedFenceInfo: 'codeFencedFenceInfo',
// Meta words (`highlight="1"`) in a fence.
// Includes string.
codeFencedFenceMeta: 'codeFencedFenceMeta',
// A line of code.
codeFlowValue: 'codeFlowValue',
// Whole indented code:
//
// ```markdown
// alert(1)
// ```
//
// Includes `lineEnding`, `linePrefix`, and `codeFlowValue`.
codeIndented: 'codeIndented',
// A text code (``` `alpha` ```).
// Includes `codeTextSequence`, `codeTextData`, `lineEnding`, and can include
// `codeTextPadding`.
codeText: 'codeText',
codeTextData: 'codeTextData',
// A space or line ending right after or before a tick.
codeTextPadding: 'codeTextPadding',
// A text code fence (` `` `).
codeTextSequence: 'codeTextSequence',
// Whole content:
//
// ```markdown
// [a]: b
// c
// =
// d
// ```
//
// Includes `paragraph` and `definition`.
content: 'content',
// Whole definition:
//
// ```markdown
// [micromark]: https://github.com/micromark/micromark
// ```
//
// Includes `definitionLabel`, `definitionMarker`, `whitespace`,
// `definitionDestination`, and optionally `lineEnding` and `definitionTitle`.
definition: 'definition',
// Destination of a definition (`https://github.com/micromark/micromark` or
// `<https://github.com/micromark/micromark>`).
// Includes `definitionDestinationLiteral` or `definitionDestinationRaw`.
definitionDestination: 'definitionDestination',
// Enclosed destination of a definition
// (`<https://github.com/micromark/micromark>`).
// Includes `definitionDestinationLiteralMarker` and optionally
// `definitionDestinationString`.
definitionDestinationLiteral: 'definitionDestinationLiteral',
// Markers of an enclosed definition destination (`<` or `>`).
definitionDestinationLiteralMarker: 'definitionDestinationLiteralMarker',
// Unenclosed destination of a definition
// (`https://github.com/micromark/micromark`).
// Includes `definitionDestinationString`.
definitionDestinationRaw: 'definitionDestinationRaw',
// Text in an destination (`https://github.com/micromark/micromark`).
// Includes string.
definitionDestinationString: 'definitionDestinationString',
// Label of a definition (`[micromark]`).
// Includes `definitionLabelMarker` and `definitionLabelString`.
definitionLabel: 'definitionLabel',
// Markers of a definition label (`[` or `]`).
definitionLabelMarker: 'definitionLabelMarker',
// Value of a definition label (`micromark`).
// Includes string.
definitionLabelString: 'definitionLabelString',
// Marker between a label and a destination (`:`).
definitionMarker: 'definitionMarker',
// Title of a definition (`"x"`, `'y'`, or `(z)`).
// Includes `definitionTitleMarker` and optionally `definitionTitleString`.
definitionTitle: 'definitionTitle',
// Marker around a title of a definition (`"`, `'`, `(`, or `)`).
definitionTitleMarker: 'definitionTitleMarker',
// Data without markers in a title (`z`).
// Includes string.
definitionTitleString: 'definitionTitleString',
// Emphasis (`*alpha*`).
// Includes `emphasisSequence` and `emphasisText`.
emphasis: 'emphasis',
// Sequence of emphasis markers (`*` or `_`).
emphasisSequence: 'emphasisSequence',
// Emphasis text (`alpha`).
// Includes text.
emphasisText: 'emphasisText',
// The character escape marker (`\`).
escapeMarker: 'escapeMarker',
// A hard break created with a backslash (`\\n`).
// Note: does not include the line ending.
hardBreakEscape: 'hardBreakEscape',
// A hard break created with trailing spaces (` \n`).
// Does not include the line ending.
hardBreakTrailing: 'hardBreakTrailing',
// Flow HTML:
//
// ```markdown
// <div
// ```
//
// Inlcudes `lineEnding`, `htmlFlowData`.
htmlFlow: 'htmlFlow',
htmlFlowData: 'htmlFlowData',
// HTML in text (the tag in `a <i> b`).
// Includes `lineEnding`, `htmlTextData`.
htmlText: 'htmlText',
htmlTextData: 'htmlTextData',
// Whole image (`![alpha](bravo)`, `![alpha][bravo]`, `![alpha][]`, or
// `![alpha]`).
// Includes `label` and an optional `resource` or `reference`.
image: 'image',
// Whole link label (`[*alpha*]`).
// Includes `labelLink` or `labelImage`, `labelText`, and `labelEnd`.
label: 'label',
// Text in an label (`*alpha*`).
// Includes text.
labelText: 'labelText',
// Start a link label (`[`).
// Includes a `labelMarker`.
labelLink: 'labelLink',
// Start an image label (`![`).
// Includes `labelImageMarker` and `labelMarker`.
labelImage: 'labelImage',
// Marker of a label (`[` or `]`).
labelMarker: 'labelMarker',
// Marker to start an image (`!`).
labelImageMarker: 'labelImageMarker',
// End a label (`]`).
// Includes `labelMarker`.
labelEnd: 'labelEnd',
// Whole link (`[alpha](bravo)`, `[alpha][bravo]`, `[alpha][]`, or `[alpha]`).
// Includes `label` and an optional `resource` or `reference`.
link: 'link',
// Whole paragraph:
//
// ```markdown
// alpha
// bravo.
// ```
//
// Includes text.
paragraph: 'paragraph',
// A reference (`[alpha]` or `[]`).
// Includes `referenceMarker` and an optional `referenceString`.
reference: 'reference',
// A reference marker (`[` or `]`).
referenceMarker: 'referenceMarker',
// Reference text (`alpha`).
// Includes string.
referenceString: 'referenceString',
// A resource (`(https://example.com "alpha")`).
// Includes `resourceMarker`, an optional `resourceDestination` with an optional
// `whitespace` and `resourceTitle`.
resource: 'resource',
// A resource destination (`https://example.com`).
// Includes `resourceDestinationLiteral` or `resourceDestinationRaw`.
resourceDestination: 'resourceDestination',
// A literal resource destination (`<https://example.com>`).
// Includes `resourceDestinationLiteralMarker` and optionally
// `resourceDestinationString`.
resourceDestinationLiteral: 'resourceDestinationLiteral',
// A resource destination marker (`<` or `>`).
resourceDestinationLiteralMarker: 'resourceDestinationLiteralMarker',
// A raw resource destination (`https://example.com`).
// Includes `resourceDestinationString`.
resourceDestinationRaw: 'resourceDestinationRaw',
// Resource destination text (`https://example.com`).
// Includes string.
resourceDestinationString: 'resourceDestinationString',
// A resource marker (`(` or `)`).
resourceMarker: 'resourceMarker',
// A resource title (`"alpha"`, `'alpha'`, or `(alpha)`).
// Includes `resourceTitleMarker` and optionally `resourceTitleString`.
resourceTitle: 'resourceTitle',
// A resource title marker (`"`, `'`, `(`, or `)`).
resourceTitleMarker: 'resourceTitleMarker',
// Resource destination title (`alpha`).
// Includes string.
resourceTitleString: 'resourceTitleString',
// Whole setext heading:
//
// ```markdown
// alpha
// bravo
// =====
// ```
//
// Includes `setextHeadingText`, `lineEnding`, `linePrefix`, and
// `setextHeadingLine`.
setextHeading: 'setextHeading',
// Content in a setext heading (`alpha\nbravo`).
// Includes text.
setextHeadingText: 'setextHeadingText',
// Underline in a setext heading, including whitespace suffix (`==`).
// Includes `setextHeadingLineSequence`.
setextHeadingLine: 'setextHeadingLine',
// Sequence of equals or dash characters in underline in a setext heading (`-`).
setextHeadingLineSequence: 'setextHeadingLineSequence',
// Strong (`**alpha**`).
// Includes `strongSequence` and `strongText`.
strong: 'strong',
// Sequence of strong markers (`**` or `__`).
strongSequence: 'strongSequence',
// Strong text (`alpha`).
// Includes text.
strongText: 'strongText',
// Whole thematic break:
//
// ```markdown
// * * *
// ```
//
// Includes `thematicBreakSequence` and `whitespace`.
thematicBreak: 'thematicBreak',
// A sequence of one or more thematic break markers (`***`).
thematicBreakSequence: 'thematicBreakSequence',
// Whole block quote:
//
// ```markdown
// > a
// >
// > b
// ```
//
// Includes `blockQuotePrefix` and flow.
blockQuote: 'blockQuote',
// The `>` or `> ` of a block quote.
blockQuotePrefix: 'blockQuotePrefix',
// The `>` of a block quote prefix.
blockQuoteMarker: 'blockQuoteMarker',
// The optional ` ` of a block quote prefix.
blockQuotePrefixWhitespace: 'blockQuotePrefixWhitespace',
// Whole unordered list:
//
// ```markdown
// - a
// b
// ```
//
// Includes `listItemPrefix`, flow, and optionally `listItemIndent` on further
// lines.
listOrdered: 'listOrdered',
// Whole ordered list:
//
// ```markdown
// 1. a
// b
// ```
//
// Includes `listItemPrefix`, flow, and optionally `listItemIndent` on further
// lines.
listUnordered: 'listUnordered',
// The indent of further list item lines.
listItemIndent: 'listItemIndent',
// A marker, as in, `*`, `+`, `-`, `.`, or `)`.
listItemMarker: 'listItemMarker',
// The thing that starts a list item, such as `1. `.
// Includes `listItemValue` if ordered, `listItemMarker`, and
// `listItemPrefixWhitespace` (unless followed by a line ending).
listItemPrefix: 'listItemPrefix',
// The whitespace after a marker.
listItemPrefixWhitespace: 'listItemPrefixWhitespace',
// The numerical value of an ordered item.
listItemValue: 'listItemValue',
// Internal types used for subtokenizers, compiled away
chunkDocument: 'chunkDocument',
chunkContent: 'chunkContent',
chunkFlow: 'chunkFlow',
chunkText: 'chunkText',
chunkString: 'chunkString'
})

View File

@@ -0,0 +1,101 @@
export namespace values {
let ht: '\t'
let lf: '\n'
let cr: '\r'
let space: ' '
let exclamationMark: '!'
let quotationMark: '"'
let numberSign: '#'
let dollarSign: '$'
let percentSign: '%'
let ampersand: '&'
let apostrophe: "'"
let leftParenthesis: '('
let rightParenthesis: ')'
let asterisk: '*'
let plusSign: '+'
let comma: ','
let dash: '-'
let dot: '.'
let slash: '/'
let digit0: '0'
let digit1: '1'
let digit2: '2'
let digit3: '3'
let digit4: '4'
let digit5: '5'
let digit6: '6'
let digit7: '7'
let digit8: '8'
let digit9: '9'
let colon: ':'
let semicolon: ';'
let lessThan: '<'
let equalsTo: '='
let greaterThan: '>'
let questionMark: '?'
let atSign: '@'
let uppercaseA: 'A'
let uppercaseB: 'B'
let uppercaseC: 'C'
let uppercaseD: 'D'
let uppercaseE: 'E'
let uppercaseF: 'F'
let uppercaseG: 'G'
let uppercaseH: 'H'
let uppercaseI: 'I'
let uppercaseJ: 'J'
let uppercaseK: 'K'
let uppercaseL: 'L'
let uppercaseM: 'M'
let uppercaseN: 'N'
let uppercaseO: 'O'
let uppercaseP: 'P'
let uppercaseQ: 'Q'
let uppercaseR: 'R'
let uppercaseS: 'S'
let uppercaseT: 'T'
let uppercaseU: 'U'
let uppercaseV: 'V'
let uppercaseW: 'W'
let uppercaseX: 'X'
let uppercaseY: 'Y'
let uppercaseZ: 'Z'
let leftSquareBracket: '['
let backslash: '\\'
let rightSquareBracket: ']'
let caret: '^'
let underscore: '_'
let graveAccent: '`'
let lowercaseA: 'a'
let lowercaseB: 'b'
let lowercaseC: 'c'
let lowercaseD: 'd'
let lowercaseE: 'e'
let lowercaseF: 'f'
let lowercaseG: 'g'
let lowercaseH: 'h'
let lowercaseI: 'i'
let lowercaseJ: 'j'
let lowercaseK: 'k'
let lowercaseL: 'l'
let lowercaseM: 'm'
let lowercaseN: 'n'
let lowercaseO: 'o'
let lowercaseP: 'p'
let lowercaseQ: 'q'
let lowercaseR: 'r'
let lowercaseS: 's'
let lowercaseT: 't'
let lowercaseU: 'u'
let lowercaseV: 'v'
let lowercaseW: 'w'
let lowercaseX: 'x'
let lowercaseY: 'y'
let lowercaseZ: 'z'
let leftCurlyBrace: '{'
let verticalBar: '|'
let rightCurlyBrace: '}'
let tilde: '~'
let replacementCharacter: '<27>'
}

View File

@@ -0,0 +1,109 @@
/**
* This module is compiled away!
*
* While micromark works based on character codes, this module includes the
* string versions of em.
* The C0 block, except for LF, CR, HT, and w/ the replacement character added,
* are available here.
*/
export const values = /** @type {const} */ ({
ht: '\t',
lf: '\n',
cr: '\r',
space: ' ',
exclamationMark: '!',
quotationMark: '"',
numberSign: '#',
dollarSign: '$',
percentSign: '%',
ampersand: '&',
apostrophe: "'",
leftParenthesis: '(',
rightParenthesis: ')',
asterisk: '*',
plusSign: '+',
comma: ',',
dash: '-',
dot: '.',
slash: '/',
digit0: '0',
digit1: '1',
digit2: '2',
digit3: '3',
digit4: '4',
digit5: '5',
digit6: '6',
digit7: '7',
digit8: '8',
digit9: '9',
colon: ':',
semicolon: ';',
lessThan: '<',
equalsTo: '=',
greaterThan: '>',
questionMark: '?',
atSign: '@',
uppercaseA: 'A',
uppercaseB: 'B',
uppercaseC: 'C',
uppercaseD: 'D',
uppercaseE: 'E',
uppercaseF: 'F',
uppercaseG: 'G',
uppercaseH: 'H',
uppercaseI: 'I',
uppercaseJ: 'J',
uppercaseK: 'K',
uppercaseL: 'L',
uppercaseM: 'M',
uppercaseN: 'N',
uppercaseO: 'O',
uppercaseP: 'P',
uppercaseQ: 'Q',
uppercaseR: 'R',
uppercaseS: 'S',
uppercaseT: 'T',
uppercaseU: 'U',
uppercaseV: 'V',
uppercaseW: 'W',
uppercaseX: 'X',
uppercaseY: 'Y',
uppercaseZ: 'Z',
leftSquareBracket: '[',
backslash: '\\',
rightSquareBracket: ']',
caret: '^',
underscore: '_',
graveAccent: '`',
lowercaseA: 'a',
lowercaseB: 'b',
lowercaseC: 'c',
lowercaseD: 'd',
lowercaseE: 'e',
lowercaseF: 'f',
lowercaseG: 'g',
lowercaseH: 'h',
lowercaseI: 'i',
lowercaseJ: 'j',
lowercaseK: 'k',
lowercaseL: 'l',
lowercaseM: 'm',
lowercaseN: 'n',
lowercaseO: 'o',
lowercaseP: 'p',
lowercaseQ: 'q',
lowercaseR: 'r',
lowercaseS: 's',
lowercaseT: 't',
lowercaseU: 'u',
lowercaseV: 'v',
lowercaseW: 'w',
lowercaseX: 'x',
lowercaseY: 'y',
lowercaseZ: 'z',
leftCurlyBrace: '{',
verticalBar: '|',
rightCurlyBrace: '}',
tilde: '~',
replacementCharacter: '<27>'
})

View File

@@ -0,0 +1,35 @@
{
"name": "micromark-util-symbol",
"version": "2.0.0",
"description": "micromark utility with symbols",
"license": "MIT",
"keywords": [
"micromark",
"util",
"utility",
"symbol"
],
"repository": "https://github.com/micromark/micromark/tree/main/packages/micromark-util-symbol",
"bugs": "https://github.com/micromark/micromark/issues",
"funding": [
{
"type": "GitHub Sponsors",
"url": "https://github.com/sponsors/unifiedjs"
},
{
"type": "OpenCollective",
"url": "https://opencollective.com/unified"
}
],
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"lib/"
],
"exports": "./lib/default.js",
"xo": false
}

View File

@@ -0,0 +1,168 @@
# micromark-util-symbol
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][bundle-size-badge]][bundle-size]
[![Sponsors][sponsors-badge]][opencollective]
[![Backers][backers-badge]][opencollective]
[![Chat][chat-badge]][chat]
[micromark][] utility with symbols.
## Contents
* [What is this?](#what-is-this)
* [When should I use this?](#when-should-i-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package exposes constants used throughout the micromark ecosystem.
## When should I use this?
This package is useful when you are making your own micromark extensions.
Its useful to reference these constants by name instead of value while
developing.
[`micromark-build`][micromark-build] compiles them away for production code.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-util-symbol
```
In Deno with [`esm.sh`][esmsh]:
```js
import * as symbol from 'https://esm.sh/micromark-util-symbol@1'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import * as symbol from 'https://esm.sh/micromark-util-symbol@1?bundle'
</script>
```
## Use
```js
import {codes, constants, types, values} from 'micromark-util-symbol'
console.log(codes.atSign) // 64
console.log(constants.characterReferenceNamedSizeMax) // 31
console.log(types.definitionDestinationRaw) // 'definitionDestinationRaw'
console.log(values.atSign) // '@'
```
## API
This package exports the identifiers `codes`, `constants`, `types`, and
`values`.
There is no default export.
Each identifier is an object mapping strings to values.
See the code for the exposed data.
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-util-symbol@^2`, compatible with Node.js 16.
This package works with `micromark@^3`.
## Security
This package is safe.
See [`security.md`][securitymd] in [`micromark/.github`][health] for how to
submit a security report.
## Contribute
See [`contributing.md`][contributing] in [`micromark/.github`][health] for ways
to get started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organisation, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark.svg
[coverage]: https://codecov.io/github/micromark/micromark
[downloads-badge]: https://img.shields.io/npm/dm/micromark-util-symbol.svg
[downloads]: https://www.npmjs.com/package/micromark-util-symbol
[bundle-size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-util-symbol
[bundle-size]: https://bundlejs.com/?q=micromark-util-symbol
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[opencollective]: https://opencollective.com/unified
[npm]: https://docs.npmjs.com/cli/install
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[esmsh]: https://esm.sh
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[license]: https://github.com/micromark/micromark/blob/main/license
[author]: https://wooorm.com
[health]: https://github.com/micromark/.github
[securitymd]: https://github.com/micromark/.github/blob/main/security.md
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[typescript]: https://www.typescriptlang.org
[micromark]: https://github.com/micromark/micromark
[micromark-build]: https://github.com/micromark/micromark/tree/main/packages/micromark-build

View File

@@ -0,0 +1,114 @@
{
"name": "micromark-extension-gfm-autolink-literal",
"version": "2.0.0",
"description": "micromark extension to support GFM autolink literals",
"license": "MIT",
"keywords": [
"micromark",
"micromark-extension",
"literal",
"url",
"autolink",
"auto",
"link",
"gfm",
"markdown",
"unified"
],
"repository": "micromark/micromark-extension-gfm-autolink-literal",
"bugs": "https://github.com/micromark/micromark-extension-gfm-autolink-literal/issues",
"funding": {
"type": "opencollective",
"url": "https://opencollective.com/unified"
},
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"dev/",
"lib/",
"index.d.ts",
"index.js"
],
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
},
"dependencies": {
"micromark-util-character": "^2.0.0",
"micromark-util-sanitize-uri": "^2.0.0",
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"c8": "^8.0.0",
"create-gfm-fixtures": "^1.0.0",
"micromark": "^4.0.0",
"micromark-build": "^2.0.0",
"prettier": "^2.0.0",
"rehype": "^12.0.0",
"remark-cli": "^11.0.0",
"remark-preset-wooorm": "^9.0.0",
"type-coverage": "^2.0.0",
"typescript": "^5.0.0",
"xo": "^0.54.0"
},
"scripts": {
"prepack": "npm run build && npm run format",
"build": "tsc --build --clean && tsc --build && type-coverage && micromark-build",
"format": "remark . -qfo && prettier . -w --loglevel warn && xo --fix",
"test-api-prod": "node --conditions production test/index.js",
"test-api-dev": "node --conditions development test/index.js",
"test-api": "npm run test-api-dev && npm run test-api-prod",
"test-coverage": "c8 --100 --reporter lcov npm run test-api",
"test": "npm run build && npm run format && npm run test-coverage"
},
"prettier": {
"bracketSpacing": false,
"semi": false,
"singleQuote": true,
"tabWidth": 2,
"trailingComma": "none",
"useTabs": false
},
"remarkConfig": {
"plugins": [
"remark-preset-wooorm"
]
},
"typeCoverage": {
"atLeast": 100,
"detail": true,
"ignoreCatch": true,
"strict": true
},
"xo": {
"prettier": true,
"rules": {
"complexity": "off",
"unicorn/no-this-assignment": "off"
},
"overrides": [
{
"files": [
"**/*.ts"
],
"rules": {
"@typescript-eslint/consistent-type-definitions": 0
}
},
{
"files": [
"test/**/*.js"
],
"rules": {
"no-await-in-loop": 0
}
}
]
}
}

View File

@@ -0,0 +1,422 @@
# micromark-extension-gfm-autolink-literal
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][size-badge]][size]
[![Sponsors][sponsors-badge]][collective]
[![Backers][backers-badge]][collective]
[![Chat][chat-badge]][chat]
[micromark][] extensions to support GFM [literal autolinks][spec].
## Contents
* [What is this?](#what-is-this)
* [When to use this](#when-to-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [`gfmAutolinkLiteral()`](#gfmautolinkliteral)
* [`gfmAutolinkLiteralHtml()`](#gfmautolinkliteralhtml)
* [Bugs](#bugs)
* [Authoring](#authoring)
* [HTML](#html)
* [CSS](#css)
* [Syntax](#syntax)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Related](#related)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package contains extensions that add support for the extra autolink syntax
enabled by GFM to [`micromark`][micromark].
GitHub employs different algorithms to autolink: one at parse time and one at
transform time (similar to how @mentions are done at transform time).
This difference can be observed because character references and escapes are
handled differently.
But also because issues/PRs/comments omit (perhaps by accident?) the second
algorithm for `www.`, `http://`, and `https://` links (but not for email links).
As this is a syntax extension, it focuses on the first algorithm.
The second algorithm is performed by
[`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal].
The `html` part of this micromark extension does not operate on an AST and hence
cant perform the second algorithm.
The implementation of autolink literal on github.com is currently buggy.
The bugs have been reported on [`cmark-gfm`][cmark-gfm].
This micromark extension matches github.com except for its bugs.
## When to use this
This project is useful when you want to support autolink literals in markdown.
You can use these extensions when you are working with [`micromark`][micromark].
To support all GFM features, use
[`micromark-extension-gfm`][micromark-extension-gfm] instead.
When you need a syntax tree, combine this package with
[`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal].
All these packages are used in [`remark-gfm`][remark-gfm], which focusses on
making it easier to transform content by abstracting these internals away.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-extension-gfm-autolink-literal
```
In Deno with [`esm.sh`][esmsh]:
```js
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://esm.sh/micromark-extension-gfm-autolink-literal@2'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import {gfmAutolinkLiteral, gfmAutolinkLiteralHtml} from 'https://esm.sh/micromark-extension-gfm-autolink-literal@2?bundle'
</script>
```
## Use
```js
import {micromark} from 'micromark'
import {
gfmAutolinkLiteral,
gfmAutolinkLiteralHtml
} from 'micromark-extension-gfm-autolink-literal'
const output = micromark('Just a URL: www.example.com.', {
extensions: [gfmAutolinkLiteral()],
htmlExtensions: [gfmAutolinkLiteralHtml()]
})
console.log(output)
```
Yields:
```html
<p>Just a URL: <a href="http://www.example.com">www.example.com</a>.</p>
```
## API
This package exports the identifiers
[`gfmAutolinkLiteral`][api-gfm-autolink-literal] and
[`gfmAutolinkLiteralHtml`][api-gfm-autolink-literal-html].
There is no default export.
The export map supports the [`development` condition][development].
Run `node --conditions development module.js` to get instrumented dev code.
Without this condition, production code is loaded.
### `gfmAutolinkLiteral()`
Create an extension for `micromark` to support GitHub autolink literal
syntax.
###### Parameters
Extension for `micromark` that can be passed in `extensions` to enable GFM
autolink literal syntax ([`Extension`][micromark-extension]).
### `gfmAutolinkLiteralHtml()`
Create an HTML extension for `micromark` to support GitHub autolink literal
when serializing to HTML.
###### Parameters
Extension for `micromark` that can be passed in `htmlExtensions` to support
GitHub autolink literal when serializing to HTML
([`HtmlExtension`][micromark-html-extension]).
## Bugs
GitHubs own algorithm to parse autolink literals contains three bugs.
A smaller bug is left unfixed in this project for consistency.
Two main bugs are not present in this project.
The issues relating to autolink literals are:
* [GFM autolink extension (`www.`, `https?://` parts): links dont work when
after bracket](https://github.com/github/cmark-gfm/issues/278)\
fixed here ✅
* [GFM autolink extension (`www.` part): uppercase does not match on
issues/PRs/comments](https://github.com/github/cmark-gfm/issues/280)\
fixed here ✅
* [GFM autolink extension (`www.` part): the word `www`
matches](https://github.com/github/cmark-gfm/issues/279)\
present here for consistency
## Authoring
It is recommended to use labels, either with a resource or a definition,
instead of autolink literals, as those allow relative URLs and descriptive
text to explain the URL in prose.
## HTML
GFM autolink literals relate to the `<a>` element in HTML.
See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info.
When an email autolink is used, the string `mailto:` is prepended when
generating the `href` attribute of the hyperlink.
When a www autolink is used, the string `http://` is prepended.
## CSS
As hyperlinks are the fundamental thing that makes the web, you will most
definitely have CSS for `a` elements already.
The same CSS can be used for autolink literals, too.
GitHub itself does not apply interesting CSS to autolink literals.
For any link, it currently (June 2022) [uses][css]:
```css
a {
background-color: transparent;
color: #58a6ff;
text-decoration: none;
}
a:active,
a:hover {
outline-width: 0;
}
a:hover {
text-decoration: underline;
}
a:not([href]) {
color: inherit;
text-decoration: none;
}
```
## Syntax
Autolink literals form with, roughly, the following BNF:
```bnf
gfm_autolink_literal ::= gfm_protocol_autolink | gfm_www_autolink | gfm_email_autolink
; Restriction: the code before must be `www_autolink_before`.
; Restriction: the code after `.` must not be eof.
www_autolink ::= 3('w' | 'W') '.' [domain [path]]
www_autolink_before ::= eof | eol | space_or_tab | '(' | '*' | '_' | '[' | ']' | '~'
; Restriction: the code before must be `http_autolink_before`.
; Restriction: the code after the protocol must be `http_autolink_protocol_after`.
http_autolink ::= ('h' | 'H') 2('t' | 'T') ('p' | 'P') ['s' | 'S'] ':' 2'/' domain [path]
http_autolink_before ::= byte - ascii_alpha
http_autolink_protocol_after ::= byte - eof - eol - ascii_control - unicode_whitespace - ode_punctuation
; Restriction: the code before must be `email_autolink_before`.
; Restriction: `ascii_digit` may not occur in the last label part of the label.
email_autolink ::= 1*('+' | '-' | '.' | '_' | ascii_alphanumeric) '@' 1*(1*label_segment l_dot_cont) 1*label_segment
email_autolink_before ::= byte - ascii_alpha - '/'
; Restriction: `_` may not occur in the last two domain parts.
domain ::= 1*(url_ampt_cont | domain_punct_cont | '-' | byte - eof - ascii_control - ode_whitespace - unicode_punctuation)
; Restriction: must not be followed by `punct`.
domain_punct_cont ::= '.' | '_'
; Restriction: must not be followed by `char-ref`.
url_ampt_cont ::= '&'
; Restriction: a counter `balance = 0` is increased for every `(`, and decreased for every `)`.
; Restriction: `)` must not be `paren_at_end`.
path ::= 1*(url_ampt_cont | path_punctuation_cont | '(' | ')' | byte - eof - eol - space_or_tab)
; Restriction: must not be followed by `punct`.
path_punctuation_cont ::= trailing_punctuation - '<'
; Restriction: must be followed by `punct` and `balance` must be less than `0`.
paren_at_end ::= ')'
label_segment ::= label_dash_underscore_cont | ascii_alpha | ascii_digit
; Restriction: if followed by `punct`, the whole email autolink is invalid.
label_dash_underscore_cont ::= '-' | '_'
; Restriction: must not be followed by `punct`.
label_dot_cont ::= '.'
punct ::= *trailing_punctuation ( byte - eof - eol - space_or_tab - '<' )
char_ref ::= *ascii_alpha ';' path_end
trailing_punctuation ::= '!' | '"' | '\'' | ')' | '*' | ',' | '.' | ':' | ';' | '<' | '?' | '_' | '~'
```
The grammar for GFM autolink literal is very relaxed: basically anything
except for whitespace is allowed after a prefix.
To use whitespace characters and otherwise impossible characters, in URLs,
you can use percent encoding:
```markdown
https://example.com/alpha%20bravo
```
Yields:
```html
<p><a href="https://example.com/alpha%20bravo">https://example.com/alpha%20bravo</a></p>
```
There are several cases where incorrect encoding of URLs would, in other
languages, result in a parse error.
In markdown, there are no errors, and URLs are normalized.
In addition, many characters are percent encoded
([`sanitizeUri`][micromark-util-sanitize-uri]).
For example:
```markdown
www.a👍b%
```
Yields:
```html
<p><a href="http://www.a%F0%9F%91%8Db%25">www.a👍b%</a></p>
```
There is a big difference between how www and protocol literals work
compared to how email literals work.
The first two are done when parsing, and work like anything else in
markdown.
But email literals are handled afterwards: when everything is parsed, we
look back at the events to figure out if there were email addresses.
This particularly affects how they interleave with character escapes and
character references.
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-extension-gfm-autolink-literal@^2`, compatible with Node.js 16.
This package works with `micromark` version `3` and later.
## Security
This package is safe.
Unlike other links in CommonMark, which allow arbitrary protocols, this
construct always produces safe links.
## Related
* [`micromark-extension-gfm`][micromark-extension-gfm]
— support all of GFM
* [`mdast-util-gfm-autolink-literal`][mdast-util-gfm-autolink-literal]
— support all of GFM in mdast
* [`mdast-util-gfm`][mdast-util-gfm]
— support all of GFM in mdast
* [`remark-gfm`][remark-gfm]
— support all of GFM in remark
## Contribute
See [`contributing.md` in `micromark/.github`][contributing] for ways to get
started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organization, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark-extension-gfm-autolink-literal/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark-extension-gfm-autolink-literal/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark-extension-gfm-autolink-literal.svg
[coverage]: https://codecov.io/github/micromark/micromark-extension-gfm-autolink-literal
[downloads-badge]: https://img.shields.io/npm/dm/micromark-extension-gfm-autolink-literal.svg
[downloads]: https://www.npmjs.com/package/micromark-extension-gfm-autolink-literal
[size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-extension-gfm-autolink-literal
[size]: https://bundlejs.com/?q=micromark-extension-gfm-autolink-literal
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[collective]: https://opencollective.com/unified
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[npm]: https://docs.npmjs.com/cli/install
[esmsh]: https://esm.sh
[license]: license
[author]: https://wooorm.com
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[typescript]: https://www.typescriptlang.org
[development]: https://nodejs.org/api/packages.html#packages_resolving_user_conditions
[micromark]: https://github.com/micromark/micromark
[micromark-extension-gfm]: https://github.com/micromark/micromark-extension-gfm
[micromark-util-sanitize-uri]: https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri
[micromark-extension]: https://github.com/micromark/micromark#syntaxextension
[micromark-html-extension]: https://github.com/micromark/micromark#htmlextension
[mdast-util-gfm]: https://github.com/syntax-tree/mdast-util-gfm
[mdast-util-gfm-autolink-literal]: https://github.com/syntax-tree/mdast-util-gfm-autolink-literal
[remark-gfm]: https://github.com/remarkjs/remark-gfm
[spec]: https://github.github.com/gfm/#autolinks-extension-
[html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
[css]: https://github.com/sindresorhus/github-markdown-css
[cmark-gfm]: https://github.com/github/cmark-gfm
[api-gfm-autolink-literal]: #gfmautolinkliteral
[api-gfm-autolink-literal-html]: #gfmautolinkliteralhtml