This commit is contained in:
2024-03-22 03:47:51 +05:30
parent 8bcf3d211e
commit 89819f6fe2
28440 changed files with 3211033 additions and 2 deletions

View File

@@ -0,0 +1,21 @@
import type {Program} from 'estree'
export {mdxExpression, type Options} from './lib/syntax.js'
declare module 'micromark-util-types' {
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface Token {
estree?: Program
}
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface TokenTypeMap {
mdxFlowExpression: 'mdxFlowExpression'
mdxFlowExpressionMarker: 'mdxFlowExpressionMarker'
mdxFlowExpressionChunk: 'mdxFlowExpressionChunk'
mdxTextExpression: 'mdxTextExpression'
mdxTextExpressionMarker: 'mdxTextExpressionMarker'
mdxTextExpressionChunk: 'mdxTextExpressionChunk'
}
}

View File

@@ -0,0 +1,5 @@
/**
* @typedef {import('./lib/syntax.js').Options} Options
*/
export {mdxExpression} from './lib/syntax.js'

View File

@@ -0,0 +1,49 @@
/**
* Create an extension for `micromark` to enable MDX expression syntax.
*
* @param {Options | null | undefined} [options]
* Configuration (optional).
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable MDX
* expression syntax.
*/
export function mdxExpression(options?: Options | null | undefined): Extension;
export type Acorn = import('micromark-util-events-to-acorn').Acorn;
export type AcornOptions = import('micromark-util-events-to-acorn').AcornOptions;
export type Extension = import('micromark-util-types').Extension;
export type State = import('micromark-util-types').State;
export type TokenizeContext = import('micromark-util-types').TokenizeContext;
export type Tokenizer = import('micromark-util-types').Tokenizer;
/**
* Configuration (optional).
*/
export type Options = {
/**
* Acorn parser to use (optional).
*/
acorn?: Acorn | null | undefined;
/**
* Configuration for acorn (default: `{ecmaVersion: 2024, locations: true,
* sourceType: 'module'}`).
*
* All fields except `locations` can be set.
*/
acornOptions?: AcornOptions | null | undefined;
/**
* Whether to add `estree` fields to tokens with results from acorn (default:
* `false`).
*/
addResult?: boolean | null | undefined;
/**
* Undocumented option to parse only a spread (used by
* `micromark-extension-mdx-jsx` to parse spread attributes) (default:
* `false`).
*/
spread?: boolean | null | undefined;
/**
* Undocumented option to disallow empty attributes (used by
* `micromark-extension-mdx-jsx` to prohobit empty attribute values)
* (default: `false`).
*/
allowEmpty?: boolean | null | undefined;
};

View File

@@ -0,0 +1,273 @@
/**
* @typedef {import('micromark-util-events-to-acorn').Acorn} Acorn
* @typedef {import('micromark-util-events-to-acorn').AcornOptions} AcornOptions
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
/**
* @typedef Options
* Configuration (optional).
* @property {Acorn | null | undefined} [acorn]
* Acorn parser to use (optional).
* @property {AcornOptions | null | undefined} [acornOptions]
* Configuration for acorn (default: `{ecmaVersion: 2024, locations: true,
* sourceType: 'module'}`).
*
* All fields except `locations` can be set.
* @property {boolean | null | undefined} [addResult=false]
* Whether to add `estree` fields to tokens with results from acorn (default:
* `false`).
* @property {boolean | null | undefined} [spread=false]
* Undocumented option to parse only a spread (used by
* `micromark-extension-mdx-jsx` to parse spread attributes) (default:
* `false`).
* @property {boolean | null | undefined} [allowEmpty=true]
* Undocumented option to disallow empty attributes (used by
* `micromark-extension-mdx-jsx` to prohobit empty attribute values)
* (default: `false`).
*/
import {ok as assert} from 'devlop'
import {factoryMdxExpression} from 'micromark-factory-mdx-expression'
import {factorySpace} from 'micromark-factory-space'
import {markdownLineEnding, markdownSpace} from 'micromark-util-character'
import {codes, types} from 'micromark-util-symbol'
/**
* Create an extension for `micromark` to enable MDX expression syntax.
*
* @param {Options | null | undefined} [options]
* Configuration (optional).
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable MDX
* expression syntax.
*/
export function mdxExpression(options) {
const options_ = options || {}
const addResult = options_.addResult
const acorn = options_.acorn
// Hidden: `micromark-extension-mdx-jsx` supports expressions in tags,
// and one of them is only “spread” elements.
// It also has expressions that are not allowed to be empty (`<x y={}/>`).
// Instead of duplicating code there, this are two small hidden feature here
// to test that behavior.
const spread = options_.spread
let allowEmpty = options_.allowEmpty
/** @type {AcornOptions} */
let acornOptions
if (allowEmpty === null || allowEmpty === undefined) {
allowEmpty = true
}
if (acorn) {
if (!acorn.parseExpressionAt) {
throw new Error(
'Expected a proper `acorn` instance passed in as `options.acorn`'
)
}
acornOptions = Object.assign(
{ecmaVersion: 2024, sourceType: 'module'},
options_.acornOptions
)
} else if (options_.acornOptions || options_.addResult) {
throw new Error('Expected an `acorn` instance passed in as `options.acorn`')
}
return {
flow: {
[codes.leftCurlyBrace]: {
name: 'mdxFlowExpression',
tokenize: tokenizeFlowExpression,
concrete: true
}
},
text: {
[codes.leftCurlyBrace]: {
name: 'mdxTextExpression',
tokenize: tokenizeTextExpression
}
}
}
/**
* MDX expression (flow).
*
* ```markdown
* > | {Math.PI}
* ^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeFlowExpression(effects, ok, nok) {
const self = this
return start
/**
* Start of an MDX expression (flow).
*
* ```markdown
* > | {Math.PI}
* ^
* ```
*
* @type {State}
*/
function start(code) {
// To do: in `markdown-rs`, constructs need to parse the indent themselves.
// This should also be introduced in `micromark-js`.
assert(code === codes.leftCurlyBrace, 'expected `{`')
return before(code)
}
/**
* After optional whitespace, before expression.
*
* ```markdown
* > | {Math.PI}
* ^
* ```
*
* @type {State}
*/
function before(code) {
return factoryMdxExpression.call(
self,
effects,
after,
'mdxFlowExpression',
'mdxFlowExpressionMarker',
'mdxFlowExpressionChunk',
acorn,
acornOptions,
addResult,
spread,
allowEmpty
)(code)
}
/**
* After expression.
*
* ```markdown
* > | {Math.PI}
* ^
* ```
*
* @type {State}
*/
function after(code) {
return markdownSpace(code)
? factorySpace(effects, end, types.whitespace)(code)
: end(code)
}
/**
* After expression, after optional whitespace.
*
* ```markdown
* > | {Math.PI}␠␊
* ^
* ```
*
* @type {State}
*/
function end(code) {
// We want to allow tags directly after expressions.
//
// This case is useful:
//
// ```mdx
// <a>{b}</a>
// ```
//
// This case is not (very?) useful:
//
// ```mdx
// {a}<b/>
// ```
//
// …but it would be tougher than needed to disallow.
//
// To allow that, here we call the flow construct of
// `micromark-extension-mdx-jsx`, and there we call this one.
//
// It would introduce a cyclical interdependency if we test JSX and
// expressions here.
// Because the JSX extension already uses parts of this monorepo, we
// instead test it there.
const lessThanValue = self.parser.constructs.flow[codes.lessThan]
const constructs = Array.isArray(lessThanValue)
? lessThanValue
: /* c8 ignore next 3 -- always a list when normalized. */
lessThanValue
? [lessThanValue]
: []
const jsxTag = constructs.find(function (d) {
return d.name === 'mdxJsxFlowTag'
})
/* c8 ignore next 3 -- this is tested in `micromark-extension-mdx-jsx` */
if (code === codes.lessThan && jsxTag) {
return effects.attempt(jsxTag, end, nok)(code)
}
return code === codes.eof || markdownLineEnding(code)
? ok(code)
: nok(code)
}
}
/**
* MDX expression (text).
*
* ```markdown
* > | a {Math.PI} c.
* ^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTextExpression(effects, ok) {
const self = this
return start
/**
* Start of an MDX expression (text).
*
* ```markdown
* > | a {Math.PI} c.
* ^
* ```
*
*
* @type {State}
*/
function start(code) {
assert(code === codes.leftCurlyBrace, 'expected `{`')
return factoryMdxExpression.call(
self,
effects,
ok,
'mdxTextExpression',
'mdxTextExpressionMarker',
'mdxTextExpressionChunk',
acorn,
acornOptions,
addResult,
spread,
allowEmpty,
true
)(code)
}
}
}

View File

@@ -0,0 +1,21 @@
import type {Program} from 'estree'
export {mdxExpression, type Options} from './lib/syntax.js'
declare module 'micromark-util-types' {
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface Token {
estree?: Program
}
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
interface TokenTypeMap {
mdxFlowExpression: 'mdxFlowExpression'
mdxFlowExpressionMarker: 'mdxFlowExpressionMarker'
mdxFlowExpressionChunk: 'mdxFlowExpressionChunk'
mdxTextExpression: 'mdxTextExpression'
mdxTextExpressionMarker: 'mdxTextExpressionMarker'
mdxTextExpressionChunk: 'mdxTextExpressionChunk'
}
}

View File

@@ -0,0 +1,5 @@
/**
* @typedef {import('./lib/syntax.js').Options} Options
*/
export { mdxExpression } from './lib/syntax.js';

View File

@@ -0,0 +1,49 @@
/**
* Create an extension for `micromark` to enable MDX expression syntax.
*
* @param {Options | null | undefined} [options]
* Configuration (optional).
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable MDX
* expression syntax.
*/
export function mdxExpression(options?: Options | null | undefined): Extension;
export type Acorn = import('micromark-util-events-to-acorn').Acorn;
export type AcornOptions = import('micromark-util-events-to-acorn').AcornOptions;
export type Extension = import('micromark-util-types').Extension;
export type State = import('micromark-util-types').State;
export type TokenizeContext = import('micromark-util-types').TokenizeContext;
export type Tokenizer = import('micromark-util-types').Tokenizer;
/**
* Configuration (optional).
*/
export type Options = {
/**
* Acorn parser to use (optional).
*/
acorn?: Acorn | null | undefined;
/**
* Configuration for acorn (default: `{ecmaVersion: 2024, locations: true,
* sourceType: 'module'}`).
*
* All fields except `locations` can be set.
*/
acornOptions?: AcornOptions | null | undefined;
/**
* Whether to add `estree` fields to tokens with results from acorn (default:
* `false`).
*/
addResult?: boolean | null | undefined;
/**
* Undocumented option to parse only a spread (used by
* `micromark-extension-mdx-jsx` to parse spread attributes) (default:
* `false`).
*/
spread?: boolean | null | undefined;
/**
* Undocumented option to disallow empty attributes (used by
* `micromark-extension-mdx-jsx` to prohobit empty attribute values)
* (default: `false`).
*/
allowEmpty?: boolean | null | undefined;
};

View File

@@ -0,0 +1,227 @@
/**
* @typedef {import('micromark-util-events-to-acorn').Acorn} Acorn
* @typedef {import('micromark-util-events-to-acorn').AcornOptions} AcornOptions
* @typedef {import('micromark-util-types').Extension} Extension
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext
* @typedef {import('micromark-util-types').Tokenizer} Tokenizer
*/
/**
* @typedef Options
* Configuration (optional).
* @property {Acorn | null | undefined} [acorn]
* Acorn parser to use (optional).
* @property {AcornOptions | null | undefined} [acornOptions]
* Configuration for acorn (default: `{ecmaVersion: 2024, locations: true,
* sourceType: 'module'}`).
*
* All fields except `locations` can be set.
* @property {boolean | null | undefined} [addResult=false]
* Whether to add `estree` fields to tokens with results from acorn (default:
* `false`).
* @property {boolean | null | undefined} [spread=false]
* Undocumented option to parse only a spread (used by
* `micromark-extension-mdx-jsx` to parse spread attributes) (default:
* `false`).
* @property {boolean | null | undefined} [allowEmpty=true]
* Undocumented option to disallow empty attributes (used by
* `micromark-extension-mdx-jsx` to prohobit empty attribute values)
* (default: `false`).
*/
import { factoryMdxExpression } from 'micromark-factory-mdx-expression';
import { factorySpace } from 'micromark-factory-space';
import { markdownLineEnding, markdownSpace } from 'micromark-util-character';
/**
* Create an extension for `micromark` to enable MDX expression syntax.
*
* @param {Options | null | undefined} [options]
* Configuration (optional).
* @returns {Extension}
* Extension for `micromark` that can be passed in `extensions` to enable MDX
* expression syntax.
*/
export function mdxExpression(options) {
const options_ = options || {};
const addResult = options_.addResult;
const acorn = options_.acorn;
// Hidden: `micromark-extension-mdx-jsx` supports expressions in tags,
// and one of them is only “spread” elements.
// It also has expressions that are not allowed to be empty (`<x y={}/>`).
// Instead of duplicating code there, this are two small hidden feature here
// to test that behavior.
const spread = options_.spread;
let allowEmpty = options_.allowEmpty;
/** @type {AcornOptions} */
let acornOptions;
if (allowEmpty === null || allowEmpty === undefined) {
allowEmpty = true;
}
if (acorn) {
if (!acorn.parseExpressionAt) {
throw new Error('Expected a proper `acorn` instance passed in as `options.acorn`');
}
acornOptions = Object.assign({
ecmaVersion: 2024,
sourceType: 'module'
}, options_.acornOptions);
} else if (options_.acornOptions || options_.addResult) {
throw new Error('Expected an `acorn` instance passed in as `options.acorn`');
}
return {
flow: {
[123]: {
name: 'mdxFlowExpression',
tokenize: tokenizeFlowExpression,
concrete: true
}
},
text: {
[123]: {
name: 'mdxTextExpression',
tokenize: tokenizeTextExpression
}
}
};
/**
* MDX expression (flow).
*
* ```markdown
* > | {Math.PI}
* ^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeFlowExpression(effects, ok, nok) {
const self = this;
return start;
/**
* Start of an MDX expression (flow).
*
* ```markdown
* > | {Math.PI}
* ^
* ```
*
* @type {State}
*/
function start(code) {
// To do: in `markdown-rs`, constructs need to parse the indent themselves.
// This should also be introduced in `micromark-js`.
return before(code);
}
/**
* After optional whitespace, before expression.
*
* ```markdown
* > | {Math.PI}
* ^
* ```
*
* @type {State}
*/
function before(code) {
return factoryMdxExpression.call(self, effects, after, 'mdxFlowExpression', 'mdxFlowExpressionMarker', 'mdxFlowExpressionChunk', acorn, acornOptions, addResult, spread, allowEmpty)(code);
}
/**
* After expression.
*
* ```markdown
* > | {Math.PI}
* ^
* ```
*
* @type {State}
*/
function after(code) {
return markdownSpace(code) ? factorySpace(effects, end, "whitespace")(code) : end(code);
}
/**
* After expression, after optional whitespace.
*
* ```markdown
* > | {Math.PI}␠␊
* ^
* ```
*
* @type {State}
*/
function end(code) {
// We want to allow tags directly after expressions.
//
// This case is useful:
//
// ```mdx
// <a>{b}</a>
// ```
//
// This case is not (very?) useful:
//
// ```mdx
// {a}<b/>
// ```
//
// …but it would be tougher than needed to disallow.
//
// To allow that, here we call the flow construct of
// `micromark-extension-mdx-jsx`, and there we call this one.
//
// It would introduce a cyclical interdependency if we test JSX and
// expressions here.
// Because the JSX extension already uses parts of this monorepo, we
// instead test it there.
const lessThanValue = self.parser.constructs.flow[60];
const constructs = Array.isArray(lessThanValue) ? lessThanValue : /* c8 ignore next 3 -- always a list when normalized. */
lessThanValue ? [lessThanValue] : [];
const jsxTag = constructs.find(function (d) {
return d.name === 'mdxJsxFlowTag';
});
/* c8 ignore next 3 -- this is tested in `micromark-extension-mdx-jsx` */
if (code === 60 && jsxTag) {
return effects.attempt(jsxTag, end, nok)(code);
}
return code === null || markdownLineEnding(code) ? ok(code) : nok(code);
}
}
/**
* MDX expression (text).
*
* ```markdown
* > | a {Math.PI} c.
* ^^^^^^^^^
* ```
*
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeTextExpression(effects, ok) {
const self = this;
return start;
/**
* Start of an MDX expression (text).
*
* ```markdown
* > | a {Math.PI} c.
* ^
* ```
*
*
* @type {State}
*/
function start(code) {
return factoryMdxExpression.call(self, effects, ok, 'mdxTextExpression', 'mdxTextExpressionMarker', 'mdxTextExpressionChunk', acorn, acornOptions, addResult, spread, allowEmpty, true)(code);
}
}
}

View File

@@ -0,0 +1,41 @@
/**
* Parse spaces and tabs.
*
* There is no `nok` parameter:
*
* * spaces in markdown are often optional, in which case this factory can be
* used and `ok` will be switched to whether spaces were found or not
* * one line ending or space can be detected with `markdownSpace(code)` right
* before using `factorySpace`
*
* ###### Examples
*
* Where `␉` represents a tab (plus how much it expands) and `␠` represents a
* single space.
*
* ```markdown
* ␉
* ␠␠␠␠
* ␉␠
* ```
*
* @param {Effects} effects
* Context.
* @param {State} ok
* State switched to when successful.
* @param {TokenType} type
* Type (`' \t'`).
* @param {number | undefined} [max=Infinity]
* Max (exclusive).
* @returns {State}
* Start state.
*/
export function factorySpace(
effects: Effects,
ok: State,
type: TokenType,
max?: number | undefined
): State
export type Effects = import('micromark-util-types').Effects
export type State = import('micromark-util-types').State
export type TokenType = import('micromark-util-types').TokenType

View File

@@ -0,0 +1,69 @@
/**
* @typedef {import('micromark-util-types').Effects} Effects
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenType} TokenType
*/
import {markdownSpace} from 'micromark-util-character'
// To do: implement `spaceOrTab`, `spaceOrTabMinMax`, `spaceOrTabWithOptions`.
/**
* Parse spaces and tabs.
*
* There is no `nok` parameter:
*
* * spaces in markdown are often optional, in which case this factory can be
* used and `ok` will be switched to whether spaces were found or not
* * one line ending or space can be detected with `markdownSpace(code)` right
* before using `factorySpace`
*
* ###### Examples
*
* Where `␉` represents a tab (plus how much it expands) and `␠` represents a
* single space.
*
* ```markdown
* ␉
* ␠␠␠␠
* ␉␠
* ```
*
* @param {Effects} effects
* Context.
* @param {State} ok
* State switched to when successful.
* @param {TokenType} type
* Type (`' \t'`).
* @param {number | undefined} [max=Infinity]
* Max (exclusive).
* @returns {State}
* Start state.
*/
export function factorySpace(effects, ok, type, max) {
const limit = max ? max - 1 : Number.POSITIVE_INFINITY
let size = 0
return start
/** @type {State} */
function start(code) {
if (markdownSpace(code)) {
effects.enter(type)
return prefix(code)
}
return ok(code)
}
/** @type {State} */
function prefix(code) {
if (markdownSpace(code) && size++ < limit) {
effects.consume(code)
return prefix
}
effects.exit(type)
return ok(code)
}
}

View File

@@ -0,0 +1,41 @@
/**
* Parse spaces and tabs.
*
* There is no `nok` parameter:
*
* * spaces in markdown are often optional, in which case this factory can be
* used and `ok` will be switched to whether spaces were found or not
* * one line ending or space can be detected with `markdownSpace(code)` right
* before using `factorySpace`
*
* ###### Examples
*
* Where `␉` represents a tab (plus how much it expands) and `␠` represents a
* single space.
*
* ```markdown
* ␉
* ␠␠␠␠
* ␉␠
* ```
*
* @param {Effects} effects
* Context.
* @param {State} ok
* State switched to when successful.
* @param {TokenType} type
* Type (`' \t'`).
* @param {number | undefined} [max=Infinity]
* Max (exclusive).
* @returns {State}
* Start state.
*/
export function factorySpace(
effects: Effects,
ok: State,
type: TokenType,
max?: number | undefined
): State
export type Effects = import('micromark-util-types').Effects
export type State = import('micromark-util-types').State
export type TokenType = import('micromark-util-types').TokenType

View File

@@ -0,0 +1,66 @@
/**
* @typedef {import('micromark-util-types').Effects} Effects
* @typedef {import('micromark-util-types').State} State
* @typedef {import('micromark-util-types').TokenType} TokenType
*/
import {markdownSpace} from 'micromark-util-character'
// To do: implement `spaceOrTab`, `spaceOrTabMinMax`, `spaceOrTabWithOptions`.
/**
* Parse spaces and tabs.
*
* There is no `nok` parameter:
*
* * spaces in markdown are often optional, in which case this factory can be
* used and `ok` will be switched to whether spaces were found or not
* * one line ending or space can be detected with `markdownSpace(code)` right
* before using `factorySpace`
*
* ###### Examples
*
* Where `␉` represents a tab (plus how much it expands) and `␠` represents a
* single space.
*
* ```markdown
* ␉
* ␠␠␠␠
* ␉␠
* ```
*
* @param {Effects} effects
* Context.
* @param {State} ok
* State switched to when successful.
* @param {TokenType} type
* Type (`' \t'`).
* @param {number | undefined} [max=Infinity]
* Max (exclusive).
* @returns {State}
* Start state.
*/
export function factorySpace(effects, ok, type, max) {
const limit = max ? max - 1 : Number.POSITIVE_INFINITY
let size = 0
return start
/** @type {State} */
function start(code) {
if (markdownSpace(code)) {
effects.enter(type)
return prefix(code)
}
return ok(code)
}
/** @type {State} */
function prefix(code) {
if (markdownSpace(code) && size++ < limit) {
effects.consume(code)
return prefix
}
effects.exit(type)
return ok(code)
}
}

View File

@@ -0,0 +1,46 @@
{
"name": "micromark-factory-space",
"version": "2.0.0",
"description": "micromark factory to parse markdown space (found in lots of places)",
"license": "MIT",
"keywords": [
"micromark",
"factory",
"space"
],
"repository": "https://github.com/micromark/micromark/tree/main/packages/micromark-factory-space",
"bugs": "https://github.com/micromark/micromark/issues",
"funding": [
{
"type": "GitHub Sponsors",
"url": "https://github.com/sponsors/unifiedjs"
},
{
"type": "OpenCollective",
"url": "https://opencollective.com/unified"
}
],
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"dev/",
"index.d.ts",
"index.js"
],
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
},
"dependencies": {
"micromark-util-character": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"scripts": {
"build": "micromark-build"
},
"xo": false
}

View File

@@ -0,0 +1,224 @@
# micromark-factory-space
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][bundle-size-badge]][bundle-size]
[![Sponsors][sponsors-badge]][opencollective]
[![Backers][backers-badge]][opencollective]
[![Chat][chat-badge]][chat]
[micromark][] factory to parse [markdown space][markdown-space] (found in lots
of places).
## Contents
* [What is this?](#what-is-this)
* [When should I use this?](#when-should-i-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [`factorySpace(…)`](#factoryspace)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package exposes states to parse spaces and/or tabs.
## When should I use this?
This package is useful when you are making your own micromark extensions.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-factory-space
```
In Deno with [`esm.sh`][esmsh]:
```js
import {factorySpace} from 'https://esm.sh/micromark-factory-space@1'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import {factorySpace} from 'https://esm.sh/micromark-factory-space@1?bundle'
</script>
```
## Use
```js
import {factorySpace} from 'micromark-factory-space'
import {codes, types} from 'micromark-util-symbol'
// A micromark tokenizer that uses the factory:
/**
* @this {TokenizeContext}
* @type {Tokenizer}
*/
function tokenizeCodeFenced(effects, ok, nok) {
return start
// …
/** @type {State} */
function info(code) {
if (code === codes.eof || markdownLineEndingOrSpace(code)) {
effects.exit(types.chunkString)
effects.exit(types.codeFencedFenceInfo)
return factorySpace(effects, infoAfter, types.whitespace)(code)
}
if (code === codes.graveAccent && code === marker) return nok(code)
effects.consume(code)
return info
}
// …
}
```
## API
This module exports the identifier [`factorySpace`][api-factory-space].
There is no default export.
### `factorySpace(…)`
Parse spaces and tabs.
There is no `nok` parameter:
* spaces in markdown are often optional, in which case this factory can be
used and `ok` will be switched to whether spaces were found or not
* one line ending or space can be detected with `markdownSpace(code)` right
before using `factorySpace`
###### Examples
Where `␉` represents a tab (plus how much it expands) and `␠` represents a
single space.
```markdown
␠␠␠␠
␉␠
```
###### Parameters
* `effects` (`Effects`)
— context
* `ok` (`State`)
— state switched to when successful
* `type` (`string`)
— type (`' \t'`)
* `max` (`number`, default: `Infinity`)
— max (exclusive)
###### Returns
Start state (`State`).
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-factory-space@^2`, compatible with Node.js 16.
This package works with `micromark@^3`.
## Security
This package is safe.
See [`security.md`][securitymd] in [`micromark/.github`][health] for how to
submit a security report.
## Contribute
See [`contributing.md`][contributing] in [`micromark/.github`][health] for ways
to get started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organisation, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark.svg
[coverage]: https://codecov.io/github/micromark/micromark
[downloads-badge]: https://img.shields.io/npm/dm/micromark-factory-space.svg
[downloads]: https://www.npmjs.com/package/micromark-factory-space
[bundle-size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-factory-space
[bundle-size]: https://bundlejs.com/?q=micromark-factory-space
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[opencollective]: https://opencollective.com/unified
[npm]: https://docs.npmjs.com/cli/install
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[esmsh]: https://esm.sh
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[license]: https://github.com/micromark/micromark/blob/main/license
[author]: https://wooorm.com
[health]: https://github.com/micromark/.github
[securitymd]: https://github.com/micromark/.github/blob/main/security.md
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[markdown-space]: https://github.com/micromark/micromark/tree/main/packages/micromark-util-character#markdownspacecode
[typescript]: https://www.typescriptlang.org
[micromark]: https://github.com/micromark/micromark
[api-factory-space]: #factoryspace

View File

@@ -0,0 +1,64 @@
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code: Code): boolean;
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code: Code): boolean;
export function asciiAlpha(code: Code): boolean;
export function asciiAlphanumeric(code: Code): boolean;
export function asciiAtext(code: Code): boolean;
export function asciiDigit(code: Code): boolean;
export function asciiHexDigit(code: Code): boolean;
export function asciiPunctuation(code: Code): boolean;
export function unicodePunctuation(code: Code): boolean;
export function unicodeWhitespace(code: Code): boolean;
export type Code = import('micromark-util-types').Code;
//# sourceMappingURL=index.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.js"],"names":[],"mappings":"AA8DA;;;;;;;;;;GAUG;AACH,mCALW,IAAI,GAEF,OAAO,CASnB;AAkDD;;;;;;;;;;;;;;GAcG;AACH,yCALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;GAQG;AACH,gDALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;;;;;;;GAcG;AACH,oCALW,IAAI,GAEF,OAAO,CASnB;AAmDY,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,oCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,uCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,yCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;mBAxOvB,OAAO,sBAAsB,EAAE,IAAI"}

View File

@@ -0,0 +1,250 @@
/**
* @typedef {import('micromark-util-types').Code} Code
*/
import {codes} from 'micromark-util-symbol'
/**
* Check whether the character code represents an ASCII alpha (`a` through `z`,
* case insensitive).
*
* An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
*
* An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
* to U+005A (`Z`).
*
* An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
* to U+007A (`z`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlpha = regexCheck(/[A-Za-z]/)
/**
* Check whether the character code represents an ASCII alphanumeric (`a`
* through `z`, case insensitive, or `0` through `9`).
*
* An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
* (see `asciiAlpha`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/)
/**
* Check whether the character code represents an ASCII atext.
*
* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
* (`{`) to U+007E TILDE (`~`).
*
* See:
* **\[RFC5322]**:
* [Internet Message Format](https://tools.ietf.org/html/rfc5322).
* P. Resnick.
* IETF.
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/)
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code) {
return (
// Special whitespace codes (which have negative values), C0 and Control
// character DEL
code !== null && (code < codes.space || code === codes.del)
)
}
/**
* Check whether the character code represents an ASCII digit (`0` through `9`).
*
* An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
* U+0039 (`9`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiDigit = regexCheck(/\d/)
/**
* Check whether the character code represents an ASCII hex digit (`a` through
* `f`, case insensitive, or `0` through `9`).
*
* An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
* digit, or an ASCII lower hex digit.
*
* An **ASCII upper hex digit** is a character in the inclusive range U+0041
* (`A`) to U+0046 (`F`).
*
* An **ASCII lower hex digit** is a character in the inclusive range U+0061
* (`a`) to U+0066 (`f`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/)
/**
* Check whether the character code represents ASCII punctuation.
*
* An **ASCII punctuation** is a character in the inclusive ranges U+0021
* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/)
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code) {
return code !== null && code < codes.horizontalTab
}
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code) {
return code !== null && (code < codes.nul || code === codes.space)
}
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code) {
return (
code === codes.horizontalTab ||
code === codes.virtualSpace ||
code === codes.space
)
}
// Size note: removing ASCII from the regex and using `asciiPunctuation` here
// In fact adds to the bundle size.
/**
* Check whether the character code represents Unicode punctuation.
*
* A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
* punctuation (see `asciiPunctuation`).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u)
/**
* Check whether the character code represents Unicode whitespace.
*
* Note that this does handle micromark specific markdown whitespace characters.
* See `markdownLineEndingOrSpace` to check that.
*
* A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodeWhitespace = regexCheck(/\s/)
/**
* Create a code check from a regex.
*
* @param {RegExp} regex
* @returns {(code: Code) => boolean}
*/
function regexCheck(regex) {
return check
/**
* Check whether a code matches the bound regex.
*
* @param {Code} code
* Character code.
* @returns {boolean}
* Whether the character code matches the bound regex.
*/
function check(code) {
return code !== null && code > -1 && regex.test(String.fromCharCode(code))
}
}

View File

@@ -0,0 +1,64 @@
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code: Code): boolean;
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code: Code): boolean;
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code: Code): boolean;
export function asciiAlpha(code: Code): boolean;
export function asciiAlphanumeric(code: Code): boolean;
export function asciiAtext(code: Code): boolean;
export function asciiDigit(code: Code): boolean;
export function asciiHexDigit(code: Code): boolean;
export function asciiPunctuation(code: Code): boolean;
export function unicodePunctuation(code: Code): boolean;
export function unicodeWhitespace(code: Code): boolean;
export type Code = import('micromark-util-types').Code;
//# sourceMappingURL=index.d.ts.map

View File

@@ -0,0 +1 @@
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["index.js"],"names":[],"mappings":"AA8DA;;;;;;;;;;GAUG;AACH,mCALW,IAAI,GAEF,OAAO,CASnB;AAkDD;;;;;;;;;;;;;;GAcG;AACH,yCALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;GAQG;AACH,gDALW,IAAI,GAEF,OAAO,CAKnB;AAED;;;;;;;;;;;;;;GAcG;AACH,oCALW,IAAI,GAEF,OAAO,CASnB;AAmDY,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,iCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,oCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,uCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,yCAAO,IAAI,GAAK,OAAO,CAAA;AAAvB,wCAAO,IAAI,GAAK,OAAO,CAAA;mBAxOvB,OAAO,sBAAsB,EAAE,IAAI"}

View File

@@ -0,0 +1,244 @@
/**
* @typedef {import('micromark-util-types').Code} Code
*/
/**
* Check whether the character code represents an ASCII alpha (`a` through `z`,
* case insensitive).
*
* An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
*
* An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
* to U+005A (`Z`).
*
* An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
* to U+007A (`z`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlpha = regexCheck(/[A-Za-z]/);
/**
* Check whether the character code represents an ASCII alphanumeric (`a`
* through `z`, case insensitive, or `0` through `9`).
*
* An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
* (see `asciiAlpha`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAlphanumeric = regexCheck(/[\dA-Za-z]/);
/**
* Check whether the character code represents an ASCII atext.
*
* atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
* the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
* U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
* SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
* CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
* (`{`) to U+007E TILDE (`~`).
*
* See:
* **\[RFC5322]**:
* [Internet Message Format](https://tools.ietf.org/html/rfc5322).
* P. Resnick.
* IETF.
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiAtext = regexCheck(/[#-'*+\--9=?A-Z^-~]/);
/**
* Check whether a character code is an ASCII control character.
*
* An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
* to U+001F (US), or U+007F (DEL).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function asciiControl(code) {
return (
// Special whitespace codes (which have negative values), C0 and Control
// character DEL
code !== null && (code < 32 || code === 127)
);
}
/**
* Check whether the character code represents an ASCII digit (`0` through `9`).
*
* An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
* U+0039 (`9`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiDigit = regexCheck(/\d/);
/**
* Check whether the character code represents an ASCII hex digit (`a` through
* `f`, case insensitive, or `0` through `9`).
*
* An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
* digit, or an ASCII lower hex digit.
*
* An **ASCII upper hex digit** is a character in the inclusive range U+0041
* (`A`) to U+0046 (`F`).
*
* An **ASCII lower hex digit** is a character in the inclusive range U+0061
* (`a`) to U+0066 (`f`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiHexDigit = regexCheck(/[\dA-Fa-f]/);
/**
* Check whether the character code represents ASCII punctuation.
*
* An **ASCII punctuation** is a character in the inclusive ranges U+0021
* EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
* SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
* (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
*
* @param code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export const asciiPunctuation = regexCheck(/[!-/:-@[-`{-~]/);
/**
* Check whether a character code is a markdown line ending.
*
* A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
* LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
*
* In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
* RETURN (CR) are replaced by these virtual characters depending on whether
* they occurred together.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEnding(code) {
return code !== null && code < -2;
}
/**
* Check whether a character code is a markdown line ending (see
* `markdownLineEnding`) or markdown space (see `markdownSpace`).
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownLineEndingOrSpace(code) {
return code !== null && (code < 0 || code === 32);
}
/**
* Check whether a character code is a markdown space.
*
* A **markdown space** is the concrete character U+0020 SPACE (SP) and the
* virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
*
* In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
* replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
* SPACE (VS) characters, depending on the column at which the tab occurred.
*
* @param {Code} code
* Code.
* @returns {boolean}
* Whether it matches.
*/
export function markdownSpace(code) {
return code === -2 || code === -1 || code === 32;
}
// Size note: removing ASCII from the regex and using `asciiPunctuation` here
// In fact adds to the bundle size.
/**
* Check whether the character code represents Unicode punctuation.
*
* A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
* Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
* (Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
* (Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
* punctuation (see `asciiPunctuation`).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodePunctuation = regexCheck(/\p{P}|\p{S}/u);
/**
* Check whether the character code represents Unicode whitespace.
*
* Note that this does handle micromark specific markdown whitespace characters.
* See `markdownLineEndingOrSpace` to check that.
*
* A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
* Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
* U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
*
* See:
* **\[UNICODE]**:
* [The Unicode Standard](https://www.unicode.org/versions/).
* Unicode Consortium.
*
* @param code
* Code.
* @returns
* Whether it matches.
*/
export const unicodeWhitespace = regexCheck(/\s/);
/**
* Create a code check from a regex.
*
* @param {RegExp} regex
* @returns {(code: Code) => boolean}
*/
function regexCheck(regex) {
return check;
/**
* Check whether a code matches the bound regex.
*
* @param {Code} code
* Character code.
* @returns {boolean}
* Whether the character code matches the bound regex.
*/
function check(code) {
return code !== null && code > -1 && regex.test(String.fromCharCode(code));
}
}

View File

@@ -0,0 +1,57 @@
{
"name": "micromark-util-character",
"version": "2.1.0",
"description": "micromark utility to handle character codes",
"license": "MIT",
"keywords": [
"micromark",
"util",
"utility",
"character"
],
"repository": "https://github.com/micromark/micromark/tree/main/packages/micromark-util-character",
"bugs": "https://github.com/micromark/micromark/issues",
"funding": [
{
"type": "GitHub Sponsors",
"url": "https://github.com/sponsors/unifiedjs"
},
{
"type": "OpenCollective",
"url": "https://opencollective.com/unified"
}
],
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"dev/",
"lib/",
"index.d.ts.map",
"index.d.ts",
"index.js"
],
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
},
"dependencies": {
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"scripts": {
"build": "micromark-build"
},
"xo": {
"envs": [
"shared-node-browser"
],
"prettier": true,
"rules": {
"unicorn/prefer-code-point": "off"
}
}
}

View File

@@ -0,0 +1,446 @@
# micromark-util-character
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][bundle-size-badge]][bundle-size]
[![Sponsors][sponsors-badge]][opencollective]
[![Backers][backers-badge]][opencollective]
[![Chat][chat-badge]][chat]
[micromark][] utility to handle [character codes][code].
## Contents
* [What is this?](#what-is-this)
* [When should I use this?](#when-should-i-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [`asciiAlpha(code)`](#asciialphacode)
* [`asciiAlphanumeric(code)`](#asciialphanumericcode)
* [`asciiAtext(code)`](#asciiatextcode)
* [`asciiControl(code)`](#asciicontrolcode)
* [`asciiDigit(code)`](#asciidigitcode)
* [`asciiHexDigit(code)`](#asciihexdigitcode)
* [`asciiPunctuation(code)`](#asciipunctuationcode)
* [`markdownLineEnding(code)`](#markdownlineendingcode)
* [`markdownLineEndingOrSpace(code)`](#markdownlineendingorspacecode)
* [`markdownSpace(code)`](#markdownspacecode)
* [`unicodePunctuation(code)`](#unicodepunctuationcode)
* [`unicodeWhitespace(code)`](#unicodewhitespacecode)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package exposes algorithms to check whether characters match groups.
## When should I use this?
This package might be useful when you are making your own micromark extensions.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-util-character
```
In Deno with [`esm.sh`][esmsh]:
```js
import * as character from 'https://esm.sh/micromark-util-character@1'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import * as character from 'https://esm.sh/micromark-util-character@1?bundle'
</script>
```
## Use
```js
import {asciiAlpha} from 'micromark-util-character'
console.log(asciiAlpha(64)) // false
console.log(asciiAlpha(65)) // true
```
## API
This module exports the identifiers
[`asciiAlpha`][api-ascii-alpha],
[`asciiAlphanumeric`][api-ascii-alphanumeric],
[`asciiAtext`][api-ascii-atext],
[`asciiControl`][api-ascii-control],
[`asciiDigit`][api-ascii-digit],
[`asciiHexDigit`][api-ascii-hex-digit],
[`asciiPunctuation`][api-ascii-punctuation],
[`markdownLineEnding`][api-markdown-line-ending],
[`markdownLineEndingOrSpace`][api-markdown-line-ending-or-space],
[`markdownSpace`][api-markdown-space],
[`unicodePunctuation`][api-unicode-punctuation],
[`unicodeWhitespace`][api-unicode-whitespace].
There is no default export.
### `asciiAlpha(code)`
Check whether the [character code][code] represents an ASCII alpha (`a` through
`z`, case insensitive).
An **ASCII alpha** is an ASCII upper alpha or ASCII lower alpha.
An **ASCII upper alpha** is a character in the inclusive range U+0041 (`A`)
to U+005A (`Z`).
An **ASCII lower alpha** is a character in the inclusive range U+0061 (`a`)
to U+007A (`z`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiAlphanumeric(code)`
Check whether the [character code][code] represents an ASCII alphanumeric (`a`
through `z`, case insensitive, or `0` through `9`).
An **ASCII alphanumeric** is an ASCII digit (see `asciiDigit`) or ASCII alpha
(see `asciiAlpha`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiAtext(code)`
Check whether the [character code][code] represents an ASCII atext.
atext is an ASCII alphanumeric (see `asciiAlphanumeric`), or a character in
the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
(`{`) to U+007E TILDE (`~`) (**\[RFC5322]**).
See **\[RFC5322]**:\
[Internet Message Format](https://tools.ietf.org/html/rfc5322).\
P. Resnick.\
IETF.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiControl(code)`
Check whether a [character code][code] is an ASCII control character.
An **ASCII control** is a character in the inclusive range U+0000 NULL (NUL)
to U+001F (US), or U+007F (DEL).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiDigit(code)`
Check whether the [character code][code] represents an ASCII digit (`0` through
`9`).
An **ASCII digit** is a character in the inclusive range U+0030 (`0`) to
U+0039 (`9`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiHexDigit(code)`
Check whether the [character code][code] represents an ASCII hex digit (`a`
through `f`, case insensitive, or `0` through `9`).
An **ASCII hex digit** is an ASCII digit (see `asciiDigit`), ASCII upper hex
digit, or an ASCII lower hex digit.
An **ASCII upper hex digit** is a character in the inclusive range U+0041
(`A`) to U+0046 (`F`).
An **ASCII lower hex digit** is a character in the inclusive range U+0061
(`a`) to U+0066 (`f`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `asciiPunctuation(code)`
Check whether the [character code][code] represents ASCII punctuation.
An **ASCII punctuation** is a character in the inclusive ranges U+0021
EXCLAMATION MARK (`!`) to U+002F SLASH (`/`), U+003A COLON (`:`) to U+0040 AT
SIGN (`@`), U+005B LEFT SQUARE BRACKET (`[`) to U+0060 GRAVE ACCENT
(`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE (`~`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `markdownLineEnding(code)`
Check whether a [character code][code] is a markdown line ending.
A **markdown line ending** is the virtual characters M-0003 CARRIAGE RETURN
LINE FEED (CRLF), M-0004 LINE FEED (LF) and M-0005 CARRIAGE RETURN (CR).
In micromark, the actual character U+000A LINE FEED (LF) and U+000D CARRIAGE
RETURN (CR) are replaced by these virtual characters depending on whether
they occurred together.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `markdownLineEndingOrSpace(code)`
Check whether a [character code][code] is a markdown line ending (see
`markdownLineEnding`) or markdown space (see `markdownSpace`).
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `markdownSpace(code)`
Check whether a [character code][code] is a markdown space.
A **markdown space** is the concrete character U+0020 SPACE (SP) and the
virtual characters M-0001 VIRTUAL SPACE (VS) and M-0002 HORIZONTAL TAB (HT).
In micromark, the actual character U+0009 CHARACTER TABULATION (HT) is
replaced by one M-0002 HORIZONTAL TAB (HT) and between 0 and 3 M-0001 VIRTUAL
SPACE (VS) characters, depending on the column at which the tab occurred.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `unicodePunctuation(code)`
Check whether the [character code][code] represents Unicode punctuation.
A **Unicode punctuation** is a character in the Unicode `Pc` (Punctuation,
Connector), `Pd` (Punctuation, Dash), `Pe` (Punctuation, Close), `Pf`
(Punctuation, Final quote), `Pi` (Punctuation, Initial quote), `Po`
(Punctuation, Other), or `Ps` (Punctuation, Open) categories, or an ASCII
punctuation (see `asciiPunctuation`) (**\[UNICODE]**).
See **\[UNICODE]**:\
[The Unicode Standard](https://www.unicode.org/versions/).\
Unicode Consortium.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
### `unicodeWhitespace(code)`
Check whether the [character code][code] represents Unicode whitespace.
Note that this does handle micromark specific markdown whitespace characters.
See `markdownLineEndingOrSpace` to check that.
A **Unicode whitespace** is a character in the Unicode `Zs` (Separator,
Space) category, or U+0009 CHARACTER TABULATION (HT), U+000A LINE FEED (LF),
U+000C (FF), or U+000D CARRIAGE RETURN (CR) (**\[UNICODE]**).
See **\[UNICODE]**:\
[The Unicode Standard](https://www.unicode.org/versions/).\
Unicode Consortium.
###### Parameters
* `code` (`Code`)
— code
###### Returns
Whether it matches (`boolean`).
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-util-character@^2`, compatible with Node.js 16.
This package works with `micromark@^3`.
## Security
This package is safe.
See [`security.md`][securitymd] in [`micromark/.github`][health] for how to
submit a security report.
## Contribute
See [`contributing.md`][contributing] in [`micromark/.github`][health] for ways
to get started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organisation, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark.svg
[coverage]: https://codecov.io/github/micromark/micromark
[downloads-badge]: https://img.shields.io/npm/dm/micromark-util-character.svg
[downloads]: https://www.npmjs.com/package/micromark-util-character
[bundle-size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-util-character
[bundle-size]: https://bundlejs.com/?q=micromark-util-character
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[opencollective]: https://opencollective.com/unified
[npm]: https://docs.npmjs.com/cli/install
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[esmsh]: https://esm.sh
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[license]: https://github.com/micromark/micromark/blob/main/license
[author]: https://wooorm.com
[health]: https://github.com/micromark/.github
[securitymd]: https://github.com/micromark/.github/blob/main/security.md
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[typescript]: https://www.typescriptlang.org
[micromark]: https://github.com/micromark/micromark
[code]: https://github.com/micromark/micromark#preprocess
[api-ascii-alpha]: #asciialphacode
[api-ascii-alphanumeric]: #asciialphanumericcode
[api-ascii-atext]: #asciiatextcode
[api-ascii-control]: #asciicontrolcode
[api-ascii-digit]: #asciidigitcode
[api-ascii-hex-digit]: #asciihexdigitcode
[api-ascii-punctuation]: #asciipunctuationcode
[api-markdown-line-ending]: #markdownlineendingcode
[api-markdown-line-ending-or-space]: #markdownlineendingorspacecode
[api-markdown-space]: #markdownspacecode
[api-unicode-punctuation]: #unicodepunctuationcode
[api-unicode-whitespace]: #unicodewhitespacecode

View File

@@ -0,0 +1,138 @@
export namespace codes {
let carriageReturn: -5
let lineFeed: -4
let carriageReturnLineFeed: -3
let horizontalTab: -2
let virtualSpace: -1
let eof: null
let nul: 0
let soh: 1
let stx: 2
let etx: 3
let eot: 4
let enq: 5
let ack: 6
let bel: 7
let bs: 8
let ht: 9
let lf: 10
let vt: 11
let ff: 12
let cr: 13
let so: 14
let si: 15
let dle: 16
let dc1: 17
let dc2: 18
let dc3: 19
let dc4: 20
let nak: 21
let syn: 22
let etb: 23
let can: 24
let em: 25
let sub: 26
let esc: 27
let fs: 28
let gs: 29
let rs: 30
let us: 31
let space: 32
let exclamationMark: 33
let quotationMark: 34
let numberSign: 35
let dollarSign: 36
let percentSign: 37
let ampersand: 38
let apostrophe: 39
let leftParenthesis: 40
let rightParenthesis: 41
let asterisk: 42
let plusSign: 43
let comma: 44
let dash: 45
let dot: 46
let slash: 47
let digit0: 48
let digit1: 49
let digit2: 50
let digit3: 51
let digit4: 52
let digit5: 53
let digit6: 54
let digit7: 55
let digit8: 56
let digit9: 57
let colon: 58
let semicolon: 59
let lessThan: 60
let equalsTo: 61
let greaterThan: 62
let questionMark: 63
let atSign: 64
let uppercaseA: 65
let uppercaseB: 66
let uppercaseC: 67
let uppercaseD: 68
let uppercaseE: 69
let uppercaseF: 70
let uppercaseG: 71
let uppercaseH: 72
let uppercaseI: 73
let uppercaseJ: 74
let uppercaseK: 75
let uppercaseL: 76
let uppercaseM: 77
let uppercaseN: 78
let uppercaseO: 79
let uppercaseP: 80
let uppercaseQ: 81
let uppercaseR: 82
let uppercaseS: 83
let uppercaseT: 84
let uppercaseU: 85
let uppercaseV: 86
let uppercaseW: 87
let uppercaseX: 88
let uppercaseY: 89
let uppercaseZ: 90
let leftSquareBracket: 91
let backslash: 92
let rightSquareBracket: 93
let caret: 94
let underscore: 95
let graveAccent: 96
let lowercaseA: 97
let lowercaseB: 98
let lowercaseC: 99
let lowercaseD: 100
let lowercaseE: 101
let lowercaseF: 102
let lowercaseG: 103
let lowercaseH: 104
let lowercaseI: 105
let lowercaseJ: 106
let lowercaseK: 107
let lowercaseL: 108
let lowercaseM: 109
let lowercaseN: 110
let lowercaseO: 111
let lowercaseP: 112
let lowercaseQ: 113
let lowercaseR: 114
let lowercaseS: 115
let lowercaseT: 116
let lowercaseU: 117
let lowercaseV: 118
let lowercaseW: 119
let lowercaseX: 120
let lowercaseY: 121
let lowercaseZ: 122
let leftCurlyBrace: 123
let verticalBar: 124
let rightCurlyBrace: 125
let tilde: 126
let del: 127
let byteOrderMarker: 65279
let replacementCharacter: 65533
}

View File

@@ -0,0 +1,158 @@
/**
* Character codes.
*
* This module is compiled away!
*
* micromark works based on character codes.
* This module contains constants for the ASCII block and the replacement
* character.
* A couple of them are handled in a special way, such as the line endings
* (CR, LF, and CR+LF, commonly known as end-of-line: EOLs), the tab (horizontal
* tab) and its expansion based on what column its at (virtual space),
* and the end-of-file (eof) character.
* As values are preprocessed before handling them, the actual characters LF,
* CR, HT, and NUL (which is present as the replacement character), are
* guaranteed to not exist.
*
* Unicode basic latin block.
*/
export const codes = /** @type {const} */ ({
carriageReturn: -5,
lineFeed: -4,
carriageReturnLineFeed: -3,
horizontalTab: -2,
virtualSpace: -1,
eof: null,
nul: 0,
soh: 1,
stx: 2,
etx: 3,
eot: 4,
enq: 5,
ack: 6,
bel: 7,
bs: 8,
ht: 9, // `\t`
lf: 10, // `\n`
vt: 11, // `\v`
ff: 12, // `\f`
cr: 13, // `\r`
so: 14,
si: 15,
dle: 16,
dc1: 17,
dc2: 18,
dc3: 19,
dc4: 20,
nak: 21,
syn: 22,
etb: 23,
can: 24,
em: 25,
sub: 26,
esc: 27,
fs: 28,
gs: 29,
rs: 30,
us: 31,
space: 32,
exclamationMark: 33, // `!`
quotationMark: 34, // `"`
numberSign: 35, // `#`
dollarSign: 36, // `$`
percentSign: 37, // `%`
ampersand: 38, // `&`
apostrophe: 39, // `'`
leftParenthesis: 40, // `(`
rightParenthesis: 41, // `)`
asterisk: 42, // `*`
plusSign: 43, // `+`
comma: 44, // `,`
dash: 45, // `-`
dot: 46, // `.`
slash: 47, // `/`
digit0: 48, // `0`
digit1: 49, // `1`
digit2: 50, // `2`
digit3: 51, // `3`
digit4: 52, // `4`
digit5: 53, // `5`
digit6: 54, // `6`
digit7: 55, // `7`
digit8: 56, // `8`
digit9: 57, // `9`
colon: 58, // `:`
semicolon: 59, // `;`
lessThan: 60, // `<`
equalsTo: 61, // `=`
greaterThan: 62, // `>`
questionMark: 63, // `?`
atSign: 64, // `@`
uppercaseA: 65, // `A`
uppercaseB: 66, // `B`
uppercaseC: 67, // `C`
uppercaseD: 68, // `D`
uppercaseE: 69, // `E`
uppercaseF: 70, // `F`
uppercaseG: 71, // `G`
uppercaseH: 72, // `H`
uppercaseI: 73, // `I`
uppercaseJ: 74, // `J`
uppercaseK: 75, // `K`
uppercaseL: 76, // `L`
uppercaseM: 77, // `M`
uppercaseN: 78, // `N`
uppercaseO: 79, // `O`
uppercaseP: 80, // `P`
uppercaseQ: 81, // `Q`
uppercaseR: 82, // `R`
uppercaseS: 83, // `S`
uppercaseT: 84, // `T`
uppercaseU: 85, // `U`
uppercaseV: 86, // `V`
uppercaseW: 87, // `W`
uppercaseX: 88, // `X`
uppercaseY: 89, // `Y`
uppercaseZ: 90, // `Z`
leftSquareBracket: 91, // `[`
backslash: 92, // `\`
rightSquareBracket: 93, // `]`
caret: 94, // `^`
underscore: 95, // `_`
graveAccent: 96, // `` ` ``
lowercaseA: 97, // `a`
lowercaseB: 98, // `b`
lowercaseC: 99, // `c`
lowercaseD: 100, // `d`
lowercaseE: 101, // `e`
lowercaseF: 102, // `f`
lowercaseG: 103, // `g`
lowercaseH: 104, // `h`
lowercaseI: 105, // `i`
lowercaseJ: 106, // `j`
lowercaseK: 107, // `k`
lowercaseL: 108, // `l`
lowercaseM: 109, // `m`
lowercaseN: 110, // `n`
lowercaseO: 111, // `o`
lowercaseP: 112, // `p`
lowercaseQ: 113, // `q`
lowercaseR: 114, // `r`
lowercaseS: 115, // `s`
lowercaseT: 116, // `t`
lowercaseU: 117, // `u`
lowercaseV: 118, // `v`
lowercaseW: 119, // `w`
lowercaseX: 120, // `x`
lowercaseY: 121, // `y`
lowercaseZ: 122, // `z`
leftCurlyBrace: 123, // `{`
verticalBar: 124, // `|`
rightCurlyBrace: 125, // `}`
tilde: 126, // `~`
del: 127,
// Unicode Specials block.
byteOrderMarker: 65279,
// Unicode Specials block.
replacementCharacter: 65533 // `<60>`
})

View File

@@ -0,0 +1,36 @@
export namespace constants {
let attentionSideBefore: 1
let attentionSideAfter: 2
let atxHeadingOpeningFenceSizeMax: 6
let autolinkDomainSizeMax: 63
let autolinkSchemeSizeMax: 32
let cdataOpeningString: 'CDATA['
let characterGroupWhitespace: 1
let characterGroupPunctuation: 2
let characterReferenceDecimalSizeMax: 7
let characterReferenceHexadecimalSizeMax: 6
let characterReferenceNamedSizeMax: 31
let codeFencedSequenceSizeMin: 3
let contentTypeDocument: 'document'
let contentTypeFlow: 'flow'
let contentTypeContent: 'content'
let contentTypeString: 'string'
let contentTypeText: 'text'
let hardBreakPrefixSizeMin: 2
let htmlRaw: 1
let htmlComment: 2
let htmlInstruction: 3
let htmlDeclaration: 4
let htmlCdata: 5
let htmlBasic: 6
let htmlComplete: 7
let htmlRawSizeMax: 8
let linkResourceDestinationBalanceMax: 32
let linkReferenceSizeMax: 999
let listItemValueSizeMax: 10
let numericBaseDecimal: 10
let numericBaseHexadecimal: 16
let tabSize: 4
let thematicBreakMarkerCountMin: 3
let v8MaxSafeChunkSize: 10000
}

View File

@@ -0,0 +1,44 @@
/**
* This module is compiled away!
*
* Parsing markdown comes with a couple of constants, such as minimum or maximum
* sizes of certain sequences.
* Additionally, there are a couple symbols used inside micromark.
* These are all defined here, but compiled away by scripts.
*/
export const constants = /** @type {const} */ ({
attentionSideBefore: 1, // Symbol to mark an attention sequence as before content: `*a`
attentionSideAfter: 2, // Symbol to mark an attention sequence as after content: `a*`
atxHeadingOpeningFenceSizeMax: 6, // 6 number signs is fine, 7 isnt.
autolinkDomainSizeMax: 63, // 63 characters is fine, 64 is too many.
autolinkSchemeSizeMax: 32, // 32 characters is fine, 33 is too many.
cdataOpeningString: 'CDATA[', // And preceded by `<![`.
characterGroupWhitespace: 1, // Symbol used to indicate a character is whitespace
characterGroupPunctuation: 2, // Symbol used to indicate a character is punctuation
characterReferenceDecimalSizeMax: 7, // `&#9999999;`.
characterReferenceHexadecimalSizeMax: 6, // `&#xff9999;`.
characterReferenceNamedSizeMax: 31, // `&CounterClockwiseContourIntegral;`.
codeFencedSequenceSizeMin: 3, // At least 3 ticks or tildes are needed.
contentTypeDocument: 'document',
contentTypeFlow: 'flow',
contentTypeContent: 'content',
contentTypeString: 'string',
contentTypeText: 'text',
hardBreakPrefixSizeMin: 2, // At least 2 trailing spaces are needed.
htmlRaw: 1, // Symbol for `<script>`
htmlComment: 2, // Symbol for `<!---->`
htmlInstruction: 3, // Symbol for `<?php?>`
htmlDeclaration: 4, // Symbol for `<!doctype>`
htmlCdata: 5, // Symbol for `<![CDATA[]]>`
htmlBasic: 6, // Symbol for `<div`
htmlComplete: 7, // Symbol for `<x>`
htmlRawSizeMax: 8, // Length of `textarea`.
linkResourceDestinationBalanceMax: 32, // See: <https://spec.commonmark.org/0.30/#link-destination>, <https://github.com/remarkjs/react-markdown/issues/658#issuecomment-984345577>
linkReferenceSizeMax: 999, // See: <https://spec.commonmark.org/0.30/#link-label>
listItemValueSizeMax: 10, // See: <https://spec.commonmark.org/0.30/#ordered-list-marker>
numericBaseDecimal: 10,
numericBaseHexadecimal: 0x10,
tabSize: 4, // Tabs have a hard-coded size of 4, per CommonMark.
thematicBreakMarkerCountMin: 3, // At least 3 asterisks, dashes, or underscores are needed.
v8MaxSafeChunkSize: 10000 // V8 (and potentially others) have problems injecting giant arrays into other arrays, hence we operate in chunks.
})

View File

@@ -0,0 +1,4 @@
export {codes} from './codes.js'
export {constants} from './constants.js'
export {types} from './types.js'
export {values} from './values.js'

View File

@@ -0,0 +1,4 @@
export {codes} from './codes.js'
export {constants} from './constants.js'
export {types} from './types.js'
export {values} from './values.js'

View File

@@ -0,0 +1,105 @@
export namespace types {
let data: 'data'
let whitespace: 'whitespace'
let lineEnding: 'lineEnding'
let lineEndingBlank: 'lineEndingBlank'
let linePrefix: 'linePrefix'
let lineSuffix: 'lineSuffix'
let atxHeading: 'atxHeading'
let atxHeadingSequence: 'atxHeadingSequence'
let atxHeadingText: 'atxHeadingText'
let autolink: 'autolink'
let autolinkEmail: 'autolinkEmail'
let autolinkMarker: 'autolinkMarker'
let autolinkProtocol: 'autolinkProtocol'
let characterEscape: 'characterEscape'
let characterEscapeValue: 'characterEscapeValue'
let characterReference: 'characterReference'
let characterReferenceMarker: 'characterReferenceMarker'
let characterReferenceMarkerNumeric: 'characterReferenceMarkerNumeric'
let characterReferenceMarkerHexadecimal: 'characterReferenceMarkerHexadecimal'
let characterReferenceValue: 'characterReferenceValue'
let codeFenced: 'codeFenced'
let codeFencedFence: 'codeFencedFence'
let codeFencedFenceSequence: 'codeFencedFenceSequence'
let codeFencedFenceInfo: 'codeFencedFenceInfo'
let codeFencedFenceMeta: 'codeFencedFenceMeta'
let codeFlowValue: 'codeFlowValue'
let codeIndented: 'codeIndented'
let codeText: 'codeText'
let codeTextData: 'codeTextData'
let codeTextPadding: 'codeTextPadding'
let codeTextSequence: 'codeTextSequence'
let content: 'content'
let definition: 'definition'
let definitionDestination: 'definitionDestination'
let definitionDestinationLiteral: 'definitionDestinationLiteral'
let definitionDestinationLiteralMarker: 'definitionDestinationLiteralMarker'
let definitionDestinationRaw: 'definitionDestinationRaw'
let definitionDestinationString: 'definitionDestinationString'
let definitionLabel: 'definitionLabel'
let definitionLabelMarker: 'definitionLabelMarker'
let definitionLabelString: 'definitionLabelString'
let definitionMarker: 'definitionMarker'
let definitionTitle: 'definitionTitle'
let definitionTitleMarker: 'definitionTitleMarker'
let definitionTitleString: 'definitionTitleString'
let emphasis: 'emphasis'
let emphasisSequence: 'emphasisSequence'
let emphasisText: 'emphasisText'
let escapeMarker: 'escapeMarker'
let hardBreakEscape: 'hardBreakEscape'
let hardBreakTrailing: 'hardBreakTrailing'
let htmlFlow: 'htmlFlow'
let htmlFlowData: 'htmlFlowData'
let htmlText: 'htmlText'
let htmlTextData: 'htmlTextData'
let image: 'image'
let label: 'label'
let labelText: 'labelText'
let labelLink: 'labelLink'
let labelImage: 'labelImage'
let labelMarker: 'labelMarker'
let labelImageMarker: 'labelImageMarker'
let labelEnd: 'labelEnd'
let link: 'link'
let paragraph: 'paragraph'
let reference: 'reference'
let referenceMarker: 'referenceMarker'
let referenceString: 'referenceString'
let resource: 'resource'
let resourceDestination: 'resourceDestination'
let resourceDestinationLiteral: 'resourceDestinationLiteral'
let resourceDestinationLiteralMarker: 'resourceDestinationLiteralMarker'
let resourceDestinationRaw: 'resourceDestinationRaw'
let resourceDestinationString: 'resourceDestinationString'
let resourceMarker: 'resourceMarker'
let resourceTitle: 'resourceTitle'
let resourceTitleMarker: 'resourceTitleMarker'
let resourceTitleString: 'resourceTitleString'
let setextHeading: 'setextHeading'
let setextHeadingText: 'setextHeadingText'
let setextHeadingLine: 'setextHeadingLine'
let setextHeadingLineSequence: 'setextHeadingLineSequence'
let strong: 'strong'
let strongSequence: 'strongSequence'
let strongText: 'strongText'
let thematicBreak: 'thematicBreak'
let thematicBreakSequence: 'thematicBreakSequence'
let blockQuote: 'blockQuote'
let blockQuotePrefix: 'blockQuotePrefix'
let blockQuoteMarker: 'blockQuoteMarker'
let blockQuotePrefixWhitespace: 'blockQuotePrefixWhitespace'
let listOrdered: 'listOrdered'
let listUnordered: 'listUnordered'
let listItemIndent: 'listItemIndent'
let listItemMarker: 'listItemMarker'
let listItemPrefix: 'listItemPrefix'
let listItemPrefixWhitespace: 'listItemPrefixWhitespace'
let listItemValue: 'listItemValue'
let chunkDocument: 'chunkDocument'
let chunkContent: 'chunkContent'
let chunkFlow: 'chunkFlow'
let chunkText: 'chunkText'
let chunkString: 'chunkString'
}

View File

@@ -0,0 +1,453 @@
/**
* This module is compiled away!
*
* Here is the list of all types of tokens exposed by micromark, with a short
* explanation of what they include and where they are found.
* In picking names, generally, the rule is to be as explicit as possible
* instead of reusing names.
* For example, there is a `definitionDestination` and a `resourceDestination`,
* instead of one shared name.
*/
// Note: when changing the next record, you must also change `TokenTypeMap`
// in `micromark-util-types/index.d.ts`.
export const types = /** @type {const} */ ({
// Generic type for data, such as in a title, a destination, etc.
data: 'data',
// Generic type for syntactic whitespace (tabs, virtual spaces, spaces).
// Such as, between a fenced code fence and an info string.
whitespace: 'whitespace',
// Generic type for line endings (line feed, carriage return, carriage return +
// line feed).
lineEnding: 'lineEnding',
// A line ending, but ending a blank line.
lineEndingBlank: 'lineEndingBlank',
// Generic type for whitespace (tabs, virtual spaces, spaces) at the start of a
// line.
linePrefix: 'linePrefix',
// Generic type for whitespace (tabs, virtual spaces, spaces) at the end of a
// line.
lineSuffix: 'lineSuffix',
// Whole ATX heading:
//
// ```markdown
// #
// ## Alpha
// ### Bravo ###
// ```
//
// Includes `atxHeadingSequence`, `whitespace`, `atxHeadingText`.
atxHeading: 'atxHeading',
// Sequence of number signs in an ATX heading (`###`).
atxHeadingSequence: 'atxHeadingSequence',
// Content in an ATX heading (`alpha`).
// Includes text.
atxHeadingText: 'atxHeadingText',
// Whole autolink (`<https://example.com>` or `<admin@example.com>`)
// Includes `autolinkMarker` and `autolinkProtocol` or `autolinkEmail`.
autolink: 'autolink',
// Email autolink w/o markers (`admin@example.com`)
autolinkEmail: 'autolinkEmail',
// Marker around an `autolinkProtocol` or `autolinkEmail` (`<` or `>`).
autolinkMarker: 'autolinkMarker',
// Protocol autolink w/o markers (`https://example.com`)
autolinkProtocol: 'autolinkProtocol',
// A whole character escape (`\-`).
// Includes `escapeMarker` and `characterEscapeValue`.
characterEscape: 'characterEscape',
// The escaped character (`-`).
characterEscapeValue: 'characterEscapeValue',
// A whole character reference (`&amp;`, `&#8800;`, or `&#x1D306;`).
// Includes `characterReferenceMarker`, an optional
// `characterReferenceMarkerNumeric`, in which case an optional
// `characterReferenceMarkerHexadecimal`, and a `characterReferenceValue`.
characterReference: 'characterReference',
// The start or end marker (`&` or `;`).
characterReferenceMarker: 'characterReferenceMarker',
// Mark reference as numeric (`#`).
characterReferenceMarkerNumeric: 'characterReferenceMarkerNumeric',
// Mark reference as numeric (`x` or `X`).
characterReferenceMarkerHexadecimal: 'characterReferenceMarkerHexadecimal',
// Value of character reference w/o markers (`amp`, `8800`, or `1D306`).
characterReferenceValue: 'characterReferenceValue',
// Whole fenced code:
//
// ````markdown
// ```js
// alert(1)
// ```
// ````
codeFenced: 'codeFenced',
// A fenced code fence, including whitespace, sequence, info, and meta
// (` ```js `).
codeFencedFence: 'codeFencedFence',
// Sequence of grave accent or tilde characters (` ``` `) in a fence.
codeFencedFenceSequence: 'codeFencedFenceSequence',
// Info word (`js`) in a fence.
// Includes string.
codeFencedFenceInfo: 'codeFencedFenceInfo',
// Meta words (`highlight="1"`) in a fence.
// Includes string.
codeFencedFenceMeta: 'codeFencedFenceMeta',
// A line of code.
codeFlowValue: 'codeFlowValue',
// Whole indented code:
//
// ```markdown
// alert(1)
// ```
//
// Includes `lineEnding`, `linePrefix`, and `codeFlowValue`.
codeIndented: 'codeIndented',
// A text code (``` `alpha` ```).
// Includes `codeTextSequence`, `codeTextData`, `lineEnding`, and can include
// `codeTextPadding`.
codeText: 'codeText',
codeTextData: 'codeTextData',
// A space or line ending right after or before a tick.
codeTextPadding: 'codeTextPadding',
// A text code fence (` `` `).
codeTextSequence: 'codeTextSequence',
// Whole content:
//
// ```markdown
// [a]: b
// c
// =
// d
// ```
//
// Includes `paragraph` and `definition`.
content: 'content',
// Whole definition:
//
// ```markdown
// [micromark]: https://github.com/micromark/micromark
// ```
//
// Includes `definitionLabel`, `definitionMarker`, `whitespace`,
// `definitionDestination`, and optionally `lineEnding` and `definitionTitle`.
definition: 'definition',
// Destination of a definition (`https://github.com/micromark/micromark` or
// `<https://github.com/micromark/micromark>`).
// Includes `definitionDestinationLiteral` or `definitionDestinationRaw`.
definitionDestination: 'definitionDestination',
// Enclosed destination of a definition
// (`<https://github.com/micromark/micromark>`).
// Includes `definitionDestinationLiteralMarker` and optionally
// `definitionDestinationString`.
definitionDestinationLiteral: 'definitionDestinationLiteral',
// Markers of an enclosed definition destination (`<` or `>`).
definitionDestinationLiteralMarker: 'definitionDestinationLiteralMarker',
// Unenclosed destination of a definition
// (`https://github.com/micromark/micromark`).
// Includes `definitionDestinationString`.
definitionDestinationRaw: 'definitionDestinationRaw',
// Text in an destination (`https://github.com/micromark/micromark`).
// Includes string.
definitionDestinationString: 'definitionDestinationString',
// Label of a definition (`[micromark]`).
// Includes `definitionLabelMarker` and `definitionLabelString`.
definitionLabel: 'definitionLabel',
// Markers of a definition label (`[` or `]`).
definitionLabelMarker: 'definitionLabelMarker',
// Value of a definition label (`micromark`).
// Includes string.
definitionLabelString: 'definitionLabelString',
// Marker between a label and a destination (`:`).
definitionMarker: 'definitionMarker',
// Title of a definition (`"x"`, `'y'`, or `(z)`).
// Includes `definitionTitleMarker` and optionally `definitionTitleString`.
definitionTitle: 'definitionTitle',
// Marker around a title of a definition (`"`, `'`, `(`, or `)`).
definitionTitleMarker: 'definitionTitleMarker',
// Data without markers in a title (`z`).
// Includes string.
definitionTitleString: 'definitionTitleString',
// Emphasis (`*alpha*`).
// Includes `emphasisSequence` and `emphasisText`.
emphasis: 'emphasis',
// Sequence of emphasis markers (`*` or `_`).
emphasisSequence: 'emphasisSequence',
// Emphasis text (`alpha`).
// Includes text.
emphasisText: 'emphasisText',
// The character escape marker (`\`).
escapeMarker: 'escapeMarker',
// A hard break created with a backslash (`\\n`).
// Note: does not include the line ending.
hardBreakEscape: 'hardBreakEscape',
// A hard break created with trailing spaces (` \n`).
// Does not include the line ending.
hardBreakTrailing: 'hardBreakTrailing',
// Flow HTML:
//
// ```markdown
// <div
// ```
//
// Inlcudes `lineEnding`, `htmlFlowData`.
htmlFlow: 'htmlFlow',
htmlFlowData: 'htmlFlowData',
// HTML in text (the tag in `a <i> b`).
// Includes `lineEnding`, `htmlTextData`.
htmlText: 'htmlText',
htmlTextData: 'htmlTextData',
// Whole image (`![alpha](bravo)`, `![alpha][bravo]`, `![alpha][]`, or
// `![alpha]`).
// Includes `label` and an optional `resource` or `reference`.
image: 'image',
// Whole link label (`[*alpha*]`).
// Includes `labelLink` or `labelImage`, `labelText`, and `labelEnd`.
label: 'label',
// Text in an label (`*alpha*`).
// Includes text.
labelText: 'labelText',
// Start a link label (`[`).
// Includes a `labelMarker`.
labelLink: 'labelLink',
// Start an image label (`![`).
// Includes `labelImageMarker` and `labelMarker`.
labelImage: 'labelImage',
// Marker of a label (`[` or `]`).
labelMarker: 'labelMarker',
// Marker to start an image (`!`).
labelImageMarker: 'labelImageMarker',
// End a label (`]`).
// Includes `labelMarker`.
labelEnd: 'labelEnd',
// Whole link (`[alpha](bravo)`, `[alpha][bravo]`, `[alpha][]`, or `[alpha]`).
// Includes `label` and an optional `resource` or `reference`.
link: 'link',
// Whole paragraph:
//
// ```markdown
// alpha
// bravo.
// ```
//
// Includes text.
paragraph: 'paragraph',
// A reference (`[alpha]` or `[]`).
// Includes `referenceMarker` and an optional `referenceString`.
reference: 'reference',
// A reference marker (`[` or `]`).
referenceMarker: 'referenceMarker',
// Reference text (`alpha`).
// Includes string.
referenceString: 'referenceString',
// A resource (`(https://example.com "alpha")`).
// Includes `resourceMarker`, an optional `resourceDestination` with an optional
// `whitespace` and `resourceTitle`.
resource: 'resource',
// A resource destination (`https://example.com`).
// Includes `resourceDestinationLiteral` or `resourceDestinationRaw`.
resourceDestination: 'resourceDestination',
// A literal resource destination (`<https://example.com>`).
// Includes `resourceDestinationLiteralMarker` and optionally
// `resourceDestinationString`.
resourceDestinationLiteral: 'resourceDestinationLiteral',
// A resource destination marker (`<` or `>`).
resourceDestinationLiteralMarker: 'resourceDestinationLiteralMarker',
// A raw resource destination (`https://example.com`).
// Includes `resourceDestinationString`.
resourceDestinationRaw: 'resourceDestinationRaw',
// Resource destination text (`https://example.com`).
// Includes string.
resourceDestinationString: 'resourceDestinationString',
// A resource marker (`(` or `)`).
resourceMarker: 'resourceMarker',
// A resource title (`"alpha"`, `'alpha'`, or `(alpha)`).
// Includes `resourceTitleMarker` and optionally `resourceTitleString`.
resourceTitle: 'resourceTitle',
// A resource title marker (`"`, `'`, `(`, or `)`).
resourceTitleMarker: 'resourceTitleMarker',
// Resource destination title (`alpha`).
// Includes string.
resourceTitleString: 'resourceTitleString',
// Whole setext heading:
//
// ```markdown
// alpha
// bravo
// =====
// ```
//
// Includes `setextHeadingText`, `lineEnding`, `linePrefix`, and
// `setextHeadingLine`.
setextHeading: 'setextHeading',
// Content in a setext heading (`alpha\nbravo`).
// Includes text.
setextHeadingText: 'setextHeadingText',
// Underline in a setext heading, including whitespace suffix (`==`).
// Includes `setextHeadingLineSequence`.
setextHeadingLine: 'setextHeadingLine',
// Sequence of equals or dash characters in underline in a setext heading (`-`).
setextHeadingLineSequence: 'setextHeadingLineSequence',
// Strong (`**alpha**`).
// Includes `strongSequence` and `strongText`.
strong: 'strong',
// Sequence of strong markers (`**` or `__`).
strongSequence: 'strongSequence',
// Strong text (`alpha`).
// Includes text.
strongText: 'strongText',
// Whole thematic break:
//
// ```markdown
// * * *
// ```
//
// Includes `thematicBreakSequence` and `whitespace`.
thematicBreak: 'thematicBreak',
// A sequence of one or more thematic break markers (`***`).
thematicBreakSequence: 'thematicBreakSequence',
// Whole block quote:
//
// ```markdown
// > a
// >
// > b
// ```
//
// Includes `blockQuotePrefix` and flow.
blockQuote: 'blockQuote',
// The `>` or `> ` of a block quote.
blockQuotePrefix: 'blockQuotePrefix',
// The `>` of a block quote prefix.
blockQuoteMarker: 'blockQuoteMarker',
// The optional ` ` of a block quote prefix.
blockQuotePrefixWhitespace: 'blockQuotePrefixWhitespace',
// Whole unordered list:
//
// ```markdown
// - a
// b
// ```
//
// Includes `listItemPrefix`, flow, and optionally `listItemIndent` on further
// lines.
listOrdered: 'listOrdered',
// Whole ordered list:
//
// ```markdown
// 1. a
// b
// ```
//
// Includes `listItemPrefix`, flow, and optionally `listItemIndent` on further
// lines.
listUnordered: 'listUnordered',
// The indent of further list item lines.
listItemIndent: 'listItemIndent',
// A marker, as in, `*`, `+`, `-`, `.`, or `)`.
listItemMarker: 'listItemMarker',
// The thing that starts a list item, such as `1. `.
// Includes `listItemValue` if ordered, `listItemMarker`, and
// `listItemPrefixWhitespace` (unless followed by a line ending).
listItemPrefix: 'listItemPrefix',
// The whitespace after a marker.
listItemPrefixWhitespace: 'listItemPrefixWhitespace',
// The numerical value of an ordered item.
listItemValue: 'listItemValue',
// Internal types used for subtokenizers, compiled away
chunkDocument: 'chunkDocument',
chunkContent: 'chunkContent',
chunkFlow: 'chunkFlow',
chunkText: 'chunkText',
chunkString: 'chunkString'
})

View File

@@ -0,0 +1,101 @@
export namespace values {
let ht: '\t'
let lf: '\n'
let cr: '\r'
let space: ' '
let exclamationMark: '!'
let quotationMark: '"'
let numberSign: '#'
let dollarSign: '$'
let percentSign: '%'
let ampersand: '&'
let apostrophe: "'"
let leftParenthesis: '('
let rightParenthesis: ')'
let asterisk: '*'
let plusSign: '+'
let comma: ','
let dash: '-'
let dot: '.'
let slash: '/'
let digit0: '0'
let digit1: '1'
let digit2: '2'
let digit3: '3'
let digit4: '4'
let digit5: '5'
let digit6: '6'
let digit7: '7'
let digit8: '8'
let digit9: '9'
let colon: ':'
let semicolon: ';'
let lessThan: '<'
let equalsTo: '='
let greaterThan: '>'
let questionMark: '?'
let atSign: '@'
let uppercaseA: 'A'
let uppercaseB: 'B'
let uppercaseC: 'C'
let uppercaseD: 'D'
let uppercaseE: 'E'
let uppercaseF: 'F'
let uppercaseG: 'G'
let uppercaseH: 'H'
let uppercaseI: 'I'
let uppercaseJ: 'J'
let uppercaseK: 'K'
let uppercaseL: 'L'
let uppercaseM: 'M'
let uppercaseN: 'N'
let uppercaseO: 'O'
let uppercaseP: 'P'
let uppercaseQ: 'Q'
let uppercaseR: 'R'
let uppercaseS: 'S'
let uppercaseT: 'T'
let uppercaseU: 'U'
let uppercaseV: 'V'
let uppercaseW: 'W'
let uppercaseX: 'X'
let uppercaseY: 'Y'
let uppercaseZ: 'Z'
let leftSquareBracket: '['
let backslash: '\\'
let rightSquareBracket: ']'
let caret: '^'
let underscore: '_'
let graveAccent: '`'
let lowercaseA: 'a'
let lowercaseB: 'b'
let lowercaseC: 'c'
let lowercaseD: 'd'
let lowercaseE: 'e'
let lowercaseF: 'f'
let lowercaseG: 'g'
let lowercaseH: 'h'
let lowercaseI: 'i'
let lowercaseJ: 'j'
let lowercaseK: 'k'
let lowercaseL: 'l'
let lowercaseM: 'm'
let lowercaseN: 'n'
let lowercaseO: 'o'
let lowercaseP: 'p'
let lowercaseQ: 'q'
let lowercaseR: 'r'
let lowercaseS: 's'
let lowercaseT: 't'
let lowercaseU: 'u'
let lowercaseV: 'v'
let lowercaseW: 'w'
let lowercaseX: 'x'
let lowercaseY: 'y'
let lowercaseZ: 'z'
let leftCurlyBrace: '{'
let verticalBar: '|'
let rightCurlyBrace: '}'
let tilde: '~'
let replacementCharacter: '<27>'
}

View File

@@ -0,0 +1,109 @@
/**
* This module is compiled away!
*
* While micromark works based on character codes, this module includes the
* string versions of em.
* The C0 block, except for LF, CR, HT, and w/ the replacement character added,
* are available here.
*/
export const values = /** @type {const} */ ({
ht: '\t',
lf: '\n',
cr: '\r',
space: ' ',
exclamationMark: '!',
quotationMark: '"',
numberSign: '#',
dollarSign: '$',
percentSign: '%',
ampersand: '&',
apostrophe: "'",
leftParenthesis: '(',
rightParenthesis: ')',
asterisk: '*',
plusSign: '+',
comma: ',',
dash: '-',
dot: '.',
slash: '/',
digit0: '0',
digit1: '1',
digit2: '2',
digit3: '3',
digit4: '4',
digit5: '5',
digit6: '6',
digit7: '7',
digit8: '8',
digit9: '9',
colon: ':',
semicolon: ';',
lessThan: '<',
equalsTo: '=',
greaterThan: '>',
questionMark: '?',
atSign: '@',
uppercaseA: 'A',
uppercaseB: 'B',
uppercaseC: 'C',
uppercaseD: 'D',
uppercaseE: 'E',
uppercaseF: 'F',
uppercaseG: 'G',
uppercaseH: 'H',
uppercaseI: 'I',
uppercaseJ: 'J',
uppercaseK: 'K',
uppercaseL: 'L',
uppercaseM: 'M',
uppercaseN: 'N',
uppercaseO: 'O',
uppercaseP: 'P',
uppercaseQ: 'Q',
uppercaseR: 'R',
uppercaseS: 'S',
uppercaseT: 'T',
uppercaseU: 'U',
uppercaseV: 'V',
uppercaseW: 'W',
uppercaseX: 'X',
uppercaseY: 'Y',
uppercaseZ: 'Z',
leftSquareBracket: '[',
backslash: '\\',
rightSquareBracket: ']',
caret: '^',
underscore: '_',
graveAccent: '`',
lowercaseA: 'a',
lowercaseB: 'b',
lowercaseC: 'c',
lowercaseD: 'd',
lowercaseE: 'e',
lowercaseF: 'f',
lowercaseG: 'g',
lowercaseH: 'h',
lowercaseI: 'i',
lowercaseJ: 'j',
lowercaseK: 'k',
lowercaseL: 'l',
lowercaseM: 'm',
lowercaseN: 'n',
lowercaseO: 'o',
lowercaseP: 'p',
lowercaseQ: 'q',
lowercaseR: 'r',
lowercaseS: 's',
lowercaseT: 't',
lowercaseU: 'u',
lowercaseV: 'v',
lowercaseW: 'w',
lowercaseX: 'x',
lowercaseY: 'y',
lowercaseZ: 'z',
leftCurlyBrace: '{',
verticalBar: '|',
rightCurlyBrace: '}',
tilde: '~',
replacementCharacter: '<27>'
})

View File

@@ -0,0 +1,35 @@
{
"name": "micromark-util-symbol",
"version": "2.0.0",
"description": "micromark utility with symbols",
"license": "MIT",
"keywords": [
"micromark",
"util",
"utility",
"symbol"
],
"repository": "https://github.com/micromark/micromark/tree/main/packages/micromark-util-symbol",
"bugs": "https://github.com/micromark/micromark/issues",
"funding": [
{
"type": "GitHub Sponsors",
"url": "https://github.com/sponsors/unifiedjs"
},
{
"type": "OpenCollective",
"url": "https://opencollective.com/unified"
}
],
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"files": [
"lib/"
],
"exports": "./lib/default.js",
"xo": false
}

View File

@@ -0,0 +1,168 @@
# micromark-util-symbol
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][bundle-size-badge]][bundle-size]
[![Sponsors][sponsors-badge]][opencollective]
[![Backers][backers-badge]][opencollective]
[![Chat][chat-badge]][chat]
[micromark][] utility with symbols.
## Contents
* [What is this?](#what-is-this)
* [When should I use this?](#when-should-i-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package exposes constants used throughout the micromark ecosystem.
## When should I use this?
This package is useful when you are making your own micromark extensions.
Its useful to reference these constants by name instead of value while
developing.
[`micromark-build`][micromark-build] compiles them away for production code.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-util-symbol
```
In Deno with [`esm.sh`][esmsh]:
```js
import * as symbol from 'https://esm.sh/micromark-util-symbol@1'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import * as symbol from 'https://esm.sh/micromark-util-symbol@1?bundle'
</script>
```
## Use
```js
import {codes, constants, types, values} from 'micromark-util-symbol'
console.log(codes.atSign) // 64
console.log(constants.characterReferenceNamedSizeMax) // 31
console.log(types.definitionDestinationRaw) // 'definitionDestinationRaw'
console.log(values.atSign) // '@'
```
## API
This package exports the identifiers `codes`, `constants`, `types`, and
`values`.
There is no default export.
Each identifier is an object mapping strings to values.
See the code for the exposed data.
## Types
This package is fully typed with [TypeScript][].
It exports no additional types.
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-util-symbol@^2`, compatible with Node.js 16.
This package works with `micromark@^3`.
## Security
This package is safe.
See [`security.md`][securitymd] in [`micromark/.github`][health] for how to
submit a security report.
## Contribute
See [`contributing.md`][contributing] in [`micromark/.github`][health] for ways
to get started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organisation, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark.svg
[coverage]: https://codecov.io/github/micromark/micromark
[downloads-badge]: https://img.shields.io/npm/dm/micromark-util-symbol.svg
[downloads]: https://www.npmjs.com/package/micromark-util-symbol
[bundle-size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-util-symbol
[bundle-size]: https://bundlejs.com/?q=micromark-util-symbol
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[opencollective]: https://opencollective.com/unified
[npm]: https://docs.npmjs.com/cli/install
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[esmsh]: https://esm.sh
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[license]: https://github.com/micromark/micromark/blob/main/license
[author]: https://wooorm.com
[health]: https://github.com/micromark/.github
[securitymd]: https://github.com/micromark/.github/blob/main/security.md
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[typescript]: https://www.typescriptlang.org
[micromark]: https://github.com/micromark/micromark
[micromark-build]: https://github.com/micromark/micromark/tree/main/packages/micromark-build

View File

@@ -0,0 +1,72 @@
{
"name": "micromark-extension-mdx-expression",
"version": "3.0.0",
"description": "micromark extension to support MDX or MDX JS expressions",
"license": "MIT",
"keywords": [
"micromark",
"micromark-extension",
"mdx",
"mdxjs",
"expression",
"js",
"javascript",
"es",
"ecmascript",
"markdown",
"unified"
],
"repository": "https://github.com/micromark/micromark-extension-mdx-expression/tree/main/packages/micromark-extension-mdx-expression",
"bugs": "https://github.com/micromark/micromark-extension-mdx-expression/issues",
"funding": [
{
"type": "GitHub Sponsors",
"url": "https://github.com/sponsors/unifiedjs"
},
{
"type": "OpenCollective",
"url": "https://opencollective.com/unified"
}
],
"author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
"contributors": [
"Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
],
"sideEffects": false,
"type": "module",
"exports": {
"development": "./dev/index.js",
"default": "./index.js"
},
"files": [
"dev/",
"lib/",
"index.d.ts",
"index.js"
],
"dependencies": {
"@types/estree": "^1.0.0",
"devlop": "^1.0.0",
"micromark-factory-mdx-expression": "^2.0.0",
"micromark-factory-space": "^2.0.0",
"micromark-util-character": "^2.0.0",
"micromark-util-events-to-acorn": "^2.0.0",
"micromark-util-symbol": "^2.0.0",
"micromark-util-types": "^2.0.0"
},
"scripts": {
"build": "micromark-build"
},
"typeCoverage": {
"atLeast": 100,
"detail": true,
"ignoreCatch": true,
"strict": true
},
"xo": {
"prettier": true,
"rules": {
"unicorn/no-this-assignment": "off"
}
}
}

View File

@@ -0,0 +1,433 @@
# micromark-extension-mdx-expression
[![Build][build-badge]][build]
[![Coverage][coverage-badge]][coverage]
[![Downloads][downloads-badge]][downloads]
[![Size][size-badge]][size]
[![Sponsors][sponsors-badge]][collective]
[![Backers][backers-badge]][collective]
[![Chat][chat-badge]][chat]
[micromark][] extension to support [MDX][mdxjs] expressions (`{Math.PI}`).
## Contents
* [What is this?](#what-is-this)
* [When to use this](#when-to-use-this)
* [Install](#install)
* [Use](#use)
* [API](#api)
* [`mdxExpression(options?)`](#mdxexpressionoptions)
* [Options](#options)
* [Authoring](#authoring)
* [Syntax](#syntax)
* [Errors](#errors)
* [Unexpected end of file in expression, expected a corresponding closing brace for `{`](#unexpected-end-of-file-in-expression-expected-a-corresponding-closing-brace-for-)
* [Unexpected lazy line in expression in container, expected line to be prefixed…](#unexpected-lazy-line-in-expression-in-container-expected-line-to-be-prefixed)
* [Unexpected `$type` in code: expected an object spread (`{...spread}`)](#unexpected-type-in-code-expected-an-object-spread-spread)
* [Unexpected extra content in spread: only a single spread is supported](#unexpected-extra-content-in-spread-only-a-single-spread-is-supported)
* [Could not parse expression with acorn](#could-not-parse-expression-with-acorn)
* [Tokens](#tokens)
* [Types](#types)
* [Compatibility](#compatibility)
* [Security](#security)
* [Related](#related)
* [Contribute](#contribute)
* [License](#license)
## What is this?
This package contains an extension that adds support for the expression syntax
enabled by [MDX][mdxjs] to [`micromark`][micromark].
These extensions are used inside MDX.
This package can be made aware or unaware of JavaScript syntax.
When unaware, expressions could include Rust or variables or whatnot.
## When to use this
This project is useful when you want to support expressions in markdown.
You can use this extension when you are working with [`micromark`][micromark].
To support all MDX features, use
[`micromark-extension-mdxjs`][micromark-extension-mdxjs] instead.
When you need a syntax tree, combine this package with
[`mdast-util-mdx-expression`][mdast-util-mdx-expression].
All these packages are used in [`remark-mdx`][remark-mdx], which focusses on
making it easier to transform content by abstracting these internals away.
When you are using [`mdx-js/mdx`][mdxjs], all of this is already included.
## Install
This package is [ESM only][esm].
In Node.js (version 16+), install with [npm][]:
```sh
npm install micromark-extension-mdx-expression
```
In Deno with [`esm.sh`][esmsh]:
```js
import {mdxExpression} from 'https://esm.sh/micromark-extension-mdx-expression@2'
```
In browsers with [`esm.sh`][esmsh]:
```html
<script type="module">
import {mdxExpression} from 'https://esm.sh/micromark-extension-mdx-expression@2?bundle'
</script>
```
## Use
```js
import {Parser} from 'acorn'
import acornJsx from 'acorn-jsx'
import {micromark} from 'micromark'
import {mdxExpression} from 'micromark-extension-mdx-expression'
// Unaware of JavaScript (“agnostic”) (balanced braces):
const output = micromark('a {1 + 1} b', {extensions: [mdxExpression()]})
console.log(output)
// Aware of JavaScript:
micromark('a {!} b', {extensions: [mdxExpression({acorn: Parser.extend(acornJsx())})]})
```
Yields:
```html
<p>a b</p>
```
```txt
[1:5: Could not parse expression with acorn] {
ancestors: undefined,
cause: SyntaxError: Unexpected token
at pp$4.raise (file:///Users/tilde/Projects/oss/micromark-extension-mdx-expression/node_modules/acorn/dist/acorn.mjs:3547:13)
at pp$9.unexpected (file:///Users/tilde/Projects/oss/micromark-extension-mdx-expression/node_modules/acorn/dist/acorn.mjs:758:8)
pos: 4,
loc: { line: 1, column: 4 },
raisedAt: 1
},
column: 5,
fatal: undefined,
line: 1,
place: { line: 1, column: 5, offset: 4 },
reason: 'Could not parse expression with acorn',
ruleId: 'acorn',
source: 'micromark-extension-mdx-expression',
url: 'https://github.com/micromark/micromark-extension-mdx-expression/tree/main/packages/micromark-extension-mdx-expression#could-not-parse-expression-with-acorn'
}
```
…which is useless: go to a syntax tree with
[`mdast-util-from-markdown`][mdast-util-from-markdown] and
[`mdast-util-mdx-expression`][mdast-util-mdx-expression] instead.
## API
This package exports the identifier [`mdxExpression`][api-mdx-expression].
There is no default export.
The export map supports the [`development` condition][development].
Run `node --conditions development module.js` to get instrumented dev code.
Without this condition, production code is loaded.
### `mdxExpression(options?)`
Create an extension for `micromark` to enable MDX expression syntax.
###### Parameters
* `options` ([`Options`][api-options], optional)
— configuration
###### Returns
Extension for `micromark` that can be passed in `extensions` to enable MDX
expression syntax ([`Extension`][micromark-extension]).
### Options
Configuration (TypeScript type).
###### Fields
* `acorn` ([`Acorn`][acorn], optional)
— acorn parser to use
* `acornOptions` ([`AcornOptions`][acorn-options], default:
`{ecmaVersion: 2024, locations: true, sourceType: 'module'}`)
— configuration for acorn; all fields except `locations` can be set
* `addResult` (`boolean`, default: `false`)
— whether to add `estree` fields to tokens with results from acorn
<!-- Note: `spread` and `allowEmpty` are intentionally not documented. -->
## Authoring
When authoring markdown with JavaScript, keep in mind that MDX is a whitespace
sensitive and line-based language, while JavaScript is insensitive to
whitespace.
This affects how markdown and JavaScript interleave with eachother in MDX.
For more info on how it works, see [§ Interleaving][mdxjs-interleaving] on the
MDX site.
## Syntax
This extension supports MDX both aware and unaware to JavaScript (respectively
gnostic and agnostic).
Depending on whether acorn is passed, either valid JavaScript must be used in
expressions, or arbitrary text (such as Rust code or so) can be used.
There are two types of expressions: in text (inline, span) or in flow (block).
They start with `{`.
Depending on whether `acorn` is passed, expressions are either parsed in several
tries until whole JavaScript is found (as in, nested curly braces depend on JS
expression nesting), or they are counted and must be balanced.
Expressions end with `}`.
For flow (block) expressions, optionally markdown spaces (` ` or `\t`) can occur
after the closing brace, and finally a markdown line ending (`\r`, `\n`) or the
end of the file must follow.
While markdown typically knows no errors, for MDX it is decided to instead
throw on invalid syntax.
```mdx
Here is an expression in a heading:
## Hello, {1 + 1}!
In agnostic mode, balanced braces can occur: {a + {b} + c}.
In gnostic mode, the value of the expression must be JavaScript, so
this would fail: {!}.
But, in gnostic mode, braces can be in comments, strings, or in other
places: {1 /* { */ + 2}.
The previous examples were text (inline, span) expressions, they can
also be flow (block):
{
1 + 1
}
This is incorrect, because there are further characters:
{
1 + 1
}!
```
```mdx-broken
Blank lines cannot occur in text, because markdown has already split them in
separate constructs, so this is incorrect: {1 +
1}
```
```mdx
In flow, you can have blank lines:
{
1 +
2
}
```
## Errors
### Unexpected end of file in expression, expected a corresponding closing brace for `{`
This error occurs if a `{` was seen without a `}` (source:
`micromark-extension-mdx-expression`, rule id: `unexpected-eof`).
For example:
```mdx-broken
a { b
```
### Unexpected lazy line in expression in container, expected line to be prefixed…
This error occurs if a `{` was seen in a container which then has lazy content
(source: `micromark-extension-mdx-expression`, rule id: `unexpected-lazy`).
For example:
```mdx-broken
> {a
b}
```
### Unexpected `$type` in code: expected an object spread (`{...spread}`)
This error occurs if a spread was expected but something else was found
(source: `micromark-extension-mdx-expression`, rule id: `non-spread`).
For example:
```mdx-broken
<a {b=c}={} d>
```
### Unexpected extra content in spread: only a single spread is supported
This error occurs if a spread was expected but more was found after it
(source: `micromark-extension-mdx-expression`, rule id: `spread-extra`).
For example:
```mdx-broken
<a {...b,c} d>
```
### Could not parse expression with acorn
This error occurs if acorn crashes or when there is more content after a JS
expression (source: `micromark-extension-mdx-expression`, rule id: `acorn`).
For example:
```mdx-broken
a {"b" "c"} d
```
```mdx-broken
a {var b = "c"} d
```
## Tokens
Two tokens are used, `mdxFlowExpression` and `mdxTextExpression`, to reflect
flow and text expressions.
They include:
* `lineEnding` for the markdown line endings `\r`, `\n`, and `\r\n`
* `mdxFlowExpressionMarker` and `mdxTextExpressionMarker` for the braces
* `whitespace` for markdown spaces and tabs in blank lines
* `mdxFlowExpressionChunk` and `mdxTextExpressionChunk` for chunks of
expression content
## Types
This package is fully typed with [TypeScript][].
It exports the additional type [`Options`][api-options].
## Compatibility
Projects maintained by the unified collective are compatible with maintained
versions of Node.js.
When we cut a new major release, we drop support for unmaintained versions of
Node.
This means we try to keep the current release line,
`micromark-extension-mdx-expression@^2`, compatible with Node.js 16.
This package works with `micromark` version `3` and later.
## Security
This package is safe.
## Related
* [`micromark-extension-mdxjs`][micromark-extension-mdxjs]
— support all MDX syntax
* [`mdast-util-mdx-expression`][mdast-util-mdx-expression]
— support MDX expressions in mdast
* [`remark-mdx`][remark-mdx]
— support all MDX syntax in remark
## Contribute
See [`contributing.md` in `micromark/.github`][contributing] for ways to get
started.
See [`support.md`][support] for ways to get help.
This project has a [code of conduct][coc].
By interacting with this repository, organization, or community you agree to
abide by its terms.
## License
[MIT][license] © [Titus Wormer][author]
<!-- Definitions -->
[build-badge]: https://github.com/micromark/micromark-extension-mdx-expression/workflows/main/badge.svg
[build]: https://github.com/micromark/micromark-extension-mdx-expression/actions
[coverage-badge]: https://img.shields.io/codecov/c/github/micromark/micromark-extension-mdx-expression.svg
[coverage]: https://codecov.io/github/micromark/micromark-extension-mdx-expression
[downloads-badge]: https://img.shields.io/npm/dm/micromark-extension-mdx-expression.svg
[downloads]: https://www.npmjs.com/package/micromark-extension-mdx-expression
[size-badge]: https://img.shields.io/badge/dynamic/json?label=minzipped%20size&query=$.size.compressedSize&url=https://deno.bundlejs.com/?q=micromark-extension-mdx-expression
[size]: https://bundlejs.com/?q=micromark-extension-mdx-expression
[sponsors-badge]: https://opencollective.com/unified/sponsors/badge.svg
[backers-badge]: https://opencollective.com/unified/backers/badge.svg
[collective]: https://opencollective.com/unified
[chat-badge]: https://img.shields.io/badge/chat-discussions-success.svg
[chat]: https://github.com/micromark/micromark/discussions
[npm]: https://docs.npmjs.com/cli/install
[esmsh]: https://esm.sh
[license]: https://github.com/micromark/micromark-extension-mdx-expression/blob/main/license
[author]: https://wooorm.com
[contributing]: https://github.com/micromark/.github/blob/main/contributing.md
[support]: https://github.com/micromark/.github/blob/main/support.md
[coc]: https://github.com/micromark/.github/blob/main/code-of-conduct.md
[esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
[typescript]: https://www.typescriptlang.org
[development]: https://nodejs.org/api/packages.html#packages_resolving_user_conditions
[micromark]: https://github.com/micromark/micromark
[micromark-extension]: https://github.com/micromark/micromark#syntaxextension
[micromark-extension-mdxjs]: https://github.com/micromark/micromark-extension-mdxjs
[mdast-util-mdx-expression]: https://github.com/syntax-tree/mdast-util-mdx-expression
[mdast-util-from-markdown]: https://github.com/syntax-tree/mdast-util-from-markdown
[remark-mdx]: https://mdxjs.com/packages/remark-mdx/
[mdxjs]: https://mdxjs.com
[mdxjs-interleaving]: https://mdxjs.com/docs/what-is-mdx/#interleaving
[acorn]: https://github.com/acornjs/acorn
[acorn-options]: https://github.com/acornjs/acorn/blob/96c721dbf89d0ccc3a8c7f39e69ef2a6a3c04dfa/acorn/dist/acorn.d.ts#L16
[api-mdx-expression]: #mdxexpressionoptions
[api-options]: #options