319 lines
7.9 KiB
JavaScript
319 lines
7.9 KiB
JavaScript
|
'use strict'
|
|||
|
|
|||
|
module.exports = factory
|
|||
|
|
|||
|
// Construct a tokenizer. This creates both `tokenizeInline` and `tokenizeBlock`.
|
|||
|
function factory(type) {
|
|||
|
return tokenize
|
|||
|
|
|||
|
// Tokenizer for a bound `type`.
|
|||
|
function tokenize(value, location) {
|
|||
|
var self = this
|
|||
|
var offset = self.offset
|
|||
|
var tokens = []
|
|||
|
var methods = self[type + 'Methods']
|
|||
|
var tokenizers = self[type + 'Tokenizers']
|
|||
|
var line = location.line
|
|||
|
var column = location.column
|
|||
|
var index
|
|||
|
var length
|
|||
|
var method
|
|||
|
var name
|
|||
|
var matched
|
|||
|
var valueLength
|
|||
|
|
|||
|
// Trim white space only lines.
|
|||
|
if (!value) {
|
|||
|
return tokens
|
|||
|
}
|
|||
|
|
|||
|
// Expose on `eat`.
|
|||
|
eat.now = now
|
|||
|
eat.file = self.file
|
|||
|
|
|||
|
// Sync initial offset.
|
|||
|
updatePosition('')
|
|||
|
|
|||
|
// Iterate over `value`, and iterate over all tokenizers. When one eats
|
|||
|
// something, re-iterate with the remaining value. If no tokenizer eats,
|
|||
|
// something failed (should not happen) and an exception is thrown.
|
|||
|
while (value) {
|
|||
|
index = -1
|
|||
|
length = methods.length
|
|||
|
matched = false
|
|||
|
|
|||
|
while (++index < length) {
|
|||
|
name = methods[index]
|
|||
|
method = tokenizers[name]
|
|||
|
|
|||
|
// Previously, we had constructs such as footnotes and YAML that used
|
|||
|
// these properties.
|
|||
|
// Those are now external (plus there are userland extensions), that may
|
|||
|
// still use them.
|
|||
|
if (
|
|||
|
method &&
|
|||
|
/* istanbul ignore next */ (!method.onlyAtStart || self.atStart) &&
|
|||
|
/* istanbul ignore next */ (!method.notInList || !self.inList) &&
|
|||
|
/* istanbul ignore next */ (!method.notInBlock || !self.inBlock) &&
|
|||
|
(!method.notInLink || !self.inLink)
|
|||
|
) {
|
|||
|
valueLength = value.length
|
|||
|
|
|||
|
method.apply(self, [eat, value])
|
|||
|
|
|||
|
matched = valueLength !== value.length
|
|||
|
|
|||
|
if (matched) {
|
|||
|
break
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
/* istanbul ignore if */
|
|||
|
if (!matched) {
|
|||
|
self.file.fail(new Error('Infinite loop'), eat.now())
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
self.eof = now()
|
|||
|
|
|||
|
return tokens
|
|||
|
|
|||
|
// Update line, column, and offset based on `value`.
|
|||
|
function updatePosition(subvalue) {
|
|||
|
var lastIndex = -1
|
|||
|
var index = subvalue.indexOf('\n')
|
|||
|
|
|||
|
while (index !== -1) {
|
|||
|
line++
|
|||
|
lastIndex = index
|
|||
|
index = subvalue.indexOf('\n', index + 1)
|
|||
|
}
|
|||
|
|
|||
|
if (lastIndex === -1) {
|
|||
|
column += subvalue.length
|
|||
|
} else {
|
|||
|
column = subvalue.length - lastIndex
|
|||
|
}
|
|||
|
|
|||
|
if (line in offset) {
|
|||
|
if (lastIndex !== -1) {
|
|||
|
column += offset[line]
|
|||
|
} else if (column <= offset[line]) {
|
|||
|
column = offset[line] + 1
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Get offset. Called before the first character is eaten to retrieve the
|
|||
|
// range’s offsets.
|
|||
|
function getOffset() {
|
|||
|
var indentation = []
|
|||
|
var pos = line + 1
|
|||
|
|
|||
|
// Done. Called when the last character is eaten to retrieve the range’s
|
|||
|
// offsets.
|
|||
|
return function () {
|
|||
|
var last = line + 1
|
|||
|
|
|||
|
while (pos < last) {
|
|||
|
indentation.push((offset[pos] || 0) + 1)
|
|||
|
|
|||
|
pos++
|
|||
|
}
|
|||
|
|
|||
|
return indentation
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Get the current position.
|
|||
|
function now() {
|
|||
|
var pos = {line: line, column: column}
|
|||
|
|
|||
|
pos.offset = self.toOffset(pos)
|
|||
|
|
|||
|
return pos
|
|||
|
}
|
|||
|
|
|||
|
// Store position information for a node.
|
|||
|
function Position(start) {
|
|||
|
this.start = start
|
|||
|
this.end = now()
|
|||
|
}
|
|||
|
|
|||
|
// Throw when a value is incorrectly eaten. This shouldn’t happen but will
|
|||
|
// throw on new, incorrect rules.
|
|||
|
function validateEat(subvalue) {
|
|||
|
/* istanbul ignore if */
|
|||
|
if (value.slice(0, subvalue.length) !== subvalue) {
|
|||
|
// Capture stack-trace.
|
|||
|
self.file.fail(
|
|||
|
new Error(
|
|||
|
'Incorrectly eaten value: please report this warning on https://git.io/vg5Ft'
|
|||
|
),
|
|||
|
now()
|
|||
|
)
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Mark position and patch `node.position`.
|
|||
|
function position() {
|
|||
|
var before = now()
|
|||
|
|
|||
|
return update
|
|||
|
|
|||
|
// Add the position to a node.
|
|||
|
function update(node, indent) {
|
|||
|
var previous = node.position
|
|||
|
var start = previous ? previous.start : before
|
|||
|
var combined = []
|
|||
|
var n = previous && previous.end.line
|
|||
|
var l = before.line
|
|||
|
|
|||
|
node.position = new Position(start)
|
|||
|
|
|||
|
// If there was already a `position`, this node was merged. Fixing
|
|||
|
// `start` wasn’t hard, but the indent is different. Especially
|
|||
|
// because some information, the indent between `n` and `l` wasn’t
|
|||
|
// tracked. Luckily, that space is (should be?) empty, so we can
|
|||
|
// safely check for it now.
|
|||
|
if (previous && indent && previous.indent) {
|
|||
|
combined = previous.indent
|
|||
|
|
|||
|
if (n < l) {
|
|||
|
while (++n < l) {
|
|||
|
combined.push((offset[n] || 0) + 1)
|
|||
|
}
|
|||
|
|
|||
|
combined.push(before.column)
|
|||
|
}
|
|||
|
|
|||
|
indent = combined.concat(indent)
|
|||
|
}
|
|||
|
|
|||
|
node.position.indent = indent || []
|
|||
|
|
|||
|
return node
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Add `node` to `parent`s children or to `tokens`. Performs merges where
|
|||
|
// possible.
|
|||
|
function add(node, parent) {
|
|||
|
var children = parent ? parent.children : tokens
|
|||
|
var previous = children[children.length - 1]
|
|||
|
var fn
|
|||
|
|
|||
|
if (
|
|||
|
previous &&
|
|||
|
node.type === previous.type &&
|
|||
|
(node.type === 'text' || node.type === 'blockquote') &&
|
|||
|
mergeable(previous) &&
|
|||
|
mergeable(node)
|
|||
|
) {
|
|||
|
fn = node.type === 'text' ? mergeText : mergeBlockquote
|
|||
|
node = fn.call(self, previous, node)
|
|||
|
}
|
|||
|
|
|||
|
if (node !== previous) {
|
|||
|
children.push(node)
|
|||
|
}
|
|||
|
|
|||
|
if (self.atStart && tokens.length !== 0) {
|
|||
|
self.exitStart()
|
|||
|
}
|
|||
|
|
|||
|
return node
|
|||
|
}
|
|||
|
|
|||
|
// Remove `subvalue` from `value`. `subvalue` must be at the start of
|
|||
|
// `value`.
|
|||
|
function eat(subvalue) {
|
|||
|
var indent = getOffset()
|
|||
|
var pos = position()
|
|||
|
var current = now()
|
|||
|
|
|||
|
validateEat(subvalue)
|
|||
|
|
|||
|
apply.reset = reset
|
|||
|
reset.test = test
|
|||
|
apply.test = test
|
|||
|
|
|||
|
value = value.slice(subvalue.length)
|
|||
|
|
|||
|
updatePosition(subvalue)
|
|||
|
|
|||
|
indent = indent()
|
|||
|
|
|||
|
return apply
|
|||
|
|
|||
|
// Add the given arguments, add `position` to the returned node, and
|
|||
|
// return the node.
|
|||
|
function apply(node, parent) {
|
|||
|
return pos(add(pos(node), parent), indent)
|
|||
|
}
|
|||
|
|
|||
|
// Functions just like apply, but resets the content: the line and
|
|||
|
// column are reversed, and the eaten value is re-added. This is
|
|||
|
// useful for nodes with a single type of content, such as lists and
|
|||
|
// tables. See `apply` above for what parameters are expected.
|
|||
|
function reset() {
|
|||
|
var node = apply.apply(null, arguments)
|
|||
|
|
|||
|
line = current.line
|
|||
|
column = current.column
|
|||
|
value = subvalue + value
|
|||
|
|
|||
|
return node
|
|||
|
}
|
|||
|
|
|||
|
// Test the position, after eating, and reverse to a not-eaten state.
|
|||
|
function test() {
|
|||
|
var result = pos({})
|
|||
|
|
|||
|
line = current.line
|
|||
|
column = current.column
|
|||
|
value = subvalue + value
|
|||
|
|
|||
|
return result.position
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// Check whether a node is mergeable with adjacent nodes.
|
|||
|
function mergeable(node) {
|
|||
|
var start
|
|||
|
var end
|
|||
|
|
|||
|
if (node.type !== 'text' || !node.position) {
|
|||
|
return true
|
|||
|
}
|
|||
|
|
|||
|
start = node.position.start
|
|||
|
end = node.position.end
|
|||
|
|
|||
|
// Only merge nodes which occupy the same size as their `value`.
|
|||
|
return (
|
|||
|
start.line !== end.line || end.column - start.column === node.value.length
|
|||
|
)
|
|||
|
}
|
|||
|
|
|||
|
// Merge two text nodes: `node` into `prev`.
|
|||
|
function mergeText(previous, node) {
|
|||
|
previous.value += node.value
|
|||
|
|
|||
|
return previous
|
|||
|
}
|
|||
|
|
|||
|
// Merge two blockquotes: `node` into `prev`, unless in CommonMark or gfm modes.
|
|||
|
function mergeBlockquote(previous, node) {
|
|||
|
if (this.options.commonmark || this.options.gfm) {
|
|||
|
return node
|
|||
|
}
|
|||
|
|
|||
|
previous.children = previous.children.concat(node.children)
|
|||
|
|
|||
|
return previous
|
|||
|
}
|