This repository has been archived on 2020-11-02. You can view files and clone it, but cannot push or open issues or pull requests.
TripSit_Suite/node_modules/remark-parse/lib/tokenize/url.js
2020-11-01 22:46:04 +00:00

211 lines
4.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'use strict'
var ccount = require('ccount')
var decode = require('parse-entities')
var decimal = require('is-decimal')
var alphabetical = require('is-alphabetical')
var whitespace = require('is-whitespace-character')
var locate = require('../locate/url')
module.exports = url
url.locator = locate
url.notInLink = true
var exclamationMark = 33 // '!'
var ampersand = 38 // '&'
var rightParenthesis = 41 // ')'
var asterisk = 42 // '*'
var comma = 44 // ','
var dash = 45 // '-'
var dot = 46 // '.'
var colon = 58 // ':'
var semicolon = 59 // ';'
var questionMark = 63 // '?'
var lessThan = 60 // '<'
var underscore = 95 // '_'
var tilde = 126 // '~'
var leftParenthesisCharacter = '('
var rightParenthesisCharacter = ')'
function url(eat, value, silent) {
var self = this
var gfm = self.options.gfm
var tokenizers = self.inlineTokenizers
var length = value.length
var previousDot = -1
var protocolless = false
var dots
var lastTwoPartsStart
var start
var index
var pathStart
var path
var code
var end
var leftCount
var rightCount
var content
var children
var url
var exit
if (!gfm) {
return
}
// `WWW.` doesnt work.
if (value.slice(0, 4) === 'www.') {
protocolless = true
index = 4
} else if (value.slice(0, 7).toLowerCase() === 'http://') {
index = 7
} else if (value.slice(0, 8).toLowerCase() === 'https://') {
index = 8
} else {
return
}
// Act as if the starting boundary is a dot.
previousDot = index - 1
// Parse a valid domain.
start = index
dots = []
while (index < length) {
code = value.charCodeAt(index)
if (code === dot) {
// Dots may not appear after each other.
if (previousDot === index - 1) {
break
}
dots.push(index)
previousDot = index
index++
continue
}
if (
decimal(code) ||
alphabetical(code) ||
code === dash ||
code === underscore
) {
index++
continue
}
break
}
// Ignore a final dot:
if (code === dot) {
dots.pop()
index--
}
// If there are not dots, exit.
if (dots[0] === undefined) {
return
}
// If there is an underscore in the last two domain parts, exit:
// `www.example.c_m` and `www.ex_ample.com` are not OK, but
// `www.sub_domain.example.com` is.
lastTwoPartsStart = dots.length < 2 ? start : dots[dots.length - 2] + 1
if (value.slice(lastTwoPartsStart, index).indexOf('_') !== -1) {
return
}
/* istanbul ignore if - never used (yet) */
if (silent) {
return true
}
end = index
pathStart = index
// Parse a path.
while (index < length) {
code = value.charCodeAt(index)
if (whitespace(code) || code === lessThan) {
break
}
index++
if (
code === exclamationMark ||
code === asterisk ||
code === comma ||
code === dot ||
code === colon ||
code === questionMark ||
code === underscore ||
code === tilde
) {
// Empty
} else {
end = index
}
}
index = end
// If the path ends in a closing paren, and the count of closing parens is
// higher than the opening count, then remove the supefluous closing parens.
if (value.charCodeAt(index - 1) === rightParenthesis) {
path = value.slice(pathStart, index)
leftCount = ccount(path, leftParenthesisCharacter)
rightCount = ccount(path, rightParenthesisCharacter)
while (rightCount > leftCount) {
index = pathStart + path.lastIndexOf(rightParenthesisCharacter)
path = value.slice(pathStart, index)
rightCount--
}
}
if (value.charCodeAt(index - 1) === semicolon) {
// GitHub doesnt document this, but final semicolons arent paret of the
// URL either.
index--
// // If the path ends in what looks like an entity, its not part of the path.
if (alphabetical(value.charCodeAt(index - 1))) {
end = index - 2
while (alphabetical(value.charCodeAt(end))) {
end--
}
if (value.charCodeAt(end) === ampersand) {
index = end
}
}
}
content = value.slice(0, index)
url = decode(content, {nonTerminated: false})
if (protocolless) {
url = 'http://' + url
}
exit = self.enterLink()
// Temporarily remove all tokenizers except text in url.
self.inlineTokenizers = {text: tokenizers.text}
children = self.tokenizeInline(content, eat.now())
self.inlineTokenizers = tokenizers
exit()
return eat(content)({type: 'link', title: null, url: url, children: children})
}