View file phpBB3/vendor/s9e/text-formatter/src/Plugins/Litedown/Parser/Passes/Blocks.js

File size: 12.08Kb
var setextLines = {};

function parse()
{
	matchSetextLines();

	var blocks       = [],
		blocksCnt    = 0,
		codeFence,
		codeIndent   = 4,
		codeTag,
		lineIsEmpty  = true,
		lists        = [],
		listsCnt     = 0,
		newContext   = false,
		textBoundary = 0,
		breakParagraph,
		continuation,
		ignoreLen,
		indentStr,
		indentLen,
		lfPos,
		listIndex,
		maxIndent,
		minIndent,
		blockDepth,
		tagPos,
		tagLen;

	// Capture all the lines at once so that we can overwrite newlines safely, without preventing
	// further matches
	var matches = [],
		m,
		regexp = /^(?:(?=[-*+\d \t>`~#_])((?: {0,3}>(?:(?!!)|!(?![^\n>]*?!<)) ?)+)?([ \t]+)?(\* *\* *\*[* ]*$|- *- *-[- ]*$|_ *_ *_[_ ]*$)?((?:[-*+]|\d+\.)[ \t]+(?=\S))?[ \t]*(#{1,6}[ \t]+|```+[^`\n]*$|~~~+[^~\n]*$)?)?/gm;
	while (m = regexp.exec(text))
	{
		matches.push(m);

		// Move regexp.lastIndex if the current match is empty
		if (m.index === regexp.lastIndex)
		{
			++regexp.lastIndex;
		}
	}

	matches.forEach(function(m)
	{
		var blockMarks = [],
			matchPos   = m.index,
			matchLen   = m[0].length,
			startPos,
			startLen,
			endPos,
			endLen;

		ignoreLen  = 0;
		blockDepth = 0;

		// If the last line was empty then this is not a continuation, and vice-versa
		continuation = !lineIsEmpty;

		// Capture the position of the end of the line and determine whether the line is empty
		lfPos       = text.indexOf("\n", matchPos);
		lineIsEmpty = (lfPos === matchPos + matchLen && !m[3] && !m[4] && !m[5]);

		// If the match is empty we need to move the cursor manually
		if (!matchLen)
		{
			++regexp.lastIndex;
		}

		// If the line is empty and it's the first empty line then we break current paragraph.
		breakParagraph = (lineIsEmpty && continuation);

		// Count block marks
		if (m[1])
		{
			blockMarks = getBlockMarks(m[1]);
			blockDepth = blockMarks.length;
			ignoreLen  = m[1].length;
			if (codeTag && codeTag.hasAttribute('blockDepth'))
			{
				blockDepth = Math.min(blockDepth, codeTag.getAttribute('blockDepth'));
				ignoreLen  = computeBlockIgnoreLen(m[1], blockDepth);
			}

			// Overwrite block markup
			overwrite(matchPos, ignoreLen);
		}

		// Close supernumerary blocks
		if (blockDepth < blocksCnt && !continuation)
		{
			newContext = true;
			do
			{
				var startTag = blocks.pop();
				addEndTag(startTag.getName(), textBoundary, 0).pairWith(startTag);
			}
			while (blockDepth < --blocksCnt);
		}

		// Open new blocks
		if (blockDepth > blocksCnt && !lineIsEmpty)
		{
			newContext = true;
			do
			{
				var tagName = (blockMarks[blocksCnt] === '>!') ? 'SPOILER' : 'QUOTE';
				blocks.push(addStartTag(tagName, matchPos, 0, -999));
			}
			while (blockDepth > ++blocksCnt);
		}

		// Compute the width of the indentation
		var indentWidth = 0,
			indentPos   = 0;
		if (m[2] && !codeFence)
		{
			indentStr = m[2];
			indentLen = indentStr.length;

			do
			{
				if (indentStr[indentPos] === ' ')
				{
					++indentWidth;
				}
				else
				{
					indentWidth = (indentWidth + 4) & ~3;
				}
			}
			while (++indentPos < indentLen && indentWidth < codeIndent);
		}

		// Test whether we're out of a code block
		if (codeTag && !codeFence && indentWidth < codeIndent && !lineIsEmpty)
		{
			newContext = true;
		}

		if (newContext)
		{
			newContext = false;

			// Close the code block if applicable
			if (codeTag)
			{
				if (textBoundary > codeTag.getPos())
				{
					// Overwrite the whole block
					overwrite(codeTag.getPos(), textBoundary - codeTag.getPos());
					codeTag.pairWith(addEndTag('CODE', textBoundary, 0, -1));
				}
				else
				{
					// The code block is empty
					codeTag.invalidate();
				}
				codeTag = null;
				codeFence = null;
			}

			// Close all the lists
			lists.forEach(function(list)
			{
				closeList(list, textBoundary);
			});
			lists    = [];
			listsCnt = 0;

			// Mark the block boundary
			if (matchPos)
			{
				markBoundary(matchPos - 1);
			}
		}

		if (indentWidth >= codeIndent)
		{
			if (codeTag || !continuation)
			{
				// Adjust the amount of text being ignored
				ignoreLen = (m[1] || '').length + indentPos;

				if (!codeTag)
				{
					// Create code block
					codeTag = addStartTag('CODE', matchPos + ignoreLen, 0, -999);
				}

				// Clear the captures to prevent any further processing
				m = {};
			}
		}
		else if (!codeTag)
		{
			var hasListItem = !!m[4];

			if (!indentWidth && !continuation && !hasListItem)
			{
				// Start of a new context
				listIndex = -1;
			}
			else if (continuation && !hasListItem)
			{
				// Continuation of current list item or paragraph
				listIndex = listsCnt - 1;
			}
			else if (!listsCnt)
			{
				// We're not inside of a list already, we can start one if there's a list item
				listIndex = (hasListItem) ? 0 : -1;
			}
			else
			{
				// We're inside of a list but we need to compute the depth
				listIndex = 0;
				while (listIndex < listsCnt && indentWidth > lists[listIndex].maxIndent)
				{
					++listIndex;
				}
			}

			// Close deeper lists
			while (listIndex < listsCnt - 1)
			{
				closeList(lists.pop(), textBoundary);
				--listsCnt;
			}

			// If there's no list item at current index, we'll need to either create one or
			// drop down to previous index, in which case we have to adjust maxIndent
			if (listIndex === listsCnt && !hasListItem)
			{
				--listIndex;
			}

			if (hasListItem && listIndex >= 0)
			{
				breakParagraph = true;

				// Compute the position and amount of text consumed by the item tag
				tagPos = matchPos + ignoreLen + indentPos;
				tagLen = m[4].length;

				// Create a LI tag that consumes its markup
				var itemTag = addStartTag('LI', tagPos, tagLen);

				// Overwrite the markup
				overwrite(tagPos, tagLen);

				// If the list index is within current lists count it means this is not a new
				// list and we have to close the last item. Otherwise, it's a new list that we
				// have to create
				if (listIndex < listsCnt)
				{
					addEndTag('LI', textBoundary, 0).pairWith(lists[listIndex].itemTag);

					// Record the item in the list
					lists[listIndex].itemTag = itemTag;
					lists[listIndex].itemTags.push(itemTag);
				}
				else
				{
					++listsCnt;

					if (listIndex)
					{
						minIndent = lists[listIndex - 1].maxIndent + 1;
						maxIndent = Math.max(minIndent, listIndex * 4);
					}
					else
					{
						minIndent = 0;
						maxIndent = indentWidth;
					}

					// Create a 0-width LIST tag right before the item tag LI
					var listTag = addStartTag('LIST', tagPos, 0);

					// Test whether the list item ends with a dot, as in "1."
					if (m[4].indexOf('.') > -1)
					{
						listTag.setAttribute('type', 'decimal');

						var start = +m[4];
						if (start !== 1)
						{
							listTag.setAttribute('start', start);
						}
					}

					// Record the new list depth
					lists.push({
						listTag   : listTag,
						itemTag   : itemTag,
						itemTags  : [itemTag],
						minIndent : minIndent,
						maxIndent : maxIndent,
						tight     : true
					});
				}
			}

			// If we're in a list, on a non-empty line preceded with a blank line...
			if (listsCnt && !continuation && !lineIsEmpty)
			{
				// ...and this is not the first item of the list...
				if (lists[0].itemTags.length > 1 || !hasListItem)
				{
					// ...every list that is currently open becomes loose
					lists.forEach(function(list)
					{
						list.tight = false;
					});
				}
			}

			codeIndent = (listsCnt + 1) * 4;
		}

		if (m[5])
		{
			// Headers
			if (m[5][0] === '#')
			{
				startLen = m[5].length;
				startPos = matchPos + matchLen - startLen;
				endLen   = getAtxHeaderEndTagLen(matchPos + matchLen, lfPos);
				endPos   = lfPos - endLen;

				addTagPair('H' + /#{1,6}/.exec(m[5])[0].length, startPos, startLen, endPos, endLen);

				// Mark the start and the end of the header as boundaries
				markBoundary(startPos);
				markBoundary(lfPos);

				if (continuation)
				{
					breakParagraph = true;
				}
			}
			// Code fence
			else if (m[5][0] === '`' || m[5][0] === '~')
			{
				tagPos = matchPos + ignoreLen;
				tagLen = lfPos - tagPos;

				if (codeTag && m[5] === codeFence)
				{
					codeTag.pairWith(addEndTag('CODE', tagPos, tagLen, -1));
					addIgnoreTag(textBoundary, tagPos - textBoundary);

					// Overwrite the whole block
					overwrite(codeTag.getPos(), tagPos + tagLen - codeTag.getPos());
					codeTag = null;
					codeFence = null;
				}
				else if (!codeTag)
				{
					// Create code block
					codeTag   = addStartTag('CODE', tagPos, tagLen);
					codeFence = m[5].replace(/[^`~]+/, '');
					codeTag.setAttribute('blockDepth', blockDepth);

					// Ignore the next character, which should be a newline
					addIgnoreTag(tagPos + tagLen, 1);

					// Add the language if present, e.g. ```php
					var lang = m[5].replace(/^[`~\s]*/, '').replace(/\s+$/, '');
					if (lang !== '')
					{
						codeTag.setAttribute('lang', lang);
					}
				}
			}
		}
		else if (m[3] && !listsCnt && text[matchPos + matchLen] !== "\x17")
		{
			// Horizontal rule
			addSelfClosingTag('HR', matchPos + ignoreLen, matchLen - ignoreLen);
			breakParagraph = true;

			// Mark the end of the line as a boundary
			markBoundary(lfPos);
		}
		else if (setextLines[lfPos] && setextLines[lfPos].blockDepth === blockDepth && !lineIsEmpty && !listsCnt && !codeTag)
		{
			// Setext-style header
			addTagPair(
				setextLines[lfPos].tagName,
				matchPos + ignoreLen,
				0,
				setextLines[lfPos].endPos,
				setextLines[lfPos].endLen
			);

			// Mark the end of the Setext line
			markBoundary(setextLines[lfPos].endPos + setextLines[lfPos].endLen);
		}

		if (breakParagraph)
		{
			addParagraphBreak(textBoundary);
			markBoundary(textBoundary);
		}

		if (!lineIsEmpty)
		{
			textBoundary = lfPos;
		}

		if (ignoreLen)
		{
			addIgnoreTag(matchPos, ignoreLen, 1000);
		}
	});
}

/**
* Close a list at given offset
*
* @param {!Object} list
* @param {number}  textBoundary
*/
function closeList(list, textBoundary)
{
	addEndTag('LIST', textBoundary, 0).pairWith(list.listTag);
	addEndTag('LI',   textBoundary, 0).pairWith(list.itemTag);

	if (list.tight)
	{
		list.itemTags.forEach(function(itemTag)
		{
			itemTag.removeFlags(RULE_CREATE_PARAGRAPHS);
		});
	}
}

/**
* Compute the amount of text to ignore at the start of a block line
*
* @param  {string} str           Original block markup
* @param  {number} maxBlockDepth Maximum block depth
* @return {number}               Number of characters to ignore
*/
function computeBlockIgnoreLen(str, maxBlockDepth)
{
	var remaining = str;
	while (--maxBlockDepth >= 0)
	{
		remaining = remaining.replace(/^ *>!? ?/, '');
	}

	return str.length - remaining.length;
}

/**
* Return the length of the markup at the end of an ATX header
*
* @param  {number} startPos Start of the header's text
* @param  {number} endPos   End of the header's text
* @return {number}
*/
function getAtxHeaderEndTagLen(startPos, endPos)
{
	var content = text.substring(startPos, endPos),
		m = /[ \t]*#*[ \t]*$/.exec(content);

	return m[0].length;
}

/**
* Capture and return block marks from given string
*
* @param  {string} str Block markup, composed of ">", "!" and whitespace
* @return {!Array<string>}
*/
function getBlockMarks(str)
{
	var blockMarks = [],
		regexp     = />!?/g,
		m;
	while (m = regexp.exec(str))
	{
		blockMarks.push(m[0]);
	}

	return blockMarks;
}

/**
* Capture and store lines that contain a Setext-tyle header
*/
function matchSetextLines()
{
	// Capture the underlines used for Setext-style headers
	if (text.indexOf('-') === -1 && text.indexOf('=') === -1)
	{
		return;
	}

	// Capture the any series of - or = alone on a line, optionally preceded with the
	// angle brackets notation used in block markup
	var m, regexp = /^(?=[-=>])(?:>!? ?)*(?=[-=])(?:-+|=+) *$/gm;

	while (m = regexp.exec(text))
	{
		var match    = m[0],
			matchPos = m.index;

		// Compute the position of the end tag. We start on the LF character before the
		// match and keep rewinding until we find a non-space character
		var endPos = matchPos - 1;
		while (endPos > 0 && text[endPos - 1] === ' ')
		{
			--endPos;
		}

		// Store at the offset of the LF character
		setextLines[matchPos - 1] = {
			endLen     : matchPos + match.length - endPos,
			endPos     : endPos,
			blockDepth : match.length - match.replace(/>/g, '').length,
			tagName    : (match[0] === '=') ? 'H1' : 'H2'
		};
	}
}