347 lines
12 KiB
PHP
347 lines
12 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
/*
|
|
* This file is part of the league/commonmark package.
|
|
*
|
|
* (c) Colin O'Dell <colinodell@gmail.com>
|
|
*
|
|
* Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js)
|
|
* - (c) John MacFarlane
|
|
*
|
|
* Additional code based on commonmark-java (https://github.com/commonmark/commonmark-java)
|
|
* - (c) Atlassian Pty Ltd
|
|
*
|
|
* For the full copyright and license information, please view the LICENSE
|
|
* file that was distributed with this source code.
|
|
*/
|
|
|
|
namespace League\CommonMark\Parser;
|
|
|
|
use League\CommonMark\Environment\EnvironmentInterface;
|
|
use League\CommonMark\Event\DocumentParsedEvent;
|
|
use League\CommonMark\Event\DocumentPreParsedEvent;
|
|
use League\CommonMark\Exception\CommonMarkException;
|
|
use League\CommonMark\Input\MarkdownInput;
|
|
use League\CommonMark\Node\Block\Document;
|
|
use League\CommonMark\Node\Block\Paragraph;
|
|
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
|
|
use League\CommonMark\Parser\Block\BlockContinueParserWithInlinesInterface;
|
|
use League\CommonMark\Parser\Block\BlockStart;
|
|
use League\CommonMark\Parser\Block\BlockStartParserInterface;
|
|
use League\CommonMark\Parser\Block\DocumentBlockParser;
|
|
use League\CommonMark\Parser\Block\ParagraphParser;
|
|
use League\CommonMark\Reference\ReferenceInterface;
|
|
use League\CommonMark\Reference\ReferenceMap;
|
|
|
|
final class MarkdownParser implements MarkdownParserInterface
|
|
{
|
|
/** @psalm-readonly */
|
|
private EnvironmentInterface $environment;
|
|
|
|
/** @psalm-readonly-allow-private-mutation */
|
|
private int $maxNestingLevel;
|
|
|
|
/** @psalm-readonly-allow-private-mutation */
|
|
private ReferenceMap $referenceMap;
|
|
|
|
/** @psalm-readonly-allow-private-mutation */
|
|
private int $lineNumber = 0;
|
|
|
|
/** @psalm-readonly-allow-private-mutation */
|
|
private Cursor $cursor;
|
|
|
|
/**
|
|
* @var array<int, BlockContinueParserInterface>
|
|
*
|
|
* @psalm-readonly-allow-private-mutation
|
|
*/
|
|
private array $activeBlockParsers = [];
|
|
|
|
/**
|
|
* @var array<int, BlockContinueParserWithInlinesInterface>
|
|
*
|
|
* @psalm-readonly-allow-private-mutation
|
|
*/
|
|
private array $closedBlockParsers = [];
|
|
|
|
public function __construct(EnvironmentInterface $environment)
|
|
{
|
|
$this->environment = $environment;
|
|
}
|
|
|
|
private function initialize(): void
|
|
{
|
|
$this->referenceMap = new ReferenceMap();
|
|
$this->lineNumber = 0;
|
|
$this->activeBlockParsers = [];
|
|
$this->closedBlockParsers = [];
|
|
|
|
$this->maxNestingLevel = $this->environment->getConfiguration()->get('max_nesting_level');
|
|
}
|
|
|
|
/**
|
|
* @throws CommonMarkException
|
|
*/
|
|
public function parse(string $input): Document
|
|
{
|
|
$this->initialize();
|
|
|
|
$documentParser = new DocumentBlockParser($this->referenceMap);
|
|
$this->activateBlockParser($documentParser);
|
|
|
|
$preParsedEvent = new DocumentPreParsedEvent($documentParser->getBlock(), new MarkdownInput($input));
|
|
$this->environment->dispatch($preParsedEvent);
|
|
$markdownInput = $preParsedEvent->getMarkdown();
|
|
|
|
foreach ($markdownInput->getLines() as $lineNumber => $line) {
|
|
$this->lineNumber = $lineNumber;
|
|
$this->parseLine($line);
|
|
}
|
|
|
|
// finalizeAndProcess
|
|
$this->closeBlockParsers(\count($this->activeBlockParsers), $this->lineNumber);
|
|
$this->processInlines();
|
|
|
|
$this->environment->dispatch(new DocumentParsedEvent($documentParser->getBlock()));
|
|
|
|
return $documentParser->getBlock();
|
|
}
|
|
|
|
/**
|
|
* Analyze a line of text and update the document appropriately. We parse markdown text by calling this on each
|
|
* line of input, then finalizing the document.
|
|
*/
|
|
private function parseLine(string $line): void
|
|
{
|
|
$this->cursor = new Cursor($line);
|
|
|
|
$matches = $this->parseBlockContinuation();
|
|
if ($matches === null) {
|
|
return;
|
|
}
|
|
|
|
$unmatchedBlocks = \count($this->activeBlockParsers) - $matches;
|
|
$blockParser = $this->activeBlockParsers[$matches - 1];
|
|
$startedNewBlock = false;
|
|
|
|
// Unless last matched container is a code block, try new container starts,
|
|
// adding children to the last matched container:
|
|
$tryBlockStarts = $blockParser->getBlock() instanceof Paragraph || $blockParser->isContainer();
|
|
while ($tryBlockStarts) {
|
|
// this is a little performance optimization
|
|
if ($this->cursor->isBlank()) {
|
|
$this->cursor->advanceToEnd();
|
|
break;
|
|
}
|
|
|
|
if ($blockParser->getBlock()->getDepth() >= $this->maxNestingLevel) {
|
|
break;
|
|
}
|
|
|
|
$blockStart = $this->findBlockStart($blockParser);
|
|
if ($blockStart === null || $blockStart->isAborting()) {
|
|
$this->cursor->advanceToNextNonSpaceOrTab();
|
|
break;
|
|
}
|
|
|
|
if (($state = $blockStart->getCursorState()) !== null) {
|
|
$this->cursor->restoreState($state);
|
|
}
|
|
|
|
$startedNewBlock = true;
|
|
|
|
// We're starting a new block. If we have any previous blocks that need to be closed, we need to do it now.
|
|
if ($unmatchedBlocks > 0) {
|
|
$this->closeBlockParsers($unmatchedBlocks, $this->lineNumber - 1);
|
|
$unmatchedBlocks = 0;
|
|
}
|
|
|
|
if ($blockStart->isReplaceActiveBlockParser()) {
|
|
$this->prepareActiveBlockParserForReplacement();
|
|
}
|
|
|
|
foreach ($blockStart->getBlockParsers() as $newBlockParser) {
|
|
$blockParser = $this->addChild($newBlockParser);
|
|
$tryBlockStarts = $newBlockParser->isContainer();
|
|
}
|
|
}
|
|
|
|
// What remains at the offset is a text line. Add the text to the appropriate block.
|
|
|
|
// First check for a lazy paragraph continuation:
|
|
if (! $startedNewBlock && ! $this->cursor->isBlank() && $this->getActiveBlockParser()->canHaveLazyContinuationLines()) {
|
|
$this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
|
|
} else {
|
|
// finalize any blocks not matched
|
|
if ($unmatchedBlocks > 0) {
|
|
$this->closeBlockParsers($unmatchedBlocks, $this->lineNumber);
|
|
}
|
|
|
|
if (! $blockParser->isContainer()) {
|
|
$this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
|
|
} elseif (! $this->cursor->isBlank()) {
|
|
$this->addChild(new ParagraphParser());
|
|
$this->getActiveBlockParser()->addLine($this->cursor->getRemainder());
|
|
}
|
|
}
|
|
}
|
|
|
|
private function parseBlockContinuation(): ?int
|
|
{
|
|
// For each containing block, try to parse the associated line start.
|
|
// The document will always match, so we can skip the first block parser and start at 1 matches
|
|
$matches = 1;
|
|
for ($i = 1; $i < \count($this->activeBlockParsers); $i++) {
|
|
$blockParser = $this->activeBlockParsers[$i];
|
|
$blockContinue = $blockParser->tryContinue(clone $this->cursor, $this->getActiveBlockParser());
|
|
if ($blockContinue === null) {
|
|
break;
|
|
}
|
|
|
|
if ($blockContinue->isFinalize()) {
|
|
$this->closeBlockParsers(\count($this->activeBlockParsers) - $i, $this->lineNumber);
|
|
|
|
return null;
|
|
}
|
|
|
|
if (($state = $blockContinue->getCursorState()) !== null) {
|
|
$this->cursor->restoreState($state);
|
|
}
|
|
|
|
$matches++;
|
|
}
|
|
|
|
return $matches;
|
|
}
|
|
|
|
private function findBlockStart(BlockContinueParserInterface $lastMatchedBlockParser): ?BlockStart
|
|
{
|
|
$matchedBlockParser = new MarkdownParserState($this->getActiveBlockParser(), $lastMatchedBlockParser);
|
|
|
|
foreach ($this->environment->getBlockStartParsers() as $blockStartParser) {
|
|
\assert($blockStartParser instanceof BlockStartParserInterface);
|
|
if (($result = $blockStartParser->tryStart(clone $this->cursor, $matchedBlockParser)) !== null) {
|
|
return $result;
|
|
}
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
private function closeBlockParsers(int $count, int $endLineNumber): void
|
|
{
|
|
for ($i = 0; $i < $count; $i++) {
|
|
$blockParser = $this->deactivateBlockParser();
|
|
$this->finalize($blockParser, $endLineNumber);
|
|
|
|
// phpcs:disable SlevomatCodingStandard.ControlStructures.EarlyExit.EarlyExitNotUsed
|
|
if ($blockParser instanceof BlockContinueParserWithInlinesInterface) {
|
|
// Remember for inline parsing
|
|
$this->closedBlockParsers[] = $blockParser;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Finalize a block. Close it and do any necessary postprocessing, e.g. creating string_content from strings,
|
|
* setting the 'tight' or 'loose' status of a list, and parsing the beginnings of paragraphs for reference
|
|
* definitions.
|
|
*/
|
|
private function finalize(BlockContinueParserInterface $blockParser, int $endLineNumber): void
|
|
{
|
|
if ($blockParser instanceof ParagraphParser) {
|
|
$this->updateReferenceMap($blockParser->getReferences());
|
|
}
|
|
|
|
$blockParser->getBlock()->setEndLine($endLineNumber);
|
|
$blockParser->closeBlock();
|
|
}
|
|
|
|
/**
|
|
* Walk through a block & children recursively, parsing string content into inline content where appropriate.
|
|
*/
|
|
private function processInlines(): void
|
|
{
|
|
$p = new InlineParserEngine($this->environment, $this->referenceMap);
|
|
|
|
foreach ($this->closedBlockParsers as $blockParser) {
|
|
$blockParser->parseInlines($p);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Add block of type tag as a child of the tip. If the tip can't accept children, close and finalize it and try
|
|
* its parent, and so on til we find a block that can accept children.
|
|
*/
|
|
private function addChild(BlockContinueParserInterface $blockParser): BlockContinueParserInterface
|
|
{
|
|
$blockParser->getBlock()->setStartLine($this->lineNumber);
|
|
|
|
while (! $this->getActiveBlockParser()->canContain($blockParser->getBlock())) {
|
|
$this->closeBlockParsers(1, $this->lineNumber - 1);
|
|
}
|
|
|
|
$this->getActiveBlockParser()->getBlock()->appendChild($blockParser->getBlock());
|
|
$this->activateBlockParser($blockParser);
|
|
|
|
return $blockParser;
|
|
}
|
|
|
|
private function activateBlockParser(BlockContinueParserInterface $blockParser): void
|
|
{
|
|
$this->activeBlockParsers[] = $blockParser;
|
|
}
|
|
|
|
/**
|
|
* @throws ParserLogicException
|
|
*/
|
|
private function deactivateBlockParser(): BlockContinueParserInterface
|
|
{
|
|
$popped = \array_pop($this->activeBlockParsers);
|
|
if ($popped === null) {
|
|
throw new ParserLogicException('The last block parser should not be deactivated');
|
|
}
|
|
|
|
return $popped;
|
|
}
|
|
|
|
private function prepareActiveBlockParserForReplacement(): void
|
|
{
|
|
// Note that we don't want to parse inlines or finalize this block, as it's getting replaced.
|
|
$old = $this->deactivateBlockParser();
|
|
|
|
if ($old instanceof ParagraphParser) {
|
|
$this->updateReferenceMap($old->getReferences());
|
|
}
|
|
|
|
$old->getBlock()->detach();
|
|
}
|
|
|
|
/**
|
|
* @param ReferenceInterface[] $references
|
|
*/
|
|
private function updateReferenceMap(iterable $references): void
|
|
{
|
|
foreach ($references as $reference) {
|
|
if (! $this->referenceMap->contains($reference->getLabel())) {
|
|
$this->referenceMap->add($reference);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @throws ParserLogicException
|
|
*/
|
|
public function getActiveBlockParser(): BlockContinueParserInterface
|
|
{
|
|
$active = \end($this->activeBlockParsers);
|
|
if ($active === false) {
|
|
throw new ParserLogicException('No active block parsers are available');
|
|
}
|
|
|
|
return $active;
|
|
}
|
|
}
|