Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/Parser/Cursor.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class Cursor
/**
* @param string $line The line being parsed (ASCII or UTF-8)
*/
public function __construct(string $line)
public function __construct(string $line, bool $lineCouldHaveTabs = true)
{
if (! \mb_check_encoding($line, 'UTF-8')) {
throw new UnexpectedEncodingException('Unexpected encoding - UTF-8 or ASCII was expected');
Expand All @@ -68,7 +68,9 @@ public function __construct(string $line)
$this->line = $line;
$this->length = \mb_strlen($line, 'UTF-8') ?: 0;
$this->isMultibyte = $this->length !== \strlen($line);
$this->lastTabPosition = $this->isMultibyte ? \mb_strrpos($line, "\t", 0, 'UTF-8') : \strrpos($line, "\t");
$this->lastTabPosition = $lineCouldHaveTabs
? ($this->isMultibyte ? \mb_strrpos($line, "\t", 0, 'UTF-8') : \strrpos($line, "\t"))
: false;
}

/**
Expand Down
7 changes: 6 additions & 1 deletion src/Parser/MarkdownParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ final class MarkdownParser implements MarkdownParserInterface
/** @psalm-readonly-allow-private-mutation */
private Cursor $cursor;

/** @psalm-readonly-allow-private-mutation */
private bool $documentHasTabs = false;

/**
* @var array<int, BlockContinueParserInterface>
*
Expand Down Expand Up @@ -96,6 +99,8 @@ public function parse(string $input): Document
$this->environment->dispatch($preParsedEvent);
$markdownInput = $preParsedEvent->getMarkdown();

$this->documentHasTabs = \str_contains($markdownInput->getContent(), "\t");

foreach ($markdownInput->getLines() as $lineNumber => $line) {
$this->lineNumber = $lineNumber;
$this->parseLine($line);
Expand All @@ -119,7 +124,7 @@ private function parseLine(string $line): void
// replace NUL characters for security
$line = \str_replace("\0", "\u{FFFD}", $line);

$this->cursor = new Cursor($line);
$this->cursor = new Cursor($line, $this->documentHasTabs);

$matches = $this->parseBlockContinuation();
if ($matches === null) {
Expand Down
2 changes: 1 addition & 1 deletion tests/benchmark/benchmark.php
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@
}
};

if (extension_loaded('xdebug')) {
if (extension_loaded('xdebug') && getenv('XDEBUG_MODE') !== 'off') {
fwrite(STDERR, 'The xdebug extension is loaded, this can significantly skew benchmarks. Disable it for accurate results. For xdebug 3, prefix your command with "XDEBUG_MODE=off"' . PHP_EOL . PHP_EOL);
}

Expand Down
128 changes: 128 additions & 0 deletions tests/benchmark/benchmark_parse.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#!/usr/bin/env php
<?php

/*
* This file is part of the league/commonmark package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

/**
* Warm benchmark: measures only parsing, not converter initialization.
*
* Usage:
* php tests/benchmark/benchmark_parse.php [--iterations N] [--warmup N] [--md file]
*
* Defaults: 100 iterations, 10 warmup rounds, all built-in corpus files.
*/
require_once __DIR__ . '/../../vendor/autoload.php';

use League\CommonMark\CommonMarkConverter;

if (extension_loaded('xdebug') && getenv('XDEBUG_MODE') !== 'off') {
fwrite(STDERR, 'Xdebug is active — results will be skewed. Use XDEBUG_MODE=off.' . PHP_EOL . PHP_EOL);
}

// --- Argument parsing ---

$iterations = 100;
$warmup = 10;
$files = [];

for ($i = 1; $i < $argc; $i++) {
switch ($argv[$i]) {
case '--iterations':
$iterations = (int) ($argv[++$i] ?? $iterations);
break;
case '--warmup':
$warmup = (int) ($argv[++$i] ?? $warmup);
break;
case '--md':
$files[] = $argv[++$i] ?? '';
break;
default:
fwrite(STDERR, "Unknown option: {$argv[$i]}" . PHP_EOL);
fwrite(STDERR, 'Usage: benchmark_parse.php [--iterations N] [--warmup N] [--md file]' . PHP_EOL);
exit(1);
}
}

if ($files === []) {
$files = [
__DIR__ . '/sample.md',
];
}

// --- Benchmark runner ---

/**
* @return array{min: float, median: float, p95: float, mean: float}
*/
function bench(callable $fn, int $warmup, int $iterations): array
{
for ($i = 0; $i < $warmup; $i++) {
$fn();
}

$times = [];
for ($i = 0; $i < $iterations; $i++) {
$t0 = \hrtime(true);
$fn();
$times[] = (\hrtime(true) - $t0) / 1e6; // ms
}

\sort($times);

$count = \count($times);
$mean = \array_sum($times) / $count;
$median = $count % 2 === 0
? ($times[$count / 2 - 1] + $times[$count / 2]) / 2
: $times[(int) ($count / 2)];
$p95 = $times[(int) \ceil($count * 0.95) - 1];

return ['min' => $times[0], 'median' => $median, 'p95' => $p95, 'mean' => $mean];
}

// --- Run ---

printf(
'Warm benchmark — %d iterations (+%d warmup)%s%s',
$iterations,
$warmup,
PHP_EOL,
PHP_EOL
);
printf("%-52s %8s %8s %8s %8s%s", 'File', 'min', 'median', 'p95', 'mean', PHP_EOL);
printf("%s%s", \str_repeat('-', 88), PHP_EOL);

$converter = new CommonMarkConverter();

foreach ($files as $file) {
if (! \is_file($file)) {
fwrite(STDERR, "File not found: {$file}" . PHP_EOL);
continue;
}

$markdown = \file_get_contents($file);
$size = \round(\strlen($markdown) / 1024, 1);
$label = \basename($file) . " ({$size} KB)";

$stats = bench(
static fn () => $converter->convert($markdown),
$warmup,
$iterations
);

printf(
"%-52s %7.2f ms %7.2f ms %7.2f ms %7.2f ms%s",
$label,
$stats['min'],
$stats['median'],
$stats['p95'],
$stats['mean'],
PHP_EOL
);
}
16 changes: 16 additions & 0 deletions tests/unit/Parser/CursorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,22 @@ public function testConstructorWithInvalidUTF8(): void
new Cursor(\hex2bin('A5A5A5'));
}

public function testConstructorWithLineCouldHaveTabsFalse(): void
{
// When the caller guarantees no tabs are present, advanceBy must still
// track position and column correctly via the tab-free fast path.
$cursor = new Cursor('Hello world', false);
$this->assertSame('Hello world', $cursor->getLine());

$cursor->advanceBy(5);
$this->assertSame(5, $cursor->getPosition());
$this->assertSame(5, $cursor->getColumn());

$cursor->advanceBy(3);
$this->assertSame(8, $cursor->getPosition());
$this->assertSame(8, $cursor->getColumn());
}

/**
* @dataProvider dataForTestingNextNonSpaceMethods
*/
Expand Down