Skip to content

Commit 733b51f

Browse files
authored
Merge pull request #952 from thephpleague/fix-mbstring-usage
Fix mbstring calls without explicit encoding
2 parents 8e37e27 + 9f6b98a commit 733b51f

16 files changed

+169
-15
lines changed

.github/workflows/backwards-compatibility.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@ jobs:
1818
with:
1919
fetch-depth: 0
2020

21+
# User in the container seems to be different than the cloned repo's owner.
22+
# Git doesn't like that as the repo will then be unusable by the owner.
23+
# We don't care about this here since this is only used for running one test.
24+
# See https://github.com/actions/runner/issues/2033
25+
- name: Workaround directory permissions
26+
run: mkdir -p /home/runner/work/_temp/_github_home && printf "[safe]\n\tdirectory = /github/workspace" > /home/runner/work/_temp/_github_home/.gitconfig
27+
2128
- name: "BC Check"
2229
uses: docker://nyholm/roave-bc-check-ga
2330
with:

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi
66

77
## [Unreleased][unreleased]
88

9+
### Fixed
10+
11+
- Fixed parsing issues when `mb_internal_encoding()` is set to something other than `UTF-8` (#951)
12+
913
## [2.3.7] - 2022-11-03
1014

1115
### Fixed

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@
9494
"autoload-dev": {
9595
"psr-4": {
9696
"League\\CommonMark\\Tests\\Unit\\": "tests/unit",
97-
"League\\CommonMark\\Tests\\Functional\\": "tests/functional"
97+
"League\\CommonMark\\Tests\\Functional\\": "tests/functional",
98+
"League\\CommonMark\\Tests\\PHPStan\\": "tests/phpstan"
9899
}
99100
},
100101
"scripts": {

phpstan.neon.dist

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@ parameters:
77
message: '#Parameter .+ of class .+Reference constructor expects string, string\|null given#'
88
- path: src/Util/RegexHelper.php
99
message: '#Method .+RegexHelper::unescape\(\) should return string but returns string\|null#'
10+
rules:
11+
- 'League\CommonMark\Tests\PHPStan\MbstringFunctionCallRule'

src/Extension/Autolink/UrlAutolinkParser.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ public function parse(InlineParserContext $inlineContext): bool
105105
$url = \substr($url, 0, -$diff);
106106
}
107107

108-
$cursor->advanceBy(\mb_strlen($url));
108+
$cursor->advanceBy(\mb_strlen($url, 'UTF-8'));
109109

110110
// Auto-prefix 'http://' onto 'www' URLs
111111
if (\substr($url, 0, 4) === 'www.') {

src/Extension/Footnote/Renderer/FootnoteBackrefRenderer.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public function render(Node $node, ChildNodeRendererInterface $childRenderer): s
4444

4545
$attrs->append('class', $this->config->get('footnote/backref_class'));
4646
$attrs->set('rev', 'footnote');
47-
$attrs->set('href', \mb_strtolower($node->getReference()->getDestination()));
47+
$attrs->set('href', \mb_strtolower($node->getReference()->getDestination(), 'UTF-8'));
4848
$attrs->set('role', 'doc-backlink');
4949

5050
$symbol = $this->config->get('footnote/backref_symbol');

src/Extension/Footnote/Renderer/FootnoteRefRenderer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ public function render(Node $node, ChildNodeRendererInterface $childRenderer): \
4040

4141
$attrs = $node->data->getData('attributes');
4242
$attrs->append('class', $this->config->get('footnote/ref_class'));
43-
$attrs->set('href', \mb_strtolower($node->getReference()->getDestination()));
43+
$attrs->set('href', \mb_strtolower($node->getReference()->getDestination(), 'UTF-8'));
4444
$attrs->set('role', 'doc-noteref');
4545

4646
$idPrefix = $this->config->get('footnote/ref_id_prefix');
4747

4848
return new HtmlElement(
4949
'sup',
5050
[
51-
'id' => $idPrefix . \mb_strtolower($node->getReference()->getLabel()),
51+
'id' => $idPrefix . \mb_strtolower($node->getReference()->getLabel(), 'UTF-8'),
5252
],
5353
new HtmlElement(
5454
'a',

src/Extension/Footnote/Renderer/FootnoteRenderer.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ public function render(Node $node, ChildNodeRendererInterface $childRenderer): \
4141
$attrs = $node->data->getData('attributes');
4242

4343
$attrs->append('class', $this->config->get('footnote/footnote_class'));
44-
$attrs->set('id', $this->config->get('footnote/footnote_id_prefix') . \mb_strtolower($node->getReference()->getLabel()));
44+
$attrs->set('id', $this->config->get('footnote/footnote_id_prefix') . \mb_strtolower($node->getReference()->getLabel(), 'UTF-8'));
4545
$attrs->set('role', 'doc-endnote');
4646

4747
return new HtmlElement(

src/Normalizer/SlugNormalizer.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ public function normalize(string $text, array $context = []): string
4141
// Trim whitespace
4242
$slug = \trim($slug);
4343
// Convert to lowercase
44-
$slug = \mb_strtolower($slug);
44+
$slug = \mb_strtolower($slug, 'UTF-8');
4545
// Try replacing whitespace with a dash
4646
$slug = \preg_replace('/\s+/u', '-', $slug) ?? $slug;
4747
// Try removing characters other than letters, numbers, and marks.
4848
$slug = \preg_replace('/[^\p{L}\p{Nd}\p{Nl}\p{M}-]+/u', '', $slug) ?? $slug;
4949
// Trim to requested length if given
5050
if ($length = $context['length'] ?? $this->defaultMaxLength) {
51-
$slug = \mb_substr($slug, 0, $length);
51+
$slug = \mb_substr($slug, 0, $length, 'UTF-8');
5252
}
5353

5454
return $slug;

src/Parser/InlineParserContext.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public function getFullMatch(): string
8383
*/
8484
public function getFullMatchLength(): int
8585
{
86-
return \mb_strlen($this->matches[0]);
86+
return \mb_strlen($this->matches[0], 'UTF-8');
8787
}
8888

8989
/**

src/Parser/InlineParserEngine.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public function __construct(EnvironmentInterface $environment, ReferenceMapInter
5050
\assert($parser instanceof InlineParserInterface);
5151
$regex = $parser->getMatchDefinition()->getRegex();
5252

53-
$this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex)];
53+
$this->parsers[] = [$parser, $regex, \strlen($regex) !== \mb_strlen($regex, 'UTF-8')];
5454
}
5555
}
5656

@@ -134,7 +134,7 @@ private function addPlainText(string $text, AbstractBlock $container): void
134134
private function matchParsers(string $contents): array
135135
{
136136
$contents = \trim($contents);
137-
$isMultibyte = \mb_strlen($contents) !== \strlen($contents);
137+
$isMultibyte = \mb_strlen($contents, 'UTF-8') !== \strlen($contents);
138138

139139
$ret = [];
140140

src/Reference/ReferenceParser.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ private function parseLabel(Cursor $cursor): bool
181181
$cursor->advance();
182182

183183
// spec: A link label can have at most 999 characters inside the square brackets
184-
if (\mb_strlen($this->label, 'utf-8') > 999) {
184+
if (\mb_strlen($this->label, 'UTF-8') > 999) {
185185
return false;
186186
}
187187

src/Util/LinkParserHelper.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ public static function parseLinkLabel(Cursor $cursor): int
5858
return 0;
5959
}
6060

61-
$length = \mb_strlen($match, 'utf-8');
61+
$length = \mb_strlen($match, 'UTF-8');
6262

6363
if ($length > 1001) {
6464
return 0;

src/Util/RegexHelper.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,13 @@ public static function isLetter(?string $character): bool
104104
public static function matchAt(string $regex, string $string, int $offset = 0): ?int
105105
{
106106
$matches = [];
107-
$string = \mb_substr($string, $offset, null, 'utf-8');
107+
$string = \mb_substr($string, $offset, null, 'UTF-8');
108108
if (! \preg_match($regex, $string, $matches, \PREG_OFFSET_CAPTURE)) {
109109
return null;
110110
}
111111

112112
// PREG_OFFSET_CAPTURE always returns the byte offset, not the char offset, which is annoying
113-
$charPos = \mb_strlen(\mb_strcut($string, 0, $matches[0][1], 'utf-8'), 'utf-8');
113+
$charPos = \mb_strlen(\mb_strcut($string, 0, $matches[0][1], 'UTF-8'), 'UTF-8');
114114

115115
return $offset + $charPos;
116116
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* This file is part of the league/commonmark package.
7+
*
8+
* (c) Colin O'Dell <[email protected]>
9+
*
10+
* For the full copyright and license information, please view the LICENSE
11+
* file that was distributed with this source code.
12+
*/
13+
14+
namespace League\CommonMark\Tests\PHPStan;
15+
16+
use PHPStan\Analyser\Scope;
17+
use PHPStan\Rules\Rule;
18+
use PhpParser\Node;
19+
20+
/**
21+
* Custom phpstan rule that:
22+
*
23+
* 1. Disallows the use of certain mbstring functions that could be problematic
24+
* 2. Requires an explicit encoding be provided to all `mb_*()` functions that support it
25+
*/
26+
final class MbstringFunctionCallRule implements Rule
27+
{
28+
private array $disallowedFunctionsThatAlterGlobalSettings = [
29+
'mb_internal_encoding',
30+
'mb_regex_encoding',
31+
'mb_detect_order',
32+
'mb_language',
33+
];
34+
35+
private array $encodingParamPositionCache = [];
36+
37+
public function getNodeType(): string
38+
{
39+
return Node\Expr\FuncCall::class;
40+
}
41+
42+
public function processNode(Node $node, Scope $scope): array
43+
{
44+
if (! $node instanceof Node\Expr\FuncCall) {
45+
return [];
46+
}
47+
48+
if (! $node->name instanceof Node\Name) {
49+
return [];
50+
}
51+
52+
$functionName = $node->name->toString();
53+
if (! str_starts_with($functionName, 'mb_')) {
54+
return [];
55+
}
56+
57+
if (\in_array($functionName, $this->disallowedFunctionsThatAlterGlobalSettings, true)) {
58+
return [\sprintf('Use of %s() is not allowed in this library because it alters global settings', $functionName)];
59+
}
60+
61+
$encodingParamPosition = $this->getEncodingParamPosition($functionName);
62+
if ($encodingParamPosition === null) {
63+
return [];
64+
}
65+
66+
$arg = $node->args[$encodingParamPosition] ?? null;
67+
if ($arg === null) {
68+
return [\sprintf('%s() is missing the $encoding param (should be "UTF-8")', $functionName)];
69+
}
70+
71+
if (! $arg instanceof Node\Arg) {
72+
return [];
73+
}
74+
75+
$encodingArg = $arg->value;
76+
if (! ($encodingArg instanceof Node\Scalar\String_)) {
77+
return [\sprintf('%s() must define the $encoding as "UTF-8"', $functionName)];
78+
}
79+
80+
if ($encodingArg->value !== 'UTF-8') {
81+
return [\sprintf('%s() must define the $encoding as "UTF-8", not "%s"', $functionName, $encodingArg->value)];
82+
}
83+
84+
return [];
85+
}
86+
87+
private function getEncodingParamPosition(string $function): ?int
88+
{
89+
if (isset($this->encodingParamPositionCache[$function])) {
90+
return $this->encodingParamPositionCache[$function];
91+
}
92+
93+
$reflection = new \ReflectionFunction($function);
94+
$params = $reflection->getParameters();
95+
96+
$encodingParamPosition = null;
97+
foreach ($params as $i => $param) {
98+
if ($param->getName() === 'encoding') {
99+
$encodingParamPosition = $i;
100+
break;
101+
}
102+
}
103+
104+
$this->encodingParamPositionCache[$function] = $encodingParamPosition;
105+
106+
return $encodingParamPosition;
107+
}
108+
}

tests/unit/Parser/InlineParserEngineTest.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
namespace League\CommonMark\Tests\Unit\Parser;
1515

1616
use League\CommonMark\Environment\Environment;
17+
use League\CommonMark\Extension\CommonMark\Node\Inline\Link;
18+
use League\CommonMark\Extension\CommonMark\Parser\Inline\CloseBracketParser;
19+
use League\CommonMark\Extension\CommonMark\Parser\Inline\OpenBracketParser;
1720
use League\CommonMark\Node\Block\Paragraph;
1821
use League\CommonMark\Node\Inline\Text;
1922
use League\CommonMark\Parser\Inline\InlineParserMatch;
@@ -76,4 +79,33 @@ public function testParseWithNoInlineParsers(): void
7679
$this->assertTrue($child instanceof Text);
7780
$this->assertSame('The quick brown fox jumps over the lazy dog', $child->getLiteral());
7881
}
82+
83+
/**
84+
* @see https://github.com/thephpleague/commonmark/issues/951
85+
*
86+
* @runInSeparateProcess to avoid polluting the global environment
87+
*/
88+
public function testMultibyteDetectionRegressionFromIssue951(): void
89+
{
90+
\mb_internal_encoding('iso-8859-1'); // @phpstan-ignore-line
91+
92+
$environment = new Environment();
93+
$environment->addInlineParser(new CloseBracketParser(), 30);
94+
$environment->addInlineParser(new OpenBracketParser(), 20);
95+
96+
$engine = new InlineParserEngine($environment, new ReferenceMap());
97+
$paragraph = new Paragraph();
98+
$engine->parse('AAA ÀÀ [label](https://url)', $paragraph);
99+
100+
$this->assertCount(2, $paragraph->children());
101+
102+
$text = $paragraph->firstChild();
103+
$this->assertTrue($text instanceof Text);
104+
$this->assertSame('AAA ÀÀ ', $text->getLiteral());
105+
106+
$link = $paragraph->lastChild();
107+
$this->assertTrue($link instanceof Link);
108+
$this->assertSame('label', $link->firstChild()->getLiteral());
109+
$this->assertSame('https://url', $link->getUrl());
110+
}
79111
}

0 commit comments

Comments
 (0)