Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes some BBC and Markdown bugs #8362

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Sources/Actions/Moderation/WatchedUsers.php
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ public static function list_getWatchedUserPosts(int $start, int $items_per_page,

$row['body'] = Parser::transform(
string: $row['body'],
input_types: Parser::INPUT_BBC | Parser::INPUT_MARKDOWN | ((bool) $row['last_smileys'] ? Parser::INPUT_SMILEYS : 0),
input_types: Parser::INPUT_BBC | Parser::INPUT_MARKDOWN | ((bool) $row['smileys_enabled'] ? Parser::INPUT_SMILEYS : 0),
options: ['cache_id' => (int) $row['id_msg']],
);

Expand Down
2 changes: 1 addition & 1 deletion Sources/Actions/Post.php
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ protected function showPreview(): void
// Do all bulletin board code tags, with or without smileys.
Utils::$context['preview_message'] = Parser::transform(
string: Utils::$context['preview_message'],
input_types: Parser::INPUT_BBC | Parser::INPUT_MARKDOWN | (isset($_REQUEST['ns']) ? Parser::INPUT_SMILEYS : 0),
input_types: Parser::INPUT_BBC | Parser::INPUT_MARKDOWN | (!isset($_REQUEST['ns']) ? Parser::INPUT_SMILEYS : 0),
);

Lang::censorText(Utils::$context['preview_message']);
Expand Down
2 changes: 1 addition & 1 deletion Sources/Mail.php
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public static function send(
}

// Use real tabs.
$message = strtr($message, [Utils::TAB_SUBSTITUTE => $send_html ? '<span style="white-space: pre-wrap;">' . "\t" . '</span>' : "\t"]);
$message = strtr($message, [Utils::TAB_SUBSTITUTE => $send_html ? '<span style="white-space: pre;">' . "\t" . '</span>' : "\t"]);

list(, $from_name) = self::mimespecialchars(addcslashes($from !== null ? $from : Utils::$context['forum_name'], '<>()\'\\"'), true, $hotmail_fix, $line_break);
list(, $subject) = self::mimespecialchars($subject, true, $hotmail_fix, $line_break);
Expand Down
26 changes: 11 additions & 15 deletions Sources/Msg.php
Original file line number Diff line number Diff line change
Expand Up @@ -707,15 +707,13 @@ function ($a) {
}

// Replace code BBC with placeholders. We'll restore them at the end.
$parts = preg_split('~(\[/code\]|\[code(?:=[^\]]+)?\])~i', $message, -1, PREG_SPLIT_DELIM_CAPTURE);
$parts = preg_split('/(\[code(?:=[^\]]+)?\](?:[^\[]|\[(?!\/code\])|(?R))*\[\/code])/i', $message, -1, PREG_SPLIT_DELIM_CAPTURE);

for ($i = 0, $n = count($parts); $i < $n; $i++) {
// It goes 0 = outside, 1 = begin tag, 2 = inside, 3 = close tag, repeat.
if ($i % 4 == 2) {
$code_tag = $parts[$i - 1] . $parts[$i] . $parts[$i + 1];
$substitute = $parts[$i - 1] . $i . $parts[$i + 1];
$code_tags[$substitute] = $code_tag;
$parts[$i] = $i;
if ($i % 2 == 1) {
$substitute = md5($parts[$i]);
$code_tags[$substitute] = $parts[$i];
$parts[$i] = $substitute;
}
}

Expand Down Expand Up @@ -919,16 +917,14 @@ public static function un_preparsecode(string $message): string
// Any hooks want to work here?
IntegrationHook::call('integrate_unpreparsecode', [&$message]);

$parts = preg_split('~(\[/code\]|\[code(?:=[^\]]+)?\])~i', $message, -1, PREG_SPLIT_DELIM_CAPTURE);

// We're going to unparse only the stuff outside [code]...
$parts = preg_split('/(\[code(?:=[^\]]+)?\](?:[^\[]|\[(?!\/code\])|(?R))*\[\/code])/i', $message, -1, PREG_SPLIT_DELIM_CAPTURE);

for ($i = 0, $n = count($parts); $i < $n; $i++) {
// If $i is a multiple of four (0, 4, 8, ...) then it's not a code section...
if ($i % 4 == 2) {
$code_tag = $parts[$i - 1] . $parts[$i] . $parts[$i + 1];
$substitute = $parts[$i - 1] . $i . $parts[$i + 1];
$code_tags[$substitute] = $code_tag;
$parts[$i] = $i;
if ($i % 2 == 1) {
$substitute = md5($parts[$i]);
$code_tags[$substitute] = $parts[$i];
$parts[$i] = $substitute;
}
}

Expand Down
25 changes: 14 additions & 11 deletions Sources/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -371,22 +371,21 @@ public static function highlightPhpCode(string $code): string

$oldlevel = error_reporting(0);

$buffer = str_replace(["\n", "\r"], '', @highlight_string($code, true));
$buffer = @highlight_string($code, true);

error_reporting($oldlevel);

$buffer = preg_replace_callback_array(
return preg_replace_callback_array(
[
'~(?:' . Utils::TAB_SUBSTITUTE . ')+~u' => fn ($matches) => '<span style="white-space: pre-wrap;">' . strtr($matches[0], [Utils::TAB_SUBSTITUTE => "\t"]) . '</span>',
'~<span style="color: #[0-9a-fA-F]{6}">(<span style="white-space: pre-wrap;">\h*</span>)</span>~' => fn ($matches) => $matches[1],
'~(?:' . Utils::TAB_SUBSTITUTE . ')+~u' => fn ($matches) => '<span style="white-space: pre;">' . strtr($matches[0], [Utils::TAB_SUBSTITUTE => "\t"]) . '</span>',
'~<span style="color: #[0-9a-fA-F]{6}">(<span style="white-space: pre;">\h*</span>)</span>~' => fn ($matches) => $matches[1],
'~\R~' => fn ($matches) => '<br>',
'/\'/' => fn ($matches) => '&#039;',
// PHP 8.3 changed the returned HTML.
'/^(<pre>)?<code[^>]*>|<\/code>(<\/pre>)?$/' => fn ($matches) => '',
],
$buffer,
);

// PHP 8.3 changed the returned HTML.
$buffer = preg_replace('/^(<pre>)?<code[^>]*>|<\/code>(<\/pre>)?$/', '', $buffer);

return strtr($buffer, ['\'' => '&#039;']);
}

/**
Expand Down Expand Up @@ -526,6 +525,7 @@ protected function setDisabled(): void
$this->disabled['iurl'] = true;
$this->disabled['email'] = true;
$this->disabled['flash'] = true;
$this->disabled['youtube'] = true;

// @todo Change maybe?
if (!isset($_GET['images'])) {
Expand Down Expand Up @@ -628,10 +628,13 @@ protected static function toHTML(string $string, int $input_types, array $option

// Parse the BBCode.
if ($input_types & self::INPUT_BBC) {
$string = BBcodeParser::load(!empty($options['for_print']))->parse($string, $options['cache_id'], $options['parse_tags']);
$string = BBcodeParser::load(!empty($options['for_print']))->parse($string, !empty($input_types & self::INPUT_SMILEYS), $options['cache_id'], $options['parse_tags']);

// BBCodeParser calls the SmileyParser internally; don't repeat.
$input_types &= ~self::INPUT_SMILEYS;
}

// Parse the smileys.
// Parse the smileys, if we haven't already.
if ($input_types & self::INPUT_SMILEYS) {
$string = SmileyParser::load()->parse($string);
}
Expand Down
32 changes: 29 additions & 3 deletions Sources/Parsers/BBCodeParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ class BBCodeParser extends Parser
*/
protected ?string $alltags_regex = null;

/**
* @var bool
*
* Whether smileys should be parsed while we are parsing BBCode.
*/
protected bool $smileys = true;

/**
* @var array
*
Expand Down Expand Up @@ -824,6 +831,7 @@ public function __construct(bool $for_print = false)
* Parse bulletin board code in a string.
*
* @param string|bool $message The string to parse.
* @param bool $smileys Whether to parse smileys. Default: true.
* @param string|int $cache_id The cache ID.
* If $cache_id is left empty, an ID will be generated automatically.
* Manually specifying a ID is helpful in cases when an integration hook
Expand All @@ -832,7 +840,7 @@ public function __construct(bool $for_print = false)
* @param array $parse_tags If set, only parses these tags rather than all of them.
* @return string The parsed string.
*/
public function parse(string $message, string|int $cache_id = '', array $parse_tags = []): string
public function parse(string $message, bool $smileys = true, string|int $cache_id = '', array $parse_tags = []): string
{
// Don't waste cycles
if (strval($message) === '') {
Expand All @@ -843,6 +851,7 @@ public function parse(string $message, string|int $cache_id = '', array $parse_t
$this->resetRuntimeProperties();

$this->message = $message;
$this->smileys = $smileys;
$this->parse_tags = $parse_tags;

$this->setDisabled();
Expand All @@ -857,6 +866,10 @@ public function parse(string $message, string|int $cache_id = '', array $parse_t
}

if (!self::$enable_bbc) {
if ($this->smileys === true) {
$this->message = SmileyParser::load()->parse($this->message);
}

$this->message = $this->fixHtml($this->message);

return $this->message;
Expand Down Expand Up @@ -1863,7 +1876,7 @@ public static function codeValidate(array &$tag, array|string &$data, array $dis
// Fix the PHP code stuff...
$code = str_replace("<pre style=\"display: inline;\">\t</pre>", "\t", implode('', $php_parts));

$code = str_replace("\t", "<span style=\"white-space: pre-wrap;\">\t</span>", $code);
$code = str_replace("\t", "<span style=\"white-space: pre;\">\t</span>", $code);

if ($add_begin) {
$code = preg_replace(['/^(.+?)&lt;\?.{0,40}?php(?:&nbsp;|\s)/', '/\?&gt;((?:\s*<\/(font|span)>)*)$/m'], '$1', $code, 2);
Expand Down Expand Up @@ -2215,7 +2228,20 @@ protected function parseMessage(): void
$this->message .= "\n" . $tag['after'] . "\n";
}

$this->message = strtr($this->message, ["\n" => '']);
// Parse the smileys within the parts where it can be done safely.
if ($this->smileys === true) {
$message_parts = explode("\n", $this->message);

for ($i = 0, $n = count($message_parts); $i < $n; $i += 2) {
$message_parts[$i] = SmileyParser::load()->parse($message_parts[$i]);
}

$this->message = implode('', $message_parts);
}
// No smileys, just get rid of the markers.
else {
$this->message = strtr($this->message, ["\n" => '']);
}

// Transform the first table row into a table header and wrap the rest
// in table body tags.
Expand Down
94 changes: 68 additions & 26 deletions Sources/Parsers/MarkdownParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class MarkdownParser extends Parser
// or
'|' .
// Non-space, non-control characters.
'[^\s\p{Cc}]+' .
'[^\s\p{Cc}]+?' .
')' .
')';

Expand Down Expand Up @@ -397,7 +397,7 @@ class MarkdownParser extends Parser
'interrupts_p' => true,
'marker_pattern' => '/^((?P<bullet>[*+-])|(?P<number>\d+)(?P<num_punct>[.)]))\h+/u',
'opener_test' => 'testOpensListItem',
'continue_test' => 'testContinuesListItem',
'continue_test' => false,
'closer_test' => 'testClosesListItem',
'add' => 'addListItem',
'append' => null,
Expand Down Expand Up @@ -1226,12 +1226,16 @@ protected function testIsIndentedCode(array $line_info): bool
return true;
}

if ($this->in_code === 2) {
return false;
}

if ($this->testIsBlank($line_info) && $this->in_code === 1) {
return true;
}

if ($line_info['indent'] < 4) {
$this->in_code = $this->in_code === 1 ? 0 : $this->in_code;
$this->in_code = 0;

return false;
}
Expand All @@ -1248,7 +1252,7 @@ protected function testIsIndentedCode(array $line_info): bool
&& $open_block['properties']['indent'] >= $line_info['indent']
)
) {
$this->in_code = $this->in_code === 1 ? 0 : $this->in_code;
$this->in_code = 0;

return false;
}
Expand Down Expand Up @@ -1395,21 +1399,6 @@ protected function testOpensListItem(array $line_info): bool
);
}

/**
* Tests whether a line is part of a list item.
*
* @param array $line_info Info about the current line.
* @return bool Whether this line is part of a list item.
*/
protected function testContinuesListItem(array $line_info, int $last_container, int $o): bool
{
return (bool) (
$this->open[$o]['type'] === 'list_item'
&& $this->open[$o - 1]['type'] === 'list'
&& $line_info['indent'] >= $this->open[$o]['properties']['indent']
);
}

/**
* Tests whether a line closes a list item.
*
Expand Down Expand Up @@ -1827,6 +1816,21 @@ protected function addListItem(array $line_info, int $last_container, int $o): v

$indent = $line_info['indent'] + mb_strlen($marker) + strspn($line_info['content'], ' ', strlen($marker));

// Check for nested lists.
if (
$this->open[$last_container]['type'] === 'list'
&& $line_info['indent'] >= $this->open[$last_container]['properties']['indent']
) {
// Close the open paragraph (or whatever) inside the open list item.
while ($this->open[$o]['type'] !== 'list_item') {
$this->getMethod($this->block_types[$this->open[$o]['type']]['close'] ?? 'closeBlock')($o);
$o--;
}

// Consider the open list item to be our container.
$last_container = $o;
}

// If this list item doesn't match the existing list's type,
// exit the existing list so we can start a new one.
if (
Expand Down Expand Up @@ -2475,8 +2479,41 @@ protected function parseInlineSecondPass(array $content): array
}

// We need more info to make decisions about this run of delimiter chars.
$prev_char = html_entity_decode($chars[$start - 1] ?? ' ');
$next_char = html_entity_decode($chars[$i + 1] ?? ' ');
if (isset($chars[$start - 1])) {
$prev_char = $chars[$start - 1];
} elseif (!isset($content[$c - 1])) {
$prev_char = ' ';
} else {
$temp = $content[$c - 1];

while (isset($temp[array_key_last($temp)]['content'])) {
$temp = $temp[array_key_last($temp)]['content'];
}

if (is_string(end($temp['content']))) {
$prev_char = mb_substr(end($temp['content']), -1);
} else {
$prev_char = ' ';
}
}

if (isset($chars[$i + 1])) {
$next_char = $chars[$i + 1];
} elseif (!isset($content[$c + 1])) {
$next_char = ' ';
} else {
$temp = $content[$c + 1];

while (isset($temp[0]['content'])) {
$temp = $temp[0]['content'];
}

if (is_string(reset($temp['content']))) {
$next_char = mb_substr(reset($temp['content']), 0, 1);
} else {
$next_char = ' ';
}
}

$prev_is_space = preg_match('/\s/u', $prev_char);
$prev_is_punct = $prev_is_space ? false : preg_match('/\pP/u', $prev_char);
Expand Down Expand Up @@ -2660,10 +2697,8 @@ protected function parseLink(array $chars, int &$i, array &$content): void

$str = implode('', array_slice($chars, $delim['properties']['position'], $i - $delim['properties']['position'])) . ']' . mb_substr(implode('', $chars), $i + 1);

$prefix = $delim['type'] === '![' ? '!' : '';

// Inline link/image?
if (preg_match('~^' . $prefix . self::REGEX_LINK_INLINE . '~u', $str, $matches)) {
if (preg_match('~^' . self::REGEX_LINK_INLINE . '~u', $str, $matches)) {
$this->parseEmphasis($content, $c);

$text = array_slice($content, $c + 1);
Expand Down Expand Up @@ -2693,7 +2728,7 @@ protected function parseLink(array $chars, int &$i, array &$content): void
self::REGEX_LINK_REF_COLLAPSED,
self::REGEX_LINK_REF_SHORTCUT,
] as $regex) {
if (preg_match('~' . $prefix . $regex . '~u', $str, $matches)) {
if (preg_match('~' . $regex . '~u', $str, $matches)) {
break;
}
}
Expand Down Expand Up @@ -3256,6 +3291,8 @@ protected function renderBlockquote(array $element): void
*/
protected function renderList(array $element): void
{
static $nesting_level = 0;

switch ($this->output_type) {
case self::OUTPUT_BBC:
if ($element['content'] === []) {
Expand All @@ -3270,7 +3307,10 @@ protected function renderList(array $element): void
return;
}

$style_type = $element['properties']['ordered'] ? 'decimal' : 'disc';
$ordered_styles = ['decimal', 'lower-roman', 'lower-alpha'];
$unordered_styles = ['disc', 'circle', 'square'];

$style_type = $element['properties']['ordered'] ? $ordered_styles[$nesting_level % 3] : $unordered_styles[$nesting_level % 3];

foreach (BBCodeParser::getCodes() as $code) {
if (
Expand All @@ -3297,7 +3337,9 @@ protected function renderList(array $element): void
$this->rendered .= "\n";

foreach ($element['content'] as $content_element) {
$nesting_level++;
$this->render($content_element);
$nesting_level--;
}

switch ($this->output_type) {
Expand Down
Loading
Loading