Skip to content

Commit

Permalink
Merge pull request moodlehq#111 from stronk7/lang_string_sorted_please
Browse files Browse the repository at this point in the history
Lang string sorted please
  • Loading branch information
andrewnicols authored Mar 14, 2024
2 parents 3966e9f + 238d3f4 commit 0b9aeea
Show file tree
Hide file tree
Showing 10 changed files with 737 additions and 0 deletions.
335 changes: 335 additions & 0 deletions moodle/Sniffs/Files/LangFilesOrderingSniff.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,335 @@
<?php

// This file is part of Moodle - https://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <https://www.gnu.org/licenses/>.

/**
* This sniff verifies that lang files are sorted alphabetically by string key.
*
* @copyright 2024 onwards Eloy Lafuente (stronk7) {@link https://stronk7.com}
* @license https://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/

namespace MoodleHQ\MoodleCS\moodle\Sniffs\Files;

use MoodleHQ\MoodleCS\moodle\Util\MoodleUtil;
use PHP_CodeSniffer\Files\File;
use PHP_CodeSniffer\Sniffs\Sniff;

class LangFilesOrderingSniff implements Sniff
{
/**
* @var string|null The previous string that has been processed.
*
* We use this variable to compare the current string with the previous one. And decide
* if the current string is a duplicate or if it's out of order.
*/
protected ?string $previousString = null;

/**
* @var int pointer to the token where we should stop fixing the file (defaults to last token).
*
* When we find a comment that is not a "Deprecated since Moodle" one, we will stop fixing the file.
*/
protected int $stopFixingPtr = 999999999;

/**
* @var array An array which keys are all the known strings, grouped, and the values are the start and end pointers to them.
*
* We use this array to, accurately, know where to move every string on each fixing iteration.
*/
protected array $strings = [];

public function register(): array {
return [T_OPEN_TAG]; // We are going to process the whole file, finding all the strings and comments within it.
}

public function process(File $phpcsFile, $stackPtr): void {
// If the file is not a lang file, return.
if (!MoodleUtil::isLangFile($phpcsFile)) {
return;
}

// Only for Moodle 4.4dev (404) and up.
// Make and exception for unit tests, so they are run always.
if (!MoodleUtil::meetsMinimumMoodleVersion($phpcsFile, 404) && !MoodleUtil::isUnitTestRunning()) {
return; // @codeCoverageIgnore
}

// Get the file tokens, for ease of use.
$tokens = $phpcsFile->getTokens();

// Ensure that we start from scratch on each file and pass.
$this->previousString = null;
$this->stopFixingPtr = 999999999;
$this->strings = [];

// Let's find the first variable and start the process.
$currentPtr = $phpcsFile->findNext(T_VARIABLE, $stackPtr + 1);
if ($currentPtr === false) {
return; // No strings found, nothing to do.
}

// It's time to iterate over all the strings and comments till the end of the file.
// We'll go accumulating all the strings by group, with their start and end pointers as values.
$currentGroup = 'main'; // The default group to start with, we'll change it each time we find a new section.
do {
// Let's manage comments first (so we know if we are changing the current group).

// Correct comments are in new line and begin with "// Deprecated since ".
if (
$tokens[$currentPtr]['code'] === T_COMMENT &&
strpos($tokens[$currentPtr]['content'], '// Deprecated since ') === 0 &&
$tokens[$currentPtr - 1]['content'] === "\n"
) {
$currentGroup = trim($tokens[$currentPtr]['content']);
}

// If we find a comment that is not the standard one, we will stop fixing the file here. And error.
if (
$tokens[$currentPtr]['code'] === T_COMMENT &&
strpos($tokens[$currentPtr]['content'], '// Deprecated since ') === false
) {
$phpcsFile->addWarning(
'Unexpected comment found. Auto-fixing will not work after this comment',
$currentPtr,
'UnexpectedComment'
);
if ($this->stopFixingPtr > $currentPtr) {
$this->stopFixingPtr = $currentPtr; // Update the stop fixing pointer.
}
}

if ($tokens[$currentPtr]['code'] === T_COMMENT) {
continue; // We are done for comment tokens.
}

// Arrived here, all the tokens are variables, so we don't need to check for that.

// If the name of the variable is not "$string", error.
if ($tokens[$currentPtr]['content'] !== '$string') {
$phpcsFile->addError(
'Variable "%s" is not expected in a lang file',
$currentPtr,
'UnexpectedVariable',
[$tokens[$currentPtr]['content']]
);
continue; // We are done for this token.
}

// Get the string key, if any.
if (!$stringKey = $this->getStringKey($phpcsFile, $currentPtr)) {
continue; // Problems with this string key, skip it (has been already reported).
}

// Have found a valid $string[KEY], let's calculate the end and store it.
if ($currentEnd = $this->getStringEnd($phpcsFile, $currentPtr)) {
if (!isset($this->strings[$currentGroup])) {
$this->strings[$currentGroup] = [];
$this->previousString = null; // Reset the previous string on new group.
}
// Check if the string already has been found earlier.
if (isset($this->strings[$currentGroup][$stringKey])) {
$phpcsFile->addError('The string key "%s" is duplicated', $currentPtr, 'DuplicatedKey', [$stringKey]);
continue; // We are done for this string, won't report anything about it till fixed.
} else {
// We can safely add the string to the group, if we are before the last pointer to fix.
if ($currentPtr < $this->stopFixingPtr) {
$this->strings[$currentGroup][$stringKey] = [$currentPtr, $currentEnd];
}
}
}

if (null === $currentEnd) {
// The string end is not as expected, report as error unless the next token
// after the semicolon is a comment. In that case, we won't report it because
// UnexpectedComment will take on it.
$delegateToUnexpectedComment = false;
$semicolonPtr = $phpcsFile->findNext(T_SEMICOLON, $currentPtr + 1);
if (
(
isset($tokens[$semicolonPtr + 1]) && // There is a next token (not the end of the file)
$tokens[$semicolonPtr + 1]['code'] === T_COMMENT // And it's a comment.
) ||
(
isset($tokens[$semicolonPtr + 2]) && // Or there are 2 more tokens (not the end of the file).
$tokens[$semicolonPtr + 1]['code'] === T_WHITESPACE && // And they are whitespace + comment.
$tokens[$semicolonPtr + 2]['code'] === T_COMMENT
)
) {
$delegateToUnexpectedComment = true;
}
if (!$delegateToUnexpectedComment) {
$phpcsFile->addError(
'Unexpected string end, it should be a line feed after a semicolon',
$currentPtr,
'UnexpectedEnd'
);
continue; // We are done for this string, won't report anything about it till fixed.
}
}

// Note: We only issue these warnings if there are previous strings to compare with,
// and, obviously, if the string is out of order.
if ($this->previousString && strcmp($this->previousString, $stringKey) > 0) {
// We are going to add this as fixable warning only if we are
// before the last pointer to fix. This is an unordered string.
$phpcsFile->addWarning(
'The string key "%s" is not in the correct order, it should be before "%s"',
$currentPtr,
'IncorrectOrder',
[$stringKey, $this->previousString],
0,
$currentPtr < $this->stopFixingPtr
);
}

// Feed $previousString with the current string key.
$this->previousString = $stringKey;
} while ($currentPtr = $phpcsFile->findNext([T_VARIABLE, T_COMMENT], $currentPtr + 1)); // Move to next.

// If we are fixing the file, we need to sort the strings and move them to the correct position.
if ($phpcsFile->fixer->enabled) {
$this->sortStringsAndFix($phpcsFile);
}
}

/**
* Given a lang file, fix all the sorting issues found.
*
* This is really similar to the insertion-sort algorithm, but with
* a few optimisations to avoid unnecessary iterations. Should be
* efficient enough against lists that are expected to be not
* too long and already mostly sorted.
*
* @param File $phpcsFile The lang file being processed.
*/
protected function sortStringsAndFix(File $phpcsFile): void {
// Because of hard restrictions in CodeSniffer fixer (we cannot apply more than one change
// to the same token in the same pass), we need to accumulate all the changes and apply them
// at the end of the process. So we are going to build a big changeset to be applied all together.
// Keys will be the token index and values an array, with operation (index, DELETE, INSERT) and content.

// Get the file tokens, for ease of use.
$tokens = $phpcsFile->getTokens();

// We are going to perform the sorting within each detected group/section.
foreach ($this->strings as $group => $strings) {
$changeSet = []; // The changeset to be applied at the end of the iteration.

$strings = $this->strings[$group];
// Let's compare the keys in the array of strings with the sorted version of it.
$sorted = $unSorted = array_keys($strings);
sort($sorted, SORT_STRING);
$count = count($sorted);
for ($i = 0; $i < $count; $i++) {
$sortedKey = $sorted[$i];
$stringsKey = $unSorted[$i];

// The string being checked is not in order (comparing with the sorted array).
if ($sortedKey !== $stringsKey) {
// Apply the changes to the strings array by moving the key to the correct position.
$keyValue = $strings[$sortedKey];
// Remove the element to move.
unset($strings[$sortedKey]);
// Rebuild the array, with the element in new position.
$strings = array_slice($strings, 0, $i, true) +
[$sortedKey => $keyValue] +
array_slice($strings, $i, null, true);
$this->strings[$group] = $strings; // Update the group array with the rebuilt version.
$unSorted = array_keys($strings); // Update the unsorted keys array.

// Now add the required changes to the changeset that we'll be using when fixing the file.
// For every token in the string being moved, delete it and add it in the correct position.
foreach (range($keyValue[0], $keyValue[1]) as $tokenIndex) {
$tempToken = $tokens[$tokenIndex]; // Store the token.
$changeSet[$tokenIndex]['DELETE'] = ''; // Delete the current string token.
// Insert the token before the previous string.
if (!isset($changeSet[$strings[$stringsKey][0] - 1]['INSERT'])) {
$changeSet[$strings[$stringsKey][0] - 1]['INSERT'] = '';
}
$changeSet[$strings[$stringsKey][0] - 1]['INSERT'] .= $tempToken['content'];
}
}
}

// Let's apply the accumulated changes to the file.
if (!empty($changeSet)) {
$phpcsFile->fixer->beginChangeset();
foreach ($changeSet as $tokenIndex => $operations) {
if (isset($operations['DELETE'])) {
$phpcsFile->fixer->replaceToken($tokenIndex, '');
}
if (isset($operations['INSERT'])) {
$phpcsFile->fixer->addContent($tokenIndex, $operations['INSERT']);
}
}
$phpcsFile->fixer->endChangeset();
}
}
}

/**
* Return the string key corresponding to the string at the pointer.
* Note that the key has got any quote (single or double) trimmed.
*
* @param File $phpcsFile
* @param int $stackPtr
* @return string|null
*/
protected function getStringKey(File $phpcsFile, int $stackPtr): ?string {
$tokens = $phpcsFile->getTokens();

// If the structure is not exactly: $string[KEY], add error and return null.
if (
$tokens[$stackPtr + 1]['code'] !== T_OPEN_SQUARE_BRACKET ||
$tokens[$stackPtr + 2]['code'] !== T_CONSTANT_ENCAPSED_STRING ||
$tokens[$stackPtr + 3]['code'] !== T_CLOSE_SQUARE_BRACKET
) {
$phpcsFile->addError(
"Unexpected string syntax, it should be `\$string['key']`",
$stackPtr,
'UnexpectedSyntax'
);
return null;
}

// Now we can safely extract the string key and return it.
return trim($tokens[$stackPtr + 2]['content'], "'\"");
}

/**
* Return the string final pointer, it should be always a \n after a T_SEMICOLON.
*
* @param File $phpcsFile
* @param int $stackPtr
* @return int|null The pointer to the end of the string, or null if it's not an expected string end.
*/
protected function getStringEnd(File $phpcsFile, int $stackPtr): ?int {
$tokens = $phpcsFile->getTokens();
$currentEndToken = $phpcsFile->findNext(T_SEMICOLON, $stackPtr + 1) + 1;

// Verify that the current end token is a line feed, if not, we won't be able to fix (swap).
if (
!isset($tokens[$currentEndToken]) ||
$tokens[$currentEndToken]['code'] !== T_WHITESPACE ||
$tokens[$currentEndToken]['content'] !== "\n"
) {
return null; // This is not an expected string end.
}

return $currentEndToken;
}
}
Loading

0 comments on commit 0b9aeea

Please sign in to comment.