Added utilities for handling tokens and tokens list.

udan11 · udan11 · commit 3d1f0e8faf94 · 2015-09-30T20:03:57.000+03:00
diff --git a/src/Lexer.php b/src/Lexer.php
@@ -159,6 +159,22 @@ class Lexer
      */
     public $errors = array();
 
+    /**
+     * Gets the tokens list parsed by a new instance of a lexer.
+     *
+     * @param string|UtfString $str       The query to be lexed.
+     * @param bool             $strict    Whether strict mode should be
+     *                                    enabled or not.
+     * @param string           $delimiter The delimiter to be used.
+     *
+     * @return TokensList
+     */
+    public static function getTokens($str, $strict = false, $delimiter = null)
+    {
+        $lexer = new Lexer($str);
+        return $lexer->list;
+    }
+
     /**
      * Constructor.
      *
diff --git a/src/Utils/Tokens.php b/src/Utils/Tokens.php
@@ -0,0 +1,178 @@
+<?php
+
+/**
+ * Token utilities.
+ *
+ * @package    SqlParser
+ * @subpackage Utils
+ */
+namespace SqlParser\Utils;
+
+use SqlParser\Lexer;
+use SqlParser\Token;
+use SqlParser\TokensList;
+
+/**
+ * Token utilities.
+ *
+ * @category   Token
+ * @package    SqlParser
+ * @subpackage Utils
+ * @author     Dan Ungureanu <udan1107@gmail.com>
+ * @license    http://opensource.org/licenses/GPL-2.0 GNU Public License
+ */
+class Tokens
+{
+
+    /**
+     * Checks if a pattern is a match for the specified token.
+     *
+     * @param  Token  $token   The token to be matched.
+     * @param  array $pattern The pattern to be matches.
+     *
+     * @return bool
+     */
+    public static function match(Token $token, array $pattern)
+    {
+        // Token.
+        if ((isset($pattern['token']))
+            && ($pattern['token'] !== $token->token)
+        ) {
+            return false;
+        }
+
+        // Value.
+        if ((isset($pattern['value']))
+            && ($pattern['value'] !== $token->value)
+        ) {
+            return false;
+        }
+
+        if ((isset($pattern['value_str']))
+            && (strcasecmp($pattern['value_str'], $token->value))
+        ) {
+            return false;
+        }
+
+        // Type.
+        if ((isset($pattern['type']))
+            && ($pattern['type'] !== $token->type)
+        ) {
+            return false;
+        }
+
+        // Flags.
+        if ((isset($pattern['flags']))
+            && (($pattern['flags'] & $token->flags) === 0)
+        ) {
+            return false;
+        }
+
+        return true;
+    }
+
+    public static function replaceTokens($list, array $find, array $replace) {
+
+        /**
+         * Whether the first parameter is a list.
+         *
+         * @var bool
+         */
+        $isList = $list instanceof TokensList;
+
+        // Parsing the tokens.
+        if (!$isList) {
+            $list = Lexer::getTokens($list);
+        }
+
+        /**
+         * The list to be returned.
+         *
+         * @var array
+         */
+        $newList = array();
+
+        /**
+         * The length of the find pattern is calculated only once.
+         *
+         * @var int
+         */
+        $findCount = count($find);
+
+        /**
+         * The starting index of the pattern.
+         *
+         * @var int
+         */
+        $i = 0;
+
+        while ($i < $list->count) {
+
+            // A sequence may not start with a comment.
+            if ($list->tokens[$i]->type === Token::TYPE_COMMENT) {
+                $newList[] = $list->tokens[$i];
+                ++$i;
+                continue;
+            }
+
+            /**
+             * The index used to parse `$list->tokens`.
+             *
+             * This index might be running faster than `$k` because some tokens
+             * are skipped.
+             *
+             * @var int
+             */
+            $j = $i;
+
+            /**
+             * The index used to parse `$find`.
+             *
+             * This index might be running slower than `$j` because some tokens
+             * are skipped.
+             *
+             * @var int
+             */
+            $k = 0;
+
+            // Checking if the next tokens match the pattern described.
+            while (($j < $list->count) && ($k < $findCount)) {
+
+                // Comments are being skipped.
+                if ($list->tokens[$j]->type === Token::TYPE_COMMENT) {
+                    ++$j;
+                }
+
+                if (!static::match($list->tokens[$j], $find[$k])) {
+                    // This token does not match the pattern.
+                    break;
+                }
+
+                // Going to next token and segment of find pattern.
+                ++$j;
+                ++$k;
+            }
+
+
+            // Checking if the sequence was found.
+            if ($k === $findCount) {
+
+                // Inserting new tokens.
+                foreach ($replace as $token) {
+                    $newList[] = $token;
+                }
+
+                // Skipping next `$findCount` tokens.
+                $i = $j;
+            } else {
+                // Adding the same token.
+                $newList[] = $list->tokens[$i];
+                ++$i;
+            }
+        }
+
+        return $isList ?
+            new TokensList($newList) : TokensList::build($newList);
+    }
+
+}
diff --git a/tests/Utils/TokensTest.php b/tests/Utils/TokensTest.php
@@ -0,0 +1,106 @@
+<?php
+
+namespace SqlParser\Tests\Utils;
+
+use SqlParser\Parser;
+use SqlParser\Token;
+use SqlParser\Utils\Tokens;
+
+use SqlParser\Tests\TestCase;
+
+class TokensTest extends TestCase
+{
+
+    /**
+     * @dataProvider replaceTokensProvider
+     */
+    public function testReplaceTokens($list, $find, $replace, $expected)
+    {
+        $this->assertEquals($expected, Tokens::replaceTokens($list, $find, $replace));
+    }
+
+    public function replaceTokensProvider()
+    {
+        return array(
+            array(
+                'SELECT * FROM /*x*/a/*c*/.b',
+                array(
+                    array('value_str' => 'a'),
+                    array('token' => '.'),
+                ),
+                array(
+                    new Token('c'),
+                    new Token('.'),
+                ),
+                'SELECT * FROM /*x*/c.b',
+            )
+        );
+    }
+
+    /**
+     * @dataProvider matchProvider
+     */
+    public function testMatch($token, $pattern, $expected)
+    {
+        $this->assertEquals($expected, Tokens::match($token, $pattern));
+    }
+
+    public function matchProvider()
+    {
+        return array(
+            array(new Token(''), array(), true),
+
+            array(
+                new Token('"abc"', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('token' => '"abc"'),
+                true
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('value' => 'abc'),
+                true
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('value_str' => 'ABC'),
+                true
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('type' => Token::TYPE_STRING),
+                true
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('flags' => Token::FLAG_STRING_DOUBLE_QUOTES),
+                true
+            ),
+
+            array(
+                new Token('"abc"', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('token' => '"abcd"'),
+                false
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('value' => 'abcd'),
+                false
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('value_str' => 'ABCd'),
+                false
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('type' => Token::TYPE_NUMBER),
+                false
+            ),
+            array(
+                new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
+                array('flags' => Token::FLAG_STRING_SINGLE_QUOTES),
+                false
+            ),
+        );
+    }
+}