Skip to content

Commit 3d1f0e8

Browse files
committed
Added utilities for handling tokens and tokens list.
1 parent 7203302 commit 3d1f0e8

File tree

3 files changed

+300
-0
lines changed

3 files changed

+300
-0
lines changed

src/Lexer.php

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,22 @@ class Lexer
159159
*/
160160
public $errors = array();
161161

162+
/**
163+
* Gets the tokens list parsed by a new instance of a lexer.
164+
*
165+
* @param string|UtfString $str The query to be lexed.
166+
* @param bool $strict Whether strict mode should be
167+
* enabled or not.
168+
* @param string $delimiter The delimiter to be used.
169+
*
170+
* @return TokensList
171+
*/
172+
public static function getTokens($str, $strict = false, $delimiter = null)
173+
{
174+
$lexer = new Lexer($str);
175+
return $lexer->list;
176+
}
177+
162178
/**
163179
* Constructor.
164180
*

src/Utils/Tokens.php

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
<?php
2+
3+
/**
4+
* Token utilities.
5+
*
6+
* @package SqlParser
7+
* @subpackage Utils
8+
*/
9+
namespace SqlParser\Utils;
10+
11+
use SqlParser\Lexer;
12+
use SqlParser\Token;
13+
use SqlParser\TokensList;
14+
15+
/**
16+
* Token utilities.
17+
*
18+
* @category Token
19+
* @package SqlParser
20+
* @subpackage Utils
21+
* @author Dan Ungureanu <[email protected]>
22+
* @license http://opensource.org/licenses/GPL-2.0 GNU Public License
23+
*/
24+
class Tokens
25+
{
26+
27+
/**
28+
* Checks if a pattern is a match for the specified token.
29+
*
30+
* @param Token $token The token to be matched.
31+
* @param array $pattern The pattern to be matches.
32+
*
33+
* @return bool
34+
*/
35+
public static function match(Token $token, array $pattern)
36+
{
37+
// Token.
38+
if ((isset($pattern['token']))
39+
&& ($pattern['token'] !== $token->token)
40+
) {
41+
return false;
42+
}
43+
44+
// Value.
45+
if ((isset($pattern['value']))
46+
&& ($pattern['value'] !== $token->value)
47+
) {
48+
return false;
49+
}
50+
51+
if ((isset($pattern['value_str']))
52+
&& (strcasecmp($pattern['value_str'], $token->value))
53+
) {
54+
return false;
55+
}
56+
57+
// Type.
58+
if ((isset($pattern['type']))
59+
&& ($pattern['type'] !== $token->type)
60+
) {
61+
return false;
62+
}
63+
64+
// Flags.
65+
if ((isset($pattern['flags']))
66+
&& (($pattern['flags'] & $token->flags) === 0)
67+
) {
68+
return false;
69+
}
70+
71+
return true;
72+
}
73+
74+
public static function replaceTokens($list, array $find, array $replace) {
75+
76+
/**
77+
* Whether the first parameter is a list.
78+
*
79+
* @var bool
80+
*/
81+
$isList = $list instanceof TokensList;
82+
83+
// Parsing the tokens.
84+
if (!$isList) {
85+
$list = Lexer::getTokens($list);
86+
}
87+
88+
/**
89+
* The list to be returned.
90+
*
91+
* @var array
92+
*/
93+
$newList = array();
94+
95+
/**
96+
* The length of the find pattern is calculated only once.
97+
*
98+
* @var int
99+
*/
100+
$findCount = count($find);
101+
102+
/**
103+
* The starting index of the pattern.
104+
*
105+
* @var int
106+
*/
107+
$i = 0;
108+
109+
while ($i < $list->count) {
110+
111+
// A sequence may not start with a comment.
112+
if ($list->tokens[$i]->type === Token::TYPE_COMMENT) {
113+
$newList[] = $list->tokens[$i];
114+
++$i;
115+
continue;
116+
}
117+
118+
/**
119+
* The index used to parse `$list->tokens`.
120+
*
121+
* This index might be running faster than `$k` because some tokens
122+
* are skipped.
123+
*
124+
* @var int
125+
*/
126+
$j = $i;
127+
128+
/**
129+
* The index used to parse `$find`.
130+
*
131+
* This index might be running slower than `$j` because some tokens
132+
* are skipped.
133+
*
134+
* @var int
135+
*/
136+
$k = 0;
137+
138+
// Checking if the next tokens match the pattern described.
139+
while (($j < $list->count) && ($k < $findCount)) {
140+
141+
// Comments are being skipped.
142+
if ($list->tokens[$j]->type === Token::TYPE_COMMENT) {
143+
++$j;
144+
}
145+
146+
if (!static::match($list->tokens[$j], $find[$k])) {
147+
// This token does not match the pattern.
148+
break;
149+
}
150+
151+
// Going to next token and segment of find pattern.
152+
++$j;
153+
++$k;
154+
}
155+
156+
157+
// Checking if the sequence was found.
158+
if ($k === $findCount) {
159+
160+
// Inserting new tokens.
161+
foreach ($replace as $token) {
162+
$newList[] = $token;
163+
}
164+
165+
// Skipping next `$findCount` tokens.
166+
$i = $j;
167+
} else {
168+
// Adding the same token.
169+
$newList[] = $list->tokens[$i];
170+
++$i;
171+
}
172+
}
173+
174+
return $isList ?
175+
new TokensList($newList) : TokensList::build($newList);
176+
}
177+
178+
}

tests/Utils/TokensTest.php

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
<?php
2+
3+
namespace SqlParser\Tests\Utils;
4+
5+
use SqlParser\Parser;
6+
use SqlParser\Token;
7+
use SqlParser\Utils\Tokens;
8+
9+
use SqlParser\Tests\TestCase;
10+
11+
class TokensTest extends TestCase
12+
{
13+
14+
/**
15+
* @dataProvider replaceTokensProvider
16+
*/
17+
public function testReplaceTokens($list, $find, $replace, $expected)
18+
{
19+
$this->assertEquals($expected, Tokens::replaceTokens($list, $find, $replace));
20+
}
21+
22+
public function replaceTokensProvider()
23+
{
24+
return array(
25+
array(
26+
'SELECT * FROM /*x*/a/*c*/.b',
27+
array(
28+
array('value_str' => 'a'),
29+
array('token' => '.'),
30+
),
31+
array(
32+
new Token('c'),
33+
new Token('.'),
34+
),
35+
'SELECT * FROM /*x*/c.b',
36+
)
37+
);
38+
}
39+
40+
/**
41+
* @dataProvider matchProvider
42+
*/
43+
public function testMatch($token, $pattern, $expected)
44+
{
45+
$this->assertEquals($expected, Tokens::match($token, $pattern));
46+
}
47+
48+
public function matchProvider()
49+
{
50+
return array(
51+
array(new Token(''), array(), true),
52+
53+
array(
54+
new Token('"abc"', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
55+
array('token' => '"abc"'),
56+
true
57+
),
58+
array(
59+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
60+
array('value' => 'abc'),
61+
true
62+
),
63+
array(
64+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
65+
array('value_str' => 'ABC'),
66+
true
67+
),
68+
array(
69+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
70+
array('type' => Token::TYPE_STRING),
71+
true
72+
),
73+
array(
74+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
75+
array('flags' => Token::FLAG_STRING_DOUBLE_QUOTES),
76+
true
77+
),
78+
79+
array(
80+
new Token('"abc"', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
81+
array('token' => '"abcd"'),
82+
false
83+
),
84+
array(
85+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
86+
array('value' => 'abcd'),
87+
false
88+
),
89+
array(
90+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
91+
array('value_str' => 'ABCd'),
92+
false
93+
),
94+
array(
95+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
96+
array('type' => Token::TYPE_NUMBER),
97+
false
98+
),
99+
array(
100+
new Token('"abc""', Token::TYPE_STRING, Token::FLAG_STRING_DOUBLE_QUOTES),
101+
array('flags' => Token::FLAG_STRING_SINGLE_QUOTES),
102+
false
103+
),
104+
);
105+
}
106+
}

0 commit comments

Comments
 (0)