Skip to content

Commit 9130da5

Browse files
alambayman-sigma
authored andcommitted
Improve parsing speed by avoiding some clones in parse_identifier (apache#1624)
1 parent 115e414 commit 9130da5

File tree

1 file changed

+36
-24
lines changed

1 file changed

+36
-24
lines changed

src/parser/mod.rs

+36-24
Original file line numberDiff line numberDiff line change
@@ -970,15 +970,15 @@ impl<'a> Parser<'a> {
970970
t @ (Token::Word(_) | Token::SingleQuotedString(_)) => {
971971
if self.peek_token().token == Token::Period {
972972
let mut id_parts: Vec<Ident> = vec![match t {
973-
Token::Word(w) => w.to_ident(next_token.span),
973+
Token::Word(w) => w.into_ident(next_token.span),
974974
Token::SingleQuotedString(s) => Ident::with_quote('\'', s),
975975
_ => unreachable!(), // We matched above
976976
}];
977977

978978
while self.consume_token(&Token::Period) {
979979
let next_token = self.next_token();
980980
match next_token.token {
981-
Token::Word(w) => id_parts.push(w.to_ident(next_token.span)),
981+
Token::Word(w) => id_parts.push(w.into_ident(next_token.span)),
982982
Token::SingleQuotedString(s) => {
983983
// SQLite has single-quoted identifiers
984984
id_parts.push(Ident::with_quote('\'', s))
@@ -1108,7 +1108,7 @@ impl<'a> Parser<'a> {
11081108
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
11091109
{
11101110
Ok(Some(Expr::Function(Function {
1111-
name: ObjectName(vec![w.to_ident(w_span)]),
1111+
name: ObjectName(vec![w.clone().into_ident(w_span)]),
11121112
uses_odbc_syntax: false,
11131113
parameters: FunctionArguments::None,
11141114
args: FunctionArguments::None,
@@ -1123,7 +1123,7 @@ impl<'a> Parser<'a> {
11231123
| Keyword::CURRENT_DATE
11241124
| Keyword::LOCALTIME
11251125
| Keyword::LOCALTIMESTAMP => {
1126-
Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?))
1126+
Ok(Some(self.parse_time_functions(ObjectName(vec![w.clone().into_ident(w_span)]))?))
11271127
}
11281128
Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
11291129
Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
@@ -1148,7 +1148,7 @@ impl<'a> Parser<'a> {
11481148
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
11491149
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
11501150
Keyword::POSITION if self.peek_token_ref().token == Token::LParen => {
1151-
Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
1151+
Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?))
11521152
}
11531153
Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)),
11541154
Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)),
@@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
11671167
let query = self.parse_query()?;
11681168
self.expect_token(&Token::RParen)?;
11691169
Ok(Some(Expr::Function(Function {
1170-
name: ObjectName(vec![w.to_ident(w_span)]),
1170+
name: ObjectName(vec![w.clone().into_ident(w_span)]),
11711171
uses_odbc_syntax: false,
11721172
parameters: FunctionArguments::None,
11731173
args: FunctionArguments::Subquery(query),
@@ -1203,11 +1203,12 @@ impl<'a> Parser<'a> {
12031203
w_span: Span,
12041204
) -> Result<Expr, ParserError> {
12051205
match self.peek_token().token {
1206-
Token::Period => {
1207-
self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![])
1208-
}
1206+
Token::Period => self.parse_compound_field_access(
1207+
Expr::Identifier(w.clone().into_ident(w_span)),
1208+
vec![],
1209+
),
12091210
Token::LParen => {
1210-
let id_parts = vec![w.to_ident(w_span)];
1211+
let id_parts = vec![w.clone().into_ident(w_span)];
12111212
if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
12121213
Ok(expr)
12131214
} else {
@@ -1220,7 +1221,7 @@ impl<'a> Parser<'a> {
12201221
}
12211222
Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) =>
12221223
{
1223-
let ident = Expr::Identifier(w.to_ident(w_span));
1224+
let ident = Expr::Identifier(w.clone().into_ident(w_span));
12241225
let mut fields = vec![];
12251226
self.parse_multi_dim_subscript(&mut fields)?;
12261227
self.parse_compound_field_access(ident, fields)
@@ -1250,11 +1251,11 @@ impl<'a> Parser<'a> {
12501251
Token::Arrow if self.dialect.supports_lambda_functions() => {
12511252
self.expect_token(&Token::Arrow)?;
12521253
Ok(Expr::Lambda(LambdaFunction {
1253-
params: OneOrManyWithParens::One(w.to_ident(w_span)),
1254+
params: OneOrManyWithParens::One(w.clone().into_ident(w_span)),
12541255
body: Box::new(self.parse_expr()?),
12551256
}))
12561257
}
1257-
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
1258+
_ => Ok(Expr::Identifier(w.clone().into_ident(w_span))),
12581259
}
12591260
}
12601261

@@ -1438,7 +1439,7 @@ impl<'a> Parser<'a> {
14381439
} else {
14391440
let tok = self.next_token();
14401441
let key = match tok.token {
1441-
Token::Word(word) => word.to_ident(tok.span),
1442+
Token::Word(word) => word.into_ident(tok.span),
14421443
_ => {
14431444
return parser_err!(
14441445
format!("Expected identifier, found: {tok}"),
@@ -1490,7 +1491,7 @@ impl<'a> Parser<'a> {
14901491
let next_token = self.next_token();
14911492
match next_token.token {
14921493
Token::Word(w) => {
1493-
let expr = Expr::Identifier(w.to_ident(next_token.span));
1494+
let expr = Expr::Identifier(w.into_ident(next_token.span));
14941495
chain.push(AccessExpr::Dot(expr));
14951496
if self.peek_token().token == Token::LBracket {
14961497
if self.dialect.supports_partiql() {
@@ -1670,7 +1671,7 @@ impl<'a> Parser<'a> {
16701671
while p.consume_token(&Token::Period) {
16711672
let tok = p.next_token();
16721673
let name = match tok.token {
1673-
Token::Word(word) => word.to_ident(tok.span),
1674+
Token::Word(word) => word.into_ident(tok.span),
16741675
_ => return p.expected("identifier", tok),
16751676
};
16761677
let func = match p.parse_function(ObjectName(vec![name]))? {
@@ -8252,7 +8253,7 @@ impl<'a> Parser<'a> {
82528253
// This because snowflake allows numbers as placeholders
82538254
let next_token = self.next_token();
82548255
let ident = match next_token.token {
8255-
Token::Word(w) => Ok(w.to_ident(next_token.span)),
8256+
Token::Word(w) => Ok(w.into_ident(next_token.span)),
82568257
Token::Number(w, false) => Ok(Ident::new(w)),
82578258
_ => self.expected("placeholder", next_token),
82588259
}?;
@@ -8763,7 +8764,7 @@ impl<'a> Parser<'a> {
87638764
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
87648765
// not an alias.)
87658766
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
8766-
Ok(Some(w.to_ident(next_token.span)))
8767+
Ok(Some(w.into_ident(next_token.span)))
87678768
}
87688769
// MSSQL supports single-quoted strings as aliases for columns
87698770
// We accept them as table aliases too, although MSSQL does not.
@@ -8930,7 +8931,7 @@ impl<'a> Parser<'a> {
89308931
loop {
89318932
match &self.peek_token_ref().token {
89328933
Token::Word(w) => {
8933-
idents.push(w.to_ident(self.peek_token_ref().span));
8934+
idents.push(w.clone().into_ident(self.peek_token_ref().span));
89348935
}
89358936
Token::EOF | Token::Eq => break,
89368937
_ => {}
@@ -8985,7 +8986,7 @@ impl<'a> Parser<'a> {
89858986
// expecting at least one word for identifier
89868987
let next_token = self.next_token();
89878988
match next_token.token {
8988-
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
8989+
Token::Word(w) => idents.push(w.into_ident(next_token.span)),
89898990
Token::EOF => {
89908991
return Err(ParserError::ParserError(
89918992
"Empty input when parsing identifier".to_string(),
@@ -9005,7 +9006,7 @@ impl<'a> Parser<'a> {
90059006
Token::Period => {
90069007
let next_token = self.next_token();
90079008
match next_token.token {
9008-
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
9009+
Token::Word(w) => idents.push(w.into_ident(next_token.span)),
90099010
Token::EOF => {
90109011
return Err(ParserError::ParserError(
90119012
"Trailing period in identifier".to_string(),
@@ -9034,7 +9035,7 @@ impl<'a> Parser<'a> {
90349035
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
90359036
let next_token = self.next_token();
90369037
match next_token.token {
9037-
Token::Word(w) => Ok(w.to_ident(next_token.span)),
9038+
Token::Word(w) => Ok(w.into_ident(next_token.span)),
90389039
Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
90399040
Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)),
90409041
_ => self.expected("identifier", next_token),
@@ -9054,9 +9055,10 @@ impl<'a> Parser<'a> {
90549055
fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
90559056
match self.peek_token().token {
90569057
Token::Word(w) => {
9058+
let quote_style_is_none = w.quote_style.is_none();
90579059
let mut requires_whitespace = false;
9058-
let mut ident = w.to_ident(self.next_token().span);
9059-
if w.quote_style.is_none() {
9060+
let mut ident = w.into_ident(self.next_token().span);
9061+
if quote_style_is_none {
90609062
while matches!(self.peek_token_no_skip().token, Token::Minus) {
90619063
self.next_token();
90629064
ident.value.push('-');
@@ -13485,13 +13487,23 @@ impl<'a> Parser<'a> {
1348513487
}
1348613488

1348713489
impl Word {
13490+
#[deprecated(since = "0.54.0", note = "please use `into_ident` instead")]
1348813491
pub fn to_ident(&self, span: Span) -> Ident {
1348913492
Ident {
1349013493
value: self.value.clone(),
1349113494
quote_style: self.quote_style,
1349213495
span,
1349313496
}
1349413497
}
13498+
13499+
/// Convert this word into an [`Ident`] identifier
13500+
pub fn into_ident(self, span: Span) -> Ident {
13501+
Ident {
13502+
value: self.value,
13503+
quote_style: self.quote_style,
13504+
span,
13505+
}
13506+
}
1349513507
}
1349613508

1349713509
#[cfg(test)]

0 commit comments

Comments
 (0)