Skip to content

Commit f7444b2

Browse files
committed
Add rewriter for multiple similar APPROX_PERCENTILEs in SELECT clause
1 parent 14b2319 commit f7444b2

File tree

8 files changed

+608
-0
lines changed

8 files changed

+608
-0
lines changed

pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
<modules>
2525
<module>parser</module>
2626
<module>linter</module>
27+
<module>rewriter</module>
2728
</modules>
2829

2930
<build>

rewriter/pom.xml

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<parent>
6+
<groupId>com.facebook.presto</groupId>
7+
<artifactId>presto-coresql</artifactId>
8+
<version>0.2-SNAPSHOT</version>
9+
</parent>
10+
11+
<artifactId>presto-coresql-rewriter</artifactId>
12+
<name>presto-coresql-rewriter</name>
13+
14+
<properties>
15+
<air.main.basedir>${project.parent.basedir}</air.main.basedir>
16+
<maven.compiler.source>1.6</maven.compiler.source>
17+
<maven.compiler.target>1.6</maven.compiler.target>
18+
</properties>
19+
20+
<dependencies>
21+
<dependency>
22+
<groupId>junit</groupId>
23+
<artifactId>junit</artifactId>
24+
<version>4.12</version>
25+
<scope>test</scope>
26+
</dependency>
27+
28+
<dependency>
29+
<groupId>org.testng</groupId>
30+
<artifactId>testng</artifactId>
31+
<scope>test</scope>
32+
</dependency>
33+
34+
<dependency>
35+
<groupId>com.facebook.presto</groupId>
36+
<artifactId>presto-coresql-parser</artifactId>
37+
<version>0.2-SNAPSHOT</version>
38+
</dependency>
39+
40+
<dependency>
41+
<groupId>com.google.guava</groupId>
42+
<artifactId>guava</artifactId>
43+
</dependency>
44+
45+
<dependency>
46+
<groupId>com.fasterxml.jackson.core</groupId>
47+
<artifactId>jackson-annotations</artifactId>
48+
</dependency>
49+
</dependencies>
50+
<build>
51+
<plugins>
52+
<plugin>
53+
<groupId>org.codehaus.mojo</groupId>
54+
<artifactId>build-helper-maven-plugin</artifactId>
55+
<executions>
56+
<execution>
57+
<phase>generate-sources</phase>
58+
<goals>
59+
<goal>add-source</goal>
60+
</goals>
61+
<configuration>
62+
<sources>
63+
<source>${project.build.directory}/generated-sources</source>
64+
</sources>
65+
</configuration>
66+
</execution>
67+
</executions>
68+
</plugin>
69+
70+
<plugin>
71+
<groupId>com.facebook.presto</groupId>
72+
<artifactId>presto-maven-plugin</artifactId>
73+
<version>0.3</version>
74+
<extensions>true</extensions>
75+
</plugin>
76+
77+
<plugin>
78+
<groupId>org.apache.maven.plugins</groupId>
79+
<artifactId>maven-shade-plugin</artifactId>
80+
<version>3.1.1</version>
81+
</plugin>
82+
83+
<plugin>
84+
<groupId>org.skife.maven</groupId>
85+
<artifactId>really-executable-jar-maven-plugin</artifactId>
86+
<version>1.0.5</version>
87+
</plugin>
88+
89+
<plugin>
90+
<groupId>org.apache.maven.plugins</groupId>
91+
<artifactId>maven-antrun-plugin</artifactId>
92+
<version>1.8</version>
93+
</plugin>
94+
95+
<plugin>
96+
<groupId>io.airlift.maven.plugins</groupId>
97+
<artifactId>sphinx-maven-plugin</artifactId>
98+
<version>2.1</version>
99+
</plugin>
100+
101+
<plugin>
102+
<groupId>org.apache.maven.plugins</groupId>
103+
<artifactId>maven-enforcer-plugin</artifactId>
104+
<configuration>
105+
<rules>
106+
<requireUpperBoundDeps>
107+
<excludes combine.children="append">
108+
<!-- TODO: fix this in Airlift resolver -->
109+
<exclude>org.alluxio:alluxio-shaded-client</exclude>
110+
<exclude>org.codehaus.plexus:plexus-utils</exclude>
111+
<exclude>com.google.guava:guava</exclude>
112+
</excludes>
113+
</requireUpperBoundDeps>
114+
</rules>
115+
</configuration>
116+
</plugin>
117+
118+
<plugin>
119+
<groupId>org.apache.maven.plugins</groupId>
120+
<artifactId>maven-release-plugin</artifactId>
121+
<configuration>
122+
<preparationGoals>clean verify -DskipTests</preparationGoals>
123+
</configuration>
124+
</plugin>
125+
126+
<plugin>
127+
<groupId>org.apache.maven.plugins</groupId>
128+
<artifactId>maven-compiler-plugin</artifactId>
129+
<configuration combine.children="append">
130+
<fork>true</fork>
131+
<compilerArgs>
132+
<arg>-verbose</arg>
133+
<arg>-J-Xss100M</arg>
134+
</compilerArgs>
135+
</configuration>
136+
</plugin>
137+
138+
<plugin>
139+
<groupId>org.apache.maven.plugins</groupId>
140+
<artifactId>maven-surefire-plugin</artifactId>
141+
<configuration combine.children="append">
142+
<includes>
143+
<include>**/*.java</include>
144+
<include>target/**/*.java</include>
145+
<include>**/Benchmark*.java</include>
146+
</includes>
147+
<excludes>
148+
<exclude>**/*jmhTest*.java</exclude>
149+
<exclude>**/*jmhType*.java</exclude>
150+
</excludes>
151+
</configuration>
152+
</plugin>
153+
154+
<!-- Always build a jar with the test classes -->
155+
<plugin>
156+
<groupId>org.apache.maven.plugins</groupId>
157+
<artifactId>maven-jar-plugin</artifactId>
158+
<configuration>
159+
<!-- do not build an empty jar if the project is
160+
e.g. a pom project -->
161+
<skipIfEmpty>true</skipIfEmpty>
162+
<archive>
163+
<manifest>
164+
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
165+
<addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
166+
<addClasspath>false</addClasspath>
167+
</manifest>
168+
</archive>
169+
</configuration>
170+
</plugin>
171+
</plugins>
172+
<pluginManagement>
173+
<plugins>
174+
<plugin>
175+
<groupId>org.gaul</groupId>
176+
<artifactId>modernizer-maven-plugin</artifactId>
177+
<version>2.1.0</version>
178+
<configuration>
179+
<javaVersion>1.8</javaVersion>
180+
</configuration>
181+
</plugin>
182+
</plugins>
183+
</pluginManagement>
184+
</build>
185+
</project>
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
15+
package com.facebook.coresql.rewriter;
16+
17+
import com.facebook.coresql.parser.AstNode;
18+
import com.facebook.coresql.parser.FunctionCall;
19+
import com.facebook.coresql.parser.SqlParserDefaultVisitor;
20+
import com.facebook.coresql.parser.Unparser;
21+
import com.google.common.collect.ArrayListMultimap;
22+
import com.google.common.collect.ImmutableListMultimap;
23+
import com.google.common.collect.Multimap;
24+
25+
import java.util.ArrayList;
26+
import java.util.Formatter;
27+
import java.util.HashMap;
28+
import java.util.List;
29+
import java.util.Map;
30+
31+
import static com.facebook.coresql.parser.ParserHelper.parseStatement;
32+
import static com.facebook.coresql.parser.SqlParserTreeConstants.JJTARGUMENTLIST;
33+
import static com.facebook.coresql.parser.SqlParserTreeConstants.JJTIDENTIFIER;
34+
import static com.facebook.coresql.parser.SqlParserTreeConstants.JJTUNSIGNEDNUMERICLITERAL;
35+
import static java.util.Collections.binarySearch;
36+
import static java.util.Collections.sort;
37+
import static java.util.Objects.requireNonNull;
38+
39+
public class ApproxPercentileRewriter
40+
extends Rewriter
41+
{
42+
private final PatternMatcher<Multimap<String, AstNode>> matcher;
43+
private static final String REPLACEMENT = " APPROX_PERCENTILE(%s, ARRAY%s)[%d]";
44+
private Multimap<String, AstNode> firstArgMap; // A map of String to the APPROX_PERCENTILE nodes with that String as its first argument
45+
private Map<String, ArrayList<Double>> percentiles;
46+
private static final String REWRITE_NAME = "Multiple APPROX PERCENTILE with same first arg and literal second arg";
47+
48+
public ApproxPercentileRewriter()
49+
{
50+
this.matcher = new ApproxPercentilePatternMatcher();
51+
this.firstArgMap = ArrayListMultimap.create();
52+
this.percentiles = new HashMap<>();
53+
}
54+
55+
@Override
56+
public boolean rewritePatternIsPresent(String sql)
57+
{
58+
AstNode root = requireNonNull(parseStatement(sql));
59+
firstArgMap = matcher.matchPattern(root);
60+
return firstArgMap.keySet().stream().anyMatch(key -> firstArgMap.get(key).size() >= 2);
61+
}
62+
63+
@Override
64+
public RewriteResult rewrite(String sql)
65+
{
66+
AstNode root = requireNonNull(parseStatement(sql));
67+
this.firstArgMap = matcher.matchPattern(root);
68+
getPercentilesFromFirstArgMap();
69+
String rewrittenSql = Unparser.unparse(root, this);
70+
return new RewriteResult(REWRITE_NAME, sql, rewrittenSql);
71+
}
72+
73+
@Override
74+
public void visit(FunctionCall node, Void data)
75+
{
76+
if (canRewrite(node)) {
77+
applyRewrite(node);
78+
}
79+
else {
80+
defaultVisit(node, data);
81+
}
82+
}
83+
84+
/**
85+
* Generates a rewritten version of the current subtree.
86+
*
87+
* @param node The function call node we're rewriting
88+
*/
89+
private void applyRewrite(AstNode node)
90+
{
91+
// First, unparse up to the node. This ensures we don't miss any special tokens
92+
unparseUpto((AstNode) node.jjtGetChild(0));
93+
// Then, add the rewritten version to the Unparser
94+
String firstArg = getFirstArgAsString(node);
95+
Double secondArg = getSecondArgAsDouble(node);
96+
97+
Formatter formatter = new Formatter(stringBuilder);
98+
formatter.format(REPLACEMENT, firstArg, percentiles.get(firstArg), binarySearch(percentiles.get(firstArg), secondArg) + 1);
99+
// Move to end of this node -- we've already put in a rewritten version of it, so we don't need to unparse it
100+
moveToEndOfNode(node);
101+
}
102+
103+
private String getFirstArgAsString(AstNode approxPercentile)
104+
{
105+
AstNode args = approxPercentile.GetFirstChildOfKind(JJTARGUMENTLIST);
106+
AstNode firstArg = (AstNode) args.jjtGetChild(0);
107+
return Unparser.unparse(firstArg);
108+
}
109+
110+
private Double getSecondArgAsDouble(AstNode approxPercentile)
111+
{
112+
AstNode args = approxPercentile.GetFirstChildOfKind(JJTARGUMENTLIST);
113+
AstNode secondArg = (AstNode) args.jjtGetChild(1);
114+
return Double.parseDouble(Unparser.unparse(secondArg));
115+
}
116+
117+
private boolean canRewrite(AstNode node)
118+
{
119+
String firstArg = getFirstArgAsString(node);
120+
return firstArgMap.containsValue(node) && firstArgMap.get(firstArg).size() >= 2;
121+
}
122+
123+
private void getPercentilesFromFirstArgMap()
124+
{
125+
// Map each first argument to a list of the percentiles of the APPROX_PERCENTILE nodes that have that first argument
126+
for (Map.Entry<String, AstNode> entry : firstArgMap.entries()) {
127+
String firstArg = entry.getKey();
128+
AstNode approxPercentileNode = entry.getValue();
129+
percentiles.putIfAbsent(firstArg, new ArrayList<>());
130+
List<Double> percentilesWithThisFirstArg = percentiles.get(firstArg);
131+
percentilesWithThisFirstArg.add(getSecondArgAsDouble(approxPercentileNode));
132+
}
133+
// Sort each percentile list. This will allow binary sort downstream
134+
for (String key : percentiles.keySet()) {
135+
sort(percentiles.get(key));
136+
}
137+
}
138+
139+
private static class ApproxPercentilePatternMatcher
140+
extends SqlParserDefaultVisitor
141+
implements PatternMatcher<Multimap<String, AstNode>>
142+
{
143+
private Multimap<String, AstNode> firstArgMap; // A map of String to the APPROX_PERCENTILE nodes with that String as its first argument
144+
145+
public ApproxPercentilePatternMatcher()
146+
{ }
147+
148+
@Override
149+
public Multimap<String, AstNode> matchPattern(AstNode root)
150+
{
151+
this.firstArgMap = ArrayListMultimap.create();
152+
requireNonNull(root, "AST passed to pattern matcher was null");
153+
root.jjtAccept(this, null);
154+
return ImmutableListMultimap.copyOf(firstArgMap);
155+
}
156+
157+
@Override
158+
public void visit(FunctionCall node, Void data)
159+
{
160+
if (isApproxPercentile(node)) {
161+
AstNode argList = node.GetFirstChildOfKind(JJTARGUMENTLIST);
162+
AstNode secondArg = (AstNode) argList.jjtGetChild(1);
163+
if (!isUnsignedLiteral(secondArg)) {
164+
return;
165+
}
166+
AstNode firstArg = (AstNode) argList.jjtGetChild(0);
167+
String firstArgAsString = Unparser.unparse(firstArg);
168+
firstArgMap.put(firstArgAsString, node);
169+
}
170+
defaultVisit(node, data);
171+
}
172+
173+
public static boolean isUnsignedLiteral(AstNode node)
174+
{
175+
return node.getId() == JJTUNSIGNEDNUMERICLITERAL;
176+
}
177+
178+
private static boolean isApproxPercentile(AstNode node)
179+
{
180+
AstNode identifier = node.GetFirstChildOfKind(JJTIDENTIFIER);
181+
if (identifier == null) {
182+
return false;
183+
}
184+
String image = identifier.GetImage();
185+
return image != null && image.equalsIgnoreCase("APPROX_PERCENTILE");
186+
}
187+
}
188+
}

0 commit comments

Comments
 (0)