Skip to content

Commit 9706a9d

Browse files
zzcclphit-lacus
authored andcommitted
KYLIN-5187 Support soft affinity and local cache feature
1. Implement LocalDataCacheManager 2. base xiaoxiang's PR 3. Implement CacheFileScanRDD 4. Implement AbstractCacheFileSystem 5. Optimize performance 6. Support soft affinity for hdfs 7. Support ByteBuffer to read data, and avoid to read data one byte by one byte 8. Support to cache small files in memory : ByteBufferPageStore extends PageStore to support cache data in memory 9. Pre-init KylinCacheFileSystem to fix s3a issue 10. Upgrade alluxio client verion to 2.7.4
1 parent fd4a472 commit 9706a9d

File tree

30 files changed

+2230
-22
lines changed

30 files changed

+2230
-22
lines changed

core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java

+6-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737

3838
import org.apache.commons.lang.StringUtils;
3939
import org.apache.commons.lang.text.StrSubstitutor;
40+
import org.apache.hadoop.conf.Configuration;
4041
import org.apache.hadoop.fs.FileSystem;
4142
import org.apache.hadoop.fs.Path;
4243
import org.apache.kylin.common.annotation.ConfigTag;
@@ -295,6 +296,10 @@ public String getDeployEnv() {
295296
}
296297

297298
public String getHdfsWorkingDirectory() {
299+
return getHdfsWorkingDirectoryInternal(HadoopUtil.getCurrentConfiguration());
300+
}
301+
302+
public String getHdfsWorkingDirectoryInternal(Configuration hadoopConf) {
298303
if (cachedHdfsWorkingDirectory != null) {
299304
return cachedHdfsWorkingDirectory;
300305
}
@@ -306,7 +311,7 @@ public String getHdfsWorkingDirectory() {
306311
throw new IllegalArgumentException("kylin.env.hdfs-working-dir must be absolute, but got " + root);
307312

308313
try {
309-
FileSystem fs = path.getFileSystem(HadoopUtil.getCurrentConfiguration());
314+
FileSystem fs = path.getFileSystem(hadoopConf);
310315
path = fs.makeQualified(path);
311316
} catch (IOException e) {
312317
throw new RuntimeException(e);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one
3+
-- or more contributor license agreements. See the NOTICE file
4+
-- distributed with this work for additional information
5+
-- regarding copyright ownership. The ASF licenses this file
6+
-- to you under the Apache License, Version 2.0 (the
7+
-- "License"); you may not use this file except in compliance
8+
-- with the License. You may obtain a copy of the License at
9+
--
10+
-- http://www.apache.org/licenses/LICENSE-2.0
11+
--
12+
-- Unless required by applicable law or agreed to in writing, software
13+
-- distributed under the License is distributed on an "AS IS" BASIS,
14+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
-- See the License for the specific language governing permissions and
16+
-- limitations under the License.
17+
--
18+
19+
SELECT test_kylin_fact.cal_dt,cast(timestampdiff(DAY,date'2013-01-01',test_kylin_fact.cal_dt) as integer) as x,sum(price) as y
20+
FROM TEST_KYLIN_FACT
21+
22+
inner JOIN edw.test_cal_dt as test_cal_dt
23+
ON test_kylin_fact.cal_dt = test_cal_dt.cal_dt
24+
inner JOIN test_category_groupings
25+
ON test_kylin_fact.leaf_categ_id = test_category_groupings.leaf_categ_id AND test_kylin_fact.lstg_site_id = test_category_groupings.site_id
26+
inner JOIN edw.test_sites as test_sites
27+
ON test_kylin_fact.lstg_site_id = test_sites.site_id
28+
GROUP BY test_kylin_fact.cal_dt
29+
ORDER BY test_kylin_fact.cal_dt
30+
;{"scanRowCount":1462,"scanBytes":215217,"scanFiles":2,"cuboidId":262144}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!--
3+
Licensed to the Apache Software Foundation (ASF) under one
4+
or more contributor license agreements. See the NOTICE file
5+
distributed with this work for additional information
6+
regarding copyright ownership. The ASF licenses this file
7+
to you under the Apache License, Version 2.0 (the
8+
"License"); you may not use this file except in compliance
9+
with the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
20+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
21+
<modelVersion>4.0.0</modelVersion>
22+
23+
<artifactId>kylin-soft-affinity-cache</artifactId>
24+
<packaging>jar</packaging>
25+
<name>Apache Kylin 4.X - Soft Affinity and Cache</name>
26+
27+
<parent>
28+
<groupId>org.apache.kylin</groupId>
29+
<artifactId>kylin-spark-project</artifactId>
30+
<version>4.0.2-SNAPSHOT</version>
31+
<relativePath>../pom.xml</relativePath>
32+
</parent>
33+
34+
<properties>
35+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
36+
<alluxio.version>2.7.4</alluxio.version>
37+
</properties>
38+
39+
<dependencies>
40+
<dependency>
41+
<groupId>org.apache.spark</groupId>
42+
<artifactId>spark-core_${scala.binary.version}</artifactId>
43+
<version>${spark.version}</version>
44+
<scope>provided</scope>
45+
<exclusions>
46+
<exclusion>
47+
<groupId>org.apache.hadoop</groupId>
48+
<artifactId>hadoop-client</artifactId>
49+
</exclusion>
50+
</exclusions>
51+
</dependency>
52+
<dependency>
53+
<groupId>org.alluxio</groupId>
54+
<artifactId>alluxio-shaded-client</artifactId>
55+
<version>${alluxio.version}</version>
56+
<scope>provided</scope>
57+
</dependency>
58+
</dependencies>
59+
60+
<build>
61+
<plugins>
62+
<plugin>
63+
<groupId>org.codehaus.mojo</groupId>
64+
<artifactId>build-helper-maven-plugin</artifactId>
65+
<executions>
66+
<execution>
67+
<id>add-source</id>
68+
<phase>generate-sources</phase>
69+
<goals>
70+
<goal>add-source</goal>
71+
</goals>
72+
<configuration>
73+
<sources>
74+
<source>src/main/${spark.version.dir}</source>
75+
</sources>
76+
</configuration>
77+
</execution>
78+
</executions>
79+
</plugin>
80+
<plugin>
81+
<groupId>net.alchim31.maven</groupId>
82+
<artifactId>scala-maven-plugin</artifactId>
83+
<executions>
84+
<execution>
85+
<id>compile-version-dependent-source</id>
86+
<phase>process-resources</phase>
87+
<goals>
88+
<goal>compile</goal>
89+
</goals>
90+
<configuration>
91+
<sourceDir>${spark.version.dir}</sourceDir>
92+
</configuration>
93+
</execution>
94+
<execution>
95+
<id>compile-common-scala-source</id>
96+
<phase>process-resources</phase>
97+
<goals>
98+
<goal>compile</goal>
99+
</goals>
100+
<configuration>
101+
<sourceDir>scala</sourceDir>
102+
</configuration>
103+
</execution>
104+
<execution>
105+
<id>scala-test-compile</id>
106+
<phase>process-test-resources</phase>
107+
<goals>
108+
<goal>testCompile</goal>
109+
</goals>
110+
</execution>
111+
</executions>
112+
</plugin>
113+
</plugins>
114+
</build>
115+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.kylin.cache;
20+
21+
public class KylinCacheConstants {
22+
23+
private KylinCacheConstants() {
24+
}
25+
26+
// Todo: change the param key name
27+
public static final String KYLIN_CACHE_FS =
28+
"org.apache.kylin.cache.fs.kylin.KylinCacheFileSystem";
29+
}

0 commit comments

Comments
 (0)