Skip to content

Commit eeb9463

Browse files
author
xiaoheng.xxh
committed
open source xml extractor source codes
1 parent a37402a commit eeb9463

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2295
-0
lines changed

language/xml/extractor/LEGAL.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Legal Disclaimer
2+
3+
Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.
4+
5+
法律免责声明
6+
7+
关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。

language/xml/extractor/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Introduction
2+
The codefuse-query xml extractor transforms the source code of xml file into standardized coref-xml data, which is utilized for further analysis by codefuse-query.
3+
4+
# Quick Start
5+
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
6+
2. Build. Execute `mvn clean install`.
7+
3. Run. Execute `java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`.
8+
9+
After execution, a file named coref_xml_src.db will be generated in the ./db directory.

language/xml/extractor/README_cn.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# 简介
2+
Codefuse-query XML 提取器将 XML 文件的源代码转换为标准化的 coref-xml 数据,这些数据用于 codefuse-query 进行进一步分析。
3+
4+
# 快速开始
5+
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。
6+
2. 构建。执行 mvn clean install。
7+
3. 运行。执行 java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。
8+
9+
执行后,一个名为 coref_xml_src.db 的文件将生成在 ./db 目录下。
Binary file not shown.

language/xml/extractor/pom.xml

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<groupId>com.alipay.codequery</groupId>
6+
<artifactId>xml-extractor</artifactId>
7+
<version>1.0-SNAPSHOT</version>
8+
9+
<packaging>jar</packaging>
10+
11+
<name>xml-extractor</name>
12+
<url>http://maven.apache.org</url>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
</properties>
17+
18+
<dependencies>
19+
<dependency>
20+
<groupId>junit</groupId>
21+
<artifactId>junit</artifactId>
22+
<version>4.12</version>
23+
<scope>test</scope>
24+
</dependency>
25+
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
26+
<dependency>
27+
<groupId>org.apache.commons</groupId>
28+
<artifactId>commons-lang3</artifactId>
29+
<version>3.11</version>
30+
</dependency>
31+
32+
<!-- https://mvnrepository.com/artifact/stax/stax-api -->
33+
<dependency>
34+
<groupId>stax</groupId>
35+
<artifactId>stax-api</artifactId>
36+
<version>1.0.1</version>
37+
</dependency>
38+
39+
<dependency>
40+
<groupId>org.codehaus.woodstox</groupId>
41+
<artifactId>stax2-api</artifactId>
42+
<version>4.2</version>
43+
</dependency>
44+
45+
<dependency>
46+
<groupId>com.fasterxml.woodstox</groupId>
47+
<artifactId>woodstox-core</artifactId>
48+
<version>6.4.1-SNAPSHOT</version>
49+
<scope>system</scope>
50+
<systemPath>${project.basedir}/lib/woodstox-core-6.4.1-SNAPSHOT.jar</systemPath>
51+
</dependency>
52+
53+
<dependency>
54+
<groupId>org.projectlombok</groupId>
55+
<artifactId>lombok</artifactId>
56+
<version>1.18.16</version>
57+
<scope>provided</scope>
58+
</dependency>
59+
60+
<dependency>
61+
<groupId>org.xerial</groupId>
62+
<artifactId>sqlite-jdbc</artifactId>
63+
<version>3.36.0.2</version>
64+
</dependency>
65+
66+
<dependency>
67+
<groupId>org.mybatis</groupId>
68+
<artifactId>mybatis</artifactId>
69+
<version>3.5.6</version>
70+
</dependency>
71+
72+
<dependency>
73+
<groupId>tk.mybatis</groupId>
74+
<artifactId>mapper</artifactId>
75+
<!-- 建议使用最新版本,最新版本请从项目首页查找 -->
76+
<version>4.1.5</version>
77+
</dependency>
78+
79+
<dependency>
80+
<groupId>org.apache.logging.log4j</groupId>
81+
<artifactId>log4j-core</artifactId>
82+
<version>2.14.1</version>
83+
</dependency>
84+
<dependency>
85+
<groupId>org.apache.logging.log4j</groupId>
86+
<artifactId>log4j-api</artifactId>
87+
<version>2.14.1</version>
88+
</dependency>
89+
<dependency>
90+
<groupId>org.apache.logging.log4j</groupId>
91+
<artifactId>log4j-slf4j-impl</artifactId>
92+
<version>2.14.1</version>
93+
</dependency>
94+
95+
</dependencies>
96+
<build>
97+
<plugins>
98+
<plugin>
99+
<groupId>org.apache.maven.plugins</groupId>
100+
<artifactId>maven-compiler-plugin</artifactId>
101+
<configuration>
102+
<source>8</source>
103+
<target>8</target>
104+
</configuration>
105+
</plugin>
106+
<plugin>
107+
<groupId>org.apache.maven.plugins</groupId>
108+
<artifactId>maven-surefire-plugin</artifactId>
109+
<version>2.4.2</version>
110+
<configuration>
111+
<skipTests>true</skipTests>
112+
</configuration>
113+
</plugin>
114+
<plugin>
115+
<groupId>org.mybatis.generator</groupId>
116+
<artifactId>mybatis-generator-maven-plugin</artifactId>
117+
<version>1.3.7</version>
118+
<configuration>
119+
<verbose>true</verbose>
120+
<overwrite>true</overwrite>
121+
</configuration>
122+
<dependencies>
123+
<dependency>
124+
<groupId>org.xerial</groupId>
125+
<artifactId>sqlite-jdbc</artifactId>
126+
<version>3.36.0.2</version>
127+
</dependency>
128+
<dependency>
129+
<groupId>tk.mybatis</groupId>
130+
<artifactId>mapper</artifactId>
131+
<version>4.1.5</version>
132+
</dependency>
133+
</dependencies>
134+
<executions>
135+
<execution>
136+
<id>Generate MyBatis Artifacts</id>
137+
<goals>
138+
<goal>generate</goal>
139+
</goals>
140+
</execution>
141+
</executions>
142+
</plugin>
143+
<plugin>
144+
<groupId>org.apache.maven.plugins</groupId>
145+
<artifactId>maven-assembly-plugin</artifactId>
146+
<version>2.5.5</version>
147+
<configuration>
148+
<archive>
149+
<manifest>
150+
<mainClass>com.alipay.codequery.Extractor</mainClass>
151+
</manifest>
152+
</archive>
153+
<descriptorRefs>
154+
<descriptorRef>jar-with-dependencies</descriptorRef>
155+
</descriptorRefs>
156+
</configuration>
157+
<executions>
158+
<execution>
159+
<id>make-assembly</id>
160+
<phase>package</phase>
161+
<goals>
162+
<goal>single</goal>
163+
</goals>
164+
</execution>
165+
</executions>
166+
</plugin>
167+
168+
</plugins>
169+
</build>
170+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
2+
package com.alipay.codequery;
3+
import com.alipay.codequery.stax.StaxCorefExtractor;
4+
import com.alipay.codequery.util.CorefStorage;
5+
import com.alipay.codequery.util.LoggerUtil;
6+
import org.apache.logging.log4j.Level;
7+
import org.apache.logging.log4j.LogManager;
8+
import org.apache.logging.log4j.Logger;
9+
10+
import javax.xml.stream.XMLStreamException;
11+
import java.io.File;
12+
import java.io.IOException;
13+
14+
public class Extractor {
15+
private static final Logger logger = LogManager.getLogger(Extractor.class);
16+
public static final String XML_EXT = ".xml";
17+
public static final String AXML_EXT = ".axml";
18+
public static final String[] FILE_EXT_ARRAY = {
19+
XML_EXT,
20+
AXML_EXT,
21+
};
22+
23+
public static void main(String[] args) throws IOException, XMLStreamException {
24+
LoggerUtil.initLogger(Level.INFO);
25+
26+
long start = System.currentTimeMillis();
27+
// repoDir和destDir是设置的本地测试目录,在生产中会被替换掉
28+
String repoDir = "";
29+
String destDir = "";
30+
if (args.length > 0) {
31+
repoDir = args[0];
32+
}
33+
if (args.length > 1) {
34+
destDir = args[1];
35+
}
36+
if (!destDir.endsWith(File.separator)) {
37+
destDir += File.separator;
38+
}
39+
CorefStorage corefStorage = new CorefStorage(destDir);
40+
File sourceDir = new File(repoDir);
41+
parse(sourceDir, sourceDir, corefStorage);
42+
logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start));
43+
}
44+
45+
private static void parse(File sourceDir, File rootDir, CorefStorage corefStorage) {
46+
File[] files = rootDir.listFiles();
47+
if (files == null) {
48+
return;
49+
}
50+
for (File file: files) {
51+
if (file.isDirectory()) {
52+
parse(sourceDir, file, corefStorage);
53+
} else {
54+
for (String fileExt: FILE_EXT_ARRAY) {
55+
if (file.getName().endsWith(fileExt)) {
56+
logger.info("Start Extracting xml file: {}", file.getAbsolutePath());
57+
try {
58+
StaxCorefExtractor extractor = new StaxCorefExtractor(file, corefStorage, sourceDir.getAbsolutePath());
59+
extractor.parse();
60+
} catch (Exception e) {
61+
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath());
62+
}
63+
}
64+
}
65+
}
66+
}
67+
}
68+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
package com.alipay.codequery.dal.mybatis.domain;
2+
3+
import javax.persistence.*;
4+
5+
@Table(name = "xml_attribute")
6+
public class XmlAttribute {
7+
@Id
8+
private Integer id;
9+
10+
@Column(name = "element_id")
11+
private Integer elementId;
12+
13+
private String name;
14+
15+
private String value;
16+
17+
@Column(name = "index_order")
18+
private Integer indexOrder;
19+
20+
@Column(name = "location_id")
21+
private Integer locationId;
22+
23+
public XmlAttribute(Integer id, Integer elementId, String name, String value, Integer indexOrder, Integer locationId) {
24+
this.id = id;
25+
this.elementId = elementId;
26+
this.name = name;
27+
this.value = value;
28+
this.indexOrder = indexOrder;
29+
this.locationId = locationId;
30+
}
31+
32+
public XmlAttribute() {
33+
super();
34+
}
35+
36+
/**
37+
* @return id
38+
*/
39+
public Integer getId() {
40+
return id;
41+
}
42+
43+
/**
44+
* @param id
45+
*/
46+
public void setId(Integer id) {
47+
this.id = id;
48+
}
49+
50+
/**
51+
* @return element_id
52+
*/
53+
public Integer getElementId() {
54+
return elementId;
55+
}
56+
57+
/**
58+
* @param elementId
59+
*/
60+
public void setElementId(Integer elementId) {
61+
this.elementId = elementId;
62+
}
63+
64+
/**
65+
* @return name
66+
*/
67+
public String getName() {
68+
return name;
69+
}
70+
71+
/**
72+
* @param name
73+
*/
74+
public void setName(String name) {
75+
this.name = name == null ? null : name.trim();
76+
}
77+
78+
/**
79+
* @return value
80+
*/
81+
public String getValue() {
82+
return value;
83+
}
84+
85+
/**
86+
* @param value
87+
*/
88+
public void setValue(String value) {
89+
this.value = value == null ? null : value.trim();
90+
}
91+
92+
/**
93+
* @return index_order
94+
*/
95+
public Integer getIndexOrder() {
96+
return indexOrder;
97+
}
98+
99+
/**
100+
* @param indexOrder
101+
*/
102+
public void setIndexOrder(Integer indexOrder) {
103+
this.indexOrder = indexOrder;
104+
}
105+
106+
/**
107+
* @return location_id
108+
*/
109+
public Integer getLocationId() {
110+
return locationId;
111+
}
112+
113+
/**
114+
* @param locationId
115+
*/
116+
public void setLocationId(Integer locationId) {
117+
this.locationId = locationId;
118+
}
119+
}

0 commit comments

Comments
 (0)