Skip to content

Commit 1e2e72b

Browse files
committed
Initial commit from Ant Program Analysis Team
1 parent b75f0e2 commit 1e2e72b

File tree

245 files changed

+58891
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

245 files changed

+58891
-0
lines changed

.gitignore

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
target/
2+
!.mvn/wrapper/maven-wrapper.jar
3+
!**/src/main/**/target/
4+
!**/src/test/**/target/
5+
logs/
6+
*.log
7+
.DS_Store
8+
9+
### STS ###
10+
.apt_generated
11+
.classpath
12+
.factorypath
13+
.project
14+
.settings
15+
.springBeans
16+
.sts4-cache
17+
18+
### IntelliJ IDEA ###
19+
.idea
20+
*.iws
21+
*.iml
22+
*.ipr
23+
24+
### NetBeans ###
25+
/nbproject/private/
26+
/nbbuild/
27+
/dist/
28+
/nbdist/
29+
/.nb-gradle/
30+
build/
31+
!**/src/main/**/build/
32+
!**/src/test/**/build/
33+
34+
### VS Code ###
35+
.vscode/
36+
.cloudide

LEGAL.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Legal Disclaimer
2+
3+
Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail.
4+
5+
法律免责声明
6+
7+
关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。

README.md

Whitespace-only changes.

cli/database/__init__.py

Whitespace-only changes.

cli/database/create.py

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import json
2+
import re
3+
import time
4+
from pathlib import Path
5+
6+
from extractor.extractor import *
7+
8+
9+
def conf_check(args):
10+
src_path = Path(args.source_root).expanduser().resolve()
11+
if not src_path.exists():
12+
logging.error("source not exist, Please redefine the directory or file by --source-root or -s")
13+
return False
14+
output_path = Path(args.output).expanduser().resolve()
15+
if not output_path.exists():
16+
logging.warning("%s not exists, it will be created", str(output_path))
17+
try:
18+
output_path.mkdir(parents=True)
19+
logging.info("%s success build", str(output_path))
20+
except Exception as e:
21+
logging.error("can not to create database directory %s: %s", str(output_path), e)
22+
return False
23+
if args.extraction_config_file:
24+
extraction_config_file_path = Path(args.output).expanduser().resolve()
25+
if not extraction_config_file_path.exists():
26+
logging.error("extraction_config_file: %s not exists, please redefine by --extraction-config-file",
27+
args.extraction_config_file)
28+
return False
29+
return True
30+
31+
32+
def memory_statistics():
33+
memory = psutil.virtual_memory()
34+
35+
# 获取总内存大小(以字节为单位)
36+
total_memory = memory.total
37+
38+
# 格式化内存大小
39+
size_units = ["B", "KB", "MB", "GB", "TB"]
40+
unit_index = 0
41+
while total_memory >= 1024 and unit_index < len(size_units) - 1:
42+
total_memory /= 1024
43+
unit_index += 1
44+
logging.info(f"current memory is : {total_memory:.2f} {size_units[unit_index]}")
45+
logging.info(f"final -Xmx is : {max(total_memory - 1, 6):.2f} {size_units[unit_index]}")
46+
47+
48+
def conf_option_deal(args):
49+
options = dict()
50+
if args.extraction_config_file:
51+
try:
52+
with open(args.extraction_config_file, "r") as f:
53+
options = json.load(f)
54+
except Exception as e:
55+
logging.error(e)
56+
return -1
57+
for language in args.language:
58+
options[language] = dict()
59+
if args.extraction_config:
60+
# 要求option必须是a.b=c的形式,a为语言名,若不是报错
61+
pattern = r'^(.+)\.(.+)\=(.+)$'
62+
for tmp in args.extraction_config:
63+
match = re.match(pattern, tmp)
64+
if match:
65+
language = match.group(1)
66+
key = match.group(2)
67+
value = match.group(3)
68+
# 若option与需要抽取的语言对不上, 报错并返回配置错误
69+
if language not in args.language:
70+
logging.error("option language error: %s does not need to be extracted", language)
71+
return -1
72+
options[language][key] = value
73+
else:
74+
logging.error("option format error: %s, it need like java.a=b", tmp)
75+
return -1
76+
return options
77+
78+
79+
def database_create(args):
80+
if not conf_check(args):
81+
logging.error("configuration error, Please check conf")
82+
raise ValueError("configuration error")
83+
options = conf_option_deal(args)
84+
if options == -1:
85+
logging.error("configuration error, Please check conf")
86+
raise ValueError("configuration error")
87+
memory_statistics()
88+
timeout = args.timeout
89+
extractor_fail = list()
90+
for language in args.language:
91+
output_path = Path(args.output).expanduser().resolve()
92+
src_path = Path(args.source_root).expanduser().resolve()
93+
now_output_path = output_path / ("coref_" + language + "_src.db")
94+
# 给用户反悔机会,若添加overwrite选项则直接覆盖存量数据
95+
if now_output_path.exists():
96+
logging.info(f"{now_output_path} will be create")
97+
if not args.overwrite:
98+
user_input = input(f"file {now_output_path} Already exists, do you want to overwrite it? (y/n): ")
99+
if user_input.lower() != "y":
100+
logging.warning(" %s will use old version data", language)
101+
continue
102+
start_time = time.time()
103+
return_code = extractor_run(language, src_path, output_path, timeout, options[language])
104+
if return_code == 0:
105+
logging.info("Finish extracting data source %s with %s language Extractor, extraction is Success, "
106+
"execution time is %.2fs.", args.source_root, language, time.time() - start_time)
107+
else:
108+
extractor_fail.append(language)
109+
logging.error("%s extracting error with %s language Extractor, please check by log",
110+
args.source_root, language)
111+
timeout = timeout - time.time() + start_time
112+
if timeout < 0:
113+
logging.error("extract fail: timeout")
114+
raise RuntimeError("timeout")
115+
if len(extractor_fail) > 0:
116+
for language in extractor_fail:
117+
logging.error("%s extract fail, please check log", language)
118+
logging.error("extract fail")
119+
raise RuntimeError("extract fail")
120+
else:
121+
logging.info("extract success")
122+
return

cli/extractor/__init__.py

Whitespace-only changes.

cli/extractor/extractor.py

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import logging
2+
3+
import psutil
4+
5+
from run.runner import Runner
6+
from sparrow_schema.schema import sparrow
7+
8+
9+
class Extractor:
10+
# 若添加抽取器,请命名为language_extractor,并完成language_extractor_cmd函数实现执行指令, 可被相应展示与执行
11+
cfamily_extractor = ""
12+
go_extractor = ""
13+
java_extractor = ""
14+
javascript_extractor = ""
15+
properties_extractor = ""
16+
python_extractor = ""
17+
sql_extractor = ""
18+
swift_extractor = ""
19+
xml_extractor = ""
20+
21+
def __init__(self):
22+
Extractor.cfamily_extractor = sparrow.home / "language" / "cfamily" / "extractor" / "usr" / "bin" / "coref-cfamily-src-extractor"
23+
Extractor.go_extractor = sparrow.home / "language" / "go" / "extractor" / "coref-go-src-extractor"
24+
Extractor.java_extractor = sparrow.home / "language" / "java" / "extractor" / "coref-java-src-extractor_deploy.jar"
25+
Extractor.javascript_extractor = sparrow.home / "language" / "javascript" / "extractor" / "coref-javascript-src-extractor"
26+
Extractor.properties_extractor = sparrow.home / "language" / "properties" / "extractor" / "coref-properties-src-extractor_deploy.jar"
27+
Extractor.python_extractor = sparrow.home / "language" / "python" / "extractor" / "coref-python-src-extractor"
28+
Extractor.sql_extractor = sparrow.home / "language" / "sql" / "extractor" / "coref-sql-src-extractor_deploy.jar"
29+
Extractor.swift_extractor = sparrow.home / "language" / "swift" / "extractor" / "usr" / "bin" / "coref-swift-src-extractor"
30+
Extractor.xml_extractor = sparrow.home / "language" / "xml" / "extractor" / "coref-xml-extractor_deploy.jar"
31+
32+
33+
def cfamily_extractor_cmd(source_root, database, options):
34+
cmd = list()
35+
cmd += [str(Extractor.cfamily_extractor)]
36+
cmd += ["--compile-commands=", str(source_root)]
37+
cmd += ["--output-db-path=", str(database)]
38+
return cmd
39+
40+
41+
def go_extractor_cmd(source_root, database, options):
42+
cmd = list()
43+
cmd += [str(Extractor.go_extractor)]
44+
if options:
45+
for (key, value) in options.items():
46+
if key == "extract-config":
47+
for tmp in value.split(","):
48+
cmd += ["-ex", tmp]
49+
elif key == "go-build-flag":
50+
for tmp in value.split(","):
51+
cmd += [tmp]
52+
else:
53+
logging.warning("unsupported config name: %s for Go extractor.", key)
54+
cmd += ["-o", str(database/"coref_go_src.db")]
55+
cmd += [str(source_root)]
56+
return cmd
57+
58+
59+
def java_extractor_cmd(source_root, database, options):
60+
cmd = list()
61+
cmd += jar_extractor_cmd(Extractor.java_extractor, source_root, database)
62+
if options:
63+
for (key, value) in options.items():
64+
if key == "white-list" or key == "whiteList":
65+
cmd += ["-w=", value]
66+
elif key == "cp":
67+
cmd += ["-cp=", value]
68+
elif key == "classpath":
69+
cmd += ["--classpath=", value]
70+
elif key == "incremental":
71+
if value == "true":
72+
cmd += ["--incremental"]
73+
cmd += ["--cache-dir=" + options.get("cache-dir", "null")]
74+
cmd += ["--commit=" + options.get("commit", "null")]
75+
cmd += ["--remote-cache-type=" + options.get("remote-cache-type", "null")]
76+
cmd += ["--oss-bucket=" + options.get("oss-bucket", "null")]
77+
cmd += ["--oss-config-file=" + options.get("oss-config-file", "null")]
78+
cmd += ["--oss-path-prefix=" + options.get("oss-path-prefix", "null")]
79+
else:
80+
logging.warning("java.incremental does not take effect, please use java.incremental=true")
81+
else:
82+
if key != "cache-dir" and key != "commit" and key != "remote-cache-type" and \
83+
key != "oss-bucket" and key != "oss-config-file" and key != "oss-path-prefix":
84+
logging.warning("unsupported config name:%s for java extractor.", key)
85+
if "incremental" not in options or options["incremental"] != "true":
86+
cmd += ["--parallel"]
87+
return cmd
88+
89+
90+
def javascript_extractor_cmd(source_root, database, options):
91+
cmd = list()
92+
cmd += [str(Extractor.javascript_extractor), "extract"] + \
93+
["--src", str(source_root)] + \
94+
["--db", str(database/"coref_javascript_src.db")]
95+
if options:
96+
for (key, value) in options.items():
97+
if key == "black-list" or key == "blacklist":
98+
cmd += ["--blacklist"]
99+
for tmp in value.split(','):
100+
cmd += [tmp]
101+
elif key == "use-gitignore":
102+
if value == "true":
103+
cmd += ["--use-gitignore"]
104+
else:
105+
logging.warning("javascript.use-gitignore does not take effect, please use "
106+
"javascript.use-gitignore=true")
107+
elif key == "extract-dist":
108+
if value == "true":
109+
cmd += ["--extract-dist"]
110+
else:
111+
logging.warning("javascript.extract-dist does not take effect, please use "
112+
"javascript.extract-dist=true")
113+
elif key == "extract-deps":
114+
if value == "true":
115+
cmd += ["--extract-deps"]
116+
else:
117+
logging.warning("javascript.extract-deps does not take effect, please use "
118+
"javascript.extract-deps=true")
119+
elif key == "file-size-limit":
120+
cmd += ["--file-size-limit", value]
121+
else:
122+
logging.warning("unsupported config name:%s for javascript extractor.", key)
123+
return cmd
124+
125+
126+
def properties_extractor_cmd(source_root, database, options):
127+
cmd = jar_extractor_cmd(Extractor.properties_extractor, source_root, database)
128+
return cmd
129+
130+
131+
def python_extractor_cmd(source_root, database, options):
132+
cmd = list()
133+
cmd += [str(Extractor.python_extractor), "-s", str(source_root), "-d", str(database)]
134+
return cmd
135+
136+
137+
def sql_extractor_cmd(source_root, database, options):
138+
cmd = list()
139+
cmd += jar_extractor_cmd(Extractor.sql_extractor, source_root, database)
140+
if "sql-dialect-type" in options:
141+
cmd += ["--sql-dialect-type", options["sql-dialect-type"]]
142+
return cmd
143+
144+
145+
def swift_extractor(source_root, database, options):
146+
cmd = list()
147+
cmd += [str(Extractor.swift_extractor), str(source_root), str(database)]
148+
if options:
149+
for (key, value) in options.items():
150+
if key == "corpus":
151+
for tmp in value.split(","):
152+
cmd += ["--corpus", tmp]
153+
else:
154+
logging.warning("unsupported config name:%s for Swift extractor.", key)
155+
return cmd
156+
157+
158+
def xml_extractor_cmd(source_root, database, options):
159+
cmd = jar_extractor_cmd(Extractor.xml_extractor, source_root, database)
160+
return cmd
161+
162+
163+
def jar_extractor_cmd(extractor_path, source_root, database):
164+
# 获取内存信息
165+
mem = psutil.virtual_memory()
166+
total_memory = mem.total
167+
total_memory_gb = round(total_memory / (1024 ** 3))
168+
logging.info("current memory is : %s GB", total_memory_gb)
169+
xmx = max(total_memory_gb - 1, 6)
170+
logging.info("final -Xmx is: %s GB", xmx)
171+
cmd = list()
172+
cmd += ["java", "-jar", "-Xmx" + str(xmx) + "g", str(extractor_path)]
173+
cmd += [str(source_root), str(database)]
174+
return cmd
175+
176+
177+
def extractor_run(language, source_root, database, timeout, options):
178+
function_name = language + "_extractor_cmd"
179+
if function_name in globals():
180+
# 通过语言名确定函数名并直接调用对应抽取器执行函数,获取执行指令
181+
function = globals()[function_name]
182+
cmd = function(source_root, database, options)
183+
if cmd == -1:
184+
logging.error("option error")
185+
logging.error("Failed to obtain the %s extractor", language)
186+
return -1
187+
tmp = Runner(cmd, timeout)
188+
return tmp.subrun()
189+
else:
190+
logging.error("Not supported language: %s", language)
191+
return -1
192+

cli/godel/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)