forked from Rust-GPU/Rust-CUDA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgen_libdevice_json.py
85 lines (73 loc) · 2.72 KB
/
gen_libdevice_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# Takes the pdf file of the libcuda docs and generates a JSON file representing it.
# That json file is then used to generate internal intrinsics as well as intrinsics docs.
# libdevice is 300+ intrinsics, therefore making a script to do this is better for developer
# sanity as well as extensibility for any future versions of libdevice.
import pdfplumber
import os
import re
import json
dirname = os.path.dirname(__file__)
filename = os.path.join(dirname, 'data/libdevice.pdf')
out_filename = os.path.join(dirname, 'data/libdevice.json')
text = ""
with pdfplumber.open(filename) as pdf:
for page in pdf.pages:
text += page.extract_text()
open("scripts/data/libdevice.txt", "w", encoding="utf8").write(text)
# I know this is bad but trust me its much less work than writing a proper parser
regex = r"3\.\d+\.\s(\w+)(?!\.)\nPrototype:\n(.+)\nDescription:\n([\s\S]*?(?=Returns:))Returns:\n([\s\S]*?(?=Library Availability))Library Availability:\n([\s\S]*?(?=(3\.\d+\.)|\Z|www\.nvidia\.com))"
# The raw text includes the page footer which messes up the regex so clean that up before we go on
sanitize_regex = r"www.nvidia.com\nLibdevice User's Guide Part 000 _v8.0 \| \d+Function Reference\n"
text = re.sub(sanitize_regex, "", text)
# renders better in markdown
text = text.replace("\u2023", "-")
# replace more than one space in a row with a single space
text = re.sub(" +", " ", text)
# the text conversion has some issues with the math symbols in the pdf
# it seems to turn x and y into \nx and \ny
text = text.replace("\nx", "x")
text = text.replace("\ny", "y")
# i dont even know
text = text.replace(".x", "x.")
matches = re.finditer(regex, text)
intrinsics = []
type_map = {
"float": "f32",
"double": "f64",
"i8": "i8",
"i16": "i16",
"i32": "i32",
"i64": "i64",
"void": "()",
"i8*": "*mut i8",
"i16*": "*mut i16",
"i32*": "*mut i32",
"i64*": "*mut i64",
"float*": "*mut f32",
"double*": "*mut f64",
}
for match in matches:
sig_txt = match.group(2).strip()
sig = {}
return_ty = type_map[re.search(".*(?= @)", sig_txt).group()]
params = []
for param in re.finditer("(\w+\*?)(?= %) %(\w+)", sig_txt):
params.append(
{
"name": param.group(2).strip(),
"type": type_map[param.group(1).strip()]
}
)
sig["params"] = params
sig["returns"] = return_ty
intrinsics.append(
{
"name": match.group(1).strip(),
"sig": sig,
"description": match.group(3).strip(),
"returns": match.group(4).strip(),
"availability": match.group(5).strip()
}
)
out = open(out_filename, "w", encoding="utf8")
out.write(json.dumps(intrinsics, indent=2))