-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.jv
329 lines (285 loc) · 8.78 KB
/
model.jv
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
use {
LengthUnit,
PressureUnit,
DateYYYYMMDD
} from "./../../shared/valuetypes.jv";
use {
DecimalParser,
BracesRemover,
SingleQuoteRemover,
BracesAndSingleQuotesRemover
} from "./../../shared/composite-blocktypes.jv";
use {
TakeUntilComma,
ParseDecimal
} from "./../../shared/transforms.jv";
// Parsing Method is allowed to be only Text Parsing or Table Parsing according to the source paper.
constraint AllowedParsingMethodList on text: value in [
"Text Parsing",
"Table Parsing"
];
valuetype ParsingMethod oftype text {
constraints: [
AllowedParsingMethodList
];
}
pipeline YieldStrengthAndGrainSizePipeline {
FileExtractor
-> ZipInterpreter
-> CombinedCSVPicker
-> CombinedTextFileInterpreter
-> CombinedCSVInterpreter
-> CombinedTableInterpreter
-> CompinedCompoundSanatizer
-> CombinedGrainSizeValueSanatizer
-> CombinedFirstGrainSizeValue
-> CombinedGrainSizeValueParser
-> CombinedYieldStrengthValueSanatizer
-> CombinedFirstYieldStrengthValue
-> CombinedYieldStrengthValueParser
-> CombinedLoader;
ZipInterpreter
-> EngineeringReadyYieldStrengthCSVPicker
-> EngineeringReadyYieldStrengthTextFileInterpreter
-> EngineeringReadyYieldStrengthCSVInterpreter
-> EngineeringReadyYieldStrengthTableInterpreter
-> EngineeringReadyCompoundSanatizer
-> EngineeringReadyAuthorSanatizer
-> EngineeringReadyYieldStrengthLoader;
ZipInterpreter
-> GrainSizeCSVPicker
-> GrainSizeTextFileInterpreter
-> GrainSizeCSVInterpreter
-> GrainSizeRenamer
-> GrainSizeTableInterpreter
-> GrainSizeCompoundSanatizer
-> GrainSizeAuthorSanatizer
-> GrainSizeLoader;
ZipInterpreter
-> YieldStrengthCSVPicker
-> YieldStrengthTextFileInterpreter
-> YieldStrengthCSVInterpreter
-> YieldStrengthRenamer
-> YieldStrengthTableInterpreter
-> YieldStrengthCompoundSanatizer
-> YieldStrengthAuthorSanatizer
-> YieldStrengthLoader;
block FileExtractor oftype HttpExtractor {
url: "https://figshare.com/ndownloader/files/31626647";
}
block ZipInterpreter oftype ArchiveInterpreter {
archiveType: "zip";
}
block CombinedCSVPicker oftype FilePicker {
path: "/Databases/Combined/Combined_YieldStrength_GrainSize_Database.csv";
}
block EngineeringReadyYieldStrengthCSVPicker oftype FilePicker {
path: "/Databases/Engineering_Ready_YS/EngineeringReady_YieldStrength_Database.csv";
}
block GrainSizeCSVPicker oftype FilePicker {
path: "/Databases/GS/GrainSize_Database.csv";
}
block YieldStrengthCSVPicker oftype FilePicker {
path: "/Databases/YS/YieldStrength_Database.csv";
}
block CombinedTextFileInterpreter oftype TextFileInterpreter { }
block EngineeringReadyYieldStrengthTextFileInterpreter oftype TextFileInterpreter { }
block GrainSizeTextFileInterpreter oftype TextFileInterpreter { }
block YieldStrengthTextFileInterpreter oftype TextFileInterpreter { }
block CombinedCSVInterpreter oftype CSVInterpreter {
delimiter: ",";
enclosing: '"';
enclosingEscape: '"';
}
block EngineeringReadyYieldStrengthCSVInterpreter oftype CSVInterpreter {
delimiter: ",";
enclosing: '"';
enclosingEscape: '"';
}
block GrainSizeCSVInterpreter oftype CSVInterpreter {
delimiter: ",";
enclosing: '"';
enclosingEscape: '"';
}
block YieldStrengthCSVInterpreter oftype CSVInterpreter {
delimiter: ",";
enclosing: '"';
enclosingEscape: '"';
}
block GrainSizeRenamer oftype CellWriter {
at: range C1:E1;
write: [
'Old Value',
'Units',
'Value'
];
}
block YieldStrengthRenamer oftype CellWriter {
at: range C1:E1;
write: [
'Old Value',
'Units',
'Value'
];
}
block CombinedTableInterpreter oftype TableInterpreter {
header: true;
columns: [
"Compound" oftype text,
"Blacklisted Compound?" oftype boolean,
"Yield Strength Value" oftype text, // Includes braces
"Yield Strength Unit" oftype PressureUnit,
"Grain Size Value" oftype text, // Includes braces
"Grain Size Unit" oftype LengthUnit,
"DOI" oftype DOI,
"Open Access" oftype boolean,
];
}
block EngineeringReadyYieldStrengthTableInterpreter oftype TableInterpreter {
header: true;
columns: [
"Compound" oftype text,
"Blacklisted Compound?" oftype boolean,
"Value" oftype decimal,
"Units" oftype PressureUnit,
// "Raw Value" oftype decimal,
// "Raw Units" oftype text, // Should only have MPa and GPa as values but are noisy
"Parsing Method" oftype ParsingMethod,
"DOI" oftype DOI,
"Article Title" oftype text,
"Author" oftype text,
"Journal" oftype text,
"Date" oftype DateYYYYMMDD,
"Open Access" oftype boolean,
];
}
block GrainSizeTableInterpreter oftype TableInterpreter {
header: true;
columns: [
"Compound" oftype text,
"Blacklisted Compound?" oftype boolean,
"Value" oftype decimal,
"Units" oftype LengthUnit,
// "Raw Value" oftype decimal,
// "Raw Units" oftype GrainRawUnits, // Should only be ["µm", "nm", "pm"] but is noisy
"Parsing Method" oftype ParsingMethod,
"DOI" oftype DOI,
"Article Title" oftype text,
"Author" oftype text,
"Journal" oftype text,
"Date" oftype DateYYYYMMDD,
"Open Access" oftype boolean,
];
}
block YieldStrengthTableInterpreter oftype TableInterpreter {
header: true;
columns: [
"Compound" oftype text,
"Blacklisted Compound?" oftype boolean,
"Value" oftype decimal,
"Units" oftype PressureUnit,
// "Raw Value" oftype decimal,
// "Raw Units" oftype RawUnits, // Should only be ["MPa", "GPa"] but is noisy
"Parsing Method" oftype ParsingMethod,
"DOI" oftype DOI,
"Article Title" oftype text,
"Author" oftype text,
"Journal" oftype text,
"Date" oftype DateYYYYMMDD,
"Open Access" oftype boolean,
];
}
block CompinedCompoundSanatizer oftype BracesRemover {
columnWithBraces: "Compound";
}
block CombinedGrainSizeValueSanatizer oftype BracesRemover {
columnWithBraces: "Grain Size Value";
}
// FIXME: use this block instead if `FirstGrainSizeValue` when https://github.com/jvalue/jayvee/issues/591 is
// fixed
block BROKENCombinedGrainSizeValueParser oftype DecimalParser {
columnName: "Grain Size Value";
}
// INFO: Cells like "11.0, 34.6" will become "11.0"
// According to the article:
// "In cases where more than one value is extracted, when a range of values is extracted for example, Value
// will be a list."
// This makes the possibility that the values are minimum and maximum much less likely, so it's reasonable to
// just pick the first one
block CombinedFirstGrainSizeValue oftype TableTransformer {
inputColumns: [
"Grain Size Value"
];
outputColumn: "Grain Size Value";
uses: TakeUntilComma;
}
block CombinedGrainSizeValueParser oftype TableTransformer {
inputColumns: [
"Grain Size Value"
];
outputColumn: "Grain Size Value";
uses: ParseDecimal;
}
block CombinedYieldStrengthValueSanatizer oftype BracesRemover {
columnWithBraces: "Yield Strength Value";
}
// FIXME: use this block instead if `FirstGrainSizeValue` when https://github.com/jvalue/jayvee/issues/591 is
// fixed
block BROKENYieldStrengthValue oftype DecimalParser {
columnName: "Yield Strength Value";
}
// INFO: Cells like "11.0, 34.6" will become "11.0"
// According to the article:
// "In cases where more than one value is extracted, when a range of values is extracted for example, Value
// will be a list."
// This makes the possibility that the values are minimum and maximum much less likely, so it's reasonable to
// just pick the first one
block CombinedFirstYieldStrengthValue oftype TableTransformer {
inputColumns: [
"Yield Strength Value"
];
outputColumn: "Yield Strength Value";
uses: TakeUntilComma;
}
block CombinedYieldStrengthValueParser oftype TableTransformer {
inputColumns: [
"Yield Strength Value"
];
outputColumn: "Yield Strength Value";
uses: ParseDecimal;
}
block CombinedLoader oftype SQLiteLoader {
table: "Combined";
file: "./YieldStrengthAndGrainSize.sqlite";
}
block EngineeringReadyCompoundSanatizer oftype BracesRemover {
columnWithBraces: "Compound";
}
block EngineeringReadyAuthorSanatizer oftype BracesAndSingleQuotesRemover {
columnName: "Author";
}
block EngineeringReadyYieldStrengthLoader oftype SQLiteLoader {
table: "EngineeringReadyYieldStrength";
file: "./YieldStrengthAndGrainSize.sqlite";
}
block GrainSizeCompoundSanatizer oftype BracesRemover {
columnWithBraces: "Compound";
}
block GrainSizeAuthorSanatizer oftype BracesAndSingleQuotesRemover {
columnName: "Author";
}
block GrainSizeLoader oftype SQLiteLoader {
table: "GrainSize";
file: "./YieldStrengthAndGrainSize.sqlite";
}
block YieldStrengthCompoundSanatizer oftype BracesRemover {
columnWithBraces: "Compound";
}
block YieldStrengthAuthorSanatizer oftype BracesAndSingleQuotesRemover {
columnName: "Author";
}
block YieldStrengthLoader oftype SQLiteLoader {
table: "YieldStrength";
file: "./YieldStrengthAndGrainSize.sqlite";
}
}