-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsubjects.json
65 lines (64 loc) · 3.07 KB
/
subjects.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
[
{
"redirect": "SKLearn_subjects.json"
},
{
"name": "Load data",
"description": "For processing the data, it is necessary to load it first",
"actions": [
{"name": "Load csv", "state": "load_data?load_file_state"}
]
},
{
"name": ["Classification", "Classifier"],
"children": [
{
"name": "Preprocessing",
"children": [
{
"name": "Tokenization",
"description": "Tokenization splits an input text into a list of tokens",
"actions": [
{"name": "Tokenize", "state": "preprocessing?tokenize_column_state"}
]
},
{
"name": "Transform Cases",
"description": "Transforms cases of characters in a document. This operator transforms all characters in a document to either lower case or upper case, respectively.",
"actions": [
{"name": "To lower case", "state": "preprocessing?to_lowercase_state"},
{"name": "To upper case", "state": "preprocessing?to_uppercase_state"}
]
},
{
"name": "Filter tokens by length",
"description": "Use a length criteria to filter tokens",
"actions": [
{"name": "Minimum length", "state": "preprocessing?minimum_length_state"},
{"name": "Minumum length (inclusive)", "state": "preprocessing?minimum_length_inclusive_state"},
{"name": "Maximum length", "state": "preprocessing?maximum_length_state"},
{"name": "Maximum length (inclusive)", "state": "preprocessing?maximum_length_inclusive_state"},
{"name": "Range", "state": "preprocessing?range_length_state"}
]
},
{
"name": "Remove stopwords",
"description": "This operator filters English stopwords from a document by removing every token which equals a stopword from the built-in stopword list. Please note that, for this operator to work properly, every token should represent a single English word only. To obtain a document with each token representing a single word, you may tokenize a document by applying the Tokenize operator beforehand.",
"actions": [
{"name": "Remove stopwords", "state": "preprocessing?remove_stopwords_state"}
]
}
]
},
{
"name": "Algorithm Specification"
},
{
"name": "Validation"
},
{
"name": "Feature Engineering"
}
]
}
]