-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdata-tech-compare-spec.json
354 lines (354 loc) · 11.3 KB
/
data-tech-compare-spec.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
{
"$schema": "https://json-schema.org/draft/2019-09/schema",
"title": "Data technology comparison specification",
"description": "A data technology service, tool, library or system",
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Name of the technology"
},
"description": {
"type": "string",
"description": "Brief description of the technology"
},
"logo": {
"type": "string",
"pattern": "^assets/logo/[a-z_]+\\.(svg|png|jpeg)$",
"description": "Logo of the technology. Should be a SVG, PNG or JPEG file under the assets/logo directory"
},
"source_code": {
"type": "string",
"pattern": "https://.*|"
},
"website": {
"type": "string",
"pattern": "https://.*|"
},
"license": {
"type": "string",
"enum": [
"Academic Free License v3.0",
"Apache license 2.0",
"Artistic license 2.0",
"Boost Software License 1.0",
"BSD 2-clause \"Simplified\" license",
"BSD 3-clause \"New\" or \"Revised\" license",
"BSD 3-clause Clear license",
"BSD 4-clause \"Original\" or \"Old\" license",
"BSD Zero-Clause license",
"Commercial license",
"Creative Commons license family",
"Creative Commons Zero v1.0 Universal",
"Creative Commons Attribution 4.0",
"Creative Commons Attribution ShareAlike 4.0",
"Do What The F*ck You Want To Public License",
"Educational Community License v2.0",
"Eclipse Public License 1.0",
"Eclipse Public License 2.0",
"European Union Public License 1.1",
"GNU Affero General Public License v3.0",
"GNU General Public License family",
"GNU General Public License v2.0",
"GNU General Public License v3.0",
"GNU Lesser General Public License family",
"GNU Lesser General Public License v2.1",
"GNU Lesser General Public License v3.0",
"ISC",
"LaTeX Project Public License v1.3c",
"Microsoft Public License",
"MIT",
"Mozilla Public License 2.0",
"N/A",
"Open Software License 3.0",
"PostgreSQL License",
"SIL Open Font License 1.1",
"University of Illinois/NCSA Open Source License",
"The Unlicense",
"zLib License"
]
},
"year_created": {
"type": "integer"
},
"company": {
"type": "array",
"items": {
"type": "string"
}
},
"use_cases": {
"type": "array",
"items": {
"type": "string"
}
},
"language_support": {
"type": "array",
"default": [],
"uniqueItems": true,
"items": {
"type": "string",
"enum": ["java", "scala", "python", "r", "c++", "c", "rust", "javascript", "perl", "kotlin", "go", "php", "swift", "ruby", "matlab", "typescript", "c#"]
}
},
"data_validation": {
"$ref": "#/$defs/DataValidationCategory"
},
"data_visualisation": {
"$ref": "#/$defs/DataVisualisationCategory"
},
"file": {
"$ref": "#/$defs/FileCategory"
},
"job_orchestration": {
"$ref": "#/$defs/JobOrchestrationCategory"
},
"messaging": {
"$ref": "#/$defs/MessagingCategory"
},
"ml_pipelines": {
"$ref": "#/$defs/MLPipelinesCategory"
},
"object_store": {
"$ref": "#/$defs/ObjectStoreCategory"
},
"olap": {
"$ref": "#/$defs/OLAPCategory"
}
},
"oneOf": [
{
"required": ["database"]
},
{
"required": ["data_validation"]
},
{
"required": ["data_visualisation"]
},
{
"required": ["file"]
},
{
"required": ["job_orchestration"]
},
{
"required": ["messaging"]
},
{
"required": ["ml_pipelines"]
},
{
"required": ["object_store"]
},
{
"required": ["olap"]
}
],
"required": ["name", "description", "logo", "source_code", "website", "license", "year_created", "company", "use_cases", "language_support"],
"$defs": {
"DatabaseCategory": {
"type": "object"
},
"DataValidationCategory": {
"type": "object"
},
"DataVisualisationCategory": {
"type": "object"
},
"FileCategory": {
"type": "object",
"description": "Technology falls under category of file",
"properties": {
"is_human_readable": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format is human readable."
},
"orientation": {
"type": "object",
"properties": {
"value": {
"type": "string",
"description": "File format is row or column oriented (could depend on underlying storage layer)",
"enum": [
"row",
"column",
"column or row"
]
},
"source": {
"$ref": "#/$defs/Source"
}
}
},
"has_type_system": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format checks and enforces column data types"
},
"has_nested_structure_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format supports nested column structures"
},
"has_native_compression": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format supports compressions algorithms natively (does not rely on external tools/commands)"
},
"has_encoding_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format supports using different encodings for data stored"
},
"has_constraint_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format supports column or table level constraints. An example could be a not null constraint."
},
"has_acid_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format supports ACID transactions (i.e. guarantee that the file will be in a consistent state after running a group of operations)."
},
"has_metadata": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format contains metadata about the file and it's contents. For example summary statistics such as min and max, record count, creation time, etc."
},
"has_encryption_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "File format supports encryption at file and/or column level."
},
"data_processing_framework_support": {
"type": "array",
"default": [],
"uniqueItems": true,
"items": {
"$ref": "#/$defs/DataProcessingFrameworkSupportSource"
}
},
"analytics_query_support": {
"type": "array",
"default": [],
"uniqueItems": true,
"items": {
"$ref": "#/$defs/AnalyticsQuerySupportSource"
}
}
},
"required": ["is_human_readable", "orientation", "has_type_system", "has_nested_structure_support", "has_native_compression", "has_encoding_support", "has_constraint_support", "has_acid_support", "has_metadata", "has_encryption_support", "data_processing_framework_support", "analytics_query_support"]
},
"JobOrchestrationCategory": {
"type": "object",
"description": "Job orchestration technology",
"properties": {
"has_cron_schedule_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "Supports defining cron schedules for jobs"
},
"has_event_based_trigger_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "Supports jobs being triggered from events"
},
"has_api_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "Defines an API from which you can retrieve information about the job orchestrator"
},
"has_access_controls": {
"$ref": "#/$defs/HasSupportSource",
"description": "Ability to define access controls for users of the job orchestrator"
},
"has_workflow_versioning": {
"$ref": "#/$defs/HasSupportSource",
"description": "Ability to define different versions of the same workflow"
},
"has_workflow_configuration_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "Ability to define configuration to inject into workflows to alter their behaviour"
},
"has_audit_logs": {
"$ref": "#/$defs/HasSupportSource",
"description": "Maintains an audit trail of what actions the system or users have taken"
},
"has_cost_tracking": {
"$ref": "#/$defs/HasSupportSource",
"description": "Ability to track costs of workflows"
},
"has_data_source_connection_support": {
"$ref": "#/$defs/HasSupportSource",
"description": "Ability to define and manage connections to data sources"
}
},
"required": ["has_cron_schedule_support", "has_event_based_trigger_support", "has_api_support", "has_access_controls", "has_workflow_versioning", "has_workflow_configuration_support", "has_audit_logs", "has_cost_tracking", "has_data_source_connection_support"]
},
"MessagingCategory": {
"type": "object"
},
"MLPipelinesCategory": {
"type": "object"
},
"ObjectStoreCategory": {
"type": "object"
},
"OLAPCategory": {
"type": "object"
},
"Benchmarking": {
"type": "object",
"properties": {
"is_repeatable": {
"type": "boolean",
"description": "Is it possible without payment or sign-up to run benchmark tests yourself?"
},
"link_to_results": {
"type": "string",
"description": "Link to benchmark results."
},
"link_to_command": {
"type": "string",
"description": "Link to benchmark commands to reproduce benchmarks."
}
}
},
"Source": {
"type": "string",
"pattern": "https://.*",
"description": "Link to the source of this information"
},
"HasSupportSource": {
"type": "object",
"properties": {
"value": {
"type": "string",
"enum": ["yes", "no", "maybe"]
},
"source": {
"$ref": "#/$defs/Source"
},
"notes": {
"type": "string"
}
},
"required": ["value"]
},
"DataProcessingFrameworkSupportSource": {
"type": "object",
"properties": {
"value": {
"type": "string",
"enum": ["Apache Spark", "Apache Drill", "Apache Hadoop", "Apache Pig", "Apache Storm", "Apache Flink", "Apache Beam", "Apache NiFi", "Apache Gobblin"]
},
"source": {
"$ref": "#/$defs/Source"
}
}
},
"AnalyticsQuerySupportSource": {
"type": "object",
"properties": {
"value": {
"type": "string",
"enum": ["Apache Druid", "Apache Hive", "Apache Impala", "Apache Pinot", "AWS Athena", "Azure Synapse", "BigQuery", "Clickhouse", "Dremio", "DuckDB", "Firebolt", "Presto", "Trino"]
},
"source": {
"$ref": "#/$defs/Source"
}
}
}
}
}