-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexcel_to_JSON-LD_converter.py
105 lines (86 loc) · 4.26 KB
/
excel_to_JSON-LD_converter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import argparse
import pandas as pd
import json
def create_jsonld_with_conditions(schemas, item_map, unit_map, context_toplevel, context_connector):
jsonld = {
"@context": {},
"Battery": {
"@type": "battery:Battery"
}
}
# Build the @context part
for _, row in context_toplevel.iterrows():
jsonld["@context"][row['Item']] = row['Key']
connectors = set()
for _, row in context_connector.iterrows():
jsonld["@context"][row['Item']] = row['Key']
connectors.add(row['Item']) # Track connectors to avoid redefining types
# Helper function to add nested structures with type annotations
def add_to_structure(path, value, unit):
current_level = jsonld["Battery"]
# Iterate through the path to create or navigate the structure
for idx, part in enumerate(path[1:]):
is_last = idx == len(path) - 2 # Check if current part is the last in the path
if part not in current_level:
if part in connectors:
current_level[part] = {}
else:
if part in item_map:
current_level[part] = {"@type": item_map[part]['Key']}
else:
raise ValueError(f"Connector or item '{part}' is not defined in any relevant sheet.")
if not is_last:
current_level = current_level[part]
else:
# Handle the unit and value structure for the last item
final_type = item_map.get(part, {}).get('Key', '')
if unit != 'No Unit':
if pd.isna(unit):
raise ValueError(f"The value '{value}' is filled in the wrong row, please check the schemas")
unit_info = unit_map[unit]
current_level[part] = {
"@type": final_type,
"hasNumberValue": {
"@type": "emmo:hasNumberValue",
"value": value,
"unit": {
"label": unit_info['Label'],
"symbol": unit_info['Symbol'],
"@type": unit_info['Key']
}
}
}
else:
current_level[part] = {
"@type": final_type,
"value": value
}
# Process each schema entry to construct the JSON-LD output
for _, row in schemas.iterrows():
if pd.isna(row['Value']) or row['Ontology link'] == 'NotOntologize':
continue
if pd.isna(row['Unit']):
raise ValueError(f"The value '{row['Value']}' is filled in the wrong row, please check the schemas")
ontology_path = row['Ontology link'].split('-')
add_to_structure(ontology_path, row['Value'], row['Unit'])
return jsonld
def convert_excel_to_jsonld(excel_file):
excel_data = pd.ExcelFile(excel_file)
schemas = pd.read_excel(excel_data, 'Schemas')
item_map = pd.read_excel(excel_data, 'Ontology - Item').set_index('Item').to_dict(orient='index')
unit_map = pd.read_excel(excel_data, 'Ontology - Unit').set_index('Item').to_dict(orient='index')
context_toplevel = pd.read_excel(excel_data, '@context-TopLevel')
context_connector = pd.read_excel(excel_data, '@context-Connector')
jsonld_output = create_jsonld_with_conditions(schemas, item_map, unit_map, context_toplevel, context_connector)
return jsonld_output
def main():
parser = argparse.ArgumentParser(description='Convert an Excel file to JSON-LD format.')
parser.add_argument('--path_to_excel_file', required=True, help='Path to the Excel file to convert.')
args = parser.parse_args()
jsonld = convert_excel_to_jsonld(args.path_to_excel_file)
jsonld_file_path = args.path_to_excel_file.replace('.xlsx', '.json')
with open(jsonld_file_path, 'w') as f:
json.dump(jsonld, f, indent=4)
print(f"JSON-LD file has been saved to {jsonld_file_path}")
if __name__ == "__main__":
main()