Skip to content

Commit

Permalink
Update BODC_tables_generator.py
Browse files Browse the repository at this point in the history
  • Loading branch information
cyrilrader authored Sep 18, 2024
1 parent 9439481 commit c016b3a
Showing 1 changed file with 11 additions and 23 deletions.
34 changes: 11 additions & 23 deletions files/BODC_tables_generator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import json
import sema.query as kg
import pathlib
import pandas as pd
Expand Down Expand Up @@ -101,9 +100,7 @@ def execute_to_df(name: str, **vars) -> pd.DataFrame:
if bodc_parameters_file.exists():
BODCparameters = pd.read_csv(bodc_parameters_file)
else:
print('Im in the else')
BODCparameters = pd.DataFrame(columns=['id','pref_lang','alt','depr','member','definition','standardUnitID'])
print('df created')
columns_list=['id','pref_lang','alt','depr','member','definition','standardUnitID']
for collection in parametersCollectionList :
columns_list_copy=['id','pref_lang','alt','depr','member','definition','standardUnitID']
Expand All @@ -112,39 +109,30 @@ def execute_to_df(name: str, **vars) -> pd.DataFrame:
if col not in BODCparametersTmp.columns :
columns_list_copy.remove(col)
BODCparameters=pd.concat([BODCparameters,BODCparametersTmp[columns_list_copy]])
print('loop ended')
BODCparameters.columns=['identifier','preflabel','altLabel','deprecated','uri','definition','standardUnitID']
BODCparameters=BODCparameters.reset_index()
BODCparameters=BODCparameters.drop(columns='index')
BODCparameters.insert(len(BODCparameters.columns),"standardunit",np.nan)
BODCparameters=BODCparameters.astype('object',copy=False,errors='ignore')
print('first step ended, rest to find the standard unit')

for rowNumber in range(BODCparameters.shape[0]):
if type(BODCparameters['standardUnitID'][rowNumber]) != float :
if type(BODCparameters['standardunit'][rowNumber]) == float :
query_with_pref_lang = """
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?pref_lang WHERE {
<%s> skos:prefLabel ?pref_lang .
}
"""%BODCparameters['standardUnitID'][rowNumber]

result: kg.QueryResult = NSV.query(sparql=query_with_pref_lang)
print(result.to_dict()['pref_lang'])
BODCparameters.loc[rowNumber,'standardunit']=result.to_dict()['pref_lang'][0]
if (rowNumber + 1) % 10000 == 0:
print('intermediary csv file created')
BODCparameters.to_csv(bodc_parameters_file, index=False)
for uri in BODCparameters['standardUnitID'].unique():
if uri != None :
query_with_pref_lang = """
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
SELECT ?pref_lang WHERE {
<%s> skos:prefLabel ?pref_lang .
}
"""%uri
result: kg.QueryResult = NSV.query(sparql=query_with_pref_lang)
BODCparameters.loc[BODCparameters['standardUnitID']==uri,'standardunit']=result.to_dict()['pref_lang'][0]

rowNumber=BODCparameters.shape[0]
BODCparameters.loc[rowNumber,'identifier']="eunishabitats"
BODCparameters.loc[rowNumber,'preflabel']="EUNIS habitats"
BODCparameters.loc[rowNumber,'definition']="Classification of habitat types according to the EUNIS Biodiversity database"
BODCparameters.loc[rowNumber,'deprecated']="false"
BODCparameters.loc[rowNumber,'uri']="http://dd.eionet.europa.eu/vocabulary/biodiversity/eunishabitats/"
print('Second step ended')

# Final save
BODCparameters.to_csv(bodc_parameters_file, index=False)
Expand Down

0 comments on commit c016b3a

Please sign in to comment.