Skip to content

Commit

Permalink
Various changes:
Browse files Browse the repository at this point in the history
- Model website path corrections
- Removed unused imports
- Removed unused f-strings
- Removed unused vars
  • Loading branch information
vjf committed Apr 22, 2024
1 parent 2615df2 commit 707fbcc
Show file tree
Hide file tree
Showing 12 changed files with 10 additions and 89 deletions.
6 changes: 3 additions & 3 deletions src/ISO19115_3_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def write_record(self, name, bbox, model_endpath, metadata_url, output_file):
model_online = f"""<mrd:onLine>
<cit:CI_OnlineResource>
<cit:linkage>
<gco:CharacterString>http://geomodels.auscope.org/model/mcarthur</gco:CharacterString>
<gco:CharacterString>https://geomodels.auscope.org/model/{model_endpath}</gco:CharacterString>
</cit:linkage>
<cit:protocol>
<gco:CharacterString>WWW:LINK-1.0-http--link</gco:CharacterString>
Expand Down Expand Up @@ -161,8 +161,8 @@ def write_record(self, name, bbox, model_endpath, metadata_url, output_file):
try:
doc = etree.fromstring(bytes(metadata.text, encoding), parser=parser)
except lxml.etree.XMLSyntaxError as xse:
print(f"Error in {metadata.text}")
sys.exit(0)
print(f"Error in {metadata.text}: {xse}")
return False
xslt_tree = etree.XML(xslt)
# Create XSLT
transform = etree.XSLT(xslt_tree)
Expand Down
3 changes: 1 addition & 2 deletions src/ISO19139_extract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python3

import sys
import os
import requests
from lxml import etree
Expand Down Expand Up @@ -39,7 +38,7 @@ def write_record(self, name, bbox, model_endpath, metadata_url, output_file):
model_online = f"""<gmd:onLine>
<gmd:CI_OnlineResource>
<gmd:linkage>
<gmd:URL>http://geomodels.auscope.org/model/{model_endpath}</gmd:URL>
<gmd:URL>https://geomodels.auscope.org/model/{model_endpath}</gmd:URL>
</gmd:linkage>
<gmd:protocol>
<gco:CharacterString>WWW:LINK-1.0-http--link</gco:CharacterString>
Expand Down
13 changes: 0 additions & 13 deletions src/add_coords.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,5 @@
#!/usr/bin/env python3

import sys
import os
import requests
import json
from pathlib import Path
import datetime
import geojson
from lxml import etree
from io import BytesIO
from copy import copy
from lxml.builder import ElementMaker
from add_model_keyw import insert
from config import OUTPUT_DIR

"""
Utility functions used to add bounding box coordinates to ISO 19139 & 19115-3 XML
Expand Down
13 changes: 0 additions & 13 deletions src/add_links.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,6 @@
#!/usr/bin/env python3

import sys
import os
import requests
import json
from pathlib import Path
import datetime
import geojson
from lxml import etree
from io import BytesIO
from copy import copy
from lxml.builder import ElementMaker

from add_model_keyw import insert
from config import OUTPUT_DIR


def add_model_link(model_endpath, text):
Expand Down
16 changes: 2 additions & 14 deletions src/add_model_keyw.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
#!/usr/bin/env python3

import sys
import os
import requests
import json
from pathlib import Path
import datetime
import geojson
from lxml import etree
from io import BytesIO
from copy import copy
from lxml.builder import ElementMaker

from config import OUTPUT_DIR

""" Adds keywords to ISO 19139 and ISO 19115-3 XML using XPATH insertion
"""
Expand Down Expand Up @@ -103,7 +91,7 @@ def __add_models_keyword_iso19139(text, encoding):

# XML snippet to be inserted into XML record
# This uses a direct insert, results in messier XML.
insert_txt = f"""
insert_txt = """
<gmd:descriptiveKeywords xmlns:gmd="http://www.isotc211.org/2005/gmd" xmlns:gco="http://www.isotc211.org/2005/gco" xmlns:srv="http://www.isotc211.org/2005/srv" xmlns:gmx="http://www.isotc211.org/2005/gmx" xmlns:gts="http://www.isotc211.org/2005/gts" xmlns:gsr="http://www.isotc211.org/2005/gsr" xmlns:gmi="http://www.isotc211.org/2005/gmi" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.isotc211.org/2005/gmd http://schemas.opengis.net/csw/2.0.2/profiles/apiso/1.0.0/apiso.xsd">
<gmd:MD_Keywords>
<gmd:keyword>
Expand Down Expand Up @@ -169,7 +157,7 @@ def __add_models_keyword_iso19115_3(text, encoding):
insertpoint_xpath_list = ['mdb:MD_Metadata', 'mdb:identificationInfo', 'mri:MD_DataIdentification', 'mri:BLAH']

# XML snippet to be inserted into XML record
insert_txt = f"""<mri:descriptiveKeywords xmlns:mri="http://standards.iso.org/iso/19115/-3/mri/1.0" xmlns:gco="http://standards.iso.org/iso/19115/-3/gco/1.0">
insert_txt = """<mri:descriptiveKeywords xmlns:mri="http://standards.iso.org/iso/19115/-3/mri/1.0" xmlns:gco="http://standards.iso.org/iso/19115/-3/gco/1.0">
<mri:MD_Keywords>
<mri:keyword>
<gco:CharacterString>AuScope 3D Geological Models</gco:CharacterString>
Expand Down
28 changes: 0 additions & 28 deletions src/bedrock_summary.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#!/usr/bin/env python3
import json
import os
import sys
import glob

import boto3
import botocore
Expand Down Expand Up @@ -70,29 +68,3 @@ def run_model(model_name, brt, text):
**config["params"]
})
return body, config['modelId']


"""
NB: This code is used for testing only
"""
if __name__ == "__main__":
# List foundation models
#boto3_bedrock = boto3.client('bedrock')
#print(boto3_bedrock.list_foundation_models())

# Run claude
brt = boto3.client(service_name='bedrock-runtime')
for file in glob.glob(os.path.join(OUTPUT_DIR,'*.txt')):
print(f"{file=}")
file_stats = os.stat(file)
file_size = file_stats.st_size
print(f"{file_size=}")
if file_size > MODELS["claude"]["maxTokens"]:
print("SKIP - too big")
continue
# Read text file
with open(os.path.join(file), 'r') as fd:
text = fd.read()
print(f"{text[:90]=}")
summary = run_claude(text)
print(f"\nSUMMARY: {summary}")
1 change: 0 additions & 1 deletion src/ckan_extract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#!/usr/bin/env python3

import sys
import os
import requests
import json
Expand Down
2 changes: 1 addition & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
},
#
# NT also has an OAI-PMH interface
'nt': { 'method': 'OAIPMH',
'nt2': { 'method': 'OAIPMH',
'params': [ { 'name': 'McArthur Basin',
'model_endpath': 'mcarthur',
'oai_id': 'oai:geoscience.nt.gov.au:1/81751',
Expand Down
2 changes: 1 addition & 1 deletion src/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def __init__(self):
print(f"{OUTPUT_DIR=}")
try:
os.mkdir(OUTPUT_DIR)
except FileExistsError as fee:
except FileExistsError:
pass
self.output_dir = OUTPUT_DIR

Expand Down
5 changes: 1 addition & 4 deletions src/keywords.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
#!/usr/bin/env python3

import glob
import sys
import os
import sqlite3
from contextlib import closing

import yake

from pdf_helper import parse_pdf

"""
Uses yake and USGS vocabulary to create geoscience keywords
Expand Down Expand Up @@ -81,7 +78,7 @@ def extract_db_terms():
child = -1
gchild = -1
ggchild = -1
while parent != 1 and parent != None:
while parent != 1 and parent is not None:
ggchild = gchild
gchild = child
child = parent
Expand Down
2 changes: 0 additions & 2 deletions src/oai_extract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#!/usr/bin/env python3
import os
import sys
import datetime

from pygeometa.core import render_j2_template
Expand Down
8 changes: 1 addition & 7 deletions src/pdf_extract.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
#!/usr/bin/env python3

import os
import sys
import datetime

from pygeometa.core import render_j2_template
Expand All @@ -10,9 +7,6 @@
from extractor import Extractor
from keywords import get_keywords
from summary import get_summary
from add_links import add_model_link
from add_coords import add_coords
from config import OUTPUT_DIR

class PDFExtractor(Extractor):
""" Creates an ISO 19115 XML file by reading a PDF file
Expand Down Expand Up @@ -150,6 +144,6 @@ def write_record(self, name, model_endpath, pdf_file, pdf_url, organisation, tit
xml_string = render_j2_template(mcf_dict, template_dir=template_dir)

# write to disk
with open(os.path.join(OUTPUT_DIR, output_file), 'w') as ff:
with open(os.path.join(self.output_dir, output_file), 'w') as ff:
ff.write(xml_string)
return True

0 comments on commit 707fbcc

Please sign in to comment.