Skip to content

Commit

Permalink
Escape angle brackets appearing in the abstract
Browse files Browse the repository at this point in the history
  • Loading branch information
vjf committed Apr 23, 2024
1 parent e37ccbb commit 53ac759
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/pdf_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ def write_record(self, name, model_endpath, pdf_file, pdf_url, organisation, tit
keywords = list(kwset)
bbox_list = [str(bbox['west']), str(bbox['east']), str(bbox['south']), str(bbox['north'])]

# Remove encodings that can upset XML
summary = summary.replace('<', '&lt;')
summary = summary.replace('>', '&gt;')

# Assemble dict for jinja template
mcf_dict = {
"mcf": {
Expand Down Expand Up @@ -135,11 +139,12 @@ def write_record(self, name, model_endpath, pdf_file, pdf_url, organisation, tit
"level": "dataset"
},
"lineage": {
"statement": f"This metadata record was reproduced from the PDF report retrieved from {pdf_url} on {datetime.datetime.now():%d %b %Y}. The abstract was generated by Antrhopic Claude V2.0 (https://www.anthropic.com/). Keywords were taken from USGS Thesaurus (https://apps.usgs.gov/thesaurus/) and extracted by yake (https://pypi.org/project/yake)"
"statement": f"This metadata record was reproduced from the PDF report retrieved from {pdf_url} on {datetime.datetime.now():%d %b %Y}. The abstract was generated by Anthropic Claude V2.0 (https://www.anthropic.com/). Keywords were taken from USGS Thesaurus (https://apps.usgs.gov/thesaurus/) and extracted by yake (https://pypi.org/project/yake)"
}
}
}

# Create ISO 19115-3 XML with a modified version pygeometa's jinja template
template_dir = os.path.join(os.path.dirname(__file__), '../data/templates/ISO19115-3')
xml_string = render_j2_template(mcf_dict, template_dir=template_dir)

Expand Down

0 comments on commit 53ac759

Please sign in to comment.