Skip to content

Commit

Permalink
Merge pull request #5 from cedadev/v2.1.6
Browse files Browse the repository at this point in the history
V2.1.6
  • Loading branch information
dwest77a authored Nov 19, 2024
2 parents c92bd7c + 17ec4db commit 8d5f139
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 63 deletions.
50 changes: 27 additions & 23 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,27 +1,31 @@
BSD Licence
Copyright (c) 2016, Science & Technology Facilities Council (STFC)
BSD 3-Clause License

Copyright (c) 2023-2024, Centre of Environmental Data Analysis Developers,
Scientific and Technical Facilities Council (STFC),
UK Research and Innovation (UKRI).
All rights reserved.

Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or other
materials provided with the distribution.

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Science & Technology Facilities Council (STFC)
nor the names of its contributors may be used to endorse or promote
products derived from this software without specific prior written
permission.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tag-scanner"
version = "2.1.5"
version = "2.1.6"
description = "This package provides a command line tool moles_esgf_tag to generate dataset tags for both MOLES and ESGF."
license = "{file='LICENSE'}"
readme = "README.md"
Expand Down Expand Up @@ -28,7 +28,7 @@ elasticsearch = "7.6.0"
tqdm = "^4.45.0"
pytest = "^8.3.3"
jinja2 = "^3"
directory-tree = { git = "https://github.com/cedadev/ceda-directory-tree.git", tag = "v1.1.2"}
directory-tree = { git = "https://github.com/cedadev/ceda-directory-tree.git", tag = "v1.1.3"}
sparqlwrapper = "^2.0.0"
netcdf4 = "^1.7.2"
six = "^1.16.0"
Expand Down
36 changes: 6 additions & 30 deletions tag_scanner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,8 @@
'''
BSD Licence
Copyright (c) 2016, Science & Technology Facilities Council (STFC)
All rights reserved.
# Logger setup
import logging

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
logging.basicConfig(level=logging.DEBUG)
logstream = logging.StreamHandler()

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Science & Technology Facilities Council (STFC)
nor the names of its contributors may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
'''

__version__ = '2.0.1'
formatter = logging.Formatter('%(levelname)s [%(name)s]: %(message)s')
logstream.setFormatter(formatter)
5 changes: 5 additions & 0 deletions tag_scanner/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
from tag_scanner.utils.snippets import get_file_subset

verboselogs.install()

from tag_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class Dataset(object):
Expand Down
6 changes: 6 additions & 0 deletions tag_scanner/facets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
from tag_scanner.triple_store import TripleStore, Concept
import re

import logging
from tag_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False

class Facets(object):
"""
Expand Down
11 changes: 9 additions & 2 deletions tag_scanner/file_handlers/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
verboselogs.install()
logger = logging.getLogger(__name__)

import logging
from tag_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class NetcdfHandler(FileHandler):

Expand Down Expand Up @@ -56,7 +63,7 @@ def get_product_version(self):
def extract_facet_labels(self, proc_level):

if self.nc_data:
logger.verbose(f'GLOBAL ATTRS for {self.filepath}')
logger.debug(f'GLOBAL ATTRS for {self.filepath}')

for global_attr in ALLOWED_GLOBAL_ATTRS:
if global_attr in self.nc_data.ncattrs():
Expand All @@ -65,7 +72,7 @@ def extract_facet_labels(self, proc_level):
self.tags[global_attr] = attr

# Verbose logging
logger.verbose(f'{global_attr}={attr}')
logger.debug(f'{global_attr}={attr}')
else:
logger.warning(f'Required attr {global_attr} not found in {self.filepath}')

Expand Down
1 change: 0 additions & 1 deletion tag_scanner/tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

verboselogs.install()


class ProcessDatasets(object):
"""
This class provides the process_datasets method to process datasets,
Expand Down
10 changes: 10 additions & 0 deletions tag_scanner/triple_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@

from tag_scanner.conf.settings import SPARQL_HOST_NAME

import logging
from tag_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False


class Concept:
"""
Expand Down Expand Up @@ -89,9 +96,12 @@ def get_concepts_in_scheme(cls, uri):
result_set = graph.query(statement)

concepts = {}
results = 0
for result in result_set:
results += 1
concepts[("" + result.label).lower()] = Concept(result.label, result.concept.toPython())

logger.debug(f'SPARQL Query returned {results} entries for {uri}')
return concepts

@classmethod
Expand Down
13 changes: 8 additions & 5 deletions tag_scanner/utils/dataset_jsons.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@

import logging

logger = logging.getLogger()
from tag_scanner import logstream

logger = logging.getLogger(__name__)
logger.addHandler(logstream)
logger.propagate = False

def nested_get(key_list, input_dict):
"""
Expand Down Expand Up @@ -70,9 +74,7 @@ def __init__(self, json_files=None, json_tagger_root=None):
else:

path_root = os.path.abspath(json_tagger_root)
json_files = glob.glob(f'{path_root}/*')

print(json_files)
json_files = glob.glob(f'{path_root}/**/*.json')

# Read all the json files and build a tree of datasets
i = 0
Expand All @@ -94,7 +96,8 @@ def __init__(self, json_files=None, json_tagger_root=None):
self._json_lookup[dataset] = f
i += 1

logging.info(f'Loaded {i} JSON files')
logger.info(f'Loading JSONs from {json_tagger_root}')
logger.info(f'Loaded {i} JSON files')

def get_dataset(self, path):
"""
Expand Down

0 comments on commit 8d5f139

Please sign in to comment.