diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7e924a8ff..85234c6a8 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -21,10 +21,6 @@ jobs: strategy: matrix: include: - - python-version: "3.8" - toxenv: "py38-sqlite" - - python-version: "3.9" - toxenv: "py39-sqlite" - python-version: "3.10" toxenv: "py310-sqlite" - python-version: "3.11" diff --git a/.github/workflows/vulnerabilities.yml b/.github/workflows/vulnerabilities.yml index 9a41663a2..b2fc1379d 100644 --- a/.github/workflows/vulnerabilities.yml +++ b/.github/workflows/vulnerabilities.yml @@ -14,27 +14,33 @@ on: - released jobs: - clone: - runs-on: ubuntu-22.04 - steps: - - name: Setup Python - uses: actions/setup-python@v1 - with: - python-version: 3.8 - architecture: x64 - - name: Checkout pycsw - uses: actions/checkout@master vulnerabilities: - needs: [clone] runs-on: ubuntu-22.04 - + defaults: + run: + working-directory: . steps: + - name: Checkout pycsw + uses: actions/checkout@v4 - name: Scan vulnerabilities with trivy + uses: aquasecurity/trivy-action@master + with: + scan-type: fs + exit-code: 1 + ignore-unfixed: true + severity: CRITICAL,HIGH + scanners: vuln,misconfig,secret + scan-ref: . + - name: Build locally the image from Dockerfile run: | - sudo apt-get install -y wget apt-transport-https gnupg lsb-release - wget -qO - https://aquasecurity.github.io/trivy-repo/deb/public.key | gpg --dearmor | sudo tee /usr/share/keyrings/trivy.gpg > /dev/null - echo "deb [signed-by=/usr/share/keyrings/trivy.gpg] https://aquasecurity.github.io/trivy-repo/deb $(lsb_release -sc) main" | sudo tee -a /etc/apt/sources.list.d/trivy.list - sudo apt-get update - sudo apt-get install -y trivy - trivy --exit-code 1 fs --scanners vuln,misconfig,secret --severity HIGH,CRITICAL --ignore-unfixed . + docker buildx build -t ${{ github.repository }}:${{ github.sha }} --platform linux/amd64 --no-cache -f Dockerfile . + - name: Scan locally built Docker image for vulnerabilities with trivy + uses: aquasecurity/trivy-action@master + with: + scan-type: image + exit-code: 1 + ignore-unfixed: true + severity: CRITICAL,HIGH + vuln-type: os,library + image-ref: '${{ github.repository }}:${{ github.sha }}' diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 080af4bb6..561df07a3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -96,7 +96,6 @@ Coding Guidelines - always code with `PEP 8`_ conventions - always run source code through `flake8`_ and `pylint`_, using all pylint defaults except for ``C0111``. ``sbin/pycsw-pylint.sh`` is included for convenience - for exceptions which make their way to OGC ``ExceptionReport`` XML, always specify the appropriate ``locator`` and ``code`` parameters -- the pycsw wiki documents `developer tasks`_ for things like releasing documentation, testing, etc. Submitting a Pull Request ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/Dockerfile b/Dockerfile index 80be013f9..a8f11f4da 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,7 +10,7 @@ # Copyright (c) 2020 Ricardo Garcia Silva # Copyright (c) 2020 Massimo Di Stefano # Copyright (c) 2020 Tom Kralidis -# Copyright (c) 2020 Angelos Tzotsos +# Copyright (c) 2024 Angelos Tzotsos # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -36,7 +36,7 @@ # # ================================================================= -FROM python:3.8-slim-buster +FROM python:3.10-slim-bookworm LABEL maintainer="massimods@met.no,aheimsbakk@met.no,tommkralidis@gmail.com" # Build arguments @@ -44,9 +44,9 @@ LABEL maintainer="massimods@met.no,aheimsbakk@met.no,tommkralidis@gmail.com" ARG BUILD_DEV_IMAGE="false" -RUN apt-get update && apt-get install --yes \ - ca-certificates libexpat1 \ - && rm -rf /var/lib/apt/lists/* +RUN apt-get update --yes && \ + apt-get install --yes --no-install-recommends ca-certificates python3-setuptools && \ + rm -rf /var/lib/apt/lists/* RUN adduser --uid 1000 --gecos '' --disabled-password pycsw @@ -63,19 +63,19 @@ COPY --chown=pycsw \ requirements-dev.txt \ ./ -RUN pip install -U pip && \ - python3 -m pip install \ +RUN pip3 install -U pip setuptools && \ + pip3 install \ --requirement requirements.txt \ --requirement requirements-standalone.txt \ psycopg2-binary gunicorn \ - && if [ "$BUILD_DEV_IMAGE" = "true" ] ; then python3 -m pip install -r requirements-dev.txt; fi + && if [ "$BUILD_DEV_IMAGE" = "true" ] ; then python3 -m pip3 install -r requirements-dev.txt; fi COPY --chown=pycsw . . COPY docker/pycsw.yml ${PYCSW_CONFIG} COPY docker/entrypoint.py /usr/local/bin/entrypoint.py -RUN python3 -m pip install --editable . +RUN pip3 install --editable . WORKDIR /home/pycsw diff --git a/VERSION.txt b/VERSION.txt deleted file mode 100644 index 7a1511416..000000000 --- a/VERSION.txt +++ /dev/null @@ -1 +0,0 @@ -3.0-dev diff --git a/docker/helm/templates/db-statefulset.yaml b/docker/helm/templates/db-statefulset.yaml index 8708a12f9..41ab771c0 100644 --- a/docker/helm/templates/db-statefulset.yaml +++ b/docker/helm/templates/db-statefulset.yaml @@ -36,6 +36,8 @@ spec: volumeMounts: - mountPath: {{ .Values.db.volume_path }} name: {{ .Values.db.volume_name }} + securityContext: + readOnlyRootFilesystem: true restartPolicy: Always volumeClaimTemplates: - metadata: diff --git a/docker/helm/templates/pycsw-deployment.yaml b/docker/helm/templates/pycsw-deployment.yaml index 9180d4a41..464233517 100644 --- a/docker/helm/templates/pycsw-deployment.yaml +++ b/docker/helm/templates/pycsw-deployment.yaml @@ -31,6 +31,8 @@ spec: volumeMounts: - mountPath: {{ .Values.pycsw.volume_path }} name: {{ .Values.pycsw.volume_name }} + securityContext: + readOnlyRootFilesystem: true restartPolicy: Always volumes: - name: {{ .Values.pycsw.volume_name }} diff --git a/docker/kubernetes/db-deployment.yaml b/docker/kubernetes/db-deployment.yaml index 831bf97fa..f15f0fa0a 100644 --- a/docker/kubernetes/db-deployment.yaml +++ b/docker/kubernetes/db-deployment.yaml @@ -34,6 +34,8 @@ spec: volumeMounts: - mountPath: /var/lib/postgresql/data/pgdata name: db-data + securityContext: + readOnlyRootFilesystem: true restartPolicy: Always volumes: - name: db-data diff --git a/docker/kubernetes/pycsw-deployment.yaml b/docker/kubernetes/pycsw-deployment.yaml index ecaf2e90f..cd0ef0d4f 100644 --- a/docker/kubernetes/pycsw-deployment.yaml +++ b/docker/kubernetes/pycsw-deployment.yaml @@ -30,6 +30,8 @@ spec: volumeMounts: - mountPath: /etc/pycsw name: pycsw-config + securityContext: + readOnlyRootFilesystem: true restartPolicy: Always volumes: - name: pycsw-config diff --git a/docs/conf.py b/docs/conf.py index 27144fb79..524da85e7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,7 +39,7 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys, os +import sys from unittest.mock import MagicMock # If extensions (or modules to document with autodoc) are in another directory, @@ -84,8 +84,7 @@ # built documents. # # The short X.Y version. -with open('../VERSION.txt') as f: - version = f.read().strip() +version = '3.0-dev' # The full version, including alpha/beta/rc tags. release = version diff --git a/docs/docker.rst b/docs/docker.rst index d02de9881..b4687a493 100644 --- a/docs/docker.rst +++ b/docs/docker.rst @@ -144,7 +144,6 @@ The following instructions set up a fully working development environment:: --detach \ --volume ${PWD}/pycsw:/usr/lib/python3.7/site-packages/pycsw \ --volume ${PWD}/docs:/home/pycsw/docs \ - --volume ${PWD}/VERSION.txt:/home/pycsw/VERSION.txt \ --volume ${PWD}/LICENSE.txt:/home/pycsw/LICENSE.txt \ --volume ${PWD}/COMMITTERS.txt:/home/pycsw/COMMITTERS.txt \ --volume ${PWD}/CONTRIBUTING.rst:/home/pycsw/CONTRIBUTING.rst \ @@ -166,7 +165,7 @@ The following instructions set up a fully working development environment:: .. note:: - Please note that the pycsw image only uses python 3.8 and that it also does + The pycsw image uses a specific Python version and does not install pycsw in editable mode. As such it is not possible to use ``tox``. diff --git a/docs/installation.rst b/docs/installation.rst index b4f7fdaa5..50550f005 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -221,9 +221,9 @@ WSGI mode, use ``pycsw/wsgi.py`` in your WSGI server environment. .. note:: - ``mod_wsgi`` supports only the version of python it was compiled with. If the target server - already supports WSGI applications, pycsw will need to use the same python version. - `WSGIDaemonProcess`_ provides a ``python-path`` directive that may allow a virtualenv created from the python version ``mod_wsgi`` uses. + ``mod_wsgi`` supports only the version of Python it was compiled with. If the target server + already supports WSGI applications, pycsw will need to use the same Python version. + `WSGIDaemonProcess`_ provides a ``python-path`` directive that may allow a virtualenv created from the Python version ``mod_wsgi`` uses. Below is an example of configuring with Apache: diff --git a/docs/locale/zh/LC_MESSAGES/contributing.po b/docs/locale/zh/LC_MESSAGES/contributing.po index 8f6c5f220..81678aa55 100644 --- a/docs/locale/zh/LC_MESSAGES/contributing.po +++ b/docs/locale/zh/LC_MESSAGES/contributing.po @@ -320,12 +320,6 @@ msgstr "" "除了OGC ``ExceptionReport`` XML运行方式为个别例外,通常会指定合适的 ``定" "位器`` 和 ``代码`` 参数" -#: ../../../CONTRIBUTING.rst:99 -msgid "" -"the pycsw wiki documents `developer tasks`_ for things like releasing " -"documentation, testing, etc." -msgstr "pycsw wiki 文档的 `developer tasks`_ 类似于发布文档,测试等。" - #: ../../../CONTRIBUTING.rst:102 msgid "Submitting a Pull Request" msgstr "提交pull请求" diff --git a/docs/locale/zh/LC_MESSAGES/testing.po b/docs/locale/zh/LC_MESSAGES/testing.po index 50c1ac3aa..f52442332 100644 --- a/docs/locale/zh/LC_MESSAGES/testing.po +++ b/docs/locale/zh/LC_MESSAGES/testing.po @@ -543,12 +543,6 @@ msgstr "导航到 ``http://host/path/to/pycsw/tests/index.html`` 。" #~ msgid "Running Locally" #~ msgstr "本地运行" -#~ msgid "" -#~ "The tests framework can be run from ``tests`` using `Paver`_ (see ``pavement." -#~ "py``) tasks for convenience:" -#~ msgstr "" -#~ "为方便工作,测试框架可以用 `Paver`_ (见 ``pavement.py``) 在 ``tests`` 中运行:" - #~ msgid "" #~ "The tests perform HTTP GET and POST requests against ``http://" #~ "localhost:8000``. The expected output for each test can be found in " @@ -566,12 +560,12 @@ msgstr "导航到 ``http://host/path/to/pycsw/tests/index.html`` 。" #~ "If a given test has failed, the output is saved in ``results``. The " #~ "resulting failure can be analyzed by running ``diff tests/expected/" #~ "name_of_test.xml tests/results/name_of_test.xml`` to find variances. The " -#~ "Paver task returns a status code which indicates the number of tests which " +#~ "task returns a status code which indicates the number of tests which " #~ "have failed (i.e. ``echo $?``)." #~ msgstr "" #~ "如果某个测试失败,输出将保存在``结果``中。 运行``diff tests/expected/" #~ "name_of_test.xml tests/results/name_of_test.xml`` ,会自动统计失败结果以找到差" -#~ "异。Paver任务会返回一个状态代码,表示已失败的测试数目(即``echo $?``)。" +#~ "异。任务会返回一个状态代码,表示已失败的测试数目(即``echo $?``)。" #~ msgid "Test Suites" #~ msgstr "测试套件" @@ -631,9 +625,6 @@ msgstr "导航到 ``http://host/path/to/pycsw/tests/index.html`` 。" #~ "文件的名称内)。该 ``request`` 值是HTTP GET请求的值。该 ``PYCSW_SERVER`` 在" #~ "URL pycsw安装运行时会被替换掉的。" -#~ msgid "run ``paver test``" -#~ msgstr "运行`paver test`" - #~ msgid "Ensure that all file paths are relative to ``path/to/pycsw``" #~ msgstr "确保所有文件的路径都关联 ``path/to/pycsw`` " @@ -646,11 +637,6 @@ msgstr "导航到 ``http://host/path/to/pycsw/tests/index.html`` 。" #~ "个sqlite3数据库。该数据库*必须*被写为 ``records.db``,目录 ``tests/suites/foo/" #~ "data`` 也必须存在" -#~ msgid "" -#~ "run ``paver test`` (or ``paver test -s foo`` to test only the new test suite)" -#~ msgstr "" -#~ "运行 ``paver test`` (或 ``paver test-s foo`` , 仅用来测试新的测试套件)" - #~ msgid "" #~ "Pycsw uses `pytest`_ for managing its automated tests. There are a number of " #~ "test suites that perform mostly functional testing. These tests ensure that " diff --git a/docs/testing.rst b/docs/testing.rst index 1cdc6b94d..b7f0ccdf1 100644 --- a/docs/testing.rst +++ b/docs/testing.rst @@ -289,7 +289,7 @@ Examples: .. code:: bash # install tox on your system - sudo pip install tox + sudo pip3 install tox # run all tests on multiple Python versions against all databases, # with default arguments @@ -311,7 +311,8 @@ requests against your pycsw install. The tests are is located in .. code-block:: bash - $ paver gen_tests_html + python3 gen_html.py > index.html + Then navigate to ``http://host/path/to/pycsw/tests/index.html``. @@ -319,7 +320,6 @@ Then navigate to ``http://host/path/to/pycsw/tests/index.html``. .. _Compliance & Interoperability Testing & Evaluation Initiative: https://github.com/opengeospatial/cite/wiki .. _functional tests: https://en.wikipedia.org/wiki/Functional_testing -.. _`Paver`: https://pythonhosted.org/Paver/ .. _pytest's invocation documentation: https://docs.pytest.org/en/stable/usage.html .. _pytest: https://docs.pytest.org .. _Github Actions: https://github.com/geopython/pycsw/actions diff --git a/pavement.py b/pavement.py deleted file mode 100644 index a05304af8..000000000 --- a/pavement.py +++ /dev/null @@ -1,142 +0,0 @@ -# -*- coding: utf-8 -*- -# ================================================================= -# -# Authors: Tom Kralidis -# Ricardo Garcia Silva -# -# Copyright (c) 2015 Tom Kralidis -# Copyright (c) 2016 Ricardo Garcia Silva -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# ================================================================= - -import os - -from paver.easy import task, cmdopts, needs, \ - pushd, sh, call_task, path, info - -DOCS = 'docs' -STAGE_DIR = '/tmp' - - -@task -def build_release(): - """Create release package""" - pass - - -@task -def refresh_docs(): - """Build sphinx docs from scratch""" - with pushd(DOCS): - sh('make clean') - sh('make html') - - -@task -@cmdopts([ - ('user=', 'u', 'OSGeo userid'), -]) -def publish_docs(options): - """Publish dev docs to production""" - local_path = '_build/html' - remote_host = 'pycsw.org' - remote_path = '/osgeo/pycsw/pycsw-web/docs/latest' - - user = options.get('user', False) - if not user: - raise Exception('OSGeo userid required') - - call_task('refresh_docs') - - with pushd(DOCS): - # change privs to be group writeable - for root, dirs, files in os.walk(local_path): - for dfile in files: - os.chmod(os.path.join(root, dfile), 0o664) - for ddir in dirs: - os.chmod(os.path.join(root, ddir), 0o775) - - # copy documentation - sh('scp -r %s%s* %s@%s:%s' % (local_path, os.sep, user, remote_host, - remote_path)) - - -@task -def gen_tests_html(): - """Generate tests/index.html for online testing""" - with pushd('tests'): - # ensure manager testsuite is writeable - os.chmod(os.path.join('functionaltests', 'suites', 'manager', 'data'), 0o777) - sh('python3 gen_html.py > index.html') - - -@task -@needs(['distutils.command.sdist']) -def publish_pypi(): - """Publish to PyPI""" - pass - - -@task -def package(): - """Package a distribution .tar.gz/.zip""" - - import pycsw - - version = pycsw.__version__ - - package_name = 'pycsw-%s' % version - - call_task('package_tar_gz', options={'package_name': package_name}) - - -@task -@cmdopts([ - ('package_name=', 'p', 'Name of package'), -]) -def package_tar_gz(options): - """Package a .tar.gz distribution""" - - import tarfile - - package_name = options.get('package_name', None) - - if package_name is None: - raise Exception('Package name required') - - filename = path('%s/%s.tar.gz' % (STAGE_DIR, package_name)) - - if filename.exists(): - info('Package %s already exists' % filename) - return - - with pushd(STAGE_DIR): - stage_path = '%s/%s' % (STAGE_DIR, package_name) - - if not path(stage_path).exists(): - raise Exception('Directory %s does not exist' % stage_path) - - tar = tarfile.open(filename, 'w:gz') - tar.add(package_name) - tar.close() diff --git a/pycsw/__init__.py b/pycsw/__init__.py index b017be00c..d2d3d0c65 100644 --- a/pycsw/__init__.py +++ b/pycsw/__init__.py @@ -4,7 +4,7 @@ # Authors: Tom Kralidis # Ricardo Garcia Silva # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # Copyright (c) 2017 Ricardo Garcia Silva # # Permission is hereby granted, free of charge, to any person @@ -30,6 +30,4 @@ # # ================================================================= -import pkg_resources - -__version__ = pkg_resources.require("pycsw")[0].version +__version__ = '3.0-dev' diff --git a/pycsw/core/metadata.py b/pycsw/core/metadata.py index 7645f8ac7..453fb6165 100644 --- a/pycsw/core/metadata.py +++ b/pycsw/core/metadata.py @@ -245,7 +245,8 @@ def _parse_csw(context, repos, record, identifier, pagesize=10): md.getrecords2(typenames=csw_typenames, resulttype='hits', outputschema=csw_outputschema) matches = md.results['matches'] - except: # this is a CSW, but server rejects query + except Exception: # this is a CSW, but server rejects query + LOGGER.debug('CSW query failed') raise RuntimeError(md.response) if pagesize > matches: @@ -1244,7 +1245,8 @@ def _parse_fgdc(context, repos, exml): try: tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy) _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp)) - except: # coordinates are corrupted, do not include + except Exception: + LOGGER.debug('Coordinates are corrupt') _set(context, recobj, 'pycsw:BoundingBox', None) else: _set(context, recobj, 'pycsw:BoundingBox', None) @@ -1324,7 +1326,8 @@ def get_value_by_language(pt_group, language, pt_type='text'): data.geographic_bounding_box.east_bound_longitude, data.geographic_bounding_box.north_bound_latitude) _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp)) - except: # coordinates are corrupted, do not include + except Exception: + LOGGER.debug('Coordinates are corrupt') _set(context, recobj, 'pycsw:BoundingBox', None) else: _set(context, recobj, 'pycsw:BoundingBox', None) @@ -1636,7 +1639,8 @@ def _parse_iso(context, repos, exml): try: tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy) _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp)) - except: # coordinates are corrupted, do not include + except Exception: + LOGGER.debug('Coordinates are corrupt') _set(context, recobj, 'pycsw:BoundingBox', None) else: _set(context, recobj, 'pycsw:BoundingBox', None) @@ -1714,7 +1718,8 @@ def _parse_dc(context, repos, exml): try: tmp = '%s,%s,%s,%s' % (bbox.minx, bbox.miny, bbox.maxx, bbox.maxy) _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(tmp)) - except: # coordinates are corrupted, do not include + except Exception: + LOGGER.debug('Coordinates are corrupt') _set(context, recobj, 'pycsw:BoundingBox', None) else: _set(context, recobj, 'pycsw:BoundingBox', None) @@ -1725,6 +1730,8 @@ def _parse_dc(context, repos, exml): def _parse_json_record(context, repos, record): """Parse JSON record""" + recobj = None + if 'http://www.opengis.net/spec/ogcapi-records-1/1.0/req/record-core' in record.get('conformsTo', []): LOGGER.debug('Parsing OGC API - Records record model') recobj = _parse_oarec_record(context, repos, record) @@ -1732,6 +1739,9 @@ def _parse_json_record(context, repos, record): LOGGER.debug('Parsing STAC resource') recobj = _parse_stac_resource(context, repos, record) + if recobj is None: + raise RuntimeError('Unsupported JSON metadata format') + atom_xml = atom.write_record(recobj, 'full', context) _set(context, recobj, 'pycsw:XML', etree.tostring(atom_xml)) @@ -1792,7 +1802,8 @@ def _parse_oarec_record(context, repos, record): if links: _set(context, recobj, 'pycsw:Links', json.dumps(links)) - _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(util.geojson_geometry2bbox(record['geometry']))) + if record.get('geometry') is not None: + _set(context, recobj, 'pycsw:BoundingBox', util.bbox2wktpolygon(util.geojson_geometry2bbox(record['geometry']))) if 'temporal' in record['properties'].get('extent', []): _set(context, recobj, 'pycsw:TempExtent_begin', record['properties']['extent']['temporal']['interval'][0]) @@ -1807,6 +1818,7 @@ def _parse_stac_resource(context, repos, record): recobj = repos.dataset() keywords = [] links = [] + bbox_wkt = None stac_type = record.get('type', 'Feature') if stac_type == 'Feature': @@ -1816,7 +1828,8 @@ def _parse_stac_resource(context, repos, record): stype = 'item' title = record['properties'].get('title') abstract = record['properties'].get('description') - bbox_wkt = util.bbox2wktpolygon(util.geojson_geometry2bbox(record['geometry'])) + if record.get('geometry') is not None: + bbox_wkt = util.bbox2wktpolygon(util.geojson_geometry2bbox(record['geometry'])) elif stac_type == 'Collection': LOGGER.debug('Parsing STAC Collection') conformance = 'https://github.com/radiantearth/stac-spec/tree/master/collection-spec/collection-spec.md' @@ -1827,8 +1840,6 @@ def _parse_stac_resource(context, repos, record): if 'extent' in record and 'spatial' in record['extent']: bbox_csv = ','.join(str(t) for t in record['extent']['spatial']['bbox'][0]) bbox_wkt = util.bbox2wktpolygon(bbox_csv) - else: - bbox_wkt = None if 'extent' in record and 'temporal' in record['extent'] and 'interval' in record['extent']['temporal']: _set(context, recobj, 'pycsw:TempExtent_begin', record['extent']['temporal']['interval'][0][0]) _set(context, recobj, 'pycsw:TempExtent_end', record['extent']['temporal']['interval'][0][1]) @@ -1839,7 +1850,6 @@ def _parse_stac_resource(context, repos, record): stype = 'catalog' title = record.get('title') abstract = record.get('description') - bbox_wkt = None _set(context, recobj, 'pycsw:Identifier', record['id']) _set(context, recobj, 'pycsw:Typename', typename) diff --git a/pycsw/core/repository.py b/pycsw/core/repository.py index fce204b41..3765f8f8c 100644 --- a/pycsw/core/repository.py +++ b/pycsw/core/repository.py @@ -38,10 +38,7 @@ from time import sleep from shapely.wkt import loads -try: - from shapely.errors import ReadingError -except Exception: - from shapely.geos import ReadingError +from shapely.errors import ShapelyError from sqlalchemy import create_engine, func, __version__, select from sqlalchemy.exc import OperationalError @@ -306,7 +303,8 @@ def describe(self): properties = { 'geometry': { - '$ref': 'https://geojson.org/schema/Polygon.json' + '$ref': 'https://geojson.org/schema/Polygon.json', + 'x-ogc-role': 'primary-geometry' } } @@ -318,6 +316,9 @@ def describe(self): 'title': i.name } + if i.name == 'identifier': + properties[i.name]['x-ogc-role'] = 'id' + try: properties[i.name]['type'] = type_mappings[str(i.type)] except Exception as err: @@ -636,7 +637,7 @@ def query_spatial(bbox_data_wkt, bbox_input_wkt, predicate, distance): else: raise RuntimeError( 'Invalid spatial query predicate: %s' % predicate) - except (AttributeError, ValueError, ReadingError, TypeError): + except (AttributeError, ValueError, ShapelyError, TypeError): result = False return "true" if result else "false" diff --git a/pycsw/ogc/api/records.py b/pycsw/ogc/api/records.py index 86b8c03e8..be3b76957 100644 --- a/pycsw/ogc/api/records.py +++ b/pycsw/ogc/api/records.py @@ -45,7 +45,7 @@ from pycsw.core.pygeofilter_evaluate import to_filter from pycsw.core.util import bind_url, get_today_and_now, jsonify_links, load_custom_repo_mappings, str2bool, wkt2geom from pycsw.ogc.api.oapi import gen_oapi -from pycsw.ogc.api.util import match_env_var, render_j2_template, to_json +from pycsw.ogc.api.util import match_env_var, render_j2_template, to_json, to_rfc3339 LOGGER = logging.getLogger(__name__) @@ -109,7 +109,7 @@ def __init__(self, config: dict): try: self.limit = int(self.config['server']['maxrecords']) except KeyError: - self.limit= 10 + self.limit = 10 LOGGER.debug(f'limit: {self.limit}') repo_filter = self.config['repository'].get('filter') @@ -466,7 +466,7 @@ def queryables(self, headers_, args, collection='metadata:main'): headers_['Content-Type'] = 'application/schema+json' if collection not in self.get_all_collections(): - msg = f'Invalid collection' + msg = 'Invalid collection' LOGGER.exception(msg) return self.get_exception(400, headers_, 'InvalidParameterValue', msg) @@ -506,6 +506,11 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'): :returns: tuple of headers, status code, content """ + LOGGER.debug(f'Request args: {args.keys()}') + LOGGER.debug('converting request argument names to lower case') + args = {k.lower(): v for k, v in args.items()} + LOGGER.debug(f'Request args (lower case): {args.keys()}') + headers_['Content-Type'] = self.get_content_type(headers_, args) reserved_query_params = [ @@ -539,7 +544,7 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'): collections = [] if collection not in self.get_all_collections(): - msg = f'Invalid collection' + msg = 'Invalid collection' LOGGER.exception(msg) return self.get_exception(400, headers_, 'InvalidParameterValue', msg) @@ -830,7 +835,7 @@ def item(self, headers_, args, collection, item): headers_['Content-Type'] = self.get_content_type(headers_, args) if collection not in self.get_all_collections(): - msg = f'Invalid collection' + msg = 'Invalid collection' LOGGER.exception(msg) return self.get_exception(400, headers_, 'InvalidParameterValue', msg) @@ -992,6 +997,7 @@ def get_collection_info(self, collection_name: str = 'metadata:main', collection_info = { 'id': id_, + 'type': 'catalog', 'title': title, 'description': description, 'itemType': 'record', @@ -1127,22 +1133,29 @@ def record2json(record, url, collection, mode='ogcapi-records'): 'id': record.identifier, 'type': 'Feature', 'geometry': None, - 'time': record.date, 'properties': {}, 'links': [] } + try: + dt, dt_type = to_rfc3339(record.date) + record_dict['time'] = { + dt_type: dt + } + except Exception: + record_dict['time'] = None + # todo; for keywords with a scheme use the theme property - themes = [] if record.topicategory: + themes = [] themes.append({'concepts': [record.topicategory], 'scheme': 'https://standards.iso.org/iso/19139/resources/gmxCodelists.xml#MD_TopicCategoryCode'}) - record_dict['properties']['themes'] = themes + record_dict['properties']['themes'] = themes if record.otherconstraints: - if isinstance(record.otherconstraints, str): + if isinstance(record.otherconstraints, str) and record.otherconstraints not in [None, 'None']: record.otherconstraints = [record.otherconstraints] - record_dict['properties']['license'] = ", ".join(record.otherconstraints) + record_dict['properties']['license'] = ", ".join(record.otherconstraints) record_dict['properties']['updated'] = record.insert_date @@ -1168,7 +1181,7 @@ def record2json(record, url, collection, mode='ogcapi-records'): record_dict['properties']['description'] = record.abstract if record.format: - record_dict['properties']['formats'] = [record.format] + record_dict['properties']['formats'] = [{'name': record.format}] if record.keywords: record_dict['properties']['keywords'] = [x for x in record.keywords.split(',')] @@ -1181,39 +1194,40 @@ def record2json(record, url, collection, mode='ogcapi-records'): rcnt.append({ 'name': cnt['name'], 'organization': cnt.get('organization', ''), - 'positionName': cnt.get('position', ''), - 'roles': [ - {'name': cnt.get('role', '')} - ], - 'contactInfo': { - 'phone': {'work': cnt.get('phone', '')}, - 'email': {'work': cnt.get('email', '')}, - 'address': { - 'work': { - 'deliveryPoint': cnt.get('address', ''), - 'city': cnt.get('city', ''), - 'administrativeArea': cnt.get('region', ''), - 'postalCode': cnt.get('postcode', ''), - 'country': cnt.get('country', ''), - } - }, - 'url': cnt.get('onlineresource', '') - } + 'position': cnt.get('position', ''), + 'roles': [cnt.get('role', '')], + 'phones': [{ + 'value': cnt.get('phone', '') + }], + 'emails': [{ + 'value': cnt.get('email', '') + }], + 'addresses': [{ + 'deliveryPoint': [cnt.get('address', '')], + 'city': cnt.get('city', ''), + 'administrativeArea': cnt.get('region', ''), + 'postalCode': cnt.get('postcode', ''), + 'country': cnt.get('country', '') + }], + 'links': [{ + 'href': cnt.get('onlineresource') + }] }) except Exception as err: LOGGER.exception(f"failed to parse contact of {record.identifier}: {err}") except Exception as err: LOGGER.exception(f"failed to parse contacts json of {record.identifier}: {err}") - record_dict['properties']['providers'] = rcnt + + record_dict['properties']['contacts'] = rcnt if record.themes not in [None, '', 'null']: - ogcapiThemes = [] + ogcapi_themes = [] # For a scheme, prefer uri over label # OWSlib currently uses .keywords_object for keywords with url, see https://github.com/geopython/OWSLib/pull/765 try: for theme in json.loads(record.themes): try: - ogcapiThemes.append({ + ogcapi_themes.append({ 'scheme': theme['thesaurus'].get('url', theme['thesaurus'].get('title', '')), 'concepts': [c for c in theme.get('keywords_object', []) if c not in [None, '']] }) @@ -1221,18 +1235,26 @@ def record2json(record, url, collection, mode='ogcapi-records'): LOGGER.exception(f"failed to parse theme of {record.identifier}: {err}") except Exception as err: LOGGER.exception(f"failed to parse themes json of {record.identifier}: {err}") - record_dict['properties']['themes'] = ogcapiThemes + + record_dict['properties']['themes'] = ogcapi_themes if record.links: rdl = record_dict['links'] for link in jsonify_links(record.links): + if link['url'] in [None, 'None']: + LOGGER.debug(f'Skipping null link: {link}') + continue + link2 = { - 'href': link['url'], - 'name': link.get('name'), - 'description': link.get('description'), - 'type': link.get('protocol') + 'href': link['url'] } + if link.get('name') not in [None, 'None']: + link2['name'] = link['name'] + if link.get('description') not in [None, 'None']: + link2['description'] = link['description'] + if link.get('protocol') not in [None, 'None']: + link2['procotol'] = link['protocol'] if 'rel' in link: link2['rel'] = link['rel'] elif link['protocol'] == 'WWW:LINK-1.0-http--image-thumbnail': @@ -1289,11 +1311,32 @@ def record2json(record, url, collection, mode='ogcapi-records'): if record.time_begin or record.time_end: if record.time_end not in [None, '']: if record.time_begin not in [None, '']: - record_dict['time'] = [record.time_begin, record.time_end] + begin, _ = to_rfc3339(record.time_begin) + end, _ = to_rfc3339(record.time_end) + record_dict['time'] = { + 'interval': [begin, end] + } else: - record_dict['time'] = record.time_end + end, end_type = to_rfc3339(record.time_end) + record_dict['time'] = { + end_type: end + } else: - record_dict['time'] = record.time_begin + begin, begin_type = to_rfc3339(record.time_begin) + record_dict['time'] = { + begin_type: begin + } + + if mode == 'stac-api': + date_, date_type = to_rfc3339(record.date) + record_dict['properties']['datetime'] = date_ + + if None not in [record.time_begin, record.time_end]: + start_date, start_date_type = to_rfc3339(record.time_begin) + end_date, end_date_type = to_rfc3339(record.time_end) + + record_dict['properties']['start_datetime'] = start_date + record_dict['properties']['end_datetime'] = end_date return record_dict @@ -1315,7 +1358,7 @@ def build_anytext(name, value): tokens = value.split(',') if len(tokens) == 1 and ' ' not in value: # single term - LOGGER.debug(f'Single term with no spaces') + LOGGER.debug('Single term with no spaces') return f"{name} ILIKE '%{value}%'" for token in tokens: diff --git a/pycsw/ogc/api/templates/items.html b/pycsw/ogc/api/templates/items.html index 297d6b6ba..a28393675 100644 --- a/pycsw/ogc/api/templates/items.html +++ b/pycsw/ogc/api/templates/items.html @@ -50,11 +50,14 @@ {% endif %} {# update existing url with new key,val. indent prevents spaces in output #} +{# reset pagination, else you may end up in empty result #} {% macro updateurl(key=None,val=None) %}{{ nav_links.self.split('?')[0] }}?{% - for at in attrs.keys() %}{% - if attrs[at] not in [None,''] %}{% - if key not in [None,''] and key == at %}&{{ at }}={{ val }}{% - else %}&{{ at }}={{ attrs[at] }}{% + for at in attrs.keys() %}{% + if at != 'offset' %}{% + if attrs[at] not in [None,''] %}{% + if key not in [None,''] and key == at %}&{{ at }}={{ val }}{% + else %}&{{ at }}={{ attrs[at] }}{% + endif %}{% endif %}{% endif %}{% if key not in attrs.keys() %}&{{ key }}={{ val }}{% endif %}{% @@ -100,7 +103,7 @@
{% if 'facets=true' in nav_links.self %} - Reset + Reset {% endif %}
diff --git a/pycsw/ogc/api/util.py b/pycsw/ogc/api/util.py index e635e838e..63f6cfec5 100644 --- a/pycsw/ogc/api/util.py +++ b/pycsw/ogc/api/util.py @@ -39,7 +39,9 @@ import os import pathlib import re +from typing import Union +from dateutil.parser import parse as dparse from jinja2 import Environment, FileSystemLoader from jinja2.exceptions import TemplateNotFound import yaml @@ -82,12 +84,17 @@ def json_serial(obj): """ helper function to convert to JSON non-default types (source: https://stackoverflow.com/a/22238613) + :param obj: `object` to be evaluated + :returns: JSON non-default type to `str` """ if isinstance(obj, (datetime, date, time)): - return obj.isoformat() + if isinstance(obj, date): + return obj.strftime('%Y-%m-%d') + else: + return obj.isoformat() + 'Z' elif isinstance(obj, bytes): try: LOGGER.debug('Returning as UTF-8 decoded bytes') @@ -218,3 +225,28 @@ def render_j2_template(config, template, data): raise return template.render(config=config, data=data, version=__version__) + + +def to_rfc3339(value: str) -> Union[tuple, None]: + """ + Helper function to convert a date/datetime into + RFC3339 + + :param value: `str` of date/datetime value + + :returns: `tuple` of `datetime` of RFC3339 value and date type + """ + + try: + dt = dparse(value) # TODO TIMEZONE) + except Exception as err: + msg = f'Parse error: {err}' + LOGGER.error(msg) + return 'date', None + + if len(value) < 11: + dt_type = 'date' + else: + dt_type = 'date-time' + + return dt, dt_type diff --git a/pycsw/ogc/csw/csw2.py b/pycsw/ogc/csw/csw2.py index b60f61810..e3182b362 100644 --- a/pycsw/ogc/csw/csw2.py +++ b/pycsw/ogc/csw/csw2.py @@ -31,16 +31,10 @@ # ================================================================= import os -import sys -import cgi -from urllib.parse import quote, unquote -from io import StringIO from pycsw.core.etree import etree -from pycsw import oaipmh, opensearch, sru +from pycsw import opensearch from pycsw.ogc.csw.cql import cql2fes -from pycsw.plugins.profiles import profile as pprofile -import pycsw.plugins.outputschemas -from pycsw.core import config, log, metadata, util +from pycsw.core import metadata, util from pycsw.core.formats.fmt_json import xml2dict from pycsw.ogc.fes import fes1 import logging @@ -517,7 +511,8 @@ def getdomain(self): self.parent.context.namespaces)).text = pname try: operation, parameter = pname.split('.') - except: + except Exception as err: + LOGGER.debug(f'Cannot split pname: {err}') return node if (operation in self.parent.context.model['operations'].keys() and parameter in @@ -540,7 +535,8 @@ def getdomain(self): else: # it's a core queryable, map to internal typename model try: pname2 = self.parent.repository.queryables['_all'][pname]['dbcol'] - except: + except Exception as err: + LOGGER.debug(f'pname2 not found: {err}') pname2 = pname # decipher typename @@ -780,7 +776,7 @@ def getrecords(self): try: name, order = tmp.rsplit(':', 1) - except: + except Exception: return self.exceptionreport('InvalidParameterValue', 'sortby', 'Invalid SortBy value: must be in the format\ propertyname:A or propertyname:D') @@ -908,7 +904,7 @@ def getrecords(self): self.parent.context.namespaces), timestamp=timestamp) if 'where' not in self.parent.kvp['constraint'] and \ - self.parent.kvp['resulttype'] is None: + self.parent.kvp['resulttype'] in [None, 'hits']: returned = '0' searchresults = etree.SubElement(node, @@ -1989,7 +1985,8 @@ def exceptionreport(self, code, locator, text): try: language = self.parent.config['server'].get('language') ogc_schemas_base = self.parent.config['server'].get('ogc_schemas_base') - except: + except Exception: + LOGGER.debug('Dropping to default language and OGC schemas base') language = 'en-US' ogc_schemas_base = self.parent.context.ogc_schemas_base @@ -2023,7 +2020,8 @@ def write_boundingbox(bbox, nsmap): if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None if len(bbox2) == 4: diff --git a/pycsw/ogc/csw/csw3.py b/pycsw/ogc/csw/csw3.py index 28bef1826..2438d3255 100644 --- a/pycsw/ogc/csw/csw3.py +++ b/pycsw/ogc/csw/csw3.py @@ -28,19 +28,12 @@ # # ================================================================= -import json import os -import sys -import cgi from time import time -from urllib.parse import quote, unquote -from io import StringIO from pycsw.core.etree import etree from pycsw.ogc.csw.cql import cql2fes -from pycsw import oaipmh, opensearch, sru -from pycsw.plugins.profiles import profile as pprofile -import pycsw.plugins.outputschemas -from pycsw.core import config, log, metadata, util +from pycsw import opensearch +from pycsw.core import metadata, util from pycsw.core.formats.fmt_json import xml2dict from pycsw.ogc.fes import fes1, fes2 import logging @@ -113,7 +106,8 @@ def getcapabilities(self): try: updatesequence = \ util.get_time_iso2unix(self.parent.repository.query_insert()) - except: + except Exception as err: + LOGGER.debug(f'Cannot set updatesequence: {err}') updatesequence = None node = etree.Element(util.nspath_eval('csw30:Capabilities', @@ -512,7 +506,8 @@ def getdomain(self): self.parent.context.namespaces)).text = pname try: operation, parameter = pname.split('.') - except: + except Exception as err: + LOGGER.debug(f'pname2 not found: {err}') return node if (operation in self.parent.context.model['operations'] and parameter in self.parent.context.model['operations'][operation]['parameters']): @@ -534,8 +529,8 @@ def getdomain(self): else: # it's a core queryable, map to internal typename model try: pname2 = self.parent.repository.queryables['_all'][pname]['dbcol'] - except: - pname2 = pname + except Exception as err: + LOGGER.debug(f'pname2 not found: {err}') # decipher typename dvtype = None @@ -810,7 +805,7 @@ def getrecords(self): try: name, order = tmp.rsplit(':', 1) - except: + except Exception: return self.exceptionreport('InvalidParameterValue', 'sortby', 'Invalid SortBy value: must be in the format\ propertyname:A or propertyname:D') @@ -2079,7 +2074,8 @@ def exceptionreport(self, code, locator, text): try: language = self.parent.config['server'].get('language') ogc_schemas_base = self.parent.config['server'].get('ogc_schemas_base') - except: + except Exception: + LOGGER.debug('Dropping to default language and OGC schemas base') language = 'en-US' ogc_schemas_base = self.parent.context.ogc_schemas_base @@ -2157,7 +2153,8 @@ def write_boundingbox(bbox, nsmap): if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None if len(bbox2) == 4: diff --git a/pycsw/ogc/fes/fes1.py b/pycsw/ogc/fes/fes1.py index 2e443d84b..acb79e1e4 100644 --- a/pycsw/ogc/fes/fes1.py +++ b/pycsw/ogc/fes/fes1.py @@ -4,7 +4,7 @@ # Authors: Tom Kralidis # Angelos Tzotsos # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # Copyright (c) 2015 Angelos Tzotsos # # Permission is hereby granted, free of charge, to any person @@ -416,7 +416,6 @@ def set_spatial_ranking(geometry): util.ranking_pass = True util.ranking_query_geometry = geometry.wkt elif geometry.type in ['LineString', 'Point']: - from shapely.geometry.base import BaseGeometry from shapely.geometry import box from shapely.wkt import loads,dumps ls = loads(geometry.wkt) diff --git a/pycsw/ogc/fes/fes2.py b/pycsw/ogc/fes/fes2.py index a29dcb946..7ce00d19b 100644 --- a/pycsw/ogc/fes/fes2.py +++ b/pycsw/ogc/fes/fes2.py @@ -4,7 +4,7 @@ # Authors: Tom Kralidis # Angelos Tzotsos # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # Copyright (c) 2015 Angelos Tzotsos # # Permission is hereby granted, free of charge, to any person @@ -434,7 +434,6 @@ def set_spatial_ranking(geometry): util.ranking_pass = True util.ranking_query_geometry = geometry.wkt elif geometry.type in ['LineString', 'Point']: - from shapely.geometry.base import BaseGeometry from shapely.geometry import box from shapely.wkt import loads,dumps ls = loads(geometry.wkt) diff --git a/pycsw/ogc/gml/gml3.py b/pycsw/ogc/gml/gml3.py index 37a005432..ff51c2b8c 100644 --- a/pycsw/ogc/gml/gml3.py +++ b/pycsw/ogc/gml/gml3.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -209,16 +209,17 @@ def transform(self, src, dest): proj_src = 'epsg:%s' % src proj_dst = 'epsg:%s' % dest transformer = Transformer.from_crs(proj_src, proj_dst, always_xy=True) - except: - raise RuntimeError('Invalid projection transformation') + except Exception as err: + msg = f'Invalid projection transformation: {err}' + raise RuntimeError(msg) geom = loads(self.wkt) - if geom.type == 'Point': + if geom.geom_type == 'Point': newgeom = Point(transformer.transform(geom.x, geom.y)) wkt2 = newgeom.wkt - elif geom.type == 'LineString': + elif geom.geom_type == 'LineString': for vertice in list(geom.coords): newgeom = transformer.transform(vertice[0], vertice[1]) vertices.append(newgeom) @@ -227,7 +228,7 @@ def transform(self, src, dest): wkt2 = linestring.wkt - elif geom.type == 'Polygon': + elif geom.geom_type == 'Polygon': for vertice in list(geom.exterior.coords): newgeom = transformer.transform(vertice[0], vertice[1]) vertices.append(newgeom) diff --git a/pycsw/ogc/gml/gml32.py b/pycsw/ogc/gml/gml32.py index 2cc18d900..4fbb07011 100644 --- a/pycsw/ogc/gml/gml32.py +++ b/pycsw/ogc/gml/gml32.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -28,7 +28,6 @@ # # ================================================================= -from copy import deepcopy import logging from owslib import crs @@ -213,8 +212,9 @@ def transform(self, src, dest): proj_src = 'epsg:%s' % src proj_dst = 'epsg:%s' % dest transformer = Transformer.from_crs(proj_src, proj_dst, always_xy=True) - except: - raise RuntimeError('Invalid projection transformation') + except Exception as err: + msg = f'Invalid projection transformation: {err}' + raise RuntimeError(msg) geom = loads(self.wkt) diff --git a/pycsw/opensearch.py b/pycsw/opensearch.py index 167ba3143..1b558a153 100644 --- a/pycsw/opensearch.py +++ b/pycsw/opensearch.py @@ -31,7 +31,6 @@ # ================================================================= import logging -from urllib.parse import urlencode from pycsw.core import util from pycsw.core.etree import etree diff --git a/pycsw/plugins/outputschemas/atom.py b/pycsw/plugins/outputschemas/atom.py index 927901706..245296545 100644 --- a/pycsw/plugins/outputschemas/atom.py +++ b/pycsw/plugins/outputschemas/atom.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -28,7 +28,6 @@ # # ================================================================= -import os from pycsw.core import util from pycsw.core.etree import etree @@ -132,7 +131,8 @@ def write_extent(bbox, nsmap): if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None where = etree.Element(util.nspath_eval('georss:where', NAMESPACES)) envelope = etree.SubElement(where, util.nspath_eval('gml:Envelope', nsmap), srsName='http://www.opengis.net/def/crs/EPSG/0/4326') diff --git a/pycsw/plugins/outputschemas/datacite.py b/pycsw/plugins/outputschemas/datacite.py index da372c04b..b005d63a3 100644 --- a/pycsw/plugins/outputschemas/datacite.py +++ b/pycsw/plugins/outputschemas/datacite.py @@ -11,7 +11,7 @@ # # This module intends to follow DataCite 4.3 # -# PyCSW Copyright (C) 2015 Tom Kralidis +# PyCSW Copyright (C) 2024 Tom Kralidis # Schema Copyright (C) 2016 CERN # Schema Copyright (C) 2019 Caltech # @@ -341,7 +341,8 @@ def write_record(result, esn, context, url=None): if bbox not in [None, '']: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None bounds = etree.SubElement(node, util.nspath_eval('geoLocations', NAMESPACES)) bound = etree.SubElement(bounds, util.nspath_eval('geoLocation', NAMESPACES)) diff --git a/pycsw/plugins/outputschemas/dif.py b/pycsw/plugins/outputschemas/dif.py index 12c3a2c7e..7ffdcb622 100644 --- a/pycsw/plugins/outputschemas/dif.py +++ b/pycsw/plugins/outputschemas/dif.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -198,12 +198,11 @@ def write_record(result, esn, context, url=None): def write_extent(bbox, nsmap): ''' Generate BBOX extent ''' - from shapely.wkt import loads - if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None extent = etree.Element(util.nspath_eval('dif:Spatial_Coverage', nsmap)) etree.SubElement(extent, util.nspath_eval('dif:Southernmost_Latitude', nsmap)).text = str(bbox2[1]) diff --git a/pycsw/plugins/outputschemas/fgdc.py b/pycsw/plugins/outputschemas/fgdc.py index 1d43d07d1..bd0dccc49 100644 --- a/pycsw/plugins/outputschemas/fgdc.py +++ b/pycsw/plugins/outputschemas/fgdc.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -166,7 +166,8 @@ def write_extent(bbox): if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None spdom = etree.Element('spdom') diff --git a/pycsw/plugins/outputschemas/gm03.py b/pycsw/plugins/outputschemas/gm03.py index 9cfeeabef..bb207caaa 100644 --- a/pycsw/plugins/outputschemas/gm03.py +++ b/pycsw/plugins/outputschemas/gm03.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -228,7 +228,8 @@ def write_extent(bbox, nsmap): if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None bounding_box = etree.Element(util.nspath_eval('gm03:GM03_2_1Core.Core.EX_GeographicBoundingBox', NAMESPACES)) etree.SubElement(bounding_box, util.nspath_eval('gm03:northBoundLatitude', nsmap)).text = str(bbox2[3]) diff --git a/pycsw/plugins/profiles/apiso/apiso.py b/pycsw/plugins/profiles/apiso/apiso.py index 03b507feb..c95d045aa 100644 --- a/pycsw/plugins/profiles/apiso/apiso.py +++ b/pycsw/plugins/profiles/apiso/apiso.py @@ -31,7 +31,7 @@ # ================================================================= import os -from pycsw.core import config, util +from pycsw.core import util from pycsw.core.etree import etree from pycsw.plugins.profiles import profile @@ -707,7 +707,8 @@ def write_extent(bbox, nsmap): if bbox is not None: try: bbox2 = util.wkt2geom(bbox) - except: + except Exception as err: + LOGGER.debug(f'Geometry parsing error: {err}') return None extent = etree.Element(util.nspath_eval('gmd:extent', nsmap)) ex_extent = etree.SubElement(extent, util.nspath_eval('gmd:EX_Extent', nsmap)) diff --git a/pycsw/plugins/profiles/ebrim/ebrim.py b/pycsw/plugins/profiles/ebrim/ebrim.py index fb63ed100..53ca832c1 100644 --- a/pycsw/plugins/profiles/ebrim/ebrim.py +++ b/pycsw/plugins/profiles/ebrim/ebrim.py @@ -30,7 +30,7 @@ import os from pycsw.core.etree import etree -from pycsw.core import config, util +from pycsw.core import util from pycsw.ogc.csw.csw2 import write_boundingbox from pycsw.plugins.profiles import profile diff --git a/pycsw/plugins/repository/odc/odc.py b/pycsw/plugins/repository/odc/odc.py index c3d6a9592..7a7af2b3f 100644 --- a/pycsw/plugins/repository/odc/odc.py +++ b/pycsw/plugins/repository/odc/odc.py @@ -3,7 +3,7 @@ # # Authors: Tom Kralidis # -# Copyright (c) 2015 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation @@ -28,11 +28,8 @@ # # ================================================================= -import os, sys - -from django.db import models from django.db import connection -from django.db.models import Avg, Max, Min, Count +from django.db.models import Max, Min, Count from django.conf import settings from pycsw.core import repository, util @@ -109,7 +106,6 @@ def query_domain(self, domain, typenames, domainquerytype='list', def query_insert(self, direction='max'): ''' Query to get latest (default) or earliest update to repository ''' - from datetime import datetime if direction == 'min': return Resource.objects.aggregate( Min('last_updated'))['last_updated__min'].strftime('%Y-%m-%dT%H:%M:%SZ') diff --git a/pycsw/server.py b/pycsw/server.py index cb3fad13c..917525240 100644 --- a/pycsw/server.py +++ b/pycsw/server.py @@ -392,8 +392,8 @@ def dispatch(self, writer=sys.stdout, write_headers=True): self.repository = rs_cls(self.context, repo_filter) LOGGER.debug('Custom repository %s loaded (%s)', rs, self.repository.dbtype) connection_done = True - except: - LOGGER.debug(f'Repository not loaded retry connection {max_attempts}') + except Exception as err: + LOGGER.debug(f'Repository not loaded retry connection {max_attempts}: {err}') max_attempts += 1 except Exception as err: msg = 'Could not load custom repository %s: %s' % (rs, err) @@ -422,8 +422,8 @@ def dispatch(self, writer=sys.stdout, write_headers=True): LOGGER.debug( 'Repository loaded (local): %s.' % self.repository.dbtype) connection_done = True - except: - LOGGER.debug(f'Repository not loaded retry connection {max_attempts}') + except Exception: + LOGGER.debug(f'Repository not loaded retry connection {max_attempts}: {err}') max_attempts += 1 except Exception as err: msg = 'Could not load repository (local): %s' % err @@ -814,7 +814,8 @@ def _cql_update_queryables_mappings(self, cql, mappings): for key in mappings.keys(): try: cql = cql.replace(key, mappings[key]['dbcol']) - except: + except KeyError: + LOGGER.debug('Setting without dbcol key') cql = cql.replace(key, mappings[key]) LOGGER.debug('Interpolated CQL text = %s.', cql) return cql diff --git a/pycsw/stac/api.py b/pycsw/stac/api.py index a8344bc3f..377a62b2b 100644 --- a/pycsw/stac/api.py +++ b/pycsw/stac/api.py @@ -38,6 +38,7 @@ from pycsw.ogc.api.oapi import gen_oapi from pycsw.ogc.api.records import API from pycsw.core.pygeofilter_evaluate import to_filter +from pycsw.core.util import geojson_geometry2bbox LOGGER = logging.getLogger(__name__) @@ -264,6 +265,9 @@ def collections(self, headers_, args): 'href': self.config['server']['url'] }] + response['numberMatched'] = len(response['collections']) + response['numberReturned'] = len(response['collections']) + return self.get_response(200, headers_, response) def collection(self, headers_, args, collection='metadata:main'): @@ -345,7 +349,7 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'): headers, status, response = super().items(headers_, json_post_data, args, collection) if collection not in self.get_all_collections(): - msg = f'Invalid collection' + msg = 'Invalid collection' LOGGER.exception(msg) return self.get_exception(400, headers_, 'InvalidParameterValue', msg) @@ -373,6 +377,18 @@ def items(self, headers_, json_post_data, args, collection='metadata:main'): else: response2['features'].append(record) + if record.get('bbox') is None: + geometry = record.get('geometry') + if geometry is not None: + LOGGER.debug('Calculating bbox from geometry') + bbox = geojson_geometry2bbox(geometry) + record['bbox'] = [float(t) for t in bbox.split(',')] + + for link in record['links']: + if link.get('rel') is None: + LOGGER.debug('Missing link relation; adding rel=related') + link['rel'] = 'related' + for count, value in enumerate(response2['links']): if value['rel'] == 'alternate': response2['links'].pop(count) @@ -402,7 +418,7 @@ def item(self, headers_, args, collection, item): headers, status, response = super().item(headers_, args, collection, item) if collection not in self.get_all_collections(): - msg = f'Invalid collection' + msg = 'Invalid collection' LOGGER.exception(msg) return self.get_exception(400, headers_, 'InvalidParameterValue', msg) @@ -457,8 +473,8 @@ def links2stacassets(collection, record): if 'collection' not in record: record['collection'] = collection - links_assets = [i for i in record['links'] if i['rel'] == 'enclosure'] - links_to_keep = [i for i in record['links'] if i['rel'] != 'enclosure'] + links_assets = [i for i in record['links'] if i.get('rel', '') == 'enclosure'] + links_to_keep = [i for i in record['links'] if i.get('rel', '') != 'enclosure'] record['links'] = links_to_keep diff --git a/requirements-dev.txt b/requirements-dev.txt index f6e164cb2..1892c3ad4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,7 +2,6 @@ -r requirements-standalone.txt apipkg==1.4 -Paver==1.2.4 pytest==6.2.4 pytest-cov==2.12.0 pytest-flake8==1.0.7 diff --git a/requirements.txt b/requirements.txt index c032bc06b..dee47bbc1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ geolinks lxml OWSLib pyproj +python-dateutil PyYAML -Shapely<2.0 +Shapely xmltodict diff --git a/setup.py b/setup.py index 8d62a05bb..357a191bc 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ # Authors: Tom Kralidis # Ricardo Garcia Silva # -# Copyright (c) 2016 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # Copyright (c) 2017 Ricardo Garcia Silva # # Permission is hereby granted, free of charge, to any person @@ -32,6 +32,7 @@ import io import os +import re from setuptools import find_packages, setup @@ -43,14 +44,30 @@ def read(filename, encoding="utf-8"): return contents +def get_package_version(): + """get version from top-level package init""" + + version_file = read('pycsw/__init__.py') + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", + version_file, re.M) + if version_match: + return version_match.group(1) + raise RuntimeError('Unable to find version string.') + + +DESCRIPTION = ('pycsw is an OGC API - Records and OGC CSW server ' + 'implementation written in Python') + +print("JJJ", DESCRIPTION) + # ensure a fresh MANIFEST file is generated if (os.path.exists('MANIFEST')): os.unlink('MANIFEST') setup( name='pycsw', - version=read("VERSION.txt"), - description='pycsw is an OARec and OGC CSW server implementation written in Python', + version=get_package_version(), + description=DESCRIPTION.strip(), long_description=read("README.md"), long_description_content_type='text/markdown', license='MIT', @@ -81,7 +98,7 @@ def read(filename, encoding="utf-8"): include_package_data=True, entry_points={ 'console_scripts': [ - 'pycsw-admin.py=pycsw.core.admin:cli', + 'pycsw-admin.py=pycsw.core.admin:cli' ] }, classifiers=[ @@ -92,9 +109,8 @@ def read(filename, encoding="utf-8"): 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Topic :: Scientific/Engineering :: GIS', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Topic :: Scientific/Engineering :: GIS' ] ) diff --git a/tests/functionaltests/suites/default/expected/get_GetRecords-all.xml b/tests/functionaltests/suites/default/expected/get_GetRecords-all.xml index b2610473f..f5cdc92a8 100644 --- a/tests/functionaltests/suites/default/expected/get_GetRecords-all.xml +++ b/tests/functionaltests/suites/default/expected/get_GetRecords-all.xml @@ -2,5 +2,5 @@ - + diff --git a/tests/functionaltests/suites/default/expected/get_GetRecords-empty-maxrecords.xml b/tests/functionaltests/suites/default/expected/get_GetRecords-empty-maxrecords.xml index b2610473f..f5cdc92a8 100644 --- a/tests/functionaltests/suites/default/expected/get_GetRecords-empty-maxrecords.xml +++ b/tests/functionaltests/suites/default/expected/get_GetRecords-empty-maxrecords.xml @@ -2,5 +2,5 @@ - + diff --git a/tests/functionaltests/suites/default/expected/post_GetRecords-all-resulttype-hits.xml b/tests/functionaltests/suites/default/expected/post_GetRecords-all-resulttype-hits.xml index af07983a5..79ba3b46c 100644 --- a/tests/functionaltests/suites/default/expected/post_GetRecords-all-resulttype-hits.xml +++ b/tests/functionaltests/suites/default/expected/post_GetRecords-all-resulttype-hits.xml @@ -2,5 +2,5 @@ - + diff --git a/tests/functionaltests/suites/oarec/test_oarec_functional.py b/tests/functionaltests/suites/oarec/test_oarec_functional.py index 1f9b6c0c6..1a211fe60 100644 --- a/tests/functionaltests/suites/oarec/test_oarec_functional.py +++ b/tests/functionaltests/suites/oarec/test_oarec_functional.py @@ -4,7 +4,7 @@ # Angelos Tzotsos # Ricardo Garcia Silva # -# Copyright (c) 2023 Tom Kralidis +# Copyright (c) 2024 Tom Kralidis # Copyright (c) 2022 Angelos Tzotsos # Copyright (c) 2023 Ricardo Garcia Silva # @@ -228,6 +228,12 @@ def test_items(config): assert content['numberReturned'] == 10 assert content['features'][5]['properties']['title'] == 'Lorem ipsum dolor sit amet' # noqa + params = {'SoRtBy': '-title'} + content = json.loads(api.items({}, None, params)[2]) + assert content['numberMatched'] == 12 + assert content['numberReturned'] == 10 + assert content['features'][5]['properties']['title'] == 'Lorem ipsum dolor sit amet' # noqa + cql_json = {'op': '=', 'args': [{'property': 'title'}, 'Lorem ipsum']} content = json.loads(api.items({}, cql_json, {})[2]) assert content['numberMatched'] == 1 diff --git a/tests/functionaltests/suites/stac_api/test_stac_api_functional.py b/tests/functionaltests/suites/stac_api/test_stac_api_functional.py index 385e65080..102c729fb 100644 --- a/tests/functionaltests/suites/stac_api/test_stac_api_functional.py +++ b/tests/functionaltests/suites/stac_api/test_stac_api_functional.py @@ -83,6 +83,19 @@ def test_conformance(config): assert conformance in content['conformsTo'] +def test_collections(config): + api = STACAPI(config) + headers, status, content = api.collections({}, {'f': 'json'}) + content = json.loads(content) + + assert headers['Content-Type'] == 'application/json' + assert status == 200 + assert len(content['links']) == 3 + + assert len(content['collections']) == 1 + assert len(content['collections']) == content['numberMatched'] + assert len(content['collections']) == content['numberReturned'] + def test_queryables(config): api = STACAPI(config) headers, status, content = api.queryables({}, {}) @@ -118,6 +131,15 @@ def test_items(config): assert record['stac_version'] == '1.0.0' assert record['collection'] == 'metadata:main' + for feature in content['features']: + if feature.get('geometry') is not None: + assert 'bbox' in feature + assert isinstance(feature['bbox'], list) + + for link in feature['links']: + assert 'href' in link + assert 'rel' in link + # test GET KVP requests content = json.loads(api.items({}, None, {'bbox': '-180,-90,180,90'})[2]) assert len(content['features']) == 3 @@ -194,5 +216,9 @@ def test_item(config): assert content['stac_version'] == '1.0.0' assert content['collection'] == 'metadata:main' + for link in content['links']: + assert 'href' in link + assert 'rel' in link + headers, status, content = api.item({}, {}, 'foo', item) assert status == 400 diff --git a/tox.ini b/tox.ini index d6c5d3747..fff95d305 100644 --- a/tox.ini +++ b/tox.ini @@ -1,10 +1,10 @@ # Tox (http://tox.testrun.org/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" +# test suite on all supported python versions. To use it, "pip3 install tox" # and then run "tox" from this directory. [tox] -envlist = {py38,py39,py310,py311}-sqlite +envlist = {py310,py311}-sqlite skip_missing_interpreters = True [testenv]