diff --git a/.env b/.env index b8400e1b..20db4387 100644 --- a/.env +++ b/.env @@ -61,7 +61,6 @@ CKAN__HARVEST__MQ__HOSTNAME=redis CKAN__HARVEST__MQ__PORT=6379 CKAN__HARVEST__MQ__REDIS_DB=1 CKAN__HARVEST__LOG_LEVEL=info -CKAN__HARVEST__LOG_SCOPE=0 CKAN__HARVEST__STATUS_MAIL__ALL=True @@ -70,7 +69,7 @@ CKANEXT__GEODATAGOV__BUREAU_CSV__URL_DEFAULT=https://resources.data.gov/schemas/ CKAN__SPATIAL__SRID=4326 CKAN__SPATIAL__VALIDATOR__PROFILES=iso19139ngdc -CKANEXT__SPATIAL__SEARCH_BACKEND=solr +CKANEXT__SPATIAL__SEARCH_BACKEND=solr-bbox CKAN___GOOGLEANALYTICS__IDS=UA-1010101-1 UA-1010101-2 CKAN__TRACKING_ENABLED=true diff --git a/.profile b/.profile index 6f75c477..117824f3 100755 --- a/.profile +++ b/.profile @@ -85,6 +85,7 @@ export CKANEXT__SAML2AUTH__IDP_METADATA__LOCAL_PATH="${HOME}/${CKANEXT__SAML2AUT # Export settings for CKAN via ckanext-envvars export CKAN_REDIS_URL=rediss://:$REDIS_PASSWORD@$REDIS_HOST:$REDIS_PORT export CKAN_SQLALCHEMY_URL=$(vcap_get_service db .credentials.uri) +export CKAN_SQLALCHEMY_URL=${CKAN_SQLALCHEMY_URL/postgres/postgresql} export CKAN___SQLALCHEMY__POOL_SIZE=250 export CKAN___SQLALCHEMY__MAX_OVERFLOW=500 @@ -158,7 +159,7 @@ if [[ $MIGRATE_DB = 'True' ]]; then # Run migrations ckan db upgrade ckan harvester initdb - ckan archiver init - ckan report initdb - ckan qa init + # ckan archiver init + # ckan report initdb + # ckan qa init fi diff --git a/ckan/docker-entrypoint.d/10-setup-harvest.sh b/ckan/docker-entrypoint.d/10-setup-harvest.sh index afa48dfe..d9cee2e9 100644 --- a/ckan/docker-entrypoint.d/10-setup-harvest.sh +++ b/ckan/docker-entrypoint.d/10-setup-harvest.sh @@ -4,9 +4,18 @@ echo "Init Harvest database tables" ckan harvester initdb echo "turn on gather and fetch workers" -ckan harvester fetch-consumer & -ckan harvester fetch-consumer & -ckan harvester gather-consumer & +run_fetch () { + until ckan harvester fetch-consumer; do + sleep 1 + done +} +run_gather () { + until ckan harvester gather-consumer; do + sleep 1 + done +} +run_fetch & +run_gather & echo "check harvest job completion every 10 secs" check_harvester () { diff --git a/ckan/requirements.in b/ckan/requirements.in index a512d58b..0b8c5a7e 100644 --- a/ckan/requirements.in +++ b/ckan/requirements.in @@ -1,5 +1,5 @@ # CKAN requirements and extensions -git+https://github.com/gsa/ckan.git@ckan-2.9.8-momentjs#egg=ckan +ckan==2.10.1 git+https://github.com/ckan/ckanext-dcat@master#egg=ckanext-dcat -e git+https://github.com/ckan/ckanext-harvest.git@master#egg=ckanext-harvest -e git+https://github.com/ckan/ckanext-spatial.git@master#egg=ckanext-spatial @@ -8,21 +8,26 @@ git+https://github.com/GSA/ckanext-saml2auth.git@create_user_via_saml#egg=ckanex -e git+https://github.com/ckan/ckanext-archiver.git@master#egg=ckanext-archiver -e git+https://github.com/ckan/ckanext-report.git@master#egg=ckanext-report -ckanext-datagovcatalog>=0.0.3 -ckanext-datagovtheme>=0.1.22, ==0.1.* -ckanext-datajson>=0.1.13 -ckanext-envvars>=0.0.2 -ckanext-geodatagov>=0.1.28 -ckanext-googleanalyticsbasic -ckanext-metrics-dashboard +# ckanext-datagovcatalog>=0.0.3 +# ckanext-datagovtheme>=0.1.22 +# ckanext-datajson>=0.1.13 +ckanext-envvars>=0.0.3 +# ckanext-geodatagov>=0.1.28 +# ckanext-googleanalyticsbasic +# ckanext-metrics-dashboard +-e git+https://github.com/gsa/ckanext-geodatagov.git@ckan-2.10#egg=ckanext-geodatagov +-e git+https://github.com/gsa/ckanext-datagovcatalog.git@ckan-210#egg=ckanext-datagovcatalog +-e git+https://github.com/gsa/ckanext-datagovtheme.git@ckan-210#egg=ckanext-datagovtheme +-e git+https://github.com/gsa/ckanext-datajson.git@ckan-210#egg=ckanext-datajson +-e git+https://github.com/gsa/ckanext-googleanalyticsbasic.git@ckan-2.10#egg=ckanext-googleanalyticsbasic +-e git+https://github.com/gsa/ckanext-metrics-dashboard.git@ckan-2.10#egg=ckanext-metrics-dashboard # Pin for saml2auth to work pysaml2==7.0.1 # ckanext-harvest dependencies ckantoolkit>=0.0.7 -pika>=1.1.0 -pyOpenSSL>=22.0.0 +pika>=1.1.0,<1.3.0 # redis==2.10.6 # included in ckan core # requests>=2.11.1 # included in ckan core # six>=1.12.0 # included in dcat @@ -40,8 +45,9 @@ argparse GeoAlchemy2==0.5.0 owslib>=0.28.1 pyparsing>=2.1.10 -pyproj==2.6.1 -Shapely>=1.2.13 +pyproj==3.4.1 +Shapely==2.0.1 +geojson==3.0.1 # requests>=1.1.0 # included in ckan core # six # included in dcat @@ -55,51 +61,50 @@ messytables==0.15.2 # https://github.com/ckan/ckanext-report/blob/master/requirements.txt #ckanext-dcat -rdflib==4.2.2 +rdflib==6.1.1 # rdflib-jsonld==0.4.0 # ignoring as cannot build on cloud.gov geomet>=0.2.0 future>=0.18.3 -six>=1.15.0 # ckan doesn't advertise its requirements correctly, so let's add them here. # https://github.com/ckan/ckan/blob/ckan-2.9.5/requirements.txt -alembic==1.0.0 -Babel==2.9.1 # Upgraded from CKAN pin +alembic==1.8.1 +Babel==2.10.3 Beaker==1.11.0 bleach==3.3.0 -click==7.1.2 -dominate==2.4.0 -fanstatic==1.1 +blinker==1.5 +click==8.1.3 +dominate==2.7.0 feedgen==0.9.0 -Flask==2.0 +Flask==2.0.3 Flask-Babel==1.0.0 flask-multistatic==1.0 -Jinja2==3.0 -lxml==4.9.1 +Flask-Login==0.6.1 +Jinja2==3.1.2 +Flask-WTF==1.0.1 pyjwt==2.4.0 -Markdown==3.1.1 # Upgraded from CKAN pin -passlib==1.7.3 +Markdown==3.4.1 +passlib==1.7.4 pastedeploy==2.0.1 # manually kept - remove when #4802 is complete pathtools==0.1.2 # via watchdog -polib==1.0.7 -psycopg2==2.8.6 -python-magic==0.4.15 -pysolr==3.6.0 -python-dateutil>=1.5.0 -pytz==2016.7 -PyUtilib==5.7.1 -pyyaml==5.4 -repoze.who==2.3 -Routes==1.13 -rq==1.0 +polib==1.1.1 +psycopg2==2.9.3 +PyJWT==2.4.0 +python-magic==0.4.27 +pysolr==3.9.0 +python-dateutil>=2.8.2 +pytz +PyUtilib==6.0.0 +pyyaml==6.0 +requests==2.28.1 +rq==1.11.0 simplejson==3.18.0 # ckan 2.9.5 requires 3.10.0 only # Following can be ignored: https://github.com/ckan/ckan/pull/4450 # sqlalchemy-migrate==0.12.0 -SQLAlchemy==1.3.5 +SQLAlchemy[mypy]==1.4.41 sqlparse==0.4.4 -tzlocal==1.3 -unicodecsv>=0.9 -webassets==0.12.1 +tzlocal==4.2 +webassets==2.0 Werkzeug==2.0 # zope.interface==4.7.2 diff --git a/ckan/requirements.txt b/ckan/requirements.txt index ee8442a9..355f2a81 100644 --- a/ckan/requirements.txt +++ b/ckan/requirements.txt @@ -1,115 +1,122 @@ -alembic==1.0.0 +alembic==1.8.1 async-timeout==4.0.2 -Babel==2.9.1 +Babel==2.10.3 +backports.zoneinfo==0.2.1 Beaker==1.11.0 bleach==3.3.0 -boto3==1.26.126 -botocore==1.29.126 -certifi==2022.12.7 +blinker==1.5 +boto3==1.26.137 +botocore==1.29.137 +certifi==2023.5.7 cffi==1.15.1 -chardet==3.0.4 -git+https://github.com/gsa/ckan.git@4af8efa19edf7259530ccde5c0331a5ccd7f7066#egg=ckan +chardet==5.1.0 +charset-normalizer==2.1.1 +ckan==2.10.1 -e git+https://github.com/ckan/ckanext-archiver.git@cbfadf9fbf10405958fdef9f77a7faedc05aa20b#egg=ckanext_archiver -ckanext-datagovcatalog==0.0.5 -ckanext-datagovtheme==0.1.27 -ckanext-datajson==0.1.18 -ckanext-dcat @ git+https://github.com/ckan/ckanext-dcat@618928be5a211babafc45103a72b6aab4642e964 +-e git+https://github.com/gsa/ckanext-datagovcatalog.git@9db6fcf6a5b63743c573f567749a870df1976bd3#egg=ckanext_datagovcatalog +-e git+https://github.com/gsa/ckanext-datagovtheme.git@eb310589aa3007516070fc1cdbfdcf7e3dea116d#egg=ckanext_datagovtheme +-e git+https://github.com/gsa/ckanext-datajson.git@4d6d09b79d8f932303056747706a31aacd195cf6#egg=ckanext_datajson +ckanext-dcat @ git+https://github.com/ckan/ckanext-dcat@14396e6d03aad3118ee5a232e04052a61edd0893 ckanext-envvars==0.0.3 -ckanext-geodatagov==0.1.37 -ckanext-googleanalyticsbasic==0.2.0 --e git+https://github.com/ckan/ckanext-harvest.git@89a98d7ff5aa3445d8158921669b8d0b04fa41c3#egg=ckanext_harvest -ckanext-metrics-dashboard==0.1.5 +-e git+https://github.com/gsa/ckanext-geodatagov.git@cbbcf599b6273fca56ec60f7b5d8ea2301dae415#egg=ckanext_geodatagov +-e git+https://github.com/gsa/ckanext-googleanalyticsbasic.git@0de6406608a3156ac368ba3ac14ca21be519a2dd#egg=ckanext_googleanalyticsbasic +-e git+https://github.com/ckan/ckanext-harvest.git@9fb44f79809a1c04dfeb0e1ca2540c5ff3cacef4#egg=ckanext_harvest +-e git+https://github.com/gsa/ckanext-metrics-dashboard.git@358ad6dbc4ec31ae4e0946cef6609c38920ad5d3#egg=ckanext_metrics_dashboard -e git+https://github.com/ckan/ckanext-qa.git@1731b59d2bf82b06f7866c204b26eb7c6c9ea1f9#egg=ckanext_qa -e git+https://github.com/ckan/ckanext-report.git@3588577f46d17e5f6ef163bb984d0e7016daef71#egg=ckanext_report ckanext-saml2auth @ git+https://github.com/GSA/ckanext-saml2auth.git@c2b12a94430034c522b25d282323a064e2d6a03a -e git+https://github.com/ckan/ckanext-spatial.git@17d5a341cf8f40b35b25df91a18ce72c31195ba3#egg=ckanext_spatial ckantoolkit==0.0.7 -click==7.1.2 +click==8.1.3 cryptography==41.0.0 defusedxml==0.7.1 -dominate==2.4.0 +dominate==2.7.0 elementpath==4.1.2 -fanstatic==1.1 feedgen==0.9.0 -Flask==2.0.0 +Flask==2.0.3 Flask-Babel==1.0.0 +Flask-Login==0.6.1 flask-multistatic==1.0 +Flask-WTF==1.0.1 future==0.18.3 GeoAlchemy2==0.5.0 +geojson==3.0.1 geomet==1.0.0 gevent==22.10.2 greenlet==2.0.2 gunicorn==20.1.0 html5lib==1.1 -idna==2.10 +idna==3.4 +importlib-metadata==6.6.0 importlib-resources==5.12.0 isodate==0.6.1 itsdangerous==2.1.2 -Jinja2==3.0.0 +Jinja2==3.1.2 jmespath==1.0.1 json-table-schema==0.2.1 jsonschema==2.4.0 -lxml==4.9.1 +lxml==4.9.2 Mako==1.2.4 -Markdown==3.1.1 +Markdown==3.4.1 MarkupSafe==2.1.2 messytables==0.15.2 +mypy==1.3.0 +mypy-extensions==1.0.0 newrelic==8.8.0 nose==1.3.7 numpy==1.24.3 -OWSLib==0.29.1 +OWSLib==0.29.2 packaging==23.1 -passlib==1.7.3 +passlib==1.7.4 PasteDeploy==2.0.1 pathtools==0.1.2 -pika==1.3.1 +pika==1.2.1 pip==23.1.2 ply==3.11 -polib==1.0.7 +polib==1.1.1 progressbar==2.5 progressbar2==3.53.3 -psycopg2==2.8.6 +psycopg2==2.9.3 pycparser==2.21 PyJWT==2.4.0 pyOpenSSL==23.2.0 pyparsing==3.0.9 -pyproj==2.6.1 +pyproj==3.4.1 pysaml2==7.0.1 -pysolr==3.6.0 +pysolr==3.9.0 python-dateutil==2.8.2 -python-editor==1.0.4 -python-magic==0.4.15 +python-magic==0.4.27 python-utils==3.5.2 -pytz==2016.7 -PyUtilib==5.7.1 -PyYAML==5.4 +pytz==2023.3 +pytz-deprecation-shim==0.1.0.post0 +PyUtilib==6.0.0 +PyYAML==6.0 PyZ3950 @ git+https://github.com/danizen/PyZ3950@6d44a4ab85c8bda3a7542c2c9efdfad46c830219 -rdflib==4.2.2 -redis==4.5.4 -repoze.lru==0.7 -repoze.who==2.3 +rdflib==6.1.1 +redis==4.5.5 requests==2.31.0 rfc3987==1.3.8 -Routes==1.13 -rq==1.0 -s3transfer==0.6.0 +rq==1.11.0 +s3transfer==0.6.1 setuptools==67.1.0 shapely==2.0.1 -shutilwhich==1.1.0 simplejson==3.18.0 six==1.16.0 -SQLAlchemy==1.3.5 +SQLAlchemy==1.4.41 +sqlalchemy2-stubs==0.0.2a34 sqlparse==0.4.4 -tzlocal==1.3 -unicodecsv==0.14.1 +tomli==2.0.1 +typing_extensions==4.5.0 +tzdata==2023.3 +tzlocal==4.2 urllib3==1.26.15 -webassets==0.12.1 +webassets==2.0 webencodings==0.5.1 -WebOb==1.8.7 Werkzeug==2.0.0 wheel==0.40.0 +WTForms==3.0.1 xlrd==2.0.1 -xmlschema==2.2.3 +xmlschema==2.3.0 zipp==3.15.0 zope.event==4.6 zope.interface==6.0 diff --git a/ckan/setup/ckan.ini b/ckan/setup/ckan.ini index fe6ec665..0091d09c 100644 --- a/ckan/setup/ckan.ini +++ b/ckan/setup/ckan.ini @@ -62,6 +62,8 @@ who.log_file = %(cache_dir)s/who_log.ini who.timeout = 900 who.secure = True +ckan.csrf_protection.ignore_extensions=False + ## Database Settings # sqlalchemy.url = postgresql://ckan_default:pass@localhost/ckan_default # Possible tweaks to production sizing @@ -226,7 +228,7 @@ ckan.jobs.timeout = 180 ckan.tracking_enabled = True ## Spatial settings -ckanext.spatial.search_backend = solr +ckanext.spatial.search_backend = solr-bbox ## Harvest settings # ckanext-harvest will use ckan.redis.url if redis configuration diff --git a/e2e/cypress/integration/000_harvest.cy.js b/e2e/cypress/integration/000_harvest.cy.js index 68d9d471..7c3f7183 100644 --- a/e2e/cypress/integration/000_harvest.cy.js +++ b/e2e/cypress/integration/000_harvest.cy.js @@ -14,7 +14,9 @@ describe('Harvest', { testIsolation: false }, () => { // Make sure organization does not exist before creating cy.delete_organization(harvestOrg); // Create the organization - cy.create_organization(harvestOrg, 'cypress harvest org description', false); + cy.visit('/organization'); + cy.get('a[class="btn btn-primary"]').click(); + cy.create_organization_ui(harvestOrg, 'cypress harvest org description'); }); after(() => { diff --git a/e2e/cypress/integration/ckan_extensions.cy.js b/e2e/cypress/integration/ckan_extensions.cy.js index 7cd0d61e..701c7a74 100644 --- a/e2e/cypress/integration/ckan_extensions.cy.js +++ b/e2e/cypress/integration/ckan_extensions.cy.js @@ -2,7 +2,7 @@ describe('CKAN Extensions', () => { it('Uses CKAN 2.9', () => { cy.request('/api/action/status_show').should((response) => { expect(response.body).to.have.property('success', true); - expect(response.body.result).to.have.property('ckan_version', '2.9.8'); + expect(response.body.result).to.have.property('ckan_version', '2.10.1'); }); }); diff --git a/e2e/cypress/integration/facets.cy.js b/e2e/cypress/integration/facets.cy.js index 26dfc0e8..2d527447 100644 --- a/e2e/cypress/integration/facets.cy.js +++ b/e2e/cypress/integration/facets.cy.js @@ -17,12 +17,14 @@ describe('Facets', { testIsolation: false }, () => { }); it('Show datagov facet list on organization page', () => { - cy.create_organization('org-tags', ''); + cy.visit('/organization'); + cy.get('a[class="btn btn-primary"]').click(); + cy.create_organization_ui('org-tags', 'tags for org test'); cy.visit('/organization/org-tags'); - cy.get('.filters h2').its('length').should('be.equal', 10); - cy.get('.filters h2').first().contains('Topics'); - cy.get('.filters h2').contains('Harvest Source'); - cy.get('.filters h2').last().contains('Bureaus'); + cy.get('.module-shallow').its('length').should('be.equal', 11); + cy.get('.module-shallow').contains('Topics'); + cy.get('.module-shallow').contains('Harvest Source'); + cy.get('.module-shallow').last().contains('Bureaus'); }); it('Show datagov facet list on group page', () => { diff --git a/e2e/cypress/integration/login.cy.js b/e2e/cypress/integration/login.cy.js index b7c362a4..807b7b82 100644 --- a/e2e/cypress/integration/login.cy.js +++ b/e2e/cypress/integration/login.cy.js @@ -4,8 +4,6 @@ describe('Login', () => { cy.get('a[href="/user/login"]').click(); cy.login('not-user', 'not-password', true); cy.get('.flash-messages .alert').should('contain', 'Login failed. Bad username or password.'); - // Validate cookie is not set - cy.getCookie('auth_tkt').should('not.exist'); }); it('Valid login attempt', () => { @@ -13,7 +11,5 @@ describe('Login', () => { cy.get('a[href="/user/login"]').click(); cy.login(); cy.get('.nav-tabs>li>a').should('contain', 'My Organizations'); - // Validate cookie is set, in development secure is set to false - cy.getCookie('auth_tkt').should('have.property', 'secure', false); }); }); diff --git a/e2e/cypress/support/command.js b/e2e/cypress/support/command.js index 7b2d579d..3a258379 100644 --- a/e2e/cypress/support/command.js +++ b/e2e/cypress/support/command.js @@ -101,9 +101,8 @@ Cypress.Commands.add('create_organization', (orgName, orgDesc) => { body: { description: orgDesc, title: orgName, - approval_status: 'approved', - state: 'active', name: orgName, + save: null }, }); }); @@ -215,14 +214,14 @@ Cypress.Commands.add( }); cy.get('#field-notes').type(harvestDesc); - cy.get('[type="radio"]').check(harvestType); + cy.get('[type="radio"]').check(harvestType, { force: true }); // Validate private_datasets defaults to Private cy.get('#field-private_datasets').find(':selected').contains('Private'); - cy.get('#field-private_datasets').select(harvestPrivate); + cy.get('#field-private_datasets').select(harvestPrivate, { force: true }); - cy.get('input[name=save]').click(); + cy.get('input[name=save]').click({ force: true }); } ); diff --git a/proxy/nginx-authy.conf b/proxy/nginx-authy.conf index 27044318..fa934b7f 100644 --- a/proxy/nginx-authy.conf +++ b/proxy/nginx-authy.conf @@ -1,7 +1,41 @@ +################################## +# CKAN used to add an auth_tkt cookie for logged-in user +# It is gone after 2.10. Let us generate our own auth_tkt here. +# If the request comes from login.gov and goes to /user/me, +# we know the user is logged in. +set $check ""; +set $cookiecontent ""; +set $goodauth "auth_tkt=1; PATH=/"; +set $badauth "auth_tkt=0; PATH=/"; + +if ($uri = "/user/me") { + set $check "endpointcheck"; +} +if ($http_referer ~* \.(login|identitysandbox).gov/$ ) { + set $check "${check}+referercheck"; +} +if ($check = "endpointcheck+referercheck") { + set $cookiecontent $goodauth; +} +if ($uri = "/user/logged_out_redirect") { + set $cookiecontent $badauth; +} + +add_header Set-Cookie $cookiecontent; + +if ($uri = "/user/logged_out_redirect") { + return 302 https://{{env "PUBLIC_ROUTE"}}$uri; +} + +########################################## +# If cookie auth_tkt does not have a good value, +# we dont serve the request on the admin site. +# We 302 redirect it to the public site, +# except some special uri listed below: set $authy ""; -# Determine if auth cookie is set -if ($cookie_auth_tkt) { +# Determine if auth cookie is set with good value +if ($cookie_auth_tkt = 1) { set $authy C; } if ($uri = "/user/saml2login") { @@ -10,6 +44,9 @@ if ($uri = "/user/saml2login") { if ($uri = "/acs") { set $authy "${authy}S"; } +if ($uri = "/user/me") { + set $authy "${authy}M"; +} if ($uri = "/api/action/status_show") { set $authy "${authy}H"; } diff --git a/runtime.txt b/runtime.txt index 5592f898..305091ca 100644 --- a/runtime.txt +++ b/runtime.txt @@ -1 +1 @@ -python-3.8.x +python-3.9.x