From b90327f946391ef631c3f774832a1ad12e337119 Mon Sep 17 00:00:00 2001 From: Michael Polidori Date: Mon, 23 Sep 2024 15:32:54 -0400 Subject: [PATCH] Ckan 2.10 upgrade (#88) --- .docker-compose-db.yaml | 33 ++- .docker-compose.vital-strategies-theme.yaml | 23 +- DEPLOYING.md | 10 +- Makefile | 33 ++- README.md | 164 +++++++++++--- TESTING.md | 4 +- cca-operator/update-instance.sh | 2 +- ckan/Dockerfile | 99 +++++++-- ckan/entrypoint.sh | 49 ++++- .../vital-strategies/filesystem/.init | 0 ...l-strategies.cve-2023-32321_security.patch | 102 --------- .../vital-strategies.datapusher_timeout.patch | 17 -- ...ital-strategies.psycopg2_upgrade_fix.patch | 18 -- ckan/post_install_functions.sh | 74 ++++++- ckan/requirements.txt | 2 +- ckan/setup/supervisord.conf | 23 ++ ckan/setup/uwsgi.conf | 5 + ckan/templater.sh | 1 + configs_diff.sh | 116 ++++++++++ datapusher-plus/Dockerfile | 100 +++++++++ datapusher-plus/datapusher-settings.py | 4 + datapusher-plus/entrypoint/startup.sh | 23 ++ datapusher-plus/example.env | 161 ++++++++++++++ db/Dockerfile | 8 +- db/datastore-permissions-update.sh | 16 +- db/migration/ckan-permissions.sql | 7 + db/migration/datastore-permissions.sql | 76 +++++++ db/migration/upgrade_databases.sh | 168 +++++++++++++++ docker-compose.yaml | 147 ++++++------- ... vital-strategies-theme-ckan.ini.template} | 58 ++--- docker-compose/ckan-secrets.dat | 20 +- solr/schemas/schema210.xml | 203 ++++++++++++++++++ traefik/entrypoint.sh | 42 ++++ traefik/traefik.toml.template | 49 +++++ 34 files changed, 1490 insertions(+), 367 deletions(-) create mode 100644 ckan/overrides/vital-strategies/filesystem/.init delete mode 100644 ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.cve-2023-32321_security.patch delete mode 100644 ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.datapusher_timeout.patch delete mode 100644 ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.psycopg2_upgrade_fix.patch create mode 100644 ckan/setup/supervisord.conf create mode 100644 ckan/setup/uwsgi.conf mode change 100644 => 100755 ckan/templater.sh create mode 100755 configs_diff.sh create mode 100644 datapusher-plus/Dockerfile create mode 100644 datapusher-plus/datapusher-settings.py create mode 100644 datapusher-plus/entrypoint/startup.sh create mode 100644 datapusher-plus/example.env create mode 100644 db/migration/ckan-permissions.sql create mode 100644 db/migration/datastore-permissions.sql create mode 100755 db/migration/upgrade_databases.sh rename docker-compose/ckan-conf-templates/{vital-strategies-theme-production.ini.template => vital-strategies-theme-ckan.ini.template} (80%) create mode 100644 solr/schemas/schema210.xml create mode 100755 traefik/entrypoint.sh create mode 100644 traefik/traefik.toml.template diff --git a/.docker-compose-db.yaml b/.docker-compose-db.yaml index 5170909..e3887f0 100644 --- a/.docker-compose-db.yaml +++ b/.docker-compose-db.yaml @@ -1,50 +1,47 @@ -version: '3.2' - services: - db: image: viderum/ckan-cloud-docker:db-latest build: context: db restart: always expose: - - "5432" + - "5432" env_file: - - docker-compose/db-secrets.sh + - docker-compose/db-secrets.sh volumes: - - db:/var/lib/postgresql/data + - db:/var/lib/postgresql/data networks: - - ckan-multi + - ckan-multi jobs-db: image: postgres restart: always expose: - - "5432" + - "5432" env_file: - - docker-compose/db-secrets.sh + - docker-compose/db-secrets.sh volumes: - - jobs-db:/var/lib/postgresql/data + - jobs-db:/var/lib/postgresql/data networks: - - ckan-multi + - ckan-multi datastore-db: image: viderum/ckan-cloud-docker:db-latest restart: always expose: - - "5432" + - "5432" env_file: - - docker-compose/datastore-db-secrets.sh + - docker-compose/datastore-db-secrets.sh volumes: - - datastore-db:/var/lib/postgresql/data + - datastore-db:/var/lib/postgresql/data networks: - - ckan-multi + - ckan-multi ckan: depends_on: - - db - - jobs-db - - datastore-db + - db + - jobs-db + - datastore-db volumes: db: diff --git a/.docker-compose.vital-strategies-theme.yaml b/.docker-compose.vital-strategies-theme.yaml index 9fd04c9..45c9848 100644 --- a/.docker-compose.vital-strategies-theme.yaml +++ b/.docker-compose.vital-strategies-theme.yaml @@ -1,5 +1,3 @@ -version: '3.2' - services: proxy: @@ -21,34 +19,35 @@ services: build: context: ckan args: - CKAN_BRANCH: ckan-2.7.3 + CKAN_BRANCH: ckan-2.10.4 EXTRA_PACKAGES: cron EXTRA_FILESYSTEM: "./overrides/vital-strategies/filesystem/" - PRE_INSTALL: "sed -i -e 's/psycopg2==2.4.5/psycopg2==2.7.7/g' ~/venv/src/ckan/requirements.txt" POST_INSTALL: | - install_standard_ckan_extension_github -r ViderumGlobal/ckanext-querytool -b v2.1.2 &&\ + install_standard_ckan_extension_github -r datopian/ckanext-querytool -b cc6c8e6f19f59e6842d370bf7ac87d94e37a2831 &&\ install_standard_ckan_extension_github -r ckan/ckanext-geoview && \ - install_standard_ckan_extension_github -r okfn/ckanext-sentry && \ - install_standard_ckan_extension_github -r ckan/ckanext-googleanalytics -b v2.0.2 && \ - install_standard_ckan_extension_github -r datopian/ckanext-s3filestore -b fix-null-content-type && \ + install_standard_ckan_extension_github -r datopian/ckanext-sentry -b 2.10 && \ + install_standard_ckan_extension_github -r datopian/ckanext-gtm && \ + install_standard_ckan_extension_github -r datopian/ckanext-s3filestore -b ckan-2.10 && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l en -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l es -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l fr -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l km -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l pt_BR -f && \ - cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l zh_CN -f + cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l zh_Hans_CN -f environment: - CKAN_CONFIG_TEMPLATE_PREFIX=vital-strategies-theme- + #ports: # Uncomment to expose CKAN on localhost for development + # - 5000:5000 jobs: image: viderum/ckan-cloud-docker:ckan-latest-vital-strategies-theme build: context: ckan args: - CKAN_BRANCH: ckan-2.7.3 + CKAN_BRANCH: ckan-2.10.4 POST_INSTALL: | - install_standard_ckan_extension_github -r keitaroinc/ckanext-s3filestore -b main &&\ - install_standard_ckan_extension_github -r datopian/ckanext-querytool &&\ + install_standard_ckan_extension_github -r datopian/ckanext-s3filestore -b ckan-2.10 &&\ + install_standard_ckan_extension_github -r datopian/ckanext-querytool -b cc6c8e6f19f59e6842d370bf7ac87d94e37a2831 &&\ install_standard_ckan_extension_github -r ckan/ckanext-geoview environment: - CKAN_CONFIG_TEMPLATE_PREFIX=vital-strategies-theme- diff --git a/DEPLOYING.md b/DEPLOYING.md index cfc7ad2..2b0bf88 100644 --- a/DEPLOYING.md +++ b/DEPLOYING.md @@ -115,7 +115,7 @@ In addition to SSL specific configuration, there is one more line you need to ad This should be enough for the basic installation. In case you need to tweak versions or other initialization parameters for CKAN, you need these two files: -* `docker-compose/ckan-conf-templates/{instance-id}-theme-production.ini` +* `docker-compose/ckan-conf-templates/{instance-id}-theme-ckan.ini` This is the file used to generate the CKAN main configuration file. * `.docker-compose.{instance-id}-theme.yaml` @@ -196,7 +196,7 @@ bash migrate_filestorage.sh $HOST $ACCESS_KEY $SECRET_KEY $BUCKET $STORAGE_PATH After migration rebuild the SOLR search index. ``` -sudo make shell O=<> S=ckan C='/usr/local/bin/ckan-paster --plugin=ckan search-index rebuild -c /etc/ckan/production.ini' +sudo make shell O=<> S=ckan C='/usr/local/bin/ckan-paster --plugin=ckan search-index rebuild -c /etc/ckan/ckan.ini' ``` ## Debugging @@ -255,10 +255,10 @@ POST_INSTALL: | install_standard_ckan_extension_github -r datopian/ckanext-s3filestore &&\ ``` -And add extension to the list of plugins in `docker-compose/ckan-conf-templates/{instance-id}-theme-production.ini.template` +And add extension to the list of plugins in `docker-compose/ckan-conf-templates/{instance-id}-theme-ckan.ini.template` ``` -# in docker-compose/ckan-conf-templates/{instance-id}-theme-production.ini.template +# in docker-compose/ckan-conf-templates/{instance-id}-theme-ckan.ini.template ckan.plugins = image_view ... stats @@ -268,7 +268,7 @@ ckan.plugins = image_view Note: depending on extension you might also need to update extensions related configurations in the same file. If needed this type of information is ussually included in extension REAMDE. ``` -# in docker-compose/ckan-conf-templates/{instance-id}-theme-production.ini.template +# in docker-compose/ckan-conf-templates/{instance-id}-theme-ckan.ini.template ckanext.s3filestore.aws_access_key_id = Your-Access-Key-ID ckanext.s3filestore.aws_secret_access_key = Your-Secret-Access-Key ckanext.s3filestore.aws_bucket_name = a-bucket-to-store-your-stuff diff --git a/Makefile b/Makefile index 22adcb5..078e5d7 100644 --- a/Makefile +++ b/Makefile @@ -2,20 +2,29 @@ COMPOSE_FILES = -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.$O-theme.yaml +DATAPUSHER_TYPE ?= datapusher-plus +CKAN_DB_NAME ?= ckan +CKAN_DB_USERNAME ?= ckan +DB_USERNAME ?= postgres +DATASTORE_DB_NAME ?= datastore +DATASTORE_DB_USERNAME ?= postgres + start: + @export DATAPUSHER_DIRECTORY=$(DATAPUSHER_TYPE) && \ docker-compose $(COMPOSE_FILES) up -d --build nginx && make cron stop: docker-compose $(COMPOSE_FILES) stop build: + @export DATAPUSHER_DIRECTORY=$(DATAPUSHER_TYPE) && \ docker-compose $(COMPOSE_FILES) build pull: docker-compose $(COMPOSE_FILES) pull shell: - docker-compose $(COMPOSE_FILES) exec $S $C + docker-compose $(COMPOSE_FILES) exec -it $S sh -c 'if command -v bash > /dev/null 2>&1; then exec bash; else exec sh; fi' down: docker-compose $(COMPOSE_FILES) down @@ -36,10 +45,10 @@ exec: docker-compose $(COMPOSE_FILES) exec $S $C user: - docker-compose $(COMPOSE_FILES) exec ckan /usr/local/bin/ckan-paster --plugin=ckan user add $U password=$P email=$E -c /etc/ckan/production.ini + docker-compose $(COMPOSE_FILES) exec ckan ckan -c /etc/ckan/ckan.ini user add $U password=$P email=$E sysadmin: - docker-compose $(COMPOSE_FILES) exec ckan /usr/local/bin/ckan-paster --plugin=ckan sysadmin add $U -c /etc/ckan/production.ini + docker-compose $(COMPOSE_FILES) exec ckan ckan -c /etc/ckan/ckan.ini sysadmin add $U secret: python create_secrets.py @@ -49,6 +58,20 @@ cron: clean-rebuild: docker-compose $(COMPOSE_FILES) down -v - docker images -a | grep "ckan-cloud-docker" | awk '{print $$3}' | xargs docker rmi -f + docker images -a | grep "ckan-cloud-docker" | awk '{print $$3}' | xargs -r docker rmi -f + @export DATAPUSHER_DIRECTORY=$(DATAPUSHER_TYPE) && \ docker-compose $(COMPOSE_FILES) build --no-cache - docker-compose $(COMPOSE_FILES) up -d --build nginx && make cron + @export DATAPUSHER_DIRECTORY=$(DATAPUSHER_TYPE) && \ + docker-compose $(COMPOSE_FILES) up -d nginx && make cron + +backup-db: + docker-compose $(COMPOSE_FILES) exec -T db pg_dump -U postgres --format=custom -d ckan > ckan.dump + docker-compose ${COMPOSE_FILES} exec -T ckan sh -c "cd /var/lib/ckan && tar -czf /tmp/ckan_data.tar.gz data" + docker cp $$(docker-compose ${COMPOSE_FILES} ps -q ckan):/tmp/ckan_data.tar.gz ckan_data.tar.gz + docker-compose $(COMPOSE_FILES) exec -T datastore-db pg_dump -U postgres --format=custom -d datastore > datastore.dump + +upgrade-db: + ./db/migration/upgrade_databases.sh "$(COMPOSE_FILES)" "$(CKAN_DB_NAME)" "$(CKAN_DB_USERNAME)" "$(DB_USERNAME)" "$(DATASTORE_DB_NAME)" "$(DATASTORE_DB_USERNAME)" + +config-upgrade: + ./configs_diff.sh diff --git a/README.md b/README.md index 3552d35..df5dbe3 100644 --- a/README.md +++ b/README.md @@ -48,10 +48,15 @@ By default, `traefik` will attempt to generate a certificate and use https. This image: traefik:1.7.2-alpine restart: always volumes: - - ./traefik/traefik.dev.toml:/traefik.toml # <-- Replace ./traefik/traefik.toml with ./traefik/traefik.dev.toml as shown here + - ./traefik/traefik.toml.template:/traefik.toml.template + #- ./traefik/traefik.dev.toml:/traefik.dev.toml # Uncomment this line to bypass certificates for local development - ./traefik/acme.json:/acme.json + - ./cca-operator/templater.sh:/templater.sh + - ./docker-compose/traefik-secrets.sh:/traefik-secrets.sh + - ./traefik/entrypoint.sh:/entrypoint.sh networks: - - ckan-multi + - ckan-multi + entrypoint: ["/bin/sh", "-c", "/entrypoint.sh"] ``` ### Expose port 5000 for CKAN @@ -68,45 +73,25 @@ In your project specific `docker-compose` file, you must expose port 5000 for CK build: context: ckan args: - CKAN_BRANCH: ckan-2.7.3 + CKAN_BRANCH: ckan-2.10.4 EXTRA_PACKAGES: cron EXTRA_FILESYSTEM: "./overrides/vital-strategies/filesystem/" - PRE_INSTALL: "sed -i -e 's/psycopg2==2.4.5/psycopg2==2.7.7/g' ~/venv/src/ckan/requirements.txt" POST_INSTALL: | - install_standard_ckan_extension_github -r ViderumGlobal/ckanext-querytool -b v2.1.2 &&\ + install_standard_ckan_extension_github -r datopian/ckanext-querytool -b cc6c8e6f19f59e6842d370bf7ac87d94e37a2831 &&\ install_standard_ckan_extension_github -r ckan/ckanext-geoview && \ - install_standard_ckan_extension_github -r okfn/ckanext-sentry && \ - install_standard_ckan_extension_github -r ckan/ckanext-googleanalytics -b v2.0.2 && \ - install_standard_ckan_extension_github -r datopian/ckanext-s3filestore -b fix-null-content-type && \ + install_standard_ckan_extension_github -r datopian/ckanext-sentry -b 2.10 && \ + install_standard_ckan_extension_github -r datopian/ckanext-gtm && \ + install_standard_ckan_extension_github -r datopian/ckanext-s3filestore -b ckan-2.10 && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l en -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l es -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l fr -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l km -f && \ cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l pt_BR -f && \ - cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l zh_CN -f + cd ~/venv/src/ckanext-querytool && ~/venv/bin/python setup.py compile_catalog -l zh_Hans_CN -f environment: - CKAN_CONFIG_TEMPLATE_PREFIX=vital-strategies-theme- - ports: # <-- Add this section to expose port 5000 - - 5000:5000 -``` - -### Remove unused plugins from CKAN - -Before building and starting the environment, make sure you only have the required plugins enabled. If you're using a pre-defined project template for local testing, you might not need some of the included extensions, such as `ckanext-googleanalytics` or `ckanext-sentry`. For example, if you want to use the `vital-strategies` project template, you should remove the following plugins from the `.ini` file (found in `docker-compose/ckan-conf-templates/vital-strategies-theme-production.ini`) to avoid issues (unless you want to properly configure them): - -``` -ckan.plugins = image_view - text_view - recline_view - datastore - datapusher - resource_proxy - geojson_view - querytool - stats - sentry # <-- Remove this line - s3filestore # <-- Remove this line - googleanalytics # <-- Remove this line + # ports: # Uncomment these lines expose CKAN on localhost for local development + # - 5000:5000 ``` ### Hosts file entries @@ -154,7 +139,7 @@ Create a CKAN admin user ``` docker-compose exec ckan ckan-paster --plugin=ckan \ - sysadmin add -c /etc/ckan/production.ini admin password=12345678 email=admin@localhost + sysadmin add -c /etc/ckan/ckan.ini admin password=12345678 email=admin@localhost ``` Login to CKAN at http://nginx:8080 with username `admin` and password `12345678` @@ -187,6 +172,12 @@ Stop the environment: make stop O=vital-strategies ``` +Enter a container: + +``` +make shell O=vital-strategies S=SERVICE_NAME +``` + Make a user: ``` @@ -250,8 +241,8 @@ This allows to test different CKAN configurations and extension combinations Duplicate the CKAN default configuration: ``` -cp docker-compose/ckan-conf-templates/production.ini.template \ - docker-compose/ckan-conf-templates/my-ckan-production.ini.template +cp docker-compose/ckan-conf-templates/ckan.ini.template \ + docker-compose/ckan-conf-templates/my-ckan-ckan.ini.template ``` Edit the duplicated file and modify the settings, e.g. add the extensions to the `plugins` configuration and any additional required extension configurations. @@ -398,3 +389,110 @@ You might need to reload the solr collection after recreate: ``` curl "http://localhost:8983/solr/admin/collections?action=RELOAD&name=${INSTANCE_ID}&wt=json" ``` + +## Migrating to CKAN 2.10 and Python 3 + +>**Note**: As of January 1, 2020, Python 2 is no longer officially supported. If you're running CKAN 2.7 with Python 2, it's highly recommended to migrate to CKAN 2.10 with Python 3. The latest version of this repo also no longer supports CKAN < 2.10 and Python < 3. If you must stick with those versions for now, you will need to maintain your local copy of this repo yourself. + +All of the following commands should be run in `ckan-cloud-docker` (unless stated otherwise). In the examples below, I'm using the `vital-strategies` project template as an example. Replace `vital-strategies` with the name of your project template. + +>**Note**: Depending on any custom configurations you have, you might need to adjust the variables in `db/migration/upgrade_databases.sh` (and others, such as your custom `docker-compose` file, or your custom `.ini` file) to match your setup. + +>**Important**: While following the migration steps, you will create backups of the DBs to migrate to the new upgraded CKAN instance. A more robust backup is recommended, in the case that you need to revert to the old CKAN 2.7 instance (). It's recommended to either take a snapshot (or similar) of your server before beginning the migration, or to make a full copy of `/var/lib/docker` (or wherever your Docker data is stored) to ensure you can revert if needed. If your cloud server doesn't have space to store the copy (the copy will likely require at least 50GB of free space), you will need to copy it to another server or storage location (e.g., S3, Google Cloud Storage, or locally using `scp`, `rsync`, etc.). For steps on how to revert to the old CKAN 2.7 instance _without_ a full system or Docker data backup, see the [Reverting to the old CKAN 2.7 instance](#reverting-to-the-old-ckan-27-instance) section below. + +1. Start up your _current_ instance (if it's not running already, **don't pull the latest changes yet**): `make start O=vital-strategies` +2. Reset any repo changes that might have accidentally been commited: `git reset --mixed HEAD` +3. Create a diff file with any changes in the current branch (for example, values manually added to `.ini` files, etc.—this file will be read later in a script): `git diff > configs.diff` +4. Stash all local changes: `git stash` +5. Pull the latest changes: `git pull` (**Important**: Don't stop your instance yet—make sure it's still running when you pull this, as you need to run the next few commands on your _current_ instance, and the commands only exist in the latest codebase) +6. Run the config updater script: `make config-upgrade` (this will output any variables that have changed—you will need to enter these values when you run `make secret` later) +7. Backup the DBs: `make backup-db O=vital-strategies` (confirm that you have `ckan.dump`, `datastore.dump` and `ckan_data.tar.gz` in the current directory after running this command—you can use `ls *.dump` and `ls *.tar.gz` to confirm that the files exist) +8. Stop the containers: `make stop O=vital-strategies` +9. (optional and not recommended) If you don't want to use [DataPusher+](https://github.com/dathere/datapusher-plus), you will need to export the following variable every time you start, stop, or build CKAN: `export DATAPUSHER_TYPE=datapusher` +10. Create secrets: `make secret` (follow the prompts and make sure to add any values that were output from the config updater script in step 6) +11. Clean and rebuild: `make clean-rebuild O=vital-strategies` +12. Run the upgrade script: `make upgrade-db O=vital-strategies` + - If you have set custom DB names and users, you will need to pass in these options as needed: `make upgrade-db O=vital-strategies CKAN_DB_NAME= DB_USERNAME= CKAN_DB_USERNAME= DATASTORE_DB_NAME= DATASTORE_DB_USERNAME=`— the default values are: `CKAN_DB_NAME=ckan`, `DB_USERNAME=postgres`, `CKAN_DB_USERNAME=ckan`, `DATASTORE_DB_NAME=datastore`, `DATASTORE_DB_USERNAME=postgres` + - Copy the API token that's output at the end for step 10 +13. Stop the containers: `make stop O=vital-strategies` +14. Run `make secret` again and paste the token when prompted (step 13—"Enter Datapusher API token") +15. (optional) If you use extensions like Sentry, S3filestore, or Google Analytics, you will need to manually re-enable them in your `.ini` file (for example, `docker-compose/ckan-conf-templates/vital-strategies-theme-ckan.ini.template`). This is because these plugins cannot be enabled on the first run of the new CKAN instance, as the DB will not initialize properly. You can enable them by adding the following lines to your `.ini` file. If you have a custom theme extension, e.g., `querytool`, it must be the last item in the list. For example, if you want to add all 3 of the examples I mentioned, you would update the following line: + ``` + ckan.plugins = image_view text_view recline_view datastore datapusher resource_proxy geojson_view querytool + ``` + to: + ``` + ckan.plugins = image_view text_view recline_view datastore datapusher resource_proxy geojson_view sentry s3filestore googleanalytics querytool + ``` + **Note**: To edit the file, you will need to use `nano`, `vi` or another command line text editor. Both `nano` and `vi` should be available on most modern Linux operating systems by default. `nano` is recommended for less experienced users, as it's more user-friendly. + + To open and edit the file with `nano`, run `nano docker-compose/ckan-conf-templates/vital-strategies-theme-ckan.ini.template`. Make your changes, and then, to save and exit, press `ctrl` + `x`, then `y`, then `enter`. If you make a mistake, press `ctrl` + `x`, then `n` to exit without saving. + + To open and edit the file with `vi`, run `vi docker-compose/ckan-conf-templates/vital-strategies-theme-ckan.ini.template`. To edit the file, press `i` to enter insert mode. To save and exit, press `esc` to exit insert mode, then type `:wq` and press `enter`. If you make a mistake, press `esc` to exit insert mode, then type `:q!` and press `enter` to exit without saving. +16. Start the containers: `make start O=vital-strategies` +17. Test and confirm that the migration was successful + +>**Note**: After the migration, the first time you visit the DataStore tab for any pre-existing resources, you might see "Error: cannot connect to datapusher". If you click "Upload to DataStore", this error should go away and the process will complete as expected. It's not necessary to go through the resources and remove this error message, as there's actually no issue with DataStore/DataPusher and your old data (it's there and should be working fine)—it's just a UI bug due to switching DBs, which confuses DataPusher. It will work as expected for both existing and new resources. + +### Reverting to the old CKAN 2.7 instance + +>**Important**: It's recommended to make copies of `ckan.dump`, `datastore.dump` and `ckan_data.tar.gz` and move them off of the server, if possible. If anything goes wrong, and you must revert to the old CKAN 2.7 instance, you can restore it by following the steps below: + +1. Stop the containers: `make stop O=vital-strategies` +2. Checkout the last CKAN 2.7 commit: `git checkout d3bdc178a1726ada331b47157b92123cdec82b12` +3. Create secrets (you probably don't need to do this, but go through the process and make sure your previously entered values are correct): `make secret` (follow the prompts) +4. Clean and rebuild: `make clean-rebuild O=vital-strategies` +5. Restore the DBs (_note_: the prior version of this repo doesn't have a command for this—you must do it manually): + 1. Restore the CKAN DB: `docker-compose -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.-theme.yaml exec -T db pg_restore -U postgres --verbose --create --clean --if-exists -d postgres < ckan.dump` + 2. Restore the DataStore DB: `docker-compose -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.-theme.yaml exec -T datastore-db pg_restore -U postgres --verbose --create --clean --if-exists -d postgres < datastore.dump` + 3. Restore the CKAN data: + 1. `docker cp ckan_data.tar.gz $(docker-compose -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.-theme.yaml ps -q ckan):/tmp/ckan_data.tar.gz` + 2. `docker-compose -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.-theme.yaml exec -T ckan bash -c "tar -xzf /tmp/ckan_data.tar.gz -C /tmp/ && cp -r /tmp/data/* /var/lib/ckan/data/ && chown -R ckan:ckan /var/lib/ckan/data"` + 4. Set datastore permissions: + 1. Enter your `ckan` container: `docker-compose -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.-theme.yaml exec ckan bash` + 2. Create a new file in your `ckan` container, `ckan.sql`, with the following contents: + ``` + \connect "datastore" + + -- revoke permissions for the read-only user + REVOKE CREATE ON SCHEMA public FROM PUBLIC; + REVOKE USAGE ON SCHEMA public FROM PUBLIC; + + GRANT CREATE ON SCHEMA public TO "postgres"; + GRANT USAGE ON SCHEMA public TO "postgres"; + + -- grant select permissions for read-only user + GRANT CONNECT ON DATABASE "datastore" TO "readonly"; + GRANT USAGE ON SCHEMA public TO "readonly"; + + -- grant access to current tables and views to read-only user + GRANT SELECT ON ALL TABLES IN SCHEMA public TO "readonly"; + + -- grant access to new tables and views by default + ALTER DEFAULT PRIVILEGES FOR USER "postgres" IN SCHEMA public + GRANT SELECT ON TABLES TO "readonly"; + + -- a view for listing valid table (resource id) and view names + CREATE OR REPLACE VIEW "_table_metadata" AS + SELECT DISTINCT + substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id", + dependee.relname AS name, + dependee.oid AS oid, + dependent.relname AS alias_of + -- dependent.oid AS oid + FROM + pg_class AS dependee + LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid + LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid + LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid + WHERE + (dependee.oid != dependent.oid OR dependent.oid IS NULL) AND + (dependee.relname IN (SELECT tablename FROM pg_catalog.pg_tables) + OR dependee.relname IN (SELECT viewname FROM pg_catalog.pg_views)) AND + dependee.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname='public') + ORDER BY dependee.oid DESC; + ALTER VIEW "_table_metadata" OWNER TO "postgres"; + GRANT SELECT ON "_table_metadata" TO "readonly"; + ``` + 3. While still in your `ckan` container, get your `sqlalchemy.url`: `cat /etc/ckan/production.ini | grep sqlalchemy.url` (for example, `postgresql://ckan:123456@db/ckan`) + 4. Set the permissions by running: `cat ckan.sql | psql ` (for example, `cat ckan.sql | psql postgresql://ckan:123456@db/ckan`) \ No newline at end of file diff --git a/TESTING.md b/TESTING.md index 3fa21cf..5e7289d 100644 --- a/TESTING.md +++ b/TESTING.md @@ -56,7 +56,7 @@ Once you see a successful response, create a CKAN admin user: ``` $ docker-compose -f docker-compose.yaml -f .docker-compose-db.yaml -f .docker-compose.datagov-theme.yaml \ exec ckan ckan-paster --plugin=ckan \ - sysadmin add -c /etc/ckan/production.ini admin password=12345678 email=admin@localhost + sysadmin add -c /etc/ckan/ckan.ini admin password=12345678 email=admin@localhost ``` You should see the following prompt: @@ -153,7 +153,7 @@ $ paster datastore set-permissions -c test-core.ini | psql -h datastore-db -U po Solr is already configured as 'multi-core'. To verify it, you may run the following command inside the `ckan` container: ``` -$ grep solr_url /etc/ckan/production.ini +$ grep solr_url /etc/ckan/ckan.ini # Possible outputs: # single-core: solr_url = http://solr:8983/solr # multi-core: solr_url = http://solr:8983/solr/ckan diff --git a/cca-operator/update-instance.sh b/cca-operator/update-instance.sh index 4d20782..17a8555 100755 --- a/cca-operator/update-instance.sh +++ b/cca-operator/update-instance.sh @@ -213,7 +213,7 @@ else CKAN_ADMIN_PASSWORD=$(python3 -c "import binascii,os;print(binascii.hexlify(os.urandom(12)).decode())") echo y \ | kubectl $KUBECTL_GLOBAL_ARGS -n ${INSTANCE_NAMESPACE} exec -it ${CKAN_POD_NAME} -- bash -c \ - "ckan-paster --plugin=ckan sysadmin -c /etc/ckan/production.ini add admin password=${CKAN_ADMIN_PASSWORD} email=${CKAN_ADMIN_EMAIL}" \ + "ckan -c /etc/ckan/ckan.ini sysadmin add admin password=${CKAN_ADMIN_PASSWORD} email=${CKAN_ADMIN_EMAIL}" \ > /dev/stderr &&\ kubectl $KUBECTL_GLOBAL_ARGS -n "${INSTANCE_NAMESPACE}" \ create secret generic ckan-admin-password "--from-literal=CKAN_ADMIN_PASSWORD=${CKAN_ADMIN_PASSWORD}" diff --git a/ckan/Dockerfile b/ckan/Dockerfile index 2acd5ef..56f4e19 100644 --- a/ckan/Dockerfile +++ b/ckan/Dockerfile @@ -1,20 +1,67 @@ -# Based on CKAN 2.8 Dockerfile with minor modifications for deployment on multi-tenant CKAN cluster -FROM debian:buster +# Based on CKAN 2.8 Dockerfile with minor modifications for deployment on multi-tenant CKAN cluster and support for CKAN 2.10+ and Python 3 +FROM debian:trixie ARG EXTRA_PACKAGES ARG PIP_INDEX_URL ENV PIP_INDEX_URL=$PIP_INDEX_URL ARG GITHUB_URL ENV GITHUB_URL=$GITHUB_URL +ENV APP_DIR=/srv/app +ENV CKAN_CONFIG_PATH=/etc/ckan + +RUN apt update +RUN apt install -y locales + +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen && \ + locale-gen +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 + +# Python 3.9 install adapted from https://techkamar.medium.com/how-to-deploy-specific-version-of-python-using-docker-96d387c16779 +# Set the working directory inside the container +WORKDIR ${APP_DIR} + +# Copy the requirements file to the working directory +RUN mkdir /opt/python3.9 + +# To avoid .pyc files and save space +ENV PYTHONDONTWRITEBYTECODE 1 +ENV PYTHONUNBUFFERED 1 + +# Install all dependecnies you need to compile Python3.9 +RUN apt update +RUN apt install -y \ + wget \ + libffi-dev \ + gcc \ + build-essential \ + curl \ + tcl-dev \ + tk-dev \ + uuid-dev \ + lzma-dev \ + liblzma-dev \ + libssl-dev \ + libsqlite3-dev + +# Download, build, and install Python 3.9 +RUN wget https://www.python.org/ftp/python/3.9.18/Python-3.9.18.tgz && \ + tar -zxvf Python-3.9.18.tgz && \ + cd Python-3.9.18 && ./configure --prefix=/opt/python3.9 && make && make install && \ + ln -s /opt/python3.9/bin/python3.9 /usr/bin/python3.9 + +# Verify Python installation +RUN /usr/bin/python3.9 --version + +WORKDIR / + +# Python 3.9 install --------- END # Install required system packages -RUN apt-get -q -y --force-yes update \ - && DEBIAN_FRONTEND=noninteractive apt-get -q -y --force-yes upgrade \ - && apt-get -q -y --force-yes install \ - python-dev \ - python-pip \ - python-virtualenv \ - python-wheel \ +RUN apt-get -q -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -q -y upgrade \ + && apt-get -q -y install \ libpq-dev \ libxml2-dev \ libxslt-dev \ @@ -22,12 +69,12 @@ RUN apt-get -q -y --force-yes update \ libssl-dev \ libffi-dev \ postgresql-client \ - build-essential \ git-core \ vim \ - wget \ redis-tools \ gettext \ + libmagic-dev \ + musl-dev \ ${EXTRA_PACKAGES} \ && apt-get -q clean \ && rm -rf /var/lib/apt/lists/* @@ -44,12 +91,21 @@ RUN useradd -r -u 900 -m -c "ckan account" -d $CKAN_HOME -s /bin/false ckan # Setup virtual environment for CKAN RUN mkdir -p $CKAN_VENV $CKAN_CONFIG $CKAN_STORAGE_PATH $CKAN_LOGS_PATH && \ - virtualenv $CKAN_VENV && \ + /usr/bin/python3.9 -m venv $CKAN_VENV && \ ln -s $CKAN_VENV/bin/pip /usr/local/bin/ckan-pip &&\ - ln -s $CKAN_VENV/bin/paster /usr/local/bin/ckan-paster + ln -s $CKAN_VENV/bin/ckan /usr/local/bin/ckan -# Pip dropped support of python 2.7. We need older version -RUN ckan-pip install --upgrade "pip < 21.0" +RUN ckan-pip install --upgrade pip &&\ + ckan-pip install --upgrade setuptools &&\ + ckan-pip install --upgrade wheel + +RUN ckan-pip install supervisor && \ + ckan-pip install uwsgi==2.0.19.1 && \ + ckan-pip install gevent==24.2.1 + +RUN apt-get update && apt-get install -y \ + uwsgi-plugins-all \ + uwsgi-plugin-python3 # Setup CKAN RUN ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} -U pip &&\ @@ -60,7 +116,7 @@ USER ckan ARG CKAN_BRANCH ARG CKAN_REPO -RUN CKAN_BRANCH="${CKAN_BRANCH:-ckan-2.8.1}" && CKAN_REPO="${CKAN_REPO:-ckan/ckan}" &&\ +RUN CKAN_BRANCH="${CKAN_BRANCH:-ckan-2.10.4}" && CKAN_REPO="${CKAN_REPO:-ckan/ckan}" &&\ mkdir -p $CKAN_VENV/src &&\ wget --no-verbose -O $CKAN_VENV/src/${CKAN_BRANCH}.tar.gz https://github.com/${CKAN_REPO}/archive/${CKAN_BRANCH}.tar.gz &&\ cd $CKAN_VENV/src && tar -xzf ${CKAN_BRANCH}.tar.gz && mv ckan-${CKAN_BRANCH} ckan &&\ @@ -69,15 +125,14 @@ RUN CKAN_BRANCH="${CKAN_BRANCH:-ckan-2.8.1}" && CKAN_REPO="${CKAN_REPO:-ckan/cka ARG PRE_INSTALL RUN eval "${PRE_INSTALL}" -RUN sed -i 's/psycopg2==2.4.5/psycopg2==2.7.3.2/g' $CKAN_VENV/src/ckan/requirements.txt - RUN touch $CKAN_VENV/src/ckan/requirement-setuptools.txt && ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} --upgrade --no-cache-dir -r $CKAN_VENV/src/ckan/requirement-setuptools.txt RUN touch $CKAN_VENV/src/ckan/requirements.txt && ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} --upgrade --no-cache-dir -r $CKAN_VENV/src/ckan/requirements.txt RUN ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} -e $CKAN_VENV/src/ckan/ COPY requirements.txt /tmp/ -RUN ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} -r /tmp/requirements.txt &&\ + +RUN ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} -r /tmp/requirements.txt && \ ckan-pip install --index-url ${PIP_INDEX_URL:-https://pypi.org/simple/} -U requests[security] COPY post_install_functions.sh /post_install_functions.sh @@ -105,6 +160,12 @@ ARG ROOT_INIT RUN eval "${ROOT_INIT}" RUN . /post_install_functions.sh && patch_ckan +ADD https://raw.githubusercontent.com/ckan/ckan/${CKAN_BRANCH}/wsgi.py ${CKAN_VENV}/wsgi.py +RUN chmod 644 ${CKAN_VENV}/wsgi.py + +RUN mkdir /etc/supervisord.d +COPY setup/supervisord.conf /etc + USER ckan ENTRYPOINT ["/ckan-entrypoint.sh"] diff --git a/ckan/entrypoint.sh b/ckan/entrypoint.sh index cff5909..4bdfa39 100755 --- a/ckan/entrypoint.sh +++ b/ckan/entrypoint.sh @@ -3,11 +3,13 @@ source $CKAN_K8S_SECRETS &&\ rm -f $CKAN_CONFIG/*.ini &&\ cp -f $CKAN_K8S_TEMPLATES/${CKAN_WHO_TEMPLATE_PREFIX}who.ini $CKAN_CONFIG/who.ini &&\ -bash /templater.sh $CKAN_K8S_TEMPLATES/${CKAN_CONFIG_TEMPLATE_PREFIX}production.ini.template > $CKAN_CONFIG/production.ini &&\ -echo 'production.ini:' && cat $CKAN_CONFIG/production.ini &&\ +bash /templater.sh $CKAN_K8S_TEMPLATES/${CKAN_CONFIG_TEMPLATE_PREFIX}ckan.ini.template > $CKAN_CONFIG/ckan.ini &&\ +echo 'ckan.ini:' && cat $CKAN_CONFIG/ckan.ini &&\ bash /templater.sh $CKAN_K8S_TEMPLATES/${CKAN_INIT_TEMPLATE_PREFIX}ckan_init.sh.template > $CKAN_CONFIG/ckan_init.sh &&\ echo 'ckan_init.sh:' && cat $CKAN_CONFIG/ckan_init.sh &&\ bash $CKAN_CONFIG/ckan_init.sh +CKAN_CONFIG_PATH="$CKAN_CONFIG/ckan.ini" + [ "$?" != "0" ] && echo ERROR: CKAN Initialization failed: $? && exit 1 echo '--START_CKAN_CLOUD_LOG--{"event":"ckan-entrypoint-initialized"}--END_CKAN_CLOUD_LOG--' >/dev/stderr @@ -17,8 +19,8 @@ if [ "$DEBUG_MODE" == "TRUE" ]; then fi if [ "$*" == "" ]; then - echo running ckan-paster db init &&\ - ckan-paster --plugin=ckan db init -c "${CKAN_CONFIG}/production.ini" &&\ + echo running ckan db init &&\ + ckan -c ${CKAN_CONFIG_PATH} db init &&\ echo db initialization complete [ "$?" != "0" ] && echo ERROR: DB Initialization failed && exit 1 @@ -31,7 +33,44 @@ if [ "$*" == "" ]; then echo '--START_CKAN_CLOUD_LOG--{"event":"ckan-entrypoint-extra-init-success"}--END_CKAN_CLOUD_LOG--' >/dev/stderr - exec ${CKAN_VENV}/bin/gunicorn --paste ${CKAN_CONFIG}/production.ini --workers ${GUNICORN_WORKERS} --timeout ${GUNICORN_TIMEOUT} + ## Generate a random password + #RANDOM_PASSWORD=$(< /dev/urandom tr -dc A-Za-z0-9 | head -c 12) + + #echo "Creating system admin user 'ckan_admin'" + #yes y | ckan -c $CKAN_CONFIG_PATH sysadmin add ckan_admin email=ckan_admin@localhost password=$RANDOM_PASSWORD + #echo "Setting up ckan.datapusher.api_token in the CKAN config file $CKAN_CONFIG_PATH" + #CKAN_API_KEY=$(ckan -c $CKAN_CONFIG_PATH user token add ckan_admin datapusher | tail -n 1 | tr -d '\t') + #echo "CKAN_API_KEY: $CKAN_API_KEY" + #ckan config-tool $CKAN_CONFIG_PATH "ckan.datapusher.api_token=$CKAN_API_KEY" + #cat $CKAN_CONFIG_PATH | grep ckan.datapusher.api_token + + #ckan config-tool $CKAN_CONFIG_PATH -e "ckan.plugins = image_view text_view recline_view datastore datapusher resource_proxy geojson_view querytool stats" + + source /usr/lib/ckan/venv/bin/activate + + export CKAN_INI=$CKAN_CONFIG_PATH + export PYTHONPATH=/usr/lib/ckan/venv:$PYTHONPATH + + # Set the common uwsgi options + UWSGI_OPTS="--plugins-dir /usr/lib/uwsgi/plugins \ + --plugins http \ + --socket /tmp/uwsgi.sock \ + --wsgi-file /usr/lib/ckan/venv/wsgi.py \ + --module wsgi:application \ + --callable application \ + --virtualenv /usr/lib/ckan/venv \ + --uid 900 --gid 900 \ + --http [::]:5000 \ + --master --enable-threads \ + --lazy-apps \ + -p 2 -L -b 32768 --vacuum \ + --harakiri 300" + + # Start supervisord + supervisord --configuration /etc/supervisord.conf & + # Start uwsgi + uwsgi $UWSGI_OPTS + else sleep 180 exec "$@" diff --git a/ckan/overrides/vital-strategies/filesystem/.init b/ckan/overrides/vital-strategies/filesystem/.init new file mode 100644 index 0000000..e69de29 diff --git a/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.cve-2023-32321_security.patch b/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.cve-2023-32321_security.patch deleted file mode 100644 index 5e0c870..0000000 --- a/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.cve-2023-32321_security.patch +++ /dev/null @@ -1,102 +0,0 @@ -diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py -index f83817c4c..59add955b 100644 ---- a/ckan/lib/uploader.py -+++ b/ckan/lib/uploader.py -@@ -238,13 +238,19 @@ class ResourceUpload(object): - resource['url_type'] = '' - - def get_directory(self, id): -- directory = os.path.join(self.storage_path, -- id[0:3], id[3:6]) -+ real_storage = os.path.realpath(self.storage_path) -+ directory = os.path.join(real_storage, id[0:3], id[3:6]) -+ if directory != os.path.realpath(directory): -+ raise logic.ValidationError({'upload': ['Invalid storage directory']}) - return directory -- -+ - def get_path(self, id): - directory = self.get_directory(id) - filepath = os.path.join(directory, id[6:]) -+ -+ if filepath != os.path.realpath(filepath): -+ raise logic.ValidationError({'upload': ['Invalid storage path']}) -+ - return filepath - - def upload(self, id, max_size=10): -diff --git a/ckan/logic/schema.py b/ckan/logic/schema.py -index fe8e05c38..f33ba8976 100644 ---- a/ckan/logic/schema.py -+++ b/ckan/logic/schema.py -@@ -70,13 +70,15 @@ from ckan.logic.validators import ( - extra_key_not_in_root_schema, - empty_if_not_sysadmin, - package_id_does_not_exist, -- email_validator -+ email_validator, -+ resource_id_validator, -+ resource_id_does_not_exist - ) - - - def default_resource_schema(): - schema = { -- 'id': [ignore_empty, unicode], -+ 'id': [ignore_empty, resource_id_validator, resource_id_does_not_exist, unicode], - 'revision_id': [ignore_missing, unicode], - 'package_id': [ignore], - 'url': [ignore_missing, unicode, remove_whitespace], -diff --git a/ckan/logic/validators.py b/ckan/logic/validators.py -index 57c2b267e..ad4274c17 100644 ---- a/ckan/logic/validators.py -+++ b/ckan/logic/validators.py -@@ -5,6 +5,7 @@ import datetime - from itertools import count - import re - import mimetypes -+from sqlalchemy.orm.exc import NoResultFound - - import ckan.lib.navl.dictization_functions as df - import ckan.logic as logic -@@ -193,6 +194,26 @@ def package_id_or_name_exists(package_id_or_name, context): - return package_id_or_name - - -+def resource_id_does_not_exist(key, data, errors, context): -+ session = context['session'] -+ model = context['model'] -+ -+ if data[key] is missing: -+ return -+ resource_id = data[key] -+ assert key[0] == 'resources', ('validator depends on resource schema ' -+ 'validating as part of package schema') -+ package_id = data.get(('id',)) -+ query = session.query(model.Resource.package_id).filter( -+ model.Resource.id == resource_id, -+ ) -+ try: -+ [parent_id] = query.one() -+ except NoResultFound: -+ return -+ if parent_id != package_id: -+ errors[key].append(_('Resource id already exists.')) -+ - def resource_id_exists(value, context): - model = context['model'] - session = context['session'] -@@ -200,6 +221,13 @@ def resource_id_exists(value, context): - raise Invalid('%s: %s' % (_('Not found'), _('Resource'))) - return value - -+def resource_id_validator(value): -+ pattern = re.compile("[^0-9a-zA-Z _-]") -+ if pattern.search(value): -+ raise Invalid(_('Invalid characters in resource id')) -+ if len(value) < 7 or len(value) > 100: -+ raise Invalid(_('Invalid length for resource id')) -+ return value - - def user_id_exists(user_id, context): - '''Raises Invalid if the given user_id does not exist in the model given diff --git a/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.datapusher_timeout.patch b/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.datapusher_timeout.patch deleted file mode 100644 index fff4400..0000000 --- a/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.datapusher_timeout.patch +++ /dev/null @@ -1,17 +0,0 @@ -diff --git a/ckanext/datapusher/logic/action.py b/ckanext/datapusher/logic/action.py -index 7d34b7f..c7e80eb 100644 ---- a/ckanext/datapusher/logic/action.py -+++ b/ckanext/datapusher/logic/action.py -@@ -279,9 +279,10 @@ def datapusher_status(context, data_dict): - job_id = value.get('job_id') - url = None - job_detail = None -- -+ log_limit = config.get('ckan.datapusher.log_limit', 100) - if job_id: -- url = urlparse.urljoin(datapusher_url, 'job' + '/' + job_id) -+ url = urlparse.urljoin( -+ datapusher_url, 'job' + '/' + job_id + '?limit=%s'%log_limit) - try: - r = requests.get(url, headers={'Content-Type': 'application/json', - 'Authorization': job_key}) diff --git a/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.psycopg2_upgrade_fix.patch b/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.psycopg2_upgrade_fix.patch deleted file mode 100644 index 8dbd5cf..0000000 --- a/ckan/overrides/vital-strategies/filesystem/etc/patches/ckan/vital-strategies.psycopg2_upgrade_fix.patch +++ /dev/null @@ -1,18 +0,0 @@ -diff --git a/ckanext/datastore/helpers.py b/ckanext/datastore/helpers.py -index b616f0f..b58cb1a 100644 ---- a/ckanext/datastore/helpers.py -+++ b/ckanext/datastore/helpers.py -@@ -105,7 +105,12 @@ def get_table_names_from_sql(context, sql): - table_names = [] - - try: -- query_plan = json.loads(result['QUERY PLAN']) -+ if isinstance(result['QUERY PLAN'], list): -+ result_query_plan = json.dumps(result['QUERY PLAN']) -+ query_plan = json.loads(result_query_plan) -+ else: -+ query_plan = json.loads(result['QUERY PLAN']) -+ - plan = query_plan[0]['Plan'] - - table_names.extend(_get_table_names_from_plan(plan)) diff --git a/ckan/post_install_functions.sh b/ckan/post_install_functions.sh index cf84673..9cd2c7a 100644 --- a/ckan/post_install_functions.sh +++ b/ckan/post_install_functions.sh @@ -22,9 +22,37 @@ install_standard_ckan_extension_github() { e) EGG=${OPTARG};; esac done -# echo "#### REPO: $REPO_NAME ####" -# echo "#### BRANCH: $BRANCH ####" -# echo "#### EGG: $EGG ####" + + echo "#### REPO: $REPO_NAME ####" + echo "#### BRANCH: $BRANCH ####" + echo "#### REPO URL: $GITHUB_URL/$REPO_NAME.git ####" + + # Check if the branch exists by examining the output directly + BRANCH_EXISTS=$(git ls-remote --heads ${GITHUB_URL}/${REPO_NAME}.git ${BRANCH}) + + if [ ! -n "$BRANCH_EXISTS" ]; then + # It might be a commit instead of a branch + BRANCH_EXISTS=$(git ls-remote --heads ${GITHUB_URL}/${REPO_NAME}.git | grep -o $BRANCH) + fi + + if [ -z "$BRANCH_EXISTS" ]; then + echo "#### BRANCH EXISTS: $BRANCH_EXISTS ####" + + if [ "$BRANCH" = "master" ]; then + BRANCH_EXISTS=$(git ls-remote --heads ${GITHUB_URL}/${REPO_NAME}.git main) + if [ -n "$BRANCH_EXISTS" ]; then + echo "Branch 'master' not found, switching to 'main'." + BRANCH="main" + else + echo "Branch 'master' not found, and 'main' also does not exist." + exit 1 + fi + else + echo "Branch '$BRANCH' not found. Please check the branch name." + exit 1 + fi + fi + if [ $PIP_INDEX_URL != https://pypi.org/simple/ ]; then TMPDIR=${CKAN_VENV}/src/${EGG} git clone -b $BRANCH ${GITHUB_URL}/${REPO_NAME}.git ${TMPDIR} @@ -37,14 +65,40 @@ install_standard_ckan_extension_github() { done &&\ ckan-pip install --no-use-pep517 --index-url ${PIP_INDEX_URL} -e ${TMPDIR} else - TEMPFILE=`mktemp` - for REQUIREMENTS_FILE_NAME in requirements pip-requirements - do - if wget -O $TEMPFILE https://raw.githubusercontent.com/${REPO_NAME}/$BRANCH/$REQUIREMENTS_FILE_NAME.txt - then ckan-pip install --index-url ${PIP_INDEX_URL} -r $TEMPFILE && break; + # Remove poetry files: ckan-cloud-docker currently has issues with poetry dependencies + if [ "${REPO_NAME}" = "datopian/ckanext-sentry" ]; then + TMPDIR=${CKAN_VENV}/src/${EGG} + git clone -b $BRANCH ${GITHUB_URL}/${REPO_NAME}.git ${TMPDIR} + + CURRENT_DIR=$(pwd) + + cd ${TMPDIR} + + if [ -f "poetry.lock" ] && [ -f "pyproject.toml" ]; then + rm -f "poetry.lock" "pyproject.toml" fi - done &&\ - ckan-pip install --no-use-pep517 --index-url ${PIP_INDEX_URL} -e git+${GITHUB_URL}/${REPO_NAME}.git@$BRANCH#egg=${EGG} + + for REQUIREMENTS_FILE_NAME in requirements pip-requirements + do + if [ -f ${TMPDIR}/$REQUIREMENTS_FILE_NAME.txt ]; then + ckan-pip install --index-url ${PIP_INDEX_URL} -r ${TMPDIR}/$REQUIREMENTS_FILE_NAME.txt && break; + fi + done + + ckan-pip install --no-use-pep517 --index-url ${PIP_INDEX_URL} -e ${TMPDIR} + + cd ${CURRENT_DIR} + + else + TEMPFILE=`mktemp` + for REQUIREMENTS_FILE_NAME in requirements pip-requirements + do + if wget -O $TEMPFILE https://raw.githubusercontent.com/${REPO_NAME}/$BRANCH/$REQUIREMENTS_FILE_NAME.txt + then ckan-pip install --index-url ${PIP_INDEX_URL} -r $TEMPFILE && break; + fi + done &&\ + ckan-pip install --no-use-pep517 --index-url ${PIP_INDEX_URL} -e git+${GITHUB_URL}/${REPO_NAME}.git@$BRANCH#egg=${EGG} + fi fi } diff --git a/ckan/requirements.txt b/ckan/requirements.txt index 22d90b1..0cd7e1f 100644 --- a/ckan/requirements.txt +++ b/ckan/requirements.txt @@ -1,5 +1,5 @@ gunicorn ckanext-xloader messytables -pdftables +#pdftables Unidecode diff --git a/ckan/setup/supervisord.conf b/ckan/setup/supervisord.conf new file mode 100644 index 0000000..8aca4eb --- /dev/null +++ b/ckan/setup/supervisord.conf @@ -0,0 +1,23 @@ +[unix_http_server] +file = /tmp/supervisor.sock +chmod = 0777 +chown = ckan:ckan + +[supervisord] +logfile = /tmp/supervisord.log +logfile_maxbytes = 50MB +logfile_backups=10 +loglevel = info +pidfile = /tmp/supervisord.pid +nodaemon = true +umask = 022 +identifier = supervisor + +[supervisorctl] +serverurl = unix:///tmp/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[include] +files = /etc/supervisord.d/*.conf diff --git a/ckan/setup/uwsgi.conf b/ckan/setup/uwsgi.conf new file mode 100644 index 0000000..ec93bc4 --- /dev/null +++ b/ckan/setup/uwsgi.conf @@ -0,0 +1,5 @@ +[uwsgi] +route = ^(?!/api).*$ basicauth:Restricted,/srv/app/.htpasswd + +virtualenv = /usr/lib/ckan/venv +module = wsgi:application \ No newline at end of file diff --git a/ckan/templater.sh b/ckan/templater.sh old mode 100644 new mode 100755 index 80f92b5..e59e314 --- a/ckan/templater.sh +++ b/ckan/templater.sh @@ -79,5 +79,6 @@ for var in $vars; do value=$(echo "$value" | sed 's/\//\\\//g'); replaces="-e 's/{{$var}}/${value}/g' $replaces" done + escaped_template_path=$(echo $template | sed 's/ /\\ /g') eval sed $replaces "$escaped_template_path" diff --git a/configs_diff.sh b/configs_diff.sh new file mode 100755 index 0000000..327b420 --- /dev/null +++ b/configs_diff.sh @@ -0,0 +1,116 @@ +#!/bin/bash + +# This script will output .ini and traefik.toml variable changes from a 'git diff > configs.diff'. +# Use this before stashing changes and upgrading CKAN so you can run `make secret` again and input the variable values. +# Note: this script only looks for changes in .ini and traefik.toml (specifically email, main, and rule). + +file_path="configs.diff" +output_file="config_changes.txt" + +if [ ! -f "$file_path" ]; then + echo "File $file_path not found. Please run 'git diff > configs.diff' first." + exit 1 +fi + +rm -f "$output_file" + +trim() { + local var="$1" + var="${var#"${var%%[![:space:]]*}"}" + var="${var%"${var##*[![:space:]]}"}" + var="${var%\"}" + var="${var#\"}" + echo -n "$var" +} + +echo "The following variables have changed in the .ini and traefik.toml files and need to be updated in the secrets:" +echo "" + +in_target_file=0 +current_file="" + +# Parse the diff file +while IFS= read -r line; do + if [[ "$line" =~ ^diff\ --git\ a/.*\.ini\.template ]]; then + in_target_file=1 + current_file="ini" + elif [[ "$line" =~ ^diff\ --git\ a/.*traefik.toml ]]; then + in_target_file=1 + current_file="toml" + elif [[ "$line" =~ ^diff\ --git ]]; then + in_target_file=0 + fi + + # Output the variable changes + if [[ "$in_target_file" -eq 1 ]]; then + if [[ "$line" == +* ]] && [[ ! "$line" == "+++"* ]]; then + line_content="${line:1}" + + if [[ "$current_file" == "toml" && "$line_content" == *email* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Let's Encrypt Email: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "toml" && "$line_content" == *main* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Let's Encrypt Domain: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *ckanext.gtm.gtm_id* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Google Tag Manager ID: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *googleanalytics.id* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Google Analytics ID: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *googleanalytics.account* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Google Analytics Account: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *googleanalytics.username* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Google Analytics Username: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *googleanalytics.password* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Google Analytics Password: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *ckan.sentry.dsn* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "Sentry DSN: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *smtp.server* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "SMTP Server Address (include port, e.g., 'my.smtp.server:587'): $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *smtp.user* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "SMTP Username: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *smtp.password* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "SMTP Password: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *smtp.mail_from* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "SMTP Mail From: $(trim $value)" | tee -a "$output_file" + elif [[ "$current_file" == "ini" && "$line_content" == *=* ]]; then + key="${line_content%%=*}" + value="${line_content#*=}" + echo "$(trim "$key"): $(trim $value)" | tee -a "$output_file" + fi + fi + fi +done <"$file_path" + +if [ ! -s "$output_file" ]; then + echo "" + echo "No changes found in .ini or traefik.toml files." +else + echo "" + echo "Note: A list of these changes can also be found in '$output_file'. Make sure to run 'git pull' before running 'make secret'." +fi + +echo "" +echo "You are now ready to run 'git pull' and continue with the upgrade." +echo "" diff --git a/datapusher-plus/Dockerfile b/datapusher-plus/Dockerfile new file mode 100644 index 0000000..e8f0989 --- /dev/null +++ b/datapusher-plus/Dockerfile @@ -0,0 +1,100 @@ +############################# +### Build DataPusher Plus ### +############################# +FROM ubuntu:jammy + + +LABEL maintainer="Minhaj" + + +# Set timezone +ENV TZ=UTC +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# Set Locale +ENV LC_ALL=en_US.UTF-8 + +ENV SRC_DIR=/srv/app/src +ENV VENV=/usr/lib/ckan/dpplus_venv +ENV CFG_DIR=/etc/ckan/datapusher + +WORKDIR ${SRC_DIR} + +# Set the locale +RUN apt-get update +RUN apt-get install --no-install-recommends -y locales +RUN sed -i "/$LC_ALL/s/^# //g" /etc/locale.gen +RUN dpkg-reconfigure --frontend=noninteractive locales +RUN update-locale LANG=${LC_ALL} +RUN apt-get install -y software-properties-common +RUN add-apt-repository ppa:deadsnakes/ppa + +# Install apt-utils and other dependencies +RUN apt-get install --no-install-recommends -y \ + apt-utils \ + build-essential \ + libxslt1-dev \ + libxml2-dev \ + libffi-dev \ + wget \ + curl \ + unzip \ + git \ + libpq-dev \ + file \ + vim + +# Install Python 3.9 (check if it's available in the default repo first) +# If not available, use the previously added PPA +RUN apt-get install -y python3.9 python3.9-dev python3.9-venv + +# Set Python 3.9 as the default Python version +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 +RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 + +# Install pip for Python 3.9 +RUN apt-get install -y python3-pip + +# Clean up APT when done +RUN apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + + +#install qsv; +RUN wget https://github.com/jqnatividad/qsv/releases/download/0.108.0/qsv-0.108.0-x86_64-unknown-linux-gnu.zip +RUN unzip qsv-0.108.0-x86_64-unknown-linux-gnu.zip +RUN rm qsv-0.108.0-x86_64-unknown-linux-gnu.zip +RUN mv qsv* /usr/local/bin + + +#python env setup; link python3 to python cmd; make venv; install uwsgi; +RUN python3 -m venv ${VENV} +RUN ${VENV}/bin/pip install uwsgi + + +#INSTALL DATAPUSHER-PLUS FROM SOURCE REPO +RUN git clone --branch 0.15.0 https://github.com/datHere/datapusher-plus +RUN cd ${SRC_DIR}/datapusher-plus && \ + ${VENV}/bin/pip install -r requirements-dev.txt && \ + ${VENV}/bin/pip install -e . + + +RUN ${VENV}/bin/pip install Flask==2.3.3 +RUN ${VENV}/bin/pip install Werkzeug==2.3.0 + + +#SETUP CONFIG/SETTINGS.PY +RUN mkdir -p ${CFG_DIR} + +RUN curl https://raw.githubusercontent.com/dathere/datapusher-plus/0.15.0/deployment/datapusher-uwsgi.ini -o ${CFG_DIR}/uwsgi.ini + +COPY datapusher-plus/example.env ${SRC_DIR}/datapusher-plus/datapusher/.env +ENV JOB_CONFIG=${SRC_DIR}/datapusher-plus/datapusher/.env + +COPY datapusher-plus/entrypoint/startup.sh /startup.sh +RUN chmod +x /startup.sh + + +ENTRYPOINT [ "bash", "-c", "/startup.sh" ] + + +EXPOSE 8800 diff --git a/datapusher-plus/datapusher-settings.py b/datapusher-plus/datapusher-settings.py new file mode 100644 index 0000000..d9ba19b --- /dev/null +++ b/datapusher-plus/datapusher-settings.py @@ -0,0 +1,4 @@ +import os + +WRITE_ENGINE_URL = os.environ.get("WRITE_ENGINE_URL") +SQLALCHEMY_DATABASE_URI = os.environ.get("SQLALCHEMY_DATABASE_URI") \ No newline at end of file diff --git a/datapusher-plus/entrypoint/startup.sh b/datapusher-plus/entrypoint/startup.sh new file mode 100644 index 0000000..e342952 --- /dev/null +++ b/datapusher-plus/entrypoint/startup.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# turn on bash's job control +set -m + +check_db_ready() { + (echo > /dev/tcp/datastore-db/5432) >/dev/null 2>&1 +} + +until check_db_ready; do + echo "Waiting for datastore-db to be ready..." + sleep 2 +done + +echo "datastore-db is ready. Starting datapusher..." + +# Start the primary process and put it in the background +${VENV}/bin/uwsgi --socket=/tmp/uwsgi.sock --enable-threads -i ${CFG_DIR}/uwsgi.ini --wsgi-file=${SRC_DIR}/datapusher-plus/wsgi.py & + +# Start the test process +#cd ${SRC_DIR}/testing-datapusher-plus && ${VENV}/bin/python test.py + +fg %1 diff --git a/datapusher-plus/example.env b/datapusher-plus/example.env new file mode 100644 index 0000000..5b6b289 --- /dev/null +++ b/datapusher-plus/example.env @@ -0,0 +1,161 @@ +# To specify Datapusher+ settings, modify and copy this file to ".env" +# and put it in working directory from where DP+ is started. +# e.g. in development mode, in the datapusher-plus/datapusher directory +# in production mode, in the /etc/ckan/datapusher-plus directory +# +# Note that DP+ settings can also be passed using environment variables +# e.g. export PII_SCREENING=True + +WRITE_ENGINE_URL = 'postgresql://postgres:123456@datastore-db/datastore' + +# The connect string of the Datapusher+ Job database +SQLALCHEMY_DATABASE_URI = 'postgresql://postgres:123456@datastore-db/datapusher_jobs' + +DOWNLOAD_PREVIEW_ONLY=False + +# =============== DOWNLOAD SETTINGS ============== +# 25mb, this is ignored if either PREVIEW_ROWS > 0 or DOWNLOAD_PREVIEW_ONLY is True +MAX_CONTENT_LENGTH = 471859200 + +# A Datapusher+ job is triggered automatically everytime a resource is modified (even just its metadata) +# if its mimetype is one of the supported datapusher.formats. +# To ensure DP+ doesn't push an unchanged resource, it computes and stores the hash of the file +# If the hash has not changed (i.e. the file has not been modified), it refrains from "re-pushing" it +IGNORE_FILE_HASH = False + +# In bytes. The resource is downloaded on a streaming basis, 16K at a time +CHUNK_SIZE = 16384 + +# In seconds. How long before DP+ download times out +DOWNLOAD_TIMEOUT = 30 + +# If the SSL certificate is verified. This is set to False by default +# since externally hosted datasets may sometimes have expired/self-signed SSL certificates +SSL_VERIFY = False + +# If this is not zero, the number of preview rows to push into the datastore +# If zero, it pushes the entire file +PREVIEW_ROWS = 0 + +# If this is True, only the first n PREVIEW_ROWS are downloaded, and not the whole file +DOWNLOAD_PREVIEW_ONLY = False + +DOWNLOAD_PROXY = '' + +# =========== CKAN SERVICE PROVIDER SETTINGS ========== +HOST = "0.0.0.0" +PORT = 8800 + +# turns on logger at Debug level +DEBUG = True +# If False, configures the logger for production +# i.e. logs to STDERR and LOG_FILE (autorotates after 68mb, with 5 backups), +# and emails errors to admins. +# If True, only turns on Debug if DEBUG = True +TESTING = False + +FROM_EMAIL = 'dpplus-errors@domain.com' +# comma-delimited list of emails to send CKAN Service Provider errors to +ADMINS = '' + +# Error logging +LOG_FILE = '/tmp/ckan_service.log' +# Also show log on STDERR +STDERR = True + +# These settings are randomly generated by default +# only set these if you need to interface with the CKAN Service Provider API +# see https://ckan-service-provider.readthedocs.io/ +# SECRET_KEY = "please replace me" +# USERNAME = "admin" +# PASSWORD = "changeme" + +# number of days to keep job history +KEEP_JOBS_AGE = 60 + +# ============ QSV ANALYSIS SETTINGS ========== + +# ---------- BINARY PATHS ------------- +# qsv binary to use +# optionally, you can also use qsvdp_nightly. +# qsvdp is already very fast, but if you want even more speed +# qsvdp_nightly is compiled/linked in such a way that it's even faster/smaller +# see https://github.com/jqnatividad/qsv/blob/master/docs/PERFORMANCE.md#nightly-release-builds +QSV_BIN = '/usr/local/bin/qsvdp' + +# file binary to use. `file` is used to get file metadata to display on the log +# if qsv cannot open a spreadsheet file (probably, because its password-protected or corrupt) +FILE_BIN = '/usr/bin/file' + +# Dates are parsed with an MDY preference by default +# set PREFER_DMY = True if date-parsing should prefer DMY instead +PREFER_DMY = False + +# The zero-based index of the default sheet to export to CSV. 0 is the first sheet. +# Accepts negative numbers. -1 is the last sheet, -2 the 2nd to last sheet, etc. +DEFAULT_EXCEL_SHEET = 0 + +# Check if a file is sorted and has duplicates +SORT_AND_DUPE_CHECK = True + +# Should CSVs be deduped? Note that deduping also +# sorts the CSV. +DEDUP = False + +# -------- SUMMARY STATS SETTINGS ----------- +# Create a resource for calculated summary stats? +ADD_SUMMARY_STATS_RESOURCE = False + +# Summary Stats don't make sense if PREVIEW_ROWS > 0 +# because, you're only summarizing the preview, not the whole file +# Set to True if Summary Stats should also be done for previews +SUMMARY_STATS_WITH_PREVIEW = False + +# additional command line options to pass to qsv stats when creating +# summary stats. Set to `--everything` if you want to include all the stats, +# particularly, when ADD_SUMMARY_STATS_RESOURCE is True +SUMMARY_STATS_OPTIONS = '' + +# -------- AUTO INDEX SETTINGS ---------- +# if AUTO_INDEX_THRESHOLD > 0 or AUTO_INDEX_DATES is true +# create indices automatically based on as column's cardinality (number of unique values) +# - if a column's cardinality <= AUTO_INDEX_THRESHOLD, create an index for that column +# - if AUTO_INDEX_THRESHOLD = -1, index all columns regardless of its cardinality +AUTO_INDEX_THRESHOLD = 3 + +# for columns w/ cardinality equal to record_count, it's all unique values, create a unique index +AUTO_UNIQUE_INDEX = True + +# always index date fields? +AUTO_INDEX_DATES = True + +# ------ AUTO ALIAS SETTINGS ---------- +# Should an alias be automatically created? +# Aliases are easier to use than resource_ids, and can be used with the CKAN API where +# resource_ids are used. Aliases are also SQL views that are easier to use when querying +# the CKAN Datastore database. +# Aliases are created by concatenating "{resource_name}-{package_name}-{owner_org_name}" +# truncated at 55-characters. +AUTO_ALIAS = False + +# Should aliases should always be unique? In case of an alias name collision, a three-digit +# sequence number is appended. +AUTO_ALIAS_UNIQUE = False + +# -------- PII SETTINGS ----------- +PII_SCREENING = False + +# Stop scanning on first PII found +PII_QUICK_SCREEN = False + +# Abort Datapusher+ job if PII is found +PII_FOUND_ABORT = True + +# Create a resource where PII candidates are stored? +PII_SHOW_CANDIDATES = True + +# The resource ID/alias of a Text file that has the +# regex patterns to use for PII scanning. +# If this is not specified, the default PII scanning rules in +# default_pii_regexes.txt are used +PII_REGEX_RESOURCE_ID_OR_ALIAS = '' diff --git a/db/Dockerfile b/db/Dockerfile index e0d49a8..175ee43 100644 --- a/db/Dockerfile +++ b/db/Dockerfile @@ -1,9 +1,15 @@ -FROM mdillon/postgis:9.6-alpine +FROM postgis/postgis:12-3.1-alpine ARG APK_REPOSITORY RUN apk --update add supervisor --update-cache --repository ${APK_REPOSITORY} --allow-untrusted +#RUN apk add --no-cache postgis +# +#RUN mkdir -p /usr/local/share/postgresql/extension +# +#RUN cp /usr/share/postgresql15/extension/postgis.control /usr/local/share/postgresql/extension/ +# COPY init_ckan_db.sh /docker-entrypoint-initdb.d/ COPY *.sh /db-scripts/ COPY datastore-permissions.sql.template /db-scripts/ diff --git a/db/datastore-permissions-update.sh b/db/datastore-permissions-update.sh index fe2fb4e..1dd2c2b 100755 --- a/db/datastore-permissions-update.sh +++ b/db/datastore-permissions-update.sh @@ -1,19 +1,19 @@ cd / while ! su postgres -c "pg_isready"; do echo waiting for DB..; sleep 1; done -[ `su postgres -c "psql -c \"select count(1) from pg_roles where rolname='publicreadonly'\" -tA"` == "0" ] &&\ - echo creating role publicreadonly &&\ - ! su postgres -c "psql -c \"create role publicreadonly with login password '${DATASTORE_PUBLIC_RO_PASSWORD}';\"" \ - && echo failed to create publicreadonly role && exit 1 +[ `su postgres -c "psql -c \"select count(1) from pg_roles where rolname='readonly'\" -tA"` == "0" ] &&\ + echo creating role readonly &&\ + ! su postgres -c "psql -c \"create role readonly with login password '${DATASTORE_PUBLIC_RO_PASSWORD}';\"" \ + && echo failed to create readonly role && exit 1 echo getting all datastore resource ids ! DATASTORE_RESOURCES=`su postgres -c 'psql datastore -c "select tablename from pg_tables where schemaname='"'public'"';" -tA'` \ && echo failed to get datastore tables && exit 1 echo updating datastore table permissions for RESOURCE in $DATASTORE_RESOURCES; do if wget -qO /dev/null http://ckan:5000/api/3/action/resource_show?id=${RESOURCE} 2>/dev/null; then - ! su postgres -c "psql datastore -c 'grant select on \"${RESOURCE}\" to publicreadonly;'" >/dev/null &&\ - echo failed to grant select permissions for publicreadonly on ${RESOURCE} + ! su postgres -c "psql datastore -c 'grant select on \"${RESOURCE}\" to readonly;'" >/dev/null &&\ + echo failed to grant select permissions for readonly on ${RESOURCE} else - ! su postgres -c "psql datastore -c 'revoke select on \"${RESOURCE}\" from publicreadonly;'" >/dev/null &&\ - echo failed to revoke select permission for publicreadonly on ${RESOURCE} + ! su postgres -c "psql datastore -c 'revoke select on \"${RESOURCE}\" from readonly;'" >/dev/null &&\ + echo failed to revoke select permission for readonly on ${RESOURCE} fi done diff --git a/db/migration/ckan-permissions.sql b/db/migration/ckan-permissions.sql new file mode 100644 index 0000000..2477bfe --- /dev/null +++ b/db/migration/ckan-permissions.sql @@ -0,0 +1,7 @@ +\connect "ckan" + +GRANT CREATE ON SCHEMA public TO "ckan"; +GRANT USAGE ON SCHEMA public TO "ckan"; + +-- take connect permissions from main db +REVOKE CONNECT ON DATABASE "ckan" FROM "readonly"; \ No newline at end of file diff --git a/db/migration/datastore-permissions.sql b/db/migration/datastore-permissions.sql new file mode 100644 index 0000000..4aa6833 --- /dev/null +++ b/db/migration/datastore-permissions.sql @@ -0,0 +1,76 @@ +\connect "datastore" + +-- revoke permissions for the read-only user +REVOKE CREATE ON SCHEMA public FROM PUBLIC; +REVOKE USAGE ON SCHEMA public FROM PUBLIC; + +GRANT CREATE ON SCHEMA public TO "postgres"; +GRANT USAGE ON SCHEMA public TO "postgres"; + +-- grant select permissions for read-only user +GRANT CONNECT ON DATABASE "datastore" TO "readonly"; +GRANT USAGE ON SCHEMA public TO "readonly"; + +-- grant access to current tables and views to read-only user +GRANT SELECT ON ALL TABLES IN SCHEMA public TO "readonly"; + +-- grant access to new tables and views by default +ALTER DEFAULT PRIVILEGES FOR USER "postgres" IN SCHEMA public + GRANT SELECT ON TABLES TO "readonly"; + +-- a view for listing valid table (resource id) and view names +CREATE OR REPLACE VIEW "_table_metadata" AS + SELECT DISTINCT + substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id", + dependee.relname AS name, + dependee.oid AS oid, + dependent.relname AS alias_of + FROM + pg_class AS dependee + LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid + LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid + LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid + WHERE + (dependee.oid != dependent.oid OR dependent.oid IS NULL) AND + -- is a table (from pg_tables view definition) + -- or is a view (from pg_views view definition) + (dependee.relkind = 'r'::"char" OR dependee.relkind = 'v'::"char") + AND dependee.relnamespace = ( + SELECT oid FROM pg_namespace WHERE nspname='public') + ORDER BY dependee.oid DESC; +ALTER VIEW "_table_metadata" OWNER TO "postgres"; +GRANT SELECT ON "_table_metadata" TO "readonly"; + +-- _full_text fields are now updated by a trigger when set to NULL +CREATE OR REPLACE FUNCTION populate_full_text_trigger() RETURNS trigger +AS $body$ + BEGIN + IF NEW._full_text IS NOT NULL THEN + RETURN NEW; + END IF; + NEW._full_text := ( + SELECT to_tsvector(string_agg(value, ' ')) + FROM json_each_text(row_to_json(NEW.*)) + WHERE key NOT LIKE '\_%'); + RETURN NEW; + END; +$body$ LANGUAGE plpgsql; +ALTER FUNCTION populate_full_text_trigger() OWNER TO "postgres"; + +-- migrate existing tables that don't have full text trigger applied +DO $body$ + BEGIN + EXECUTE coalesce( + (SELECT string_agg( + 'CREATE TRIGGER zfulltext BEFORE INSERT OR UPDATE ON ' || + quote_ident(relname) || ' FOR EACH ROW EXECUTE PROCEDURE ' || + 'populate_full_text_trigger();', ' ') + FROM pg_class + LEFT OUTER JOIN pg_trigger AS t + ON t.tgrelid = relname::regclass AND t.tgname = 'zfulltext' + WHERE relkind = 'r'::"char" AND t.tgname IS NULL + AND relnamespace = ( + SELECT oid FROM pg_namespace WHERE nspname='public')), + 'SELECT 1;'); + END; +$body$; diff --git a/db/migration/upgrade_databases.sh b/db/migration/upgrade_databases.sh new file mode 100755 index 0000000..74b1d59 --- /dev/null +++ b/db/migration/upgrade_databases.sh @@ -0,0 +1,168 @@ +#!/bin/bash + +COMPOSE_FILES=$1 +CKAN_DB_NAME=$2 +CKAN_DB_USERNAME=$3 +DB_USERNAME=$4 +DATASTORE_DB_NAME=$5 +DATSTORE_DB_USERNAME=$6 + +CKAN_BACKUP_FILE="ckan.dump" +DATASTORE_BACKUP_FILE="datastore.dump" +CKAN_DATA_BACKUP_FILE="ckan_data.tar.gz" +CKAN_SERVICE="ckan" +DB_SERVICE="db" +DATASTORE_SERVICE="datastore-db" +CKAN_CONFIG_PATH="/etc/ckan/ckan.ini" + + + +if [ ! -f $CKAN_BACKUP_FILE ]; then + echo "" + echo "### CKAN backup file not found." + echo "" + exit 1 +fi + +if [ ! -f $DATASTORE_BACKUP_FILE ]; then + echo "" + echo "### Datastore backup file not found." + echo "" + exit 1 +fi + +if [ ! -f $CKAN_DATA_BACKUP_FILE ]; then + echo "" + echo "### CKAN data backup file not found." + echo "" + exit 1 +fi + +reset_database() { + local db_name=$1 + local service_name=$2 + local db_username=$3 + + echo "" + echo "### Disconnecting users from database ${db_name} on service ${service_name}..." + echo "" + docker-compose ${COMPOSE_FILES} exec -T ${service_name} psql -U ${db_username} -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname='${db_name}';" + +} + +reset_database ${CKAN_DB_NAME} ${DB_SERVICE} ${CKAN_DB_USERNAME} +reset_database ${DATASTORE_DB_NAME} ${DATASTORE_SERVICE} ${DATSTORE_DB_USERNAME} + +echo "" +echo "### ROLE and DATABASE for datapusher_jobs created in Datastore database." + +echo "" +echo "### Restoring the CKAN DB from backup..." +echo "" + +docker-compose ${COMPOSE_FILES} exec -T ${DB_SERVICE} pg_restore -U postgres --verbose --create --clean --if-exists -d postgres < ${CKAN_BACKUP_FILE} + +echo "" +echo "### Restoring CKAN DB from backup completed." + +echo "" +echo "### Restoring the Datastore DB from backup..." +echo "" + +docker-compose ${COMPOSE_FILES} exec -T ${DATASTORE_SERVICE} pg_restore -U postgres --verbose --create --clean --if-exists -d postgres < ${DATASTORE_BACKUP_FILE} + +echo "" +echo "### Restoring Datastore DB from backup completed." + +echo "" +echo "### Create ROLE and DATABASE for datapusher_jobs in Datastore database..." +echo "" + +docker-compose ${COMPOSE_FILES} exec -T ${DATASTORE_SERVICE} psql -U ${DB_USERNAME} -c "CREATE ROLE datapusher_jobs WITH LOGIN PASSWORD '123456';" +docker-compose ${COMPOSE_FILES} exec -T ${DATASTORE_SERVICE} psql -U ${DB_USERNAME} -c "CREATE DATABASE datapusher_jobs OWNER datapusher_jobs ENCODING 'utf8';" + +echo "" +echo "### Restoring data files to CKAN..." +echo "" + +docker cp ${CKAN_DATA_BACKUP_FILE} $(docker-compose ${COMPOSE_FILES} ps -q ckan):/tmp/ckan_data.tar.gz +docker-compose ${COMPOSE_FILES} exec -T ${CKAN_SERVICE} bash -c "tar -xzf /tmp/ckan_data.tar.gz -C /tmp/ && cp -r /tmp/data/* /var/lib/ckan/data/ && chown -R ckan:ckan /var/lib/ckan/data" + +echo "" +echo "### Data files restored to CKAN." + +echo "" +echo "### Running CKAN migration scripts..." +echo "" + +docker-compose ${COMPOSE_FILES} exec -T ${CKAN_SERVICE} ckan -c ${CKAN_CONFIG_PATH} db upgrade + +echo "" +echo "### CKAN migration scripts completed." + +echo "" +echo "### Rebuilding CKAN search index..." +echo "" + +docker-compose ${COMPOSE_FILES} exec -T ${CKAN_SERVICE} ckan -c ${CKAN_CONFIG_PATH} search-index rebuild + +echo "" +echo "### CKAN search index rebuilt." + +echo "" +echo "### Create a sysadmin datapusher user in CKAN..." +echo "" + +RANDOM_PASSWORD=$(tr '" +echo "2. Copy the API token below and run 'make secret' (paste it at step 13)" +echo " API key: $CKAN_API_KEY" +echo "3. Start the containers again: 'make start O='" +echo "" +echo "#################################################" +echo "" diff --git a/docker-compose.yaml b/docker-compose.yaml index e762d1f..d5b83ba 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,15 +1,17 @@ -version: '3.2' - services: - proxy: image: traefik:1.7.2-alpine restart: always volumes: - - ./traefik/traefik.toml:/traefik.toml + - ./traefik/traefik.toml.template:/traefik.toml.template + #- ./traefik/traefik.dev.toml:/traefik.dev.toml # Uncomment for development - ./traefik/acme.json:/acme.json + - ./cca-operator/templater.sh:/templater.sh + - ./docker-compose/traefik-secrets.sh:/traefik-secrets.sh + - ./traefik/entrypoint.sh:/entrypoint.sh networks: - - ckan-multi + - ckan-multi + entrypoint: ["/bin/sh", "-c", "/entrypoint.sh"] varnish: image: million12/varnish @@ -20,27 +22,27 @@ services: expose: - "80" networks: - - ckan-multi + - ckan-multi redis: image: redis:alpine restart: always expose: - - "6379" + - "6379" networks: - - ckan-multi + - ckan-multi nginx: depends_on: - - ckan + - ckan image: viderum/ckan-cloud-docker:nginx-latest build: context: nginx restart: always expose: - - "8080" + - "8080" networks: - - ckan-multi + - ckan-multi adminer: image: adminer @@ -48,78 +50,77 @@ services: expose: - "8080" networks: - - ckan-multi + - ckan-multi jobs: depends_on: - - ckan - - nginx + - ckan + - nginx image: viderum/ckan-cloud-docker:ckan-latest - command: [/ckan-entrypoint.sh, ckan-paster, --plugin=ckan, jobs, -c, /etc/ckan/production.ini, worker] + command: [/ckan-entrypoint.sh, ckan, -c, /etc/ckan/ckan.ini, jobs, worker] restart: always volumes: - - ./docker-compose/ckan-secrets.sh:/etc/ckan-conf/secrets/secrets.sh - - ./docker-compose/ckan-conf-templates:/etc/ckan-conf/templates - - ckan-data:/var/lib/ckan + - ./docker-compose/ckan-secrets.sh:/etc/ckan-conf/secrets/secrets.sh + - ./docker-compose/ckan-conf-templates:/etc/ckan-conf/templates + - ckan-data:/var/lib/ckan environment: - - CKAN_STORAGE_PATH=/var/lib/ckan/data - - CKAN_K8S_SECRETS=/etc/ckan-conf/secrets/secrets.sh - - CKAN_K8S_TEMPLATES=/etc/ckan-conf/templates + - CKAN_STORAGE_PATH=/var/lib/ckan/data + - CKAN_K8S_SECRETS=/etc/ckan-conf/secrets/secrets.sh + - CKAN_K8S_TEMPLATES=/etc/ckan-conf/templates networks: - - ckan-multi + - ckan-multi solr: - image: viderum/ckan-cloud-docker:solr-latest - build: - context: solr - args: - SCHEMA_XML: ${SCHEMA_XML:-schemas/schema28.xml} + image: ckan/ckan-solr:${SOLR_IMAGE:-2.10-solr9} restart: always expose: - - "8983" + - "8983" volumes: - - solr:/opt/solr/server + - solr:/var/solr networks: - - ckan-multi + - ckan-multi datapusher: build: context: . - dockerfile: datapusher/Dockerfile + dockerfile: ${DATAPUSHER_DIRECTORY:-datapusher}/Dockerfile args: PIP_INDEX_URL: ${PIP_INDEX_URL:-https://pypi.org/simple/} expose: - - "8800" + - "8800:8800" networks: - - ckan-multi + - ckan-multi + environment: + - WRITE_ENGINE_URL=postgresql://postgres:123456@datastore-db/datastore + - SQLALCHEMY_DATABASE_URI=postgresql://postgres:123456@datastore-db/datapusher_jobs ckan: depends_on: - - redis - - solr + - redis + - solr image: viderum/ckan-cloud-docker:ckan-latest build: context: ckan args: - CKAN_BRANCH: ${CKAN_BRANCH:-ckan-2.8.1} + CKAN_BRANCH: ${CKAN_BRANCH:-ckan-2.10.4} CKAN_REPO: ${CKAN_REPO:-ckan/ckan} PIP_INDEX_URL: ${PIP_INDEX_URL:-https://pypi.org/simple/} restart: always volumes: - - ./docker-compose/ckan-secrets.sh:/etc/ckan-conf/secrets/secrets.sh - - ./docker-compose/ckan-conf-templates:/etc/ckan-conf/templates - - ckan-data:/var/lib/ckan - - ./migrate_databases.sh:/usr/lib/ckan/migrate_databases.sh - - ./migrate_filestorage.sh:/usr/lib/ckan/migrate_filestorage.sh + - ./docker-compose/ckan-secrets.sh:/etc/ckan-conf/secrets/secrets.sh + - ./docker-compose/ckan-conf-templates:/etc/ckan-conf/templates + - ckan-data:/var/lib/ckan + - ./migrate_databases.sh:/usr/lib/ckan/migrate_databases.sh + - ./migrate_filestorage.sh:/usr/lib/ckan/migrate_filestorage.sh environment: - - CKAN_STORAGE_PATH=/var/lib/ckan/data - - CKAN_K8S_SECRETS=/etc/ckan-conf/secrets/secrets.sh - - CKAN_K8S_TEMPLATES=/etc/ckan-conf/templates - - GUNICORN_WORKERS=2 + - CKAN_STORAGE_PATH=/var/lib/ckan/data + - CKAN_K8S_SECRETS=/etc/ckan-conf/secrets/secrets.sh + - CKAN_K8S_TEMPLATES=/etc/ckan-conf/templates + - GUNICORN_WORKERS=2 expose: - - "5000" + - "5000" networks: - - ckan-multi + - ckan-multi jenkins: image: viderum/ckan-cloud-docker:jenkins-latest @@ -127,14 +128,14 @@ services: context: jenkins restart: always volumes: - - ./jenkins/jobs:/var/jenkins_home/jobs - - .:/etc/ckan-cloud/ckan-cloud-docker - - /var/run/docker.sock:/var/run/docker.sock - - ./jenkins/scripts/docker_compose_cca_operator.sh:/etc/ckan-cloud/cca_operator.sh + - ./jenkins/jobs:/var/jenkins_home/jobs + - .:/etc/ckan-cloud/ckan-cloud-docker + - /var/run/docker.sock:/var/run/docker.sock + - ./jenkins/scripts/docker_compose_cca_operator.sh:/etc/ckan-cloud/cca_operator.sh ports: - - "8089:8080" + - "8089:8080" networks: - - cloud-management + - cloud-management cca-operator: image: viderum/ckan-cloud-docker:cca-operator-latest @@ -143,45 +144,45 @@ services: command: ./server.sh restart: always volumes: - - /etc/ckan-cloud:/etc/ckan-cloud + - /etc/ckan-cloud:/etc/ckan-cloud ports: - - "8022:22" + - "8022:22" networks: - - cloud-management - - ckan-multi + - cloud-management + - ckan-multi provisioning-api-db: image: postgres restart: always ports: - - "5439:5432" + - "5439:5432" env_file: - - docker-compose/provisioning-api-db-secrets.sh + - docker-compose/provisioning-api-db-secrets.sh volumes: - - provisioning-api-db:/var/lib/postgresql/data + - provisioning-api-db:/var/lib/postgresql/data networks: - - cloud-management + - cloud-management provisioning-api: depends_on: - - provisioning-api-db - - cca-operator + - provisioning-api-db + - cca-operator image: viderum/ckan-cloud-provisioning-api:latest restart: always env_file: - - docker-compose/provisioning-api-secrets.sh + - docker-compose/provisioning-api-secrets.sh environment: - - INSTANCE_MANAGER=root@cca-operator - - PRIVATE_SSH_KEY - - PRIVATE_KEY - - PUBLIC_KEY - - GITHUB_KEY - - GITHUB_SECRET - - EXTERNAL_ADDRESS=http://localhost:8092 + - INSTANCE_MANAGER=root@cca-operator + - PRIVATE_SSH_KEY + - PRIVATE_KEY + - PUBLIC_KEY + - GITHUB_KEY + - GITHUB_SECRET + - EXTERNAL_ADDRESS=http://localhost:8092 ports: - - "8092:8000" + - "8092:8000" networks: - - cloud-management + - cloud-management volumes: ckan-data: diff --git a/docker-compose/ckan-conf-templates/vital-strategies-theme-production.ini.template b/docker-compose/ckan-conf-templates/vital-strategies-theme-ckan.ini.template similarity index 80% rename from docker-compose/ckan-conf-templates/vital-strategies-theme-production.ini.template rename to docker-compose/ckan-conf-templates/vital-strategies-theme-ckan.ini.template index ecda824..ddd70cf 100644 --- a/docker-compose/ckan-conf-templates/vital-strategies-theme-production.ini.template +++ b/docker-compose/ckan-conf-templates/vital-strategies-theme-ckan.ini.template @@ -22,6 +22,9 @@ who.log_file = %(cache_dir)s/who_log.ini # Inactive by default, so the session doesn't expire. # who.timeout = 86400 +ckan.datapusher.api_token = {{CKAN_DATAPUSHER_API_TOKEN}} +ckan.datastore.sqlsearch.enabled = true + ## Database Settings sqlalchemy.url = {{SQLALCHEMY_URL}} @@ -38,7 +41,7 @@ ckan.datastore.default_fts_index_method = gist ckan.site_id = vital-strategies ckan.site_url = {{CKAN_SITE_URL}} #ckan.use_pylons_response_cleanup_middleware = true -cache_dir = /tmp/%(ckan.site_id)s/ +cache_dir = /tmp/%(ckan.site_id)s ## Authorization Settings @@ -79,18 +82,11 @@ ckan.redis.url = {{CKAN_REDIS_URL}} # Add ``resource_proxy`` to enable resorce proxying and get around the # same origin policy -ckan.plugins = image_view - text_view - recline_view - datastore - datapusher - resource_proxy - geojson_view - querytool - stats - sentry - s3filestore - googleanalytics +ckan.plugins = image_view text_view recline_view datastore datapusher resource_proxy geojson_view querytool + +# sentry +# s3filestore +# googleanalytics # Define which views should be created by default # (plugins must be loaded in ckan.plugins) @@ -107,8 +103,8 @@ ckan.views.default_views = image_view recline_view geojson_view ## Front-End Settings # Uncomment following configuration to enable using of Bootstrap 2 -ckan.base_public_folder = public-bs2 -ckan.base_templates_folder = templates-bs2 +#ckan.base_public_folder = public-bs2 +#ckan.base_templates_folder = templates-bs2 ckan.site_title = Data Platform ckan.site_logo = /base/images/ckan-logo.png @@ -121,10 +117,10 @@ ckan.display_timezone = Etc/UTC ## Feeds Settings -ckan.feeds.authority_name = -ckan.feeds.date = -ckan.feeds.author_name = -ckan.feeds.author_link = +#ckan.feeds.authority_name = +#ckan.feeds.date = +#ckan.feeds.author_name = +#ckan.feeds.author_link = ## Storage Settings @@ -137,9 +133,9 @@ ckan.datapusher.url = {{CKAN_DATAPUSHER_URL}} ## AWS S3 settings -# ckanext.cloudstorage.driver = S3_US_EAST2 -# ckanext.cloudstorage.container_name = vital-strategies -# ckanext.cloudstorage.driver_options = {"key": "{{AWS_ACCESS_KEY_ID}}", "secret": "{{AWS_SECRET_ACCESS_KEY}}"} +ckanext.cloudstorage.driver = S3_US_EAST2 +ckanext.cloudstorage.container_name = vital-strategies +ckanext.cloudstorage.driver_options = {"key": "{{AWS_ACCESS_KEY_ID}}", "secret": "{{AWS_SECRET_ACCESS_KEY}}"} ckanext.s3filestore.host_name = https://vital-strategies-ckan.s3.us-east-2.amazonaws.com ckanext.s3filestore.aws_storage_path = demo ckanext.s3filestore.aws_access_key_id = {{AWS_ACCESS_KEY_ID}} @@ -150,13 +146,17 @@ ckanext.s3filestore.signature_version = s3v4 ## Query Tool Settings -ckanext.querytool.groups = mortality:Mortality,riskfactors:Risk Factors,birth:Births,cancer:Cancer,demo:General -ckanext.querytool.map_osm_url = https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_nolabels/{z}/{x}/{y}{r}.png -ckanext.querytool.map_osm_attribute = © OpenStreetMap © CartoDB +# ckanext.querytool.groups = mortality:Mortality,riskfactors:Risk Factors,birth:Births,cancer:Cancer,demo:General +# ckanext.querytool.map_osm_url = https://cartodb-basemaps-{s}.global.ssl.fastly.net/light_nolabels/{z}/{x}/{y}{r}.png +# ckanext.querytool.map_osm_attribute = © OpenStreetMap © CartoDB ckanext.querytool.allow_nav_bar = True ## Google Analytics +ckanext.gtm.gtm_id = {{GTM_ID}} + +# OR + googleanalytics.id = {{GA_ID}} googleanalytics.account = {{GA_ACCOUNT}} googleanalytics.username = {{GA_USERNAME}} @@ -165,7 +165,7 @@ googleanalytics.password = {{GA_PASSWORD}} ## Language settings # VITALS -ckan.locales_offered=en es zh_CN fr km pt_BR +ckan.locales_offered=en es zh_Hans_CN fr km pt_BR # Shanghai default ckan.locale_default=en @@ -186,7 +186,7 @@ ckanext.xloader.jobs_db.uri = postgresql://postgres:123456@jobs-db/postgres #ckan.activity_list_limit = 31 #ckan.activity_streams_email_notifications = true #ckan.email_notifications_since = 2 days -ckan.hide_activity_from_users = %(ckan.site_id)s +#ckan.hide_activity_from_users = %(ckan.site_id)s ## Email settings @@ -197,7 +197,7 @@ smtp.server = {{SMTP_SERVER}} smtp.starttls = True smtp.user = {{SMTP_USER}} smtp.password = {{SMTP_PASSWORD}} -smtp.mail_from = vitalplatform@vitalstrategies.org +smtp.mail_from = {{SMTP_MAIL_FROM}} ### @@ -211,7 +211,7 @@ ckan.harvest.mq.hostname = redis ckan.harvest.mq.redis_db = 9 ## Sentry settings -sentry_dsn = {{SENTRY_DSN}} +ckan.sentry.dsn = {{SENTRY_DSN}} ckan.sentry.configure_logging = True ckan.sentry.log_level = ERROR diff --git a/docker-compose/ckan-secrets.dat b/docker-compose/ckan-secrets.dat index 1df40d3..18b76a3 100644 --- a/docker-compose/ckan-secrets.dat +++ b/docker-compose/ckan-secrets.dat @@ -10,16 +10,20 @@ ckan required APP_INSTANCE_UUID 1b05eb54-743a-40a7-8e31-f2c5ff69c0cb Enter Appli ckan required SOLR_URL http://solr:8983/solr/ckan Enter SOLR connection string ckan required CKAN_REDIS_URL redis://redis:6379/1 Enter Redis URL ckan required CKAN_DATAPUSHER_URL http://datapusher:8800/ Enter Datapusher URL +ckan required CKAN_DATAPUSHER_API_TOKEN xxxxxxxx Enter Datapusher API token ckan required SMTP_SERVER mail.example.com Enter SMTP server address ckan required SMTP_USER info Enter SMTP server username ckan optional SMTP_PASSWORD empty Enter SMTP server password ckan optional SMTP_MAIL_FROM empty Enter SMTP mail from ckan required CKAN_SITE_URL http://ckan:5000 Enter Website URL (including https:// or http://) -ckan optional AWS_ACCESS_KEY_ID empty Enter AWS secret key (if any) -ckan optional AWS_SECRET_ACCESS_KEY empty AWS secret access key (if any) -ckan optional SENTRY_DSN https://@sentry.io/ Enter Sentry DSN URL with token and ID -ckan optional GA_ID empty Enter Google Analytics ID -ckan optional GA_ACCOUNT empty Enter Google Analytics account name -ckan optional GA_PASSWORD empty Enter Google Analytics password -ckan optional GA_USERNAME empty Enter Google Analytics username -datapusher optional TIMEZONE UTC Enter Datapusher timezone +ckan optional AWS_ACCESS_KEY_ID empty Enter AWS secret key [Skip if not using AWS] +ckan optional AWS_SECRET_ACCESS_KEY empty AWS secret access key [Skip if not using AWS] +ckan optional SENTRY_DSN https://@sentry.io/ Enter Sentry DSN URL with token and ID [Skip if not using Sentry] +ckan optional GTM_ID empty Enter Google Tag Manager ID [Skip if not using the new Google Analytics (GTM) or not using Google Analytics at all] +ckan optional GA_ID empty Enter Google Analytics ID [Skip if using GTM or not using Google Analytics at all] +ckan optional GA_ACCOUNT empty Enter Google Analytics account name [Skip if using GTM or not using Google Analytics at all] +ckan optional GA_PASSWORD empty Enter Google Analytics password [Skip if using GTM or not using Google Analytics at all] +ckan optional GA_USERNAME empty Enter Google Analytics username [Skip if using GTM or not using Google Analytics at all] +datapusher optional TIMEZONE UTC Enter Datapusher timezone +traefik optional CERTIFICATE_EMAIL Enter email address for Let's Encrypt certificate [Skip if using self-signed certificate] +traefik optional CERTIFICATE_DOMAIN Enter domain for Let's Encrypt certificate [Skip if using self-signed certificate] diff --git a/solr/schemas/schema210.xml b/solr/schemas/schema210.xml new file mode 100644 index 0000000..9b90496 --- /dev/null +++ b/solr/schemas/schema210.xml @@ -0,0 +1,203 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +index_id + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/traefik/entrypoint.sh b/traefik/entrypoint.sh new file mode 100755 index 0000000..b34e2cb --- /dev/null +++ b/traefik/entrypoint.sh @@ -0,0 +1,42 @@ +#!/bin/sh +set -e + +# Update the Traefik configuration file with secrets if not development mode +if [ -f /traefik.dev.toml ]; then + echo "Using development configuration" + cp /traefik.dev.toml /traefik.toml +else + if [ ! -f /traefik.toml.template ]; then + echo "Traefik template file does not exist, exiting" + exit 1 + fi + if [ ! -f /traefik-secrets.sh ]; then + echo "Traefik secrets file does not exist. Please run 'make secret' to generate it before starting the container" + exit 1 + fi + if [ ! -f /templater.sh ]; then + echo "Templater script does not exist, exiting" + exit 1 + fi + + echo "Traefik configuration file does not exist, templating" + + chmod +x /templater.sh + ./templater.sh /traefik.toml.template -f /traefik-secrets.sh > traefik.toml +fi + +# Fix acme.json file permissions: set to 600 +chmod 600 /acme.json + +# first arg is `-f` or `--some-option` +if [ "${1#-}" != "$1" ]; then + set -- traefik "$@" +fi + +# if our command is a valid Traefik subcommand, let's invoke it through Traefik instead +# (this allows for "docker run traefik version", etc) +if traefik "$1" --help | grep -s -q "help"; then + set -- traefik "$@" +fi + +exec "$@" diff --git a/traefik/traefik.toml.template b/traefik/traefik.toml.template new file mode 100644 index 0000000..3d36de4 --- /dev/null +++ b/traefik/traefik.toml.template @@ -0,0 +1,49 @@ +debug = false +defaultEntryPoints = ["http", "https"] + +[entryPoints] + [entryPoints.http] + address = ":80" + + [entryPoints.https] + address = ":443" + [entryPoints.https.tls] + [entryPoints.api] + address = ":8081" + + [api] + entryPoint = "api" + + [ping] + entryPoint = "http" + + [acme] + email = "{{CERTIFICATE_EMAIL}}" + storage = "/acme.json" + entryPoint = "https" + onHostRule = true + + [[acme.domains]] + main = "{{CERTIFICATE_DOMAIN}}" + + [acme.httpChallenge] + entryPoint = "http" + + [accessLog] + + [file] + watch = true + + [backends] + [backends.ckan] + [backends.ckan.servers.server1] + url = "http://nginx:8080" + + [frontends] + [frontends.ckan] + backend="ckan" + passHostHeader = true + [frontends.ckan.headers] + SSLRedirect = true + [frontends.ckan.routes.route1] + rule = "Host:{{CERTIFICATE_DOMAIN}}"