From 9d411a5565d43e00aec9e14ba723b21ad9377fb2 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 2 May 2024 13:57:19 -0700 Subject: [PATCH 01/14] Add setup_process_afterscript method --- config/methods/common_methods.config | 56 ++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index ce8770a..7fbb6c2 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -391,6 +391,62 @@ methods { process.containerOptions = {-> (task.containsKey('cpus')) ? "--cpu-shares ${default_cpu_shares} --cpus ${task.cpus}" : "--cpu-shares ${default_cpu_shares}"} } + /** + * Configure all processes to save their command files in the output + * directory. + * + * This add a custom process directive that, when used as the afterScript, + * will copy all of the process's .command.* files into the output + * directory. + + * Processes can customize the output directory by setting + * `process.ext.processlogdir_suffix` (can be a closure). + * + * Inspired by https://github.com/nextflow-io/nextflow/issues/1166#issuecomment-502467562 + */ + setup_process_afterscript = { + process.ext.commonAfterScript = { + process_log_dir = [ + "${params.log_output_dir}", + "process-log", + "${task.process.replace(':', '/')}" + ].join("/") + + // Handle relative paths + if (process_log_dir.substring(0, 1) != "/") { + process_log_dir = "${launchDir}/${process_log_dir}" + } + + // Allow each process to append a suffix + if (task.ext.containsKey("processlogdir_suffix")) { + process_log_dir += task.ext.processlogdir_suffix + } + + return [ + "mkdir -p \"${process_log_dir}\"", + "for filename in .command.*; do", + " cp \"\${filename}\" \"${process_log_dir}/log\${filename}\"", + "done" + ].join("\n") + } + + /* + Set the default afterScript. If individual processes override + afterScript, they can restore this functionality like so: + + afterScript { + [ + "echo 'Before the common'", + task.ext.commonAfterScript, + "echo 'After the common'" + ].join("\n") + } + */ + + process.afterScript = process.ext.commonAfterScript + } + + /** * Resolve the absolute path of a file relative to the current config file. */ From 0f3fc31179ecf620bad30d565e71517e604b0ff6 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 2 May 2024 14:00:26 -0700 Subject: [PATCH 02/14] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f5fdd2..250749b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Functional testing framework with concrete tests for `methods.set_env()` - Functional test for `bam_parser.parse_bam_header()`. - Dump parameters with `json_extractor.store_params_json()` +- Function to save process logs, even after failure ### Fixed - Fixed retry for potentially undefined variable `proc_name_keys`. #57 From 42d7aa7864d20a4e6ed502a037297b5e49c617d0 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 2 May 2024 15:44:08 -0700 Subject: [PATCH 03/14] Try using multistring approach --- config/methods/common_methods.config | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index 7fbb6c2..76b57e5 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -422,12 +422,12 @@ methods { process_log_dir += task.ext.processlogdir_suffix } - return [ - "mkdir -p \"${process_log_dir}\"", - "for filename in .command.*; do", - " cp \"\${filename}\" \"${process_log_dir}/log\${filename}\"", - "done" - ].join("\n") + return """\ + mkdir -p "${process_log_dir}" + for filename in .command.*; do + cp "\${filename}" "${process_log_dir}/log\${filename}" + done + """.stripIndent() } /* From cb5618e46955b3e6ade63f585da19dbabefd4d56 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 2 May 2024 16:11:52 -0700 Subject: [PATCH 04/14] Simplify variables --- config/methods/common_methods.config | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index 76b57e5..5148202 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -400,16 +400,21 @@ methods { * directory. * Processes can customize the output directory by setting - * `process.ext.processlogdir_suffix` (can be a closure). + * `process.ext.log_dir` and `process.ext.log_dir_suffix`. Both may be + * closures. * * Inspired by https://github.com/nextflow-io/nextflow/issues/1166#issuecomment-502467562 */ setup_process_afterscript = { + process.ext.log_dir = { + "${task.process.replace(':', '/')}" + } + process.ext.commonAfterScript = { process_log_dir = [ "${params.log_output_dir}", "process-log", - "${task.process.replace(':', '/')}" + "${task.ext.log_dir}${task.ext.log_dir_suffix ?: ''}" ].join("/") // Handle relative paths @@ -417,11 +422,6 @@ methods { process_log_dir = "${launchDir}/${process_log_dir}" } - // Allow each process to append a suffix - if (task.ext.containsKey("processlogdir_suffix")) { - process_log_dir += task.ext.processlogdir_suffix - } - return """\ mkdir -p "${process_log_dir}" for filename in .command.*; do From 133c0be292421cc06f224d37d513ddaaf403c2ac Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Thu, 2 May 2024 14:48:03 -0700 Subject: [PATCH 05/14] Add documentation --- config/methods/README.md | 61 ++++++++++++++++++++++++++++ config/methods/common_methods.config | 5 ++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/config/methods/README.md b/config/methods/README.md index 918bced..6baf2a4 100644 --- a/config/methods/README.md +++ b/config/methods/README.md @@ -151,6 +151,67 @@ methods { } ``` +### Capture process outputs, even after failures +`methods.setup_process_afterscript` is a drop-in replacement for this method of capturing process log files: + +```Nextflow +// Old technique +process xxx { + publishDir path: "${params.log_output_dir}/process-log", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}-${sample_id}/log${file(it).getName()}" } + + output: + path(".command.*") +``` + +The above method does not produce any output files if the process fails. In order to always capture the log files, use the following: + +```Nextflow +includeConfig "/path/to/common_methods.config" +... +methods { + ... + methods.setup_process_afterscript() +} +``` + +> [!NOTE] +> `methods.setup_process_afterscript()` duplicates the effect of defining `publishDir` and `output: path(".command.*")` for every process. Although there is no harm in using both techniques simultaneously, for clarity it is recommended to only use one. + +The output path is controlled by the following two [custom process directives](https://www.nextflow.io/docs/latest/process.html#ext): + +| Name | Default Value | +| --- | --- | +| `process.ext.log_dir` | `task.process.replace(':', '/')` | +| `process.ext.log_dir_suffix` | `''` | + +Each of the above can be overridden by individual processes. + +The final directory path is `${params.log_output_dir}/process-log/${task.ext.log_dir}${task.ext.log_dir_suffix}`. + +#### Combining with `afterScript` + +> [!IMPORTANT] +> This method sets `process.afterScript`. If you define another `afterScript` (either globally or for a specific process), you must use this technique to combine both scripts. + +The `afterScript` closure established by `methods.setup_process_afterscript()` is also stored as `process.ext.commonAfterScript`. If a process requires another `afterScript`, it should be combined with the one defined here like so: + +```Nextflow +process xxx { + afterScript { + [ + "echo 'Before the common script'", + task.ext.commonAfterScript ?: "", + "echo 'After the common script'" + ].join("\n") + } +``` + +Due to the [Elvis operator](https://groovy-lang.org/operators.html#_elvis_operator), the above snippet is safe to use even if `methods.setup_process_afterscript()` is not used. + + ## References 1. `nf-core` - https://nf-co.re/ 2. `nf-code modules` - https://github.com/nf-core/sarek/blob/ad2b34f39fead34d7a09051e67506229e827e892/conf/modules.config diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index 5148202..6449883 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -432,12 +432,13 @@ methods { /* Set the default afterScript. If individual processes override - afterScript, they can restore this functionality like so: + afterScript, they can restore this functionality like so (this is safe + to include even if setup_process_afterscript is not called): afterScript { [ "echo 'Before the common'", - task.ext.commonAfterScript, + task.ext.commonAfterScript ?: "", "echo 'After the common'" ].join("\n") } From 6d1a3bd782e80016f751e8292d56e7e5920afc03 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 3 May 2024 09:02:32 -0700 Subject: [PATCH 06/14] Handle edge case when no files match --- config/methods/common_methods.config | 1 + 1 file changed, 1 insertion(+) diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index 6449883..6a7b0d8 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -425,6 +425,7 @@ methods { return """\ mkdir -p "${process_log_dir}" for filename in .command.*; do + [ -e "\${filename}" ] || continue cp "\${filename}" "${process_log_dir}/log\${filename}" done """.stripIndent() From ef3fe9e52b16aea7f3541089e531b15200ec42e1 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 3 May 2024 09:11:50 -0700 Subject: [PATCH 07/14] Use a bash variable for readability --- config/methods/common_methods.config | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index 6a7b0d8..8c4b11d 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -423,10 +423,11 @@ methods { } return """\ - mkdir -p "${process_log_dir}" + readonly LOG_DIR="${process_log_dir}" + mkdir -p "\${LOG_DIR}" for filename in .command.*; do [ -e "\${filename}" ] || continue - cp "\${filename}" "${process_log_dir}/log\${filename}" + cp "\${filename}" "\${LOG_DIR}/log\${filename}" done """.stripIndent() } From 4c40847be95e2ee342c40dc1c2172359b2a00671 Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Fri, 3 May 2024 10:13:35 -0700 Subject: [PATCH 08/14] Add ext.capture_logs to disable per-process --- config/methods/README.md | 8 ++++++++ config/methods/common_methods.config | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/config/methods/README.md b/config/methods/README.md index 6baf2a4..f136d7d 100644 --- a/config/methods/README.md +++ b/config/methods/README.md @@ -191,6 +191,14 @@ Each of the above can be overridden by individual processes. The final directory path is `${params.log_output_dir}/process-log/${task.ext.log_dir}${task.ext.log_dir_suffix}`. +#### Disabling for individual processes + +```Nextflow +process xxx { + + ext capture_logs: false +``` + #### Combining with `afterScript` > [!IMPORTANT] diff --git a/config/methods/common_methods.config b/config/methods/common_methods.config index 8c4b11d..f3758db 100644 --- a/config/methods/common_methods.config +++ b/config/methods/common_methods.config @@ -410,7 +410,13 @@ methods { "${task.process.replace(':', '/')}" } + process.ext.capture_logs = true + process.ext.commonAfterScript = { + if (!task.ext.capture_logs) { + return "" + } + process_log_dir = [ "${params.log_output_dir}", "process-log", From 89e5ce24b3930820b6e5c9c204fcb72f1916032f Mon Sep 17 00:00:00 2001 From: Nicholas Wiltsie Date: Mon, 6 May 2024 15:20:34 -0700 Subject: [PATCH 09/14] Upgrade to latest static analysis Action --- .github/workflows/cicd-base.yaml | 24 ------------------------ .github/workflows/static-analysis.yml | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 24 deletions(-) delete mode 100644 .github/workflows/cicd-base.yaml create mode 100644 .github/workflows/static-analysis.yml diff --git a/.github/workflows/cicd-base.yaml b/.github/workflows/cicd-base.yaml deleted file mode 100644 index 910e0b2..0000000 --- a/.github/workflows/cicd-base.yaml +++ /dev/null @@ -1,24 +0,0 @@ ---- -name: CICD-base - -on: - push: - branches: - - main - pull_request: - branches: - - main -jobs: - CICD-base: - runs-on: ubuntu-latest - - timeout-minutes: 15 - - steps: - # Checkout codebase - - name: Checkout - uses: actions/checkout@v2 - - # Run CICD-base - - name: CICD-base - uses: docker://ghcr.io/uclahs-cds/cicd-base:latest diff --git a/.github/workflows/static-analysis.yml b/.github/workflows/static-analysis.yml new file mode 100644 index 0000000..608753d --- /dev/null +++ b/.github/workflows/static-analysis.yml @@ -0,0 +1,17 @@ +--- +name: CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + CICD-base: + runs-on: ubuntu-latest + + steps: + - uses: uclahs-cds/tool-static-analysis@v1 From cdd319468ce4238f7c11c219f78ccb9f404833fd Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Wed, 15 May 2024 17:55:43 -0700 Subject: [PATCH 10/14] 1. convert path to absolute in check_path 2. try catch to print out parameters failed the validation --- config/schema/schema.config | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/config/schema/schema.config b/config/schema/schema.config index a1a4f88..3e76ec0 100644 --- a/config/schema/schema.config +++ b/config/schema/schema.config @@ -56,6 +56,7 @@ schema { */ check_path = { String p, String mode -> def File file = new File(p) + file = file.absolutePath() if (mode == 'w') { schema.check_write_permission(file) return @@ -92,7 +93,7 @@ schema { /** * Check whether the required properties are defined for the corresponding parameter from the * schema yaml file. - * + * * @throws IllegalArgumentException when config file is missing required properties. */ check_required_properties = { String name, Map properties -> @@ -247,7 +248,12 @@ schema { validate = { String file_path="${projectDir}/config/schema.yaml" -> def params_schema = schema.load_schema(file_path) params_schema.each { key, val -> - schema.validate_parameter(params, key, val) + try { + schema.validate_parameter(params, key, val) + } catch (Exception ex) { + System.out.println "Failed to validate parameter key: ${val}" + throw ex + } } } From d667569f498958fd6b978b2789ac92048e55de7e Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Thu, 16 May 2024 10:44:40 -0700 Subject: [PATCH 11/14] fix absolutePath -> getAbsolutePath --- config/schema/schema.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/schema/schema.config b/config/schema/schema.config index 3e76ec0..c6fc4e5 100644 --- a/config/schema/schema.config +++ b/config/schema/schema.config @@ -56,7 +56,7 @@ schema { */ check_path = { String p, String mode -> def File file = new File(p) - file = file.absolutePath() + file = file.getAbsolutePath() if (mode == 'w') { schema.check_write_permission(file) return From fae7c33642f935c8305ee187f2cfe9634dc14423 Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Thu, 16 May 2024 11:12:51 -0700 Subject: [PATCH 12/14] File.getAbsolutePath returns a string.. --- config/schema/schema.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/schema/schema.config b/config/schema/schema.config index c6fc4e5..4536e5a 100644 --- a/config/schema/schema.config +++ b/config/schema/schema.config @@ -56,7 +56,7 @@ schema { */ check_path = { String p, String mode -> def File file = new File(p) - file = file.getAbsolutePath() + file = new File(file.getAbsolutePath()) if (mode == 'w') { schema.check_write_permission(file) return From 5227dc15ec787fa424b4895cc0b3a1926744a3fe Mon Sep 17 00:00:00 2001 From: Chenghao Zhu Date: Fri, 17 May 2024 14:34:28 -0700 Subject: [PATCH 13/14] Update config/schema/schema.config Co-authored-by: Nick Wiltsie --- config/schema/schema.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/schema/schema.config b/config/schema/schema.config index 4536e5a..a4c20b8 100644 --- a/config/schema/schema.config +++ b/config/schema/schema.config @@ -56,7 +56,7 @@ schema { */ check_path = { String p, String mode -> def File file = new File(p) - file = new File(file.getAbsolutePath()) + file = file.getAbsoluteFile() if (mode == 'w') { schema.check_write_permission(file) return From 7069230ed8c98725d54f0a9392d53ed0af1b66dd Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Fri, 17 May 2024 14:40:08 -0700 Subject: [PATCH 14/14] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f5fdd2..1fe110a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,11 +16,13 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Fixed - Fixed retry for potentially undefined variable `proc_name_keys`. #57 +- Fix `schema.check_write_permission` when `null` is returned by `getParentFile` #59 ### Changed - Allow `set_resources_allocation` to use maximum allocation values defined in params - Add `def`s to multiple variables that should not be globally defined - Allow specific schema validation function to accept pre-loaded schema as input +- Add try-catch to schema validation to log the parameter being failed to validate. ---