Skip to content

Commit

Permalink
fix: nf workflows with mets server
Browse files Browse the repository at this point in the history
  • Loading branch information
MehmedGIT committed Dec 4, 2024
1 parent 07d276b commit 52d718e
Show file tree
Hide file tree
Showing 21 changed files with 272 additions and 510 deletions.
48 changes: 16 additions & 32 deletions src/utils/operandi_utils/hpc/nextflow_workflows/default_workflow.nf
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,16 @@ process ocrd_cis_ocropy_binarize_0 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
"""
}

Expand All @@ -99,18 +97,16 @@ process ocrd_anybaseocr_crop_1 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
"""
}

Expand All @@ -123,18 +119,16 @@ process ocrd_skimage_binarize_2 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}'
${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}'
"""
}

Expand All @@ -147,18 +141,16 @@ process ocrd_skimage_denoise_3 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
"""
}

Expand All @@ -171,18 +163,16 @@ process ocrd_tesserocr_deskew_4 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}'
${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}'
"""
}

Expand All @@ -195,18 +185,16 @@ process ocrd_cis_ocropy_segment_5 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
"""
}

Expand All @@ -219,18 +207,16 @@ process ocrd_cis_ocropy_dewarp_6 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
"""
}

Expand All @@ -243,18 +229,16 @@ process ocrd_calamari_recognize_7 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}'
${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}'
"""
}

Expand All @@ -279,13 +263,13 @@ workflow {
main:
ch_range_multipliers = Channel.of(0..params.forks.intValue()-1)
split_page_ranges(ch_range_multipliers)
ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP")
ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2")
ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE")
ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG")
ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP")
ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR")
ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP")
ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2")
ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE")
ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG")
ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP")
ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR")
merging_mets(ocrd_calamari_recognize_7.out[0], ocrd_calamari_recognize_7.out[1])
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,16 @@ process ocrd_cis_ocropy_binarize_0 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
${params.env_wrapper_cmd_step0} ocrd-cis-ocropy-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
"""
}

Expand All @@ -99,18 +97,16 @@ process ocrd_anybaseocr_crop_1 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
${params.env_wrapper_cmd_step1} ocrd-anybaseocr-crop -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
"""
}

Expand All @@ -123,18 +119,16 @@ process ocrd_skimage_binarize_2 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}'
${params.env_wrapper_cmd_step2} ocrd-skimage-binarize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"method": "li"}'
"""
}

Expand All @@ -147,18 +141,16 @@ process ocrd_skimage_denoise_3 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
${params.env_wrapper_cmd_step3} ocrd-skimage-denoise -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
"""
}

Expand All @@ -171,18 +163,16 @@ process ocrd_tesserocr_deskew_4 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}'
${params.env_wrapper_cmd_step4} ocrd-tesserocr-deskew -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"operation_level": "page"}'
"""
}

Expand All @@ -195,18 +185,16 @@ process ocrd_cis_ocropy_segment_5 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
${params.env_wrapper_cmd_step5} ocrd-cis-ocropy-segment -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"level-of-operation": "page"}'
"""
}

Expand All @@ -219,18 +207,16 @@ process ocrd_cis_ocropy_dewarp_6 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
${params.env_wrapper_cmd_step6} ocrd-cis-ocropy-dewarp -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group}
"""
}

Expand All @@ -243,31 +229,29 @@ process ocrd_calamari_recognize_7 {
input:
val mets_path
val page_range
val workspace_dir
val input_group
val output_group

output:
val mets_path
val page_range
val workspace_dir

script:
"""
${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${mets_socket_path} -w ${workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}'
${params.env_wrapper_cmd_step7} ocrd-calamari-recognize -U ${params.mets_socket_path} -w ${params.workspace_dir} -m ${mets_path} --page-id ${page_range} -I ${input_group} -O ${output_group} -p '{"checkpoint_dir": "qurator-gt4histocr-1.0"}'
"""
}

workflow {
main:
ch_range_multipliers = Channel.of(0..params.forks.intValue()-1)
split_page_ranges(ch_range_multipliers)
ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.workspace_dir, params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], ocrd_cis_ocropy_binarize_0.out[2], "OCR-D-BIN", "OCR-D-CROP")
ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], ocrd_anybaseocr_crop_1.out[2], "OCR-D-CROP", "OCR-D-BIN2")
ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], ocrd_skimage_binarize_2.out[2], "OCR-D-BIN2", "OCR-D-BIN-DENOISE")
ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], ocrd_skimage_denoise_3.out[2], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], ocrd_tesserocr_deskew_4.out[2], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG")
ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], ocrd_cis_ocropy_segment_5.out[2], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP")
ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], ocrd_cis_ocropy_dewarp_6.out[2], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR")
ocrd_cis_ocropy_binarize_0(split_page_ranges.out[0], split_page_ranges.out[1], params.input_file_group, "OCR-D-BIN")
ocrd_anybaseocr_crop_1(ocrd_cis_ocropy_binarize_0.out[0], ocrd_cis_ocropy_binarize_0.out[1], "OCR-D-BIN", "OCR-D-CROP")
ocrd_skimage_binarize_2(ocrd_anybaseocr_crop_1.out[0], ocrd_anybaseocr_crop_1.out[1], "OCR-D-CROP", "OCR-D-BIN2")
ocrd_skimage_denoise_3(ocrd_skimage_binarize_2.out[0], ocrd_skimage_binarize_2.out[1], "OCR-D-BIN2", "OCR-D-BIN-DENOISE")
ocrd_tesserocr_deskew_4(ocrd_skimage_denoise_3.out[0], ocrd_skimage_denoise_3.out[1], "OCR-D-BIN-DENOISE", "OCR-D-BIN-DENOISE-DESKEW")
ocrd_cis_ocropy_segment_5(ocrd_tesserocr_deskew_4.out[0], ocrd_tesserocr_deskew_4.out[1], "OCR-D-BIN-DENOISE-DESKEW", "OCR-D-SEG")
ocrd_cis_ocropy_dewarp_6(ocrd_cis_ocropy_segment_5.out[0], ocrd_cis_ocropy_segment_5.out[1], "OCR-D-SEG", "OCR-D-SEG-LINE-RESEG-DEWARP")
ocrd_calamari_recognize_7(ocrd_cis_ocropy_dewarp_6.out[0], ocrd_cis_ocropy_dewarp_6.out[1], "OCR-D-SEG-LINE-RESEG-DEWARP", "OCR-D-OCR")
}
Loading

0 comments on commit 52d718e

Please sign in to comment.