better logging, fix issue with xf files

brisvag · Apr 24, 2024 · 30b7e25 · 30b7e25
1 parent 46dd38e
commit 30b7e25
Show file tree

Hide file tree

Showing 7 changed files with 85 additions and 73 deletions.
diff --git a/src/waretomo/_aretomo.py b/src/waretomo/_aretomo.py
@@ -1,3 +1,4 @@
+import logging
 import os
 import shutil
 import subprocess
@@ -7,7 +8,6 @@
 from time import sleep
 
 import GPUtil
-from rich import print
 
 from ._threaded import run_threaded
 
@@ -44,9 +44,9 @@ def _aretomo(
     reconstruct=False,
     gpu_queue=None,
     dry_run=False,
-    verbose=False,
     overwrite=False,
 ):
+    log = logging.getLogger("waretomo")
     # cwd dance is necessary cause aretomo messes up paths otherwise
     # need to use os.path.relpath cause pathlib cannot handle non-subpath relative paths
     # https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level
@@ -59,7 +59,7 @@ def _aretomo(
     if not reconstruct:
         output = output.with_stem(output.stem + "_aligned").with_suffix(".st")
     # LogFile is broken, so we do it ourselves
-    log = output.with_suffix(".aretomolog")
+    aretomolog = output.with_suffix(".aretomolog")
     with _cd(cwd):
         if not overwrite and output.exists():
             raise FileExistsError(output)
@@ -83,13 +83,15 @@ def _aretomo(
                 "VolZ": thickness_recon,
                 "PixSize": px_size,
                 "Kv": kv,
-                "ImgDose": dose,
                 "Cs": cs,
                 "Defoc": defocus,
                 "FlipVol": 1,
                 "WBP": 1,
             }
         )
+        if dose:
+            options["ImgDose"] = dose
+
     else:
         options.update(
             {
@@ -109,10 +111,9 @@ def _aretomo(
     # run aretomo with basic settings
     aretomo_cmd = f"{cmd} {' '.join(f'-{k} {v}' for k, v in options.items())}"
 
-    if verbose:
-        print(aretomo_cmd)
-        if not reconstruct:
-            print(f'mv {xf} {warp_mdoc_basename + ".xf"}')
+    log.info(aretomo_cmd)
+    if not reconstruct:
+        log.info(f'mv {xf} {warp_mdoc_basename + ".xf"}')
 
     if not dry_run:
         with _cd(cwd):
@@ -121,7 +122,7 @@ def _aretomo(
                     aretomo_cmd.split(), capture_output=True, check=False, cwd=cwd
                 )
             finally:
-                log.write_bytes(proc.stdout + proc.stderr)
+                aretomolog.write_bytes(proc.stdout + proc.stderr)
                 if gpu_queue is not None:
                     gpu_queue.put(gpu)
             proc.check_returncode()
@@ -137,14 +138,14 @@ def _aretomo(
 def aretomo_batch(
     progress, tilt_series, suffix="", label="", cmd="AreTomo", gpus=None, **kwargs
 ):
+    log = logging.getLogger("waretomo")
     if not shutil.which(cmd):
         raise FileNotFoundError(f"{cmd} is not available on the system")
     if gpus is None:
         gpus = [gpu.id for gpu in GPUtil.getGPUs()]
     if not gpus:
         raise RuntimeError("you need at least one GPU to run AreTomo")
-    if kwargs.get("verbose"):
-        print(f"[yellow]Running AreTomo in parallel on {len(gpus)} GPUs.")
+    log.info(f"Running AreTomo in parallel on {len(gpus)} GPUs.")
 
     # use a queue to hold gpu ids to ensure we only run one job per gpu
     gpu_queue = Queue()

diff --git a/src/waretomo/_fix_mdoc.py b/src/waretomo/_fix_mdoc.py
@@ -1,21 +1,21 @@
+import logging
+
 import pandas as pd
 from mdocfile.data_models import Mdoc
 
 from ._threaded import run_threaded
 
 
-def _tilt_mdoc(
-    mdoc_file, tlt_file, skipped_tilts, verbose=False, dry_run=False, overwrite=False
-):
+def _tilt_mdoc(mdoc_file, tlt_file, skipped_tilts, dry_run=False, overwrite=False):
     output = mdoc_file.parent / "mdoc_tilted" / mdoc_file.name
 
     if not overwrite and output.exists():
         raise FileExistsError(output)
 
-    if verbose:
-        print(f"Tilting mdoc: {mdoc_file}")
-        print(f"using: {tlt_file}")
-        print(f"saving to {output}")
+    log = logging.getLogger("waretomo")
+    log.info(f"Tilting mdoc: {mdoc_file}")
+    log.info(f"using: {tlt_file}")
+    log.info(f"saving to {output}")
 
     if not dry_run:
         mdoc = Mdoc.from_file(mdoc_file)

diff --git a/src/waretomo/_parse.py b/src/waretomo/_parse.py
@@ -1,13 +1,15 @@
+import logging
 from pathlib import Path, PureWindowsPath
 from xml.etree import ElementTree
 
 from mdocfile.data_models import Mdoc
-from rich import print
 
 
 def parse_data(
     progress, warp_dir, mdoc_dir, output_dir, roi_dir, just=(), exclude=(), train=False
 ):
+    log = logging.getLogger("waretomo")
+
     imod_dir = warp_dir / "imod"
     if not imod_dir.exists():
         raise FileNotFoundError("warp directory does not have an `imod` subdirectory")
@@ -58,12 +60,10 @@ def parse_data(
         valid_xml = None
         for i, tilt in enumerate(tilts):
             if not tilt.exists():
-                print(
-                    f"[red]WARN: {tilt.name} is listed in an mdoc file, "
-                    "but the file does not exists."
-                )
-                print(
-                    "[red]      The tilt will be skipped, "
+                log.warning(
+                    f"{tilt.name} is listed in an mdoc file, "
+                    "but the file does not exists. "
+                    "The tilt will be skipped, "
                     "but you may want to check your data."
                 )
                 skipped_tilts.append(i)
@@ -110,17 +110,26 @@ def parse_data(
         else:
             roi_file = None
 
+        dose = mdoc.section_data[0].ExposureDose
+        if not dose:
+            log.error("Exposure dose not present in mdoc! Setting to 0.")
+        px_size_raw = mdoc.section_data[0].PixelSpacing
+        if not px_size_raw:
+            log.error("Pixel spacing not present in mdoc! Setting to 1.")
+
+        # aretomo being weird about paths and names splitting needs extra care...
         ts_stripped = ts_name.split(".")[0]
-        alignment_result_dir = output_dir / (ts_stripped + "_Imod")
+        ts_aligned = ts_stripped + "_aligned"
+        alignment_result_dir = output_dir / (ts_aligned + ".st_Imod")
 
         tilt_series.append(
             {
                 "name": ts_name,
                 "stack": stack,
                 "rawtlt": stack.with_suffix(".rawtlt"),
                 "aln": output_dir / (ts_stripped + ".aln"),
-                "xf": alignment_result_dir / (ts_stripped + ".xf"),
-                "tlt": alignment_result_dir / (ts_stripped + ".tlt"),
+                "xf": alignment_result_dir / (ts_aligned + ".xf"),
+                "tlt": alignment_result_dir / (ts_aligned + ".tlt"),
                 "skipped_tilts": skipped_tilts,
                 "mdoc": mdoc_file,
                 "roi": roi_file,
@@ -132,8 +141,8 @@ def parse_data(
                 "recon_even": output_dir / "even" / (ts_name + ".mrc"),
                 "recon": output_dir / (ts_name + ".mrc"),
                 "aretomo_kwargs": {
-                    "dose": mdoc.section_data[0].ExposureDose,
-                    "px_size": mdoc.section_data[0].PixelSpacing * 2**binning,
+                    "dose": dose,
+                    "px_size": px_size_raw * 2**binning,
                     "cs": cs,
                     "kv": kv,
                     "defocus": defocus,

diff --git a/src/waretomo/_stack.py b/src/waretomo/_stack.py
@@ -1,22 +1,19 @@
+import logging
 import shutil
 import subprocess
 from time import sleep
 
-from rich import print
-
 from ._threaded import run_threaded
 
 
-def _stack(
-    images, output, cmd="newstack", dry_run=False, verbose=False, overwrite=False
-):
+def _stack(images, output, cmd="newstack", dry_run=False, overwrite=False):
     if not overwrite and output.exists():
         raise FileExistsError(output)
     stack_cmd = f'{cmd} {" ".join(str(img) for img in images)} {output}'
 
-    if verbose:
-        short_cmd = f"{cmd} {images[0]} [...] {images[-1]} {output}"
-        print(short_cmd)
+    log = logging.getLogger("waretomo")
+    short_cmd = f"{cmd} {images[0]} [...] {images[-1]} {output}"
+    log.info(short_cmd)
 
     if not dry_run:
         subprocess.run(stack_cmd.split(), capture_output=True, check=True)

diff --git a/src/waretomo/_threaded.py b/src/waretomo/_threaded.py
@@ -1,21 +1,21 @@
+import logging
 import os
 import subprocess
 from concurrent import futures
 
-from rich import print
-
 
 def run_threaded(
     progress,
     partials,
     label="",
     max_workers=None,
     dry_run=False,
-    verbose=False,
     **kwargs,
 ):
     max_workers = max_workers or min(32, os.cpu_count() + 4)  # see concurrent docs
 
+    log = logging.getLogger("waretomo")
+
     with futures.ThreadPoolExecutor(max_workers) as executor:
         main_task = progress.add_task(f"{label}...", total=len(partials))
 
@@ -37,20 +37,18 @@ def run_threaded(
                 exist += 1
             except subprocess.CalledProcessError as e:
                 errors.append(e)
-                if verbose:
-                    print(e)
+                log.warning("Subprocess failed with:")
             progress.update(main_task, advance=1)
 
         for t in tasks:
             progress.update(t, total=1, completed=1, visible=False)
 
         if exist:
-            print(f"[red]{label}: {exist} files already exist and were not overwritten")
+            log.warn(f"{label}: {exist} files already exist and were not overwritten")
 
         if errors:
-            print(f"[red]{label}: {len(errors)} commands have failed:")
+            log.error(f"{label}: {len(errors)} commands have failed:")
             for err in errors:
-                print(
-                    f'[yellow]{" ".join(err.cmd)}[/yellow] '
-                    f"failed with:\n[red]{err.stderr.decode()}"
+                log.error(
+                    f'{" ".join(err.cmd)} ' f"failed with:\n{err.stderr.decode()}"
                 )
diff --git a/src/waretomo/_topaz.py b/src/waretomo/_topaz.py
@@ -1,11 +1,11 @@
 import contextlib
 import io
+import logging
 import multiprocessing
 import re
 import time
 from concurrent import futures
 
-from rich import print
 from topaz.commands.denoise3d import denoise, load_model, set_device, train_model
 from topaz.torch import set_num_threads
 
@@ -52,19 +52,18 @@ def topaz_batch(
     patch_size=32,
     gpus=None,
     dry_run=False,
-    verbose=False,
     overwrite=False,
 ):
     inputs = [ts["recon"] for ts in tilt_series]
 
-    if verbose:
-        if train:
-            print(f"training model: '{model_name}' with inputs '{even}' and '{odd}'")
-        if len(inputs) > 2:
-            print(f"denoising: [{inputs[0]} [...] {inputs[-1]}]")
-        else:
-            print(f"denoising: {inputs}")
-        print(f"output: {outdir}")
+    log = logging.getLogger("waretomo")
+    if train:
+        log.info(f"training model: '{model_name}' with inputs '{even}' and '{odd}'")
+    if len(inputs) > 2:
+        log.info(f"denoising: [{inputs[0]} [...] {inputs[-1]}]")
+    else:
+        log.info(f"denoising: {inputs}")
+    log.info(f"output: {outdir}")
 
     if not dry_run:
         set_num_threads(0)