Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collate and SingleCells to accept less/different compartments #301

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions pycytominer/cyto_utils/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class SingleCells(object):
compartments : list of str, default ["cells", "cytoplasm", "nuclei"]
List of compartments to process.
compartment_linking_cols : dict, default noted below
Dictionary identifying how to merge columns across tables.
Dictionary identifying how to merge columns across tables. For examples see note below.
merge_cols : list of str, default ["TableNumber", "ImageNumber"]
Columns indicating how to merge image and compartment data.
image_cols : list of str, default ["TableNumber", "ImageNumber", "Metadata_Site"]
Expand Down Expand Up @@ -89,7 +89,14 @@ class SingleCells(object):
},
"cells": {"cytoplasm": "ObjectNumber"},
"nuclei": {"cytoplasm": "ObjectNumber"},
}
}.

The compartment_linking_cols dictionary template is: {
"child":
{"parent":"child_Parent_parent"},
"parent":
{"child":"ObjectNumber"}
}
"""

def __init__(
Expand Down
25 changes: 23 additions & 2 deletions pycytominer/cyto_utils/collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def collate(
add_image_features=True,
image_feature_categories=["Granularity", "Texture", "ImageQuality", "Threshold"],
printtoscreen=True,
no_nuclei=True,
no_cells=True,
no_cytoplasm=True
):
"""Collate the CellProfiler-created CSVs into a single SQLite file by calling cytominer-database

Expand Down Expand Up @@ -65,9 +68,21 @@ def collate(
The list of image feature groups to be used by add_image_features during aggregation
printtoscreen: bool, optional, default True
Whether or not to print output to the terminal
no_nuclei: bool, optional, default True
Whether or not the nuclei object is available. Set to False if there is no nuclei object.
no_cells: bool, optional, default True
Whether or not the cells object is available. Set to False if there is no cells object.
no_cytoplasm: bool, optional, default True
Whether or not the cytoplasm object is available. Set to False if there is no cytoplasm object.
"""

from pycytominer.cyto_utils.cells import SingleCells


# Set up comparments based on the flags. If all True, all compartments will be used
filter_compartments = [no_nuclei, no_cells, no_cytoplasm]
to_filter = ["Nuclei", "Cells", "Cytoplasm"]
compartments = [to_filter[i] for i in [j for j in range(len(filter_compartments)) if filter_compartments[j]]]

# Set up directories (these need to be abspaths to keep from confusing makedirs later)
input_dir = pathlib.Path(f"{base_directory}/analysis/{batch}/{plate}/{csv_dir}")
Expand Down Expand Up @@ -97,8 +112,13 @@ def collate(
remote_backend_file = f"{aws_remote}/backend/{batch}/{plate}/{plate}.sqlite"

remote_aggregated_file = f"{aws_remote}/backend/{batch}/{plate}/{plate}.csv"

include_list = []
for eachcompartment in compartments:
include = "--include */" + eachcompartment + ".csv"
include_list.append(include)
sync_cmd = f"aws s3 sync --exclude * {(' '.join(include_list))} --include */Image.csv {remote_input_dir} {input_dir}"

sync_cmd = f"aws s3 sync --exclude * --include */Cells.csv --include */Nuclei.csv --include */Cytoplasm.csv --include */Image.csv {remote_input_dir} {input_dir}"
if printtoscreen:
print(f"Downloading CSVs from {remote_input_dir} to {input_dir}")
run_check_errors(sync_cmd)
Expand Down Expand Up @@ -142,7 +162,7 @@ def collate(
"CREATE INDEX IF NOT EXISTS table_image_idx ON Image(TableNumber, ImageNumber);",
]
run_check_errors(index_cmd_img)
for eachcompartment in ["Cells", "Cytoplasm", "Nuclei"]:
for eachcompartment in compartments:
index_cmd_compartment = [
"sqlite3",
cache_backend_file,
Expand Down Expand Up @@ -202,6 +222,7 @@ def collate(
aggregation_operation="mean",
add_image_features=add_image_features,
image_feature_categories=image_feature_categories,
compartments=[cmp.lower() for cmp in compartments]
)
database.aggregate_profiles(output_file=aggregated_file)

Expand Down
24 changes: 24 additions & 0 deletions pycytominer/cyto_utils/collate_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@
default=True,
help="Whether to print status updates",
)
parser.add_argument(
"--no-nuclei",
dest="no_nuclei",
action="store_false",
default=True,
help="Whether or not to use nuclei objects",
)
parser.add_argument(
"--no-cells",
dest="no_cells",
action="store_false",
default=True,
help="Whether or not to use cells objects",
)
parser.add_argument(
"--no-cytoplasm",
dest="no_cytoplasm",
action="store_false",
default=True,
help="Whether or not to use cytoplasm objects",
)

args = parser.parse_args()

Expand All @@ -94,4 +115,7 @@
add_image_features=args.add_image_features,
image_feature_categories=args.image_feature_categories,
printtoscreen=args.printtoscreen,
no_nuclei=args.no_nuclei,
no_cells=args.no_cells,
no_cytoplasm=args.no_cytoplasm
)
51 changes: 26 additions & 25 deletions pycytominer/cyto_utils/single_cell_ingest_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,31 +51,32 @@ def assert_linking_cols_complete(linking_cols="default", compartments="default")

comp_err = "compartment not found. Check the specified compartments"

linking_check = []
unique_linking_cols = []
for x in linking_cols:
unique_linking_cols.append(x)
assert x in compartments, "{com} {err}".format(com=x, err=comp_err)
for y in linking_cols[x]:
unique_linking_cols.append(y)
assert y in compartments, "{com} {err}".format(com=y, err=comp_err)
linking_check.append("-".join(sorted([x, y])))

# Make sure that each combination has been specified exactly twice
linking_counter = Counter(linking_check)
for combo in linking_counter:
assert (
linking_counter[combo] == 2
), "Missing column identifier between {combo}".format(combo=combo)

# Confirm that every compartment has been specified in the linking_cols
unique_linking_cols = sorted(list(set(unique_linking_cols)))
diff_column = set(compartments).difference(unique_linking_cols)
assert unique_linking_cols == sorted(
compartments
), "All compartments must be specified in the linking_cols, {miss} is missing".format(
miss=diff_column
)
if not len(compartments) == 1:
linking_check = []
unique_linking_cols = []
for x in linking_cols:
unique_linking_cols.append(x)
assert x in compartments, "{com} {err}".format(com=x, err=comp_err)
for y in linking_cols[x]:
unique_linking_cols.append(y)
assert y in compartments, "{com} {err}".format(com=y, err=comp_err)
linking_check.append("-".join(sorted([x, y])))

# Make sure that each combination has been specified exactly twice
linking_counter = Counter(linking_check)
for combo in linking_counter:
assert (
linking_counter[combo] == 2
), "Missing column identifier between {combo}".format(combo=combo)

# Confirm that every compartment has been specified in the linking_cols
unique_linking_cols = sorted(list(set(unique_linking_cols)))
diff_column = set(compartments).difference(unique_linking_cols)
assert unique_linking_cols == sorted(
compartments
), "All compartments must be specified in the linking_cols, {miss} is missing".format(
miss=diff_column
)


def provide_linking_cols_feature_name_update(linking_cols="default"):
Expand Down