diff --git a/pixano/analytics/__init__.py b/pixano/analytics/__init__.py index 765d60e82..214720777 100644 --- a/pixano/analytics/__init__.py +++ b/pixano/analytics/__init__.py @@ -12,8 +12,10 @@ # http://www.cecill.info from pixano.analytics.feature_statistics import compute_additional_data, compute_stats +from pixano.analytics.image_statistics import compute_image_stats __all__ = [ "compute_additional_data", "compute_stats", + "compute_image_stats", ] diff --git a/pixano/analytics/image_statistics.py b/pixano/analytics/image_statistics.py new file mode 100644 index 000000000..84082dc03 --- /dev/null +++ b/pixano/analytics/image_statistics.py @@ -0,0 +1,60 @@ +# @Copyright: CEA-LIST/DIASI/SIALV/LVA (2023) +# @Author: CEA-LIST/DIASI/SIALV/LVA <pixano@cea.fr> +# @License: CECILL-C +# +# This software is a collaborative computer program whose purpose is to +# generate and explore labeled data for computer vision applications. +# This software is governed by the CeCILL-C license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL-C +# license as circulated by CEA, CNRS and INRIA at the following URL +# +# http://www.cecill.info + +from fractions import Fraction + +import pyarrow as pa +from PIL import Image as PILImage + +from pixano.data import Dataset + + +def compute_image_stats(ds: Dataset): + """Compute image stats, save them to stats.json + + Args: + ds (Dataset): Dataset + """ + + tables = ds.open_tables() + + for view in tables["media"]: + # will be flattened, so don't treat it as a real loop (only one elem) + # tt = tables["media"][view].to_lance() + # print(duckdb.sql("select * from tt")) + data_table = tables["media"][view].to_arrow() + + # Take a subset of table without image columns (which can't be converted to pandas) + if not all(p in data_table.column_names for p in ["width", "height"]): + print( + "INFO: 'width' and 'height' not found in media table, get it from image" + ) + images = data_table.select([view]).to_pylist() + sizes = [] + for image in images: + # im = image[view].as_pillow() ne marche plus car uri_prefix vide (pb avec Image.get_uri()) + im = PILImage.open(ds.media_dir / image[view].uri) + sizes.append({"width": im.width, "height": im.height}) + data = pa.Table.from_pylist(sizes).to_pandas() + else: + print("INFO: 'width' and 'height' found in media table, use it") + data = data_table.select(["width", "height"]).to_pandas() + + # Compute additional data + data["resolution"] = data.apply( + lambda x: str(x["width"]) + "x" + str(x["height"]), axis=1 + ) + data["aspect_ratio"] = data.apply( + lambda x: str(Fraction(x["width"], x["height"])).replace("/", ":"), axis=1 + ) + return data diff --git a/pixano/app/api/datasets.py b/pixano/app/api/datasets.py index 98f3ed8f8..f83d591a7 100644 --- a/pixano/app/api/datasets.py +++ b/pixano/app/api/datasets.py @@ -34,10 +34,7 @@ async def get_datasets( """ # Load datasets - infos = DatasetInfo.load_directory( - directory=settings.data_dir, - load_thumbnail=True, - ) + infos = DatasetInfo.load_directory(directory=settings.data_dir, load_thumbnail=True) # Return datasets if infos: diff --git a/pixano/data/dataset/dataset_stat.py b/pixano/data/dataset/dataset_stat.py index d1b19a1a9..b3094244e 100644 --- a/pixano/data/dataset/dataset_stat.py +++ b/pixano/data/dataset/dataset_stat.py @@ -53,3 +53,33 @@ def from_json(json_fp: Path | S3Path) -> list["DatasetStat"]: stats_json = json.load(json_file) return [DatasetStat.model_validate(stat) for stat in stats_json] + + def save(self, save_dir: Path | S3Path): + """Save DatasetInfo to json file + replace existing histogram with same name in json_fp + + Args: + save_dir (Path | S3Path): Save directory + """ + + try: + if isinstance(save_dir, S3Path): + with (save_dir / "stats.json").open(encoding="utf-8") as json_file: + json_stats = json.load(json_file) + else: + with open(save_dir / "stats.json", "r", encoding="utf-8") as json_file: + json_stats = json.load(json_file) + except FileNotFoundError: + json_stats = [] + # keep all stats except the one with same name, we replace it if exist + json_stats = [stat for stat in json_stats if stat["name"] != self.name] + json_stats.append( + {"name": self.name, "type": self.type, "histogram": self.histogram} + ) + + if isinstance(save_dir, S3Path): + with (save_dir / "stats.json").open("w", encoding="utf-8") as f: + json.dump(json_stats, f, indent="\t") + else: + with open(save_dir / "stats.json", "w", encoding="utf-8") as f: + json.dump(json_stats, f, indent="\t") diff --git a/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte b/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte index cf116434d..a1990d41c 100644 --- a/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte +++ b/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte @@ -2,10 +2,11 @@ import { page } from "$app/stores"; import type { DatasetInfo } from "@pixano/core/src"; - + import { api } from "@pixano/core/src"; import Dashboard from "../../../components/dashboard/Dashboard.svelte"; import { datasetsStore } from "../../../lib/stores/datasetStores"; + import { afterUpdate } from "svelte"; let selectedDataset: DatasetInfo; @@ -19,6 +20,19 @@ } }); } + + // get stats if not already loaded, and allow stats on page refresh + afterUpdate(async () => { + if (selectedDataset && selectedDataset.stats == undefined) { + const completedDatasetwithStats = await api.getDataset(selectedDataset.id); + if ( + completedDatasetwithStats.stats !== undefined && + completedDatasetwithStats.stats.length > 0 + ) { + selectedDataset.stats = completedDatasetwithStats.stats; + } + } + }); </script> {#if selectedDataset?.page}