From 6e8c821dd16032d840daffbffd2cda58b17466d6 Mon Sep 17 00:00:00 2001 From: brenault Date: Thu, 18 Jan 2024 16:47:09 +0100 Subject: [PATCH 1/5] feat(back): write datasetStat --- pixano/analytics/__init__.py | 2 + pixano/analytics/image_statistics.py | 60 ++++++++++++++++++++++++++++ pixano/data/dataset/dataset_stat.py | 29 ++++++++++++++ 3 files changed, 91 insertions(+) create mode 100644 pixano/analytics/image_statistics.py diff --git a/pixano/analytics/__init__.py b/pixano/analytics/__init__.py index 765d60e82..214720777 100644 --- a/pixano/analytics/__init__.py +++ b/pixano/analytics/__init__.py @@ -12,8 +12,10 @@ # http://www.cecill.info from pixano.analytics.feature_statistics import compute_additional_data, compute_stats +from pixano.analytics.image_statistics import compute_image_stats __all__ = [ "compute_additional_data", "compute_stats", + "compute_image_stats", ] diff --git a/pixano/analytics/image_statistics.py b/pixano/analytics/image_statistics.py new file mode 100644 index 000000000..84082dc03 --- /dev/null +++ b/pixano/analytics/image_statistics.py @@ -0,0 +1,60 @@ +# @Copyright: CEA-LIST/DIASI/SIALV/LVA (2023) +# @Author: CEA-LIST/DIASI/SIALV/LVA +# @License: CECILL-C +# +# This software is a collaborative computer program whose purpose is to +# generate and explore labeled data for computer vision applications. +# This software is governed by the CeCILL-C license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL-C +# license as circulated by CEA, CNRS and INRIA at the following URL +# +# http://www.cecill.info + +from fractions import Fraction + +import pyarrow as pa +from PIL import Image as PILImage + +from pixano.data import Dataset + + +def compute_image_stats(ds: Dataset): + """Compute image stats, save them to stats.json + + Args: + ds (Dataset): Dataset + """ + + tables = ds.open_tables() + + for view in tables["media"]: + # will be flattened, so don't treat it as a real loop (only one elem) + # tt = tables["media"][view].to_lance() + # print(duckdb.sql("select * from tt")) + data_table = tables["media"][view].to_arrow() + + # Take a subset of table without image columns (which can't be converted to pandas) + if not all(p in data_table.column_names for p in ["width", "height"]): + print( + "INFO: 'width' and 'height' not found in media table, get it from image" + ) + images = data_table.select([view]).to_pylist() + sizes = [] + for image in images: + # im = image[view].as_pillow() ne marche plus car uri_prefix vide (pb avec Image.get_uri()) + im = PILImage.open(ds.media_dir / image[view].uri) + sizes.append({"width": im.width, "height": im.height}) + data = pa.Table.from_pylist(sizes).to_pandas() + else: + print("INFO: 'width' and 'height' found in media table, use it") + data = data_table.select(["width", "height"]).to_pandas() + + # Compute additional data + data["resolution"] = data.apply( + lambda x: str(x["width"]) + "x" + str(x["height"]), axis=1 + ) + data["aspect_ratio"] = data.apply( + lambda x: str(Fraction(x["width"], x["height"])).replace("/", ":"), axis=1 + ) + return data diff --git a/pixano/data/dataset/dataset_stat.py b/pixano/data/dataset/dataset_stat.py index d1b19a1a9..a544644d0 100644 --- a/pixano/data/dataset/dataset_stat.py +++ b/pixano/data/dataset/dataset_stat.py @@ -53,3 +53,32 @@ def from_json(json_fp: Path | S3Path) -> list["DatasetStat"]: stats_json = json.load(json_file) return [DatasetStat.model_validate(stat) for stat in stats_json] + + def to_json(self, json_fp: Path | S3Path): + """Write DatasetStat to json_fp + replace existing histogram with same name in json_fp + + Args: + json_fp (Path | S3Path): Path to "stats.json" file + """ + try: + if isinstance(json_fp, S3Path): + with json_fp.open(encoding="utf-8") as json_file: + json_stats = json.load(json_file) + else: + with open(json_fp, "r", encoding="utf-8") as json_file: + json_stats = json.load(json_file) + except FileNotFoundError: + json_stats = [] + # keep all stats except the one with same name, we replace it if exist + json_stats = [stat for stat in json_stats if stat["name"] != self.name] + json_stats.append( + {"name": self.name, "type": self.type, "histogram": self.histogram} + ) + + if isinstance(json_fp, S3Path): + with json_fp.open("w", encoding="utf-8") as f: + json.dump(json_stats, f, indent="\t") + else: + with open(json_fp, "w", encoding="utf-8") as f: + json.dump(json_stats, f, indent="\t") From 7e18d28f73a96262ccc5f235cc6c5618c31f51fd Mon Sep 17 00:00:00 2001 From: brenault Date: Thu, 18 Jan 2024 18:05:10 +0100 Subject: [PATCH 2/5] feat(back): allow stats loading --- pixano/app/api/datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pixano/app/api/datasets.py b/pixano/app/api/datasets.py index 98f3ed8f8..a34b9ec98 100644 --- a/pixano/app/api/datasets.py +++ b/pixano/app/api/datasets.py @@ -37,6 +37,7 @@ async def get_datasets( infos = DatasetInfo.load_directory( directory=settings.data_dir, load_thumbnail=True, + load_stats=True ) # Return datasets From e486b7ff735b73eadce6187572b0d2ef233a8b9b Mon Sep 17 00:00:00 2001 From: brenault Date: Thu, 18 Jan 2024 18:11:58 +0100 Subject: [PATCH 3/5] chore(back): adapt save function signature for consistency --- pixano/data/dataset/dataset_stat.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pixano/data/dataset/dataset_stat.py b/pixano/data/dataset/dataset_stat.py index a544644d0..b3094244e 100644 --- a/pixano/data/dataset/dataset_stat.py +++ b/pixano/data/dataset/dataset_stat.py @@ -54,19 +54,20 @@ def from_json(json_fp: Path | S3Path) -> list["DatasetStat"]: return [DatasetStat.model_validate(stat) for stat in stats_json] - def to_json(self, json_fp: Path | S3Path): - """Write DatasetStat to json_fp + def save(self, save_dir: Path | S3Path): + """Save DatasetInfo to json file replace existing histogram with same name in json_fp Args: - json_fp (Path | S3Path): Path to "stats.json" file + save_dir (Path | S3Path): Save directory """ + try: - if isinstance(json_fp, S3Path): - with json_fp.open(encoding="utf-8") as json_file: + if isinstance(save_dir, S3Path): + with (save_dir / "stats.json").open(encoding="utf-8") as json_file: json_stats = json.load(json_file) else: - with open(json_fp, "r", encoding="utf-8") as json_file: + with open(save_dir / "stats.json", "r", encoding="utf-8") as json_file: json_stats = json.load(json_file) except FileNotFoundError: json_stats = [] @@ -76,9 +77,9 @@ def to_json(self, json_fp: Path | S3Path): {"name": self.name, "type": self.type, "histogram": self.histogram} ) - if isinstance(json_fp, S3Path): - with json_fp.open("w", encoding="utf-8") as f: + if isinstance(save_dir, S3Path): + with (save_dir / "stats.json").open("w", encoding="utf-8") as f: json.dump(json_stats, f, indent="\t") else: - with open(json_fp, "w", encoding="utf-8") as f: + with open(save_dir / "stats.json", "w", encoding="utf-8") as f: json.dump(json_stats, f, indent="\t") From 27f2829dad1654f86651a6a7564a2688191d4e08 Mon Sep 17 00:00:00 2001 From: brenault Date: Thu, 18 Jan 2024 18:36:22 +0100 Subject: [PATCH 4/5] chore(back): black formating --- pixano/app/api/datasets.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pixano/app/api/datasets.py b/pixano/app/api/datasets.py index a34b9ec98..95a797752 100644 --- a/pixano/app/api/datasets.py +++ b/pixano/app/api/datasets.py @@ -35,9 +35,7 @@ async def get_datasets( # Load datasets infos = DatasetInfo.load_directory( - directory=settings.data_dir, - load_thumbnail=True, - load_stats=True + directory=settings.data_dir, load_thumbnail=True, load_stats=True ) # Return datasets From 7c3c5e704e3d204d6406630b5b7339f42b3e07fb Mon Sep 17 00:00:00 2001 From: brenault Date: Fri, 19 Jan 2024 11:08:30 +0100 Subject: [PATCH 5/5] fix(back): load stats only when needed --- pixano/app/api/datasets.py | 4 +--- .../src/routes/[dataset]/dashboard/+page.svelte | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pixano/app/api/datasets.py b/pixano/app/api/datasets.py index 95a797752..f83d591a7 100644 --- a/pixano/app/api/datasets.py +++ b/pixano/app/api/datasets.py @@ -34,9 +34,7 @@ async def get_datasets( """ # Load datasets - infos = DatasetInfo.load_directory( - directory=settings.data_dir, load_thumbnail=True, load_stats=True - ) + infos = DatasetInfo.load_directory(directory=settings.data_dir, load_thumbnail=True) # Return datasets if infos: diff --git a/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte b/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte index caa2c42d9..1d2a75397 100644 --- a/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte +++ b/ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte @@ -2,10 +2,11 @@ import { page } from "$app/stores"; import type { DatasetInfo } from "@pixano/core/src"; - + import { api } from "@pixano/core/src"; import Dashboard from "../../../components/dashboard/Dashboard.svelte"; import { datasetsStore } from "../../../lib/stores/datasetStores"; + import { afterUpdate } from "svelte"; let selectedDataset: DatasetInfo; @@ -19,6 +20,19 @@ } }); } + + // get stats if not already loaded, and allow stats on page refresh + afterUpdate(async () => { + if (selectedDataset && selectedDataset.stats == undefined) { + const completedDatasetwithStats = await api.getDataset(selectedDataset.id); + if ( + completedDatasetwithStats.stats !== undefined && + completedDatasetwithStats.stats.length > 0 + ) { + selectedDataset.stats = completedDatasetwithStats.stats; + } + } + }); {#if selectedDataset?.page}