Skip to content

Commit

Permalink
Merge pull request pixano#46 from BertrandRenault/feat/stats
Browse files Browse the repository at this point in the history
feat(back): allow dataset stats loading
  • Loading branch information
cpvannier authored Jan 19, 2024
2 parents e1c243a + 7c3c5e7 commit e834519
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pixano/analytics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
# http://www.cecill.info

from pixano.analytics.feature_statistics import compute_additional_data, compute_stats
from pixano.analytics.image_statistics import compute_image_stats

__all__ = [
"compute_additional_data",
"compute_stats",
"compute_image_stats",
]
60 changes: 60 additions & 0 deletions pixano/analytics/image_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# @Copyright: CEA-LIST/DIASI/SIALV/LVA (2023)
# @Author: CEA-LIST/DIASI/SIALV/LVA <[email protected]>
# @License: CECILL-C
#
# This software is a collaborative computer program whose purpose is to
# generate and explore labeled data for computer vision applications.
# This software is governed by the CeCILL-C license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL-C
# license as circulated by CEA, CNRS and INRIA at the following URL
#
# http://www.cecill.info

from fractions import Fraction

import pyarrow as pa
from PIL import Image as PILImage

from pixano.data import Dataset


def compute_image_stats(ds: Dataset):
"""Compute image stats, save them to stats.json
Args:
ds (Dataset): Dataset
"""

tables = ds.open_tables()

for view in tables["media"]:
# will be flattened, so don't treat it as a real loop (only one elem)
# tt = tables["media"][view].to_lance()
# print(duckdb.sql("select * from tt"))
data_table = tables["media"][view].to_arrow()

# Take a subset of table without image columns (which can't be converted to pandas)
if not all(p in data_table.column_names for p in ["width", "height"]):
print(
"INFO: 'width' and 'height' not found in media table, get it from image"
)
images = data_table.select([view]).to_pylist()
sizes = []
for image in images:
# im = image[view].as_pillow() ne marche plus car uri_prefix vide (pb avec Image.get_uri())
im = PILImage.open(ds.media_dir / image[view].uri)
sizes.append({"width": im.width, "height": im.height})
data = pa.Table.from_pylist(sizes).to_pandas()
else:
print("INFO: 'width' and 'height' found in media table, use it")
data = data_table.select(["width", "height"]).to_pandas()

# Compute additional data
data["resolution"] = data.apply(
lambda x: str(x["width"]) + "x" + str(x["height"]), axis=1
)
data["aspect_ratio"] = data.apply(
lambda x: str(Fraction(x["width"], x["height"])).replace("/", ":"), axis=1
)
return data
5 changes: 1 addition & 4 deletions pixano/app/api/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,7 @@ async def get_datasets(
"""

# Load datasets
infos = DatasetInfo.load_directory(
directory=settings.data_dir,
load_thumbnail=True,
)
infos = DatasetInfo.load_directory(directory=settings.data_dir, load_thumbnail=True)

# Return datasets
if infos:
Expand Down
30 changes: 30 additions & 0 deletions pixano/data/dataset/dataset_stat.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,33 @@ def from_json(json_fp: Path | S3Path) -> list["DatasetStat"]:
stats_json = json.load(json_file)

return [DatasetStat.model_validate(stat) for stat in stats_json]

def save(self, save_dir: Path | S3Path):
"""Save DatasetInfo to json file
replace existing histogram with same name in json_fp
Args:
save_dir (Path | S3Path): Save directory
"""

try:
if isinstance(save_dir, S3Path):
with (save_dir / "stats.json").open(encoding="utf-8") as json_file:
json_stats = json.load(json_file)
else:
with open(save_dir / "stats.json", "r", encoding="utf-8") as json_file:
json_stats = json.load(json_file)
except FileNotFoundError:
json_stats = []
# keep all stats except the one with same name, we replace it if exist
json_stats = [stat for stat in json_stats if stat["name"] != self.name]
json_stats.append(
{"name": self.name, "type": self.type, "histogram": self.histogram}
)

if isinstance(save_dir, S3Path):
with (save_dir / "stats.json").open("w", encoding="utf-8") as f:
json.dump(json_stats, f, indent="\t")
else:
with open(save_dir / "stats.json", "w", encoding="utf-8") as f:
json.dump(json_stats, f, indent="\t")
16 changes: 15 additions & 1 deletion ui/apps/pixano/src/routes/[dataset]/dashboard/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import { page } from "$app/stores";
import type { DatasetInfo } from "@pixano/core/src";
import { api } from "@pixano/core/src";
import Dashboard from "../../../components/dashboard/Dashboard.svelte";
import { datasetsStore } from "../../../lib/stores/datasetStores";
import { afterUpdate } from "svelte";
let selectedDataset: DatasetInfo;
Expand All @@ -19,6 +20,19 @@
}
});
}
// get stats if not already loaded, and allow stats on page refresh
afterUpdate(async () => {
if (selectedDataset && selectedDataset.stats == undefined) {
const completedDatasetwithStats = await api.getDataset(selectedDataset.id);
if (
completedDatasetwithStats.stats !== undefined &&
completedDatasetwithStats.stats.length > 0
) {
selectedDataset.stats = completedDatasetwithStats.stats;
}
}
});
</script>

{#if selectedDataset?.page}
Expand Down

0 comments on commit e834519

Please sign in to comment.