From 9dc84bdbee19a10b183ad26430141a01ff734f1b Mon Sep 17 00:00:00 2001 From: Runbo Jiang Date: Fri, 10 Nov 2023 09:58:44 -0800 Subject: [PATCH] UMAP can read user uploaded zip file --- umap_run.py | 4 +++- utils.py | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/umap_run.py b/umap_run.py index cdc472f..c9e8058 100644 --- a/umap_run.py +++ b/umap_run.py @@ -5,7 +5,7 @@ import json import pandas as pd -from utils import UMAPParameters +from utils import UMAPParameters, load_images_from_directory """ Compute UMAP Input: 1d data (N, M) or 2d data (N, H, W) @@ -47,6 +47,8 @@ def computeUMAP(data, if images_dir == "data/example_latentrepresentation/f_vectors.parquet": df = pd.read_parquet(images_dir) images = df.values + else: # user uploaded zip file + images = load_images_from_directory(images_dir) print(images.shape) # Load dimension reduction parameter diff --git a/utils.py b/utils.py index 7b3c8aa..5e5f51e 100644 --- a/utils.py +++ b/utils.py @@ -1,4 +1,7 @@ from pydantic import BaseModel, Field +import os +from PIL import Image +import numpy as np class PCAParameters(BaseModel): n_components: int = Field(description='number of components to keep') @@ -8,4 +11,17 @@ class UMAPParameters(BaseModel): min_dist: float = Field(description='min distance between points') n_neighbors: int = Field(description='number of nearest neighbors') - +def load_images_from_directory(directory_path): + image_data = [] + for filename in os.listdir(directory_path): + if filename.endswith(".png"): + file_path = os.path.join(directory_path, filename) + try: + img = Image.open(file_path) + img_array = np.array(img) + image_data.append(img_array) + except Exception as e: + print(f"Error processing {file_path}: {e}") + + image_data = np.array(image_data) + return image_data \ No newline at end of file