Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add alignments to Python PCA API #2285

Open
akenmorris opened this issue Jul 14, 2024 · 0 comments
Open

Add alignments to Python PCA API #2285

akenmorris opened this issue Jul 14, 2024 · 0 comments

Comments

@akenmorris
Copy link
Contributor

akenmorris commented Jul 14, 2024

The Python API for performing PCA does not allow parameters to specify the alignment strategy in the case of multiple domains. This should be added.

As an example, the following script does so outside of the ShapeWorks API

#!/usr/bin/env python3

import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
import fire
import os
import sys


def read_xlsx_file(filepath):
    print(f"Reading file: {filepath}")
    data = pd.read_excel(filepath)
    return data


def save_to_csv(data, filename):
    df = pd.DataFrame(data, columns=[f'PC{i+1}' for i in range(data.shape[1])])
    df.to_csv(filename, index=False)


def python_pca(file, output):
    data = read_xlsx_file(file)

    # find the column names that start with 'world_particles_'
    particle_columns = [
        col for col in data.columns if col.startswith('world_particles_')]

    local_particle_columns = [
        col for col in data.columns if col.startswith('local_particles_')]

    alignment_columns = [
        col for col in data.columns if col.startswith('alignment_')]

    procrustes_columns = [
        col for col in data.columns if col.startswith('procrustes_')]

    num_samples = len(data)

    num_domains = len(particle_columns)
    domain_names = [col.split('_')[-1] for col in particle_columns]

    alignment_domains = [col.split('_')[-1] for col in alignment_columns]
    # append "local"
    alignment_domains.append("local")
    for domain in alignment_domains:

        # for each row in the data, read the files in the columns, skip the header
        values = []
        for i, row in data.iterrows():

            # create 4x4 identity matrix
            transform = np.eye(4)

            if domain != "local":
                # read procrustes matrix from column
                transform_values = row[f'alignment_{domain}'].split()
                transform = np.array(transform_values).reshape(
                    4, 4).astype(float)

            row_values = []
            for col in particle_columns:

                if domain != "local":
                    # replace "world" with "local" in column name
                    col = col.replace("world", "local")

                domain_name = col.split('_')[-1]

                procrustes_domain = domain

                if domain == "global" or domain == "local":
                    procrustes_domain = domain_name

                procrustes_scaling_values = np.array(row[f'procrustes_{domain_name}'].split()).reshape(4, 4).astype(float)
                # extract scaling from this 4x4
                scaling = np.sqrt(np.sum(procrustes_scaling_values[:3, :3] ** 2, axis=0))
                
                procrustes_values = row[f'procrustes_{procrustes_domain}'].split()
                procrustes = np.array(procrustes_values).reshape(
                    4, 4).astype(float)
                target_scaling = np.sqrt(np.sum(procrustes[:3, :3] ** 2, axis=0))
                # remove scaling from the target procrustes
                procrustes[:3, :3] /= target_scaling

                filename = row[col]
                with open(filename, 'r') as f:
                    particles = np.array(
                        [list(map(float, line.strip().split())) for line in f])

                    # add a column of ones
                    particles = np.hstack(
                        (particles, np.ones((particles.shape[0], 1))))

                    if domain != "local":
                        
                        # transform each x,y,z by alignment
                        particles = np.dot(particles, transform.T)

                        # apply scaling from local procrustes
                        particles[:, :3] *= scaling
                        
                        if domain != "global":
                            # apply procrustes of target alignment domain
                            particles = np.dot(particles, procrustes.T)

                    row_values.extend(particles.flatten())

            values.append(row_values)

        # reconfigure values as a NxM array
        values = np.array(values)

        # recenter the data
        values -= np.mean(values, axis=0)

        # perform PCA
        pca = PCA(n_components=num_samples)
        pca_scores = pca.fit_transform(values)

        # write values to csv
        save_to_csv(pca_scores, output + f"_{domain}.csv")


if __name__ == "__main__":
    fire.Fire(python_pca)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant