From fde31a0c0f94ac2bf7ee462f38a7dcb377391af8 Mon Sep 17 00:00:00 2001 From: knikolaou <> Date: Fri, 5 Nov 2021 15:18:14 +0100 Subject: [PATCH] creating a new parent class - SimilarityMeasures with two functions. The "calculate" function executes the calculation, that is necessary for the point selection. For the implementation of the loss functions in the keras compiler, we need to sum over the losses, which is done by the call function. Each child class defines one similarity measurement and inherits the calculate function. --- LICENSE | 4 +- examples/scripts/simple_rnd.py | 9 +- pyrnd/core/models/dense_model.py | 6 +- pyrnd/core/rnd/rnd.py | 12 +- pyrnd/core/similarity_measures/__init__.py | 16 ++ .../similarity_measures.py | 167 ++++++++++++++++-- 6 files changed, 186 insertions(+), 28 deletions(-) diff --git a/LICENSE b/LICENSE index e55f3446..25f08251 100644 --- a/LICENSE +++ b/LICENSE @@ -162,12 +162,12 @@ liability for other Contributors. Therefore, if a Contributor includes the Program in a commercial product offering, such Contributor ("Commercial Contributor") hereby agrees to defend and indemnify every other Contributor ("Indemnified Contributor") against any losses, -damages and costs (collectively "Losses") arising from claims, lawsuits +damages and costs (collectively "SimilarityMeasures") arising from claims, lawsuits and other legal actions brought by a third party against the Indemnified Contributor to the extent caused by the acts or omissions of such Commercial Contributor in connection with its distribution of the Program in a commercial product offering. The obligations in this section do not -apply to any claims or Losses relating to any actual or alleged +apply to any claims or SimilarityMeasures relating to any actual or alleged intellectual property infringement. In order to qualify, an Indemnified Contributor must: a) promptly notify the Commercial Contributor in writing of such claim, and b) allow the Commercial Contributor to control, diff --git a/examples/scripts/simple_rnd.py b/examples/scripts/simple_rnd.py index cc0bd9c0..765b25fa 100644 --- a/examples/scripts/simple_rnd.py +++ b/examples/scripts/simple_rnd.py @@ -13,6 +13,7 @@ from pyrnd.core.point_selection.greedy_selection import GreedySelection import numpy as np import matplotlib.pyplot as plt +import pyrnd.core.similarity_measures.similarity_measures as losses if __name__ == "__main__": @@ -24,16 +25,16 @@ data_generator.build_pool(100) target = pyrnd.DenseModel( - units=12, layers=4, in_d=2, out_d=12, tolerance=1e-5, loss="cosine_similarity" + units=12, layers=4, in_d=2, out_d=12, tolerance=1e-2, loss=losses.MSE() ) predictor = pyrnd.DenseModel( - units=12, layers=4, in_d=2, out_d=12, tolerance=1e-5, loss="cosine_similarity" + units=12, layers=4, in_d=2, out_d=12, tolerance=1e-2, loss=losses.MSE() ) # print(target.summary()) agent = pyrnd.RND( - point_selector=GreedySelection(threshold=0.1), - # distance_metric=euclidean_distance, + point_selector=GreedySelection(threshold=0.02), + distance_metric=losses.MSE(), data_generator=data_generator, target_network=target, predictor_network=predictor, diff --git a/pyrnd/core/models/dense_model.py b/pyrnd/core/models/dense_model.py index 44a831dc..9e608900 100644 --- a/pyrnd/core/models/dense_model.py +++ b/pyrnd/core/models/dense_model.py @@ -12,6 +12,8 @@ from tensorflow.keras.layers import InputLayer from tensorflow.keras.layers import Dense from pyrnd.core.models.model import Model +from pyrnd.core.similarity_measures import SimilarityMeasures +from pyrnd.core.similarity_measures import CosineSim class DenseModel(Model): @@ -49,7 +51,7 @@ def __init__( activation: str = "relu", learning_rate: float = 1e-2, tolerance: float = 1e-5, - loss="mean_squared_error", + loss: SimilarityMeasures = CosineSim(), ): """ Constructor for the Feed forward network module. @@ -71,7 +73,7 @@ def __init__( tolerance : float Minimum value of the loss before the model is considered trained. - loss : str + loss : SimilarityMeasures Loss to use during the training. """ super().__init__() # update parent. diff --git a/pyrnd/core/rnd/rnd.py b/pyrnd/core/rnd/rnd.py index c2535130..f97a1ab7 100644 --- a/pyrnd/core/rnd/rnd.py +++ b/pyrnd/core/rnd/rnd.py @@ -12,7 +12,7 @@ from pyrnd.core.models.model import Model from pyrnd.core.point_selection.point_selection import PointSelection from pyrnd.core.data.data_generator import DataGenerator -from typing import Callable +from pyrnd.core.similarity_measures import SimilarityMeasures import tensorflow as tf import numpy as np @@ -36,7 +36,7 @@ def __init__( data_generator: DataGenerator, target_network: Model = None, predictor_network: Model = None, - distance_metric: Callable = None, + distance_metric: SimilarityMeasures = None, point_selector: PointSelection = None, optimizers: list = None, target_size: int = None, @@ -51,7 +51,7 @@ def __init__( Model class for the target network predictor_network : Model Model class for the predictor. - distance_metric : object + distance_metric : SimilarityMeasures Metric to use in the representation comparison data_generator : objector Class to generate or select new points from the point cloud @@ -105,7 +105,7 @@ def _set_defaults(self): self.point_selector = pyrnd.GreedySelection(self) # Update the metric if self.metric is None: - self.metric = pyrnd.similarity_measures.cosine_similarity + self.metric = pyrnd.similarity_measures.CosineSim() # Update the target if self.target is None: self.target = pyrnd.DenseModel() @@ -130,7 +130,9 @@ def compute_distance(self, points: tf.Tensor): predictor_predictions = self.predictor.predict(points) target_predictions = self.target.predict(points) - self.metric_results = self.metric(target_predictions, predictor_predictions) + self.metric_results = self.metric.calculate( + target_predictions, predictor_predictions + ) return self.metric_results # return self.metric(target_predictions, predictor_predictions) diff --git a/pyrnd/core/similarity_measures/__init__.py b/pyrnd/core/similarity_measures/__init__.py index e69de29b..0998c1d4 100644 --- a/pyrnd/core/similarity_measures/__init__.py +++ b/pyrnd/core/similarity_measures/__init__.py @@ -0,0 +1,16 @@ +from .similarity_measures import \ + SimilarityMeasures, \ + CosineSim, \ + MSE, \ + EuclideanDist, \ + MahalanobisDist, \ + AngleSim + +__all__ = [ + "SimilarityMeasures", + "CosineSim", + "MSE", + "EuclideanDist", + "MahalanobisDist", + "AngleSim" + ] diff --git a/pyrnd/core/similarity_measures/similarity_measures.py b/pyrnd/core/similarity_measures/similarity_measures.py index 81701a5b..47545560 100644 --- a/pyrnd/core/similarity_measures/similarity_measures.py +++ b/pyrnd/core/similarity_measures/similarity_measures.py @@ -14,24 +14,161 @@ a quasi-distance for the comparison to occur. """ import tensorflow as tf +import numpy as np +import tensorflow_probability as tfp +import tensorflow.keras -def cosine_similarity(point_1: tf.Tensor, point_2: tf.Tensor): +class SimilarityMeasures: """ - Parameters - ---------- - point_1 : tf.Tensor - First point in the comparison. - point_2 : tf.Tensor - Second point in the comparison. + Parent Class for the similarity measurements module """ - numerator = tf.cast(tf.einsum("ij, ij -> i", point_1, point_2), tf.float32) - denominator = tf.sqrt( - tf.cast( - tf.einsum("ij, ij -> i", point_1, point_1) - * tf.einsum("ij, ij -> i", point_2, point_2), - tf.float32, + + def calculate(self, point_1, point_2) -> tf.Tensor: + """ + Calculate the similarity of the given points + + Parameters + ---------- + point_1 : tf.Tensor + first neural network representation of the considered points + point_2 : + second neural network representation of the considered points + + Returns + ------- + Similarity measurement : tf.Tensor + Similarity measurement of each point individually + """ + raise NotImplementedError + + def __call__(self, point_1: tf.Tensor, point_2: tf.Tensor) -> float: + """ + Summation over the tensor of the respective similarity measurement + Parameters + ---------- + point_1 : tf.Tensor + first neural network representation of the considered points + point_2 : + second neural network representation of the considered points + + Returns + ------- + loss : float + total loss of all points based on the similarity measurement + """ + return tf.reduce_mean(self.calculate(point_1, point_2)) + + +class CosineSim(SimilarityMeasures): + """ + Cosine similarity between two representations + """ + + def calculate(self, point_1, point_2): + """ + Parameters + ---------- + point_1 : tf.Tensor + First point in the comparison. + point_2 : tf.Tensor + Second point in the comparison. + TODO: include factor sqrt2 that rescales on a real distance metric (look up) + """ + numerator = tf.cast(tf.einsum("ij, ij -> i", point_1, point_2), tf.float32) + denominator = tf.sqrt( + tf.cast( + + # tf.einsum("ij, ij, ij, ij -> i", point_1, point_1, point_2, point_2) + + tf.einsum("ij, ij -> i", point_1, point_1) + * tf.einsum("ij, ij -> i", point_2, point_2), + tf.float32, + ) + ) + return 1 - abs(tf.divide(numerator, denominator)) + + +class AngleSim(SimilarityMeasures): + """ + Angle between two representations normalized by pi + """ + + def calculate(self, point_1, point_2): + """ + Parameters + ---------- + point_1 : tf.Tensor + First point in the comparison. + point_2 : tf.Tensor + Second point in the comparison. + """ + numerator = tf.cast(tf.einsum("ij, ij -> i", point_1, point_2), tf.float32) + denominator = tf.sqrt( + tf.cast( + tf.einsum("ij, ij -> i", point_1, point_1) + * tf.einsum("ij, ij -> i", point_2, point_2), + tf.float32, + ) ) - ) + return tf.acos(abs(tf.divide(numerator, denominator)))/np.pi + + +class MSE(SimilarityMeasures): + """ + Mean square error between two representations + """ + + def calculate(self, point_1, point_2): + """ + Parameters + ---------- + point_1 : tf.Tensor + First point in the comparison. + point_2 : tf.Tensor + Second point in the comparison. + """ + + diff = point_1 - point_2 + return tf.cast(tf.einsum("ij, ij -> i", diff, diff), tf.float32) + + +class EuclideanDist(SimilarityMeasures): + """ + Compute the Euclidean distance metric between two representations + """ + + def calculate(self, point_1, point_2) -> tf.Tensor: + """ + Parameters + ---------- + point_1 : tf.Tensor + First point in the comparison. + point_2 : tf.Tensor + Second point in the comparison. + """ + return tf.cast(tf.norm(point_1 - point_2, axis=1), tf.float32) + + +class MahalanobisDist(SimilarityMeasures): + """ + Compute Mahalanobis Distance metric between two representations + """ + + def calculate(self, point_1, point_2) -> tf.Tensor: + """ + Parameters + ---------- + point_1 : tf.Tensor + First point in the comparison. + point_2 : tf.Tensor + Second point in the comparison. + TODO: search for the fehler cause doesn't work + """ - return 1 - abs(tf.divide(numerator, denominator)) + covariance = tfp.stats.covariance(point_1) + covariance_half = tf.linalg.cholesky(covariance) + print(covariance_half) + point_1_maha = tf.matmul(point_1, covariance_half) + point_2_maha = tf.matmul(point_2, covariance_half) + return EuclideanDist().calculate(point_1_maha, point_2_maha)