From 59639bbe6e82d62c615a508d988176e48ff668fe Mon Sep 17 00:00:00 2001 From: d61h6k4 Date: Mon, 19 Jun 2023 15:37:01 +0200 Subject: [PATCH] Remove dependency on tensorflow-addons (#12514) Remove dependency on tensorflow-addons Copy tests for crf and metrics --- changelog/12514.improvement.md | 1 + poetry.lock | 96 ++------- pyproject.toml | 8 - rasa/utils/tensorflow/crf.py | 279 +++++++++++++++++++++++- rasa/utils/tensorflow/layers.py | 9 +- rasa/utils/tensorflow/metrics.py | 282 +++++++++++++++++++++++++ tests/utils/tensorflow/test_crf.py | 233 ++++++++++++++++++++ tests/utils/tensorflow/test_metrics.py | 205 ++++++++++++++++++ 8 files changed, 1019 insertions(+), 94 deletions(-) create mode 100644 changelog/12514.improvement.md create mode 100644 rasa/utils/tensorflow/metrics.py create mode 100644 tests/utils/tensorflow/test_crf.py create mode 100644 tests/utils/tensorflow/test_metrics.py diff --git a/changelog/12514.improvement.md b/changelog/12514.improvement.md new file mode 100644 index 000000000000..262b5161b53a --- /dev/null +++ b/changelog/12514.improvement.md @@ -0,0 +1 @@ +Remove tensorflow-addons from dependencies as it is now deprecated. \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index da0b875f2fa4..65fbe9dec30b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -588,18 +588,18 @@ numpy = ">=1.15.0" [[package]] name = "boto3" -version = "1.26.154" +version = "1.26.155" description = "The AWS SDK for Python" category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "boto3-1.26.154-py3-none-any.whl", hash = "sha256:ee2b3733f40f935da78bf76bc8e82af6e90841406e04605e3b2d765b50cad05e"}, - {file = "boto3-1.26.154.tar.gz", hash = "sha256:cf1067d101be538f399b685bbe6beb4bfed01095da8497d0c7fa8b8788a65c6b"}, + {file = "boto3-1.26.155-py3-none-any.whl", hash = "sha256:dd15823e8c0554d98c18584d9a6a0342c67611c1114ef61495934c2e560f632c"}, + {file = "boto3-1.26.155.tar.gz", hash = "sha256:2d4095e2029ce5ceccb25591f13e55aa5b8ba17794de09963654bd9ced45158f"}, ] [package.dependencies] -botocore = ">=1.29.154,<1.30.0" +botocore = ">=1.29.155,<1.30.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.6.0,<0.7.0" @@ -608,14 +608,14 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.29.154" +version = "1.29.155" description = "Low-level, data-driven core of boto 3." category = "main" optional = false python-versions = ">= 3.7" files = [ - {file = "botocore-1.29.154-py3-none-any.whl", hash = "sha256:b9853f72a3c93f1aa8c9a1636911cdbec3662bca2e04e4ee00437c4f8c9fa2d4"}, - {file = "botocore-1.29.154.tar.gz", hash = "sha256:a9c7da497ac5f7d4f3e932b4442e7c32cc2936f3a4658165f1528336fc429c3d"}, + {file = "botocore-1.29.155-py3-none-any.whl", hash = "sha256:32d5da68212e10c060fd484f41df4f7048fc7731ccd16fd00e37b11b6e841142"}, + {file = "botocore-1.29.155.tar.gz", hash = "sha256:7fbb7ebba5f645c9750fe557b1ea789d40017a028cdaa2c22fcbf06d4a4d3c1d"}, ] [package.dependencies] @@ -2374,14 +2374,14 @@ files = [ [[package]] name = "importlib-metadata" -version = "6.6.0" +version = "6.7.0" description = "Read metadata from Python packages" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "importlib_metadata-6.6.0-py3-none-any.whl", hash = "sha256:43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed"}, - {file = "importlib_metadata-6.6.0.tar.gz", hash = "sha256:92501cdf9cc66ebd3e612f1b4f0c0765dfa42f0fa38ffb319b6bd84dd675d705"}, + {file = "importlib_metadata-6.7.0-py3-none-any.whl", hash = "sha256:cb52082e659e97afc5dac71e79de97d8681de3aa07ff18578330904a9d18e5b5"}, + {file = "importlib_metadata-6.7.0.tar.gz", hash = "sha256:1aaf550d4f73e5d6783e7acb77aec43d49da8017410afae93822cc9cca98c4d4"}, ] [package.dependencies] @@ -2390,7 +2390,7 @@ zipp = ">=0.5" [package.extras] docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] [[package]] name = "importlib-resources" @@ -3630,14 +3630,14 @@ files = [ [[package]] name = "platformdirs" -version = "3.5.3" +version = "3.6.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.5.3-py3-none-any.whl", hash = "sha256:0ade98a4895e87dc51d47151f7d2ec290365a585151d97b4d8d6312ed6132fed"}, - {file = "platformdirs-3.5.3.tar.gz", hash = "sha256:e48fabd87db8f3a7df7150a4a5ea22c546ee8bc39bc2473244730d4b56d2cc4e"}, + {file = "platformdirs-3.6.0-py3-none-any.whl", hash = "sha256:ffa199e3fbab8365778c4a10e1fbf1b9cd50707de826eb304b50e57ec0cc8d38"}, + {file = "platformdirs-3.6.0.tar.gz", hash = "sha256:57e28820ca8094678b807ff529196506d7a21e17156cb1cddb3e74cebce54640"}, ] [package.extras] @@ -4152,14 +4152,14 @@ zstd = ["zstandard"] [[package]] name = "pyparsing" -version = "3.0.9" +version = "3.1.0" description = "pyparsing module - Classes and methods to define and execute parsing grammars" category = "main" optional = false python-versions = ">=3.6.8" files = [ - {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, - {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, + {file = "pyparsing-3.1.0-py3-none-any.whl", hash = "sha256:d554a96d1a7d3ddaf7183104485bc19fd80543ad6ac5bdb6426719d766fb06c1"}, + {file = "pyparsing-3.1.0.tar.gz", hash = "sha256:edb662d6fe322d6e990b1594b5feaeadf806803359e3d4d42f11e295e588f0ea"}, ] [package.extras] @@ -5810,40 +5810,6 @@ termcolor = ">=1.1.0" typing-extensions = ">=3.6.6" wrapt = ">=1.11.0,<1.15" -[[package]] -name = "tensorflow-addons" -version = "0.19.0" -description = "TensorFlow Addons." -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "tensorflow_addons-0.19.0-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:ca3764beba54c4ee4bb01a4294f8c2fef5c3814fd0f521dbe8beb4522545cb2d"}, - {file = "tensorflow_addons-0.19.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f74646fe83fd6f0d84ae5e0186c85cae3dd7e6c2329c8a5db4574c144706f39"}, - {file = "tensorflow_addons-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a46016fe9a1705043e39b7dacee3b089303ecdedbf1b12eb607aa35b7d2471e3"}, - {file = "tensorflow_addons-0.19.0-cp310-cp310-win_amd64.whl", hash = "sha256:eefbdb4e0450b93fba6b393870784dad4c91189e5551e01b268aeb5fe5b04da6"}, - {file = "tensorflow_addons-0.19.0-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:a297db1af6e682277f593411d4d28b939646c2b67b8351ef0d31a30b9531fb93"}, - {file = "tensorflow_addons-0.19.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b673fe22c4113edabdc0dc1ef919ba0f1fb024ca39a5718ec146285c400e8f"}, - {file = "tensorflow_addons-0.19.0-cp37-cp37m-win_amd64.whl", hash = "sha256:eefb6bf6d7a31d60649d6f6e99aee172ed4f5e693a079acfb264297997de21d0"}, - {file = "tensorflow_addons-0.19.0-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:d447a3f7852810b7985c890852dbcb6454f3899100d439d5eba370a78d8bd281"}, - {file = "tensorflow_addons-0.19.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:51fefd5f496ada5dafb13c446853fa1ddeb5482a0b9074af14efe0b99903816e"}, - {file = "tensorflow_addons-0.19.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101c3142149f16e81362cc1d0959686543cb69df79f38a3ea3c5205fbf57b28e"}, - {file = "tensorflow_addons-0.19.0-cp38-cp38-win_amd64.whl", hash = "sha256:c93602cf3b8a7bbe1fbf973b7b9f986892be34ba8b943923f09ae6cd79f0a241"}, - {file = "tensorflow_addons-0.19.0-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:fc058876dce711009227c47559b05295a5fb480748d6ec5c49386b1dc2c00167"}, - {file = "tensorflow_addons-0.19.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9334910bb6b599dd627e632a59f35ae9256bda2312b06929066a437076bf4789"}, - {file = "tensorflow_addons-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f25b029a917b635162b1f14df0263b2f79deadcd71daecd3161f69ccb1fbcea4"}, - {file = "tensorflow_addons-0.19.0-cp39-cp39-win_amd64.whl", hash = "sha256:b8f4c3a88b381bd28bba3189a0216749f9e799ae3dc4959651728e01ae20d738"}, -] - -[package.dependencies] -packaging = "*" -typeguard = ">=2.7" - -[package.extras] -tensorflow = ["tensorflow (>=2.9.0,<2.12.0)"] -tensorflow-cpu = ["tensorflow-cpu (>=2.9.0,<2.12.0)"] -tensorflow-gpu = ["tensorflow-gpu (>=2.9.0,<2.12.0)"] - [[package]] name = "tensorflow-cpu-aws" version = "2.12.0" @@ -6424,26 +6390,6 @@ PyJWT = ">=2.0.0,<3.0.0" pytz = "*" requests = ">=2.0.0" -[[package]] -name = "typeguard" -version = "4.0.0" -description = "Run-time type checker for Python" -category = "main" -optional = false -python-versions = ">=3.7.4" -files = [ - {file = "typeguard-4.0.0-py3-none-any.whl", hash = "sha256:c4a40af0ba8a41077221271b46d0a6d8d46045443e4d887887c69254ca861952"}, - {file = "typeguard-4.0.0.tar.gz", hash = "sha256:194fb3dbcb06ea9caf7088f3befee014de57961689f9c859ac5239b1ef61d987"}, -] - -[package.dependencies] -importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""} -typing-extensions = {version = ">=4.4.0", markers = "python_version < \"3.11\""} - -[package.extras] -doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["mypy (>=1.2.0)", "pytest (>=7)"] - [[package]] name = "typer" version = "0.7.0" @@ -6870,14 +6816,14 @@ requests-toolbelt = "*" [[package]] name = "websocket-client" -version = "1.5.3" +version = "1.6.0" description = "WebSocket client for Python with low level API options" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "websocket-client-1.5.3.tar.gz", hash = "sha256:b96f3bce3e54e3486ebe6504bc22bd4c140392bd2eb71764db29be8f2639aa65"}, - {file = "websocket_client-1.5.3-py3-none-any.whl", hash = "sha256:3566f8467cd350874c4913816355642a4942f6c1ed1e9406e3d42fae6d6c072a"}, + {file = "websocket-client-1.6.0.tar.gz", hash = "sha256:e84c7eafc66aade6d1967a51dfd219aabdf81d15b9705196e11fd81f48666b78"}, + {file = "websocket_client-1.6.0-py3-none-any.whl", hash = "sha256:72d7802608745b0a212f79b478642473bd825777d8637b6c8c421bf167790d4f"}, ] [package.extras] @@ -7215,4 +7161,4 @@ transformers = ["sentencepiece", "transformers"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.11" -content-hash = "96a65a2de5328f7b4a7517a772b6292a9b2541613bfa2a30d384718e6ed43acf" +content-hash = "9c2b5b76db4e055b464d9ba532645a4c996326cdc2f85343527d03343f7531ab" diff --git a/pyproject.toml b/pyproject.toml index 675e4e00ea28..3630622b121c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -204,14 +204,6 @@ optional = true version = "<1.10.10" optional = true -[[tool.poetry.dependencies.tensorflow-addons]] -version = ">=0.18,<0.20" -markers = "sys_platform != 'linux' or (platform_machine != 'arm64' and platform_machine != 'aarch64')" - -[[tool.poetry.dependencies.tensorflow-addons]] -version = "0.19.0" -markers = "sys_platform == 'linux' and (platform_machine == 'arm64' or platform_machine == 'aarch64')" - [tool.poetry.extras] spacy = [ "spacy",] jieba = [ "jieba",] diff --git a/rasa/utils/tensorflow/crf.py b/rasa/utils/tensorflow/crf.py index 68a1e5bc3298..1318eedd9c3b 100644 --- a/rasa/utils/tensorflow/crf.py +++ b/rasa/utils/tensorflow/crf.py @@ -1,20 +1,17 @@ import tensorflow as tf from tensorflow import TensorShape - -from tensorflow_addons.utils.types import TensorLike -from typeguard import typechecked -from typing import Tuple, Any, List, Union +from tensorflow.types.experimental import TensorLike +from typing import Tuple, Any, List, Union, Optional # original code taken from -# https://github.com/tensorflow/addons/blob/master/tensorflow_addons/text/crf.py +# https://github.com/tensorflow/addons/blob/b8cab7fd61af4f697a1cdae4f51c37c346b9c6f0/tensorflow_addons/text/crf.py # (modified to our neeeds) class CrfDecodeForwardRnnCell(tf.keras.layers.AbstractRNNCell): """Computes the forward decoding in a linear-chain CRF.""" - @typechecked def __init__(self, transition_params: TensorLike, **kwargs: Any) -> None: """Initialize the CrfDecodeForwardRnnCell. @@ -218,3 +215,273 @@ def _multi_seq_fn() -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: return _multi_seq_fn() return tf.cond(tf.equal(tf.shape(potentials)[1], 1), _single_seq_fn, _multi_seq_fn) + + +def crf_unary_score( + tag_indices: TensorLike, sequence_lengths: TensorLike, inputs: TensorLike +) -> tf.Tensor: + """Computes the unary scores of tag sequences. + + Args: + tag_indices: A [batch_size, max_seq_len] matrix of tag indices. + sequence_lengths: A [batch_size] vector of true sequence lengths. + inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials. + Returns: + unary_scores: A [batch_size] vector of unary scores. + """ + tag_indices = tf.cast(tag_indices, dtype=tf.int32) + sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32) + + batch_size = tf.shape(inputs)[0] + max_seq_len = tf.shape(inputs)[1] + num_tags = tf.shape(inputs)[2] + + flattened_inputs = tf.reshape(inputs, [-1]) + + offsets = tf.expand_dims(tf.range(batch_size) * max_seq_len * num_tags, 1) + offsets += tf.expand_dims(tf.range(max_seq_len) * num_tags, 0) + # Use int32 or int64 based on tag_indices' dtype. + if tag_indices.dtype == tf.int64: + offsets = tf.cast(offsets, tf.int64) + flattened_tag_indices = tf.reshape(offsets + tag_indices, [-1]) + + unary_scores = tf.reshape( + tf.gather(flattened_inputs, flattened_tag_indices), [batch_size, max_seq_len] + ) + + masks = tf.sequence_mask( + sequence_lengths, maxlen=tf.shape(tag_indices)[1], dtype=unary_scores.dtype + ) + + unary_scores = tf.reduce_sum(unary_scores * masks, 1) + return unary_scores + + +def crf_binary_score( + tag_indices: TensorLike, sequence_lengths: TensorLike, transition_params: TensorLike +) -> tf.Tensor: + """Computes the binary scores of tag sequences. + + Args: + tag_indices: A [batch_size, max_seq_len] matrix of tag indices. + sequence_lengths: A [batch_size] vector of true sequence lengths. + transition_params: A [num_tags, num_tags] matrix of binary potentials. + Returns: + binary_scores: A [batch_size] vector of binary scores. + """ + tag_indices = tf.cast(tag_indices, dtype=tf.int32) + sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32) + + num_tags = tf.shape(transition_params)[0] + num_transitions = tf.shape(tag_indices)[1] - 1 + + # Truncate by one on each side of the sequence to get the start and end + # indices of each transition. + start_tag_indices = tf.slice(tag_indices, [0, 0], [-1, num_transitions]) + end_tag_indices = tf.slice(tag_indices, [0, 1], [-1, num_transitions]) + + # Encode the indices in a flattened representation. + flattened_transition_indices = start_tag_indices * num_tags + end_tag_indices + flattened_transition_params = tf.reshape(transition_params, [-1]) + + # Get the binary scores based on the flattened representation. + binary_scores = tf.gather(flattened_transition_params, flattened_transition_indices) + + masks = tf.sequence_mask( + sequence_lengths, maxlen=tf.shape(tag_indices)[1], dtype=binary_scores.dtype + ) + truncated_masks = tf.slice(masks, [0, 1], [-1, -1]) + binary_scores = tf.reduce_sum(binary_scores * truncated_masks, 1) + return binary_scores + + +def crf_sequence_score( + inputs: TensorLike, + tag_indices: TensorLike, + sequence_lengths: TensorLike, + transition_params: TensorLike, +) -> tf.Tensor: + """Computes the unnormalized score for a tag sequence. + + Args: + inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials + to use as input to the CRF layer. + tag_indices: A [batch_size, max_seq_len] matrix of tag indices for which + we compute the unnormalized score. + sequence_lengths: A [batch_size] vector of true sequence lengths. + transition_params: A [num_tags, num_tags] transition matrix. + Returns: + sequence_scores: A [batch_size] vector of unnormalized sequence scores. + """ + tag_indices = tf.cast(tag_indices, dtype=tf.int32) + sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32) + + # If max_seq_len is 1, we skip the score calculation and simply gather the + # unary potentials of the single tag. + def _single_seq_fn() -> TensorLike: + batch_size = tf.shape(inputs, out_type=tf.int32)[0] + batch_inds = tf.reshape(tf.range(batch_size), [-1, 1]) + indices = tf.concat([batch_inds, tf.zeros_like(batch_inds)], axis=1) + + tag_inds = tf.gather_nd(tag_indices, indices) + tag_inds = tf.reshape(tag_inds, [-1, 1]) + indices = tf.concat([indices, tag_inds], axis=1) + + sequence_scores = tf.gather_nd(inputs, indices) + + sequence_scores = tf.where( + tf.less_equal(sequence_lengths, 0), + tf.zeros_like(sequence_scores), + sequence_scores, + ) + return sequence_scores + + def _multi_seq_fn() -> TensorLike: + # Compute the scores of the given tag sequence. + unary_scores = crf_unary_score(tag_indices, sequence_lengths, inputs) + binary_scores = crf_binary_score( + tag_indices, sequence_lengths, transition_params + ) + sequence_scores = unary_scores + binary_scores + return sequence_scores + + return tf.cond(tf.equal(tf.shape(inputs)[1], 1), _single_seq_fn, _multi_seq_fn) + + +def crf_forward( + inputs: TensorLike, + state: TensorLike, + transition_params: TensorLike, + sequence_lengths: TensorLike, +) -> tf.Tensor: + """Computes the alpha values in a linear-chain CRF. + + See http://www.cs.columbia.edu/~mcollins/fb.pdf for reference. + + Args: + inputs: A [batch_size, num_tags] matrix of unary potentials. + state: A [batch_size, num_tags] matrix containing the previous alpha + values. + transition_params: A [num_tags, num_tags] matrix of binary potentials. + This matrix is expanded into a [1, num_tags, num_tags] in preparation + for the broadcast summation occurring within the cell. + sequence_lengths: A [batch_size] vector of true sequence lengths. + + Returns: + new_alphas: A [batch_size, num_tags] matrix containing the + new alpha values. + """ + sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32) + + last_index = tf.maximum( + tf.constant(0, dtype=sequence_lengths.dtype), sequence_lengths - 1 + ) + inputs = tf.transpose(inputs, [1, 0, 2]) + transition_params = tf.expand_dims(transition_params, 0) + + def _scan_fn(_state: TensorLike, _inputs: TensorLike) -> TensorLike: + _state = tf.expand_dims(_state, 2) + transition_scores = _state + transition_params + new_alphas = _inputs + tf.reduce_logsumexp(transition_scores, [1]) + return new_alphas + + all_alphas = tf.transpose(tf.scan(_scan_fn, inputs, state), [1, 0, 2]) + # add first state for sequences of length 1 + all_alphas = tf.concat([tf.expand_dims(state, 1), all_alphas], 1) + + idxs = tf.stack([tf.range(tf.shape(last_index)[0]), last_index], axis=1) + return tf.gather_nd(all_alphas, idxs) + + +def crf_log_norm( + inputs: TensorLike, sequence_lengths: TensorLike, transition_params: TensorLike +) -> tf.Tensor: + """Computes the normalization for a CRF. + + Args: + inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials + to use as input to the CRF layer. + sequence_lengths: A [batch_size] vector of true sequence lengths. + transition_params: A [num_tags, num_tags] transition matrix. + Returns: + log_norm: A [batch_size] vector of normalizers for a CRF. + """ + sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32) + # Split up the first and rest of the inputs in preparation for the forward + # algorithm. + first_input = tf.slice(inputs, [0, 0, 0], [-1, 1, -1]) + first_input = tf.squeeze(first_input, [1]) + + # If max_seq_len is 1, we skip the algorithm and simply reduce_logsumexp + # over the "initial state" (the unary potentials). + def _single_seq_fn() -> TensorLike: + log_norm = tf.reduce_logsumexp(first_input, [1]) + # Mask `log_norm` of the sequences with length <= zero. + log_norm = tf.where( + tf.less_equal(sequence_lengths, 0), tf.zeros_like(log_norm), log_norm + ) + return log_norm + + def _multi_seq_fn() -> TensorLike: + """Forward computation of alpha values.""" + rest_of_input = tf.slice(inputs, [0, 1, 0], [-1, -1, -1]) + # Compute the alpha values in the forward algorithm in order to get the + # partition function. + + alphas = crf_forward( + rest_of_input, first_input, transition_params, sequence_lengths + ) + log_norm = tf.reduce_logsumexp(alphas, [1]) + # Mask `log_norm` of the sequences with length <= zero. + log_norm = tf.where( + tf.less_equal(sequence_lengths, 0), tf.zeros_like(log_norm), log_norm + ) + return log_norm + + return tf.cond(tf.equal(tf.shape(inputs)[1], 1), _single_seq_fn, _multi_seq_fn) + + +def crf_log_likelihood( + inputs: TensorLike, + tag_indices: TensorLike, + sequence_lengths: TensorLike, + transition_params: Optional[TensorLike] = None, +) -> Tuple[tf.Tensor, tf.Tensor]: + """Computes the log-likelihood of tag sequences in a CRF. + + Args: + inputs: A [batch_size, max_seq_len, num_tags] tensor of unary potentials + to use as input to the CRF layer. + tag_indices: A [batch_size, max_seq_len] matrix of tag indices for which + we compute the log-likelihood. + sequence_lengths: A [batch_size] vector of true sequence lengths. + transition_params: A [num_tags, num_tags] transition matrix, + if available. + Returns: + log_likelihood: A [batch_size] `Tensor` containing the log-likelihood of + each example, given the sequence of tag indices. + transition_params: A [num_tags, num_tags] transition matrix. This is + either provided by the caller or created in this function. + """ + inputs = tf.convert_to_tensor(inputs) + + num_tags = inputs.shape[2] + + # cast type to handle different types + tag_indices = tf.cast(tag_indices, dtype=tf.int32) + sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32) + + if transition_params is None: + initializer = tf.keras.initializers.GlorotUniform() + transition_params = tf.Variable( + initializer([num_tags, num_tags]), "transitions" + ) + transition_params = tf.cast(transition_params, inputs.dtype) + sequence_scores = crf_sequence_score( + inputs, tag_indices, sequence_lengths, transition_params + ) + log_norm = crf_log_norm(inputs, sequence_lengths, transition_params) + + # Normalize the scores to get the log-likelihood per example. + log_likelihood = sequence_scores - log_norm + return log_likelihood, transition_params diff --git a/rasa/utils/tensorflow/layers.py b/rasa/utils/tensorflow/layers.py index 78501aebe4dc..6ba29ec2a32f 100644 --- a/rasa/utils/tensorflow/layers.py +++ b/rasa/utils/tensorflow/layers.py @@ -1,7 +1,6 @@ import logging from typing import List, Optional, Text, Tuple, Callable, Union, Any import tensorflow as tf -import tensorflow_addons as tfa # TODO: The following is not (yet) available via tf.keras from keras.utils.control_flow_util import smart_cond @@ -21,13 +20,13 @@ from rasa.shared.nlu.constants import FEATURE_TYPE_SENTENCE, FEATURE_TYPE_SEQUENCE from rasa.shared.nlu.constants import TEXT, INTENT, ACTION_NAME, ACTION_TEXT +from rasa.utils.tensorflow.metrics import F1Score from rasa.utils.tensorflow.exceptions import TFLayerConfigException import rasa.utils.tensorflow.layers_utils as layers_utils +from rasa.utils.tensorflow.crf import crf_log_likelihood logger = logging.getLogger(__name__) -# https://github.com/tensorflow/addons#gpu-and-cpu-custom-ops-1 -tfa.options.TF_ADDONS_PY_OPS = True POSSIBLE_ATTRIBUTES = [ TEXT, @@ -590,7 +589,7 @@ def __init__( self.num_tags = num_tags self.scale_loss = scale_loss self.transition_regularizer = tf.keras.regularizers.l2(reg_lambda) - self.f1_score_metric = tfa.metrics.F1Score( + self.f1_score_metric = F1Score( num_classes=num_tags - 1, # `0` prediction is not a prediction average="micro", ) @@ -653,7 +652,7 @@ def loss( given the sequence of tag indices. """ - log_likelihood, _ = tfa.text.crf.crf_log_likelihood( + log_likelihood, _ = crf_log_likelihood( logits, tag_indices, sequence_lengths, self.transition_params ) loss = -log_likelihood diff --git a/rasa/utils/tensorflow/metrics.py b/rasa/utils/tensorflow/metrics.py new file mode 100644 index 000000000000..7face21ff2b2 --- /dev/null +++ b/rasa/utils/tensorflow/metrics.py @@ -0,0 +1,282 @@ +import tensorflow as tf +from tensorflow.keras import backend as K +from tensorflow.types.experimental import TensorLike +from typing import Any, Dict, Optional + + +# original code taken from +# https://github.com/tensorflow/addons/blob/f30df4322b5580b3e5946530a60f7126035dd73b/tensorflow_addons/metrics/f_scores.py +# (modified to our neeeds) + + +class FBetaScore(tf.keras.metrics.Metric): + r"""Computes F-Beta score. + + It is the weighted harmonic mean of precision + and recall. Output range is `[0, 1]`. Works for + both multi-class and multi-label classification. + + $$ + F_{\beta} = (1 + \beta^2) * \frac{\textrm{precision} * \textrm{recall}} + {(\beta^2 \cdot \textrm{precision}) + \textrm{recall}} + $$ + + Args: + num_classes: Number of unique classes in the dataset. + average: Type of averaging to be performed on data. + Acceptable values are `None`, `micro`, `macro` and + `weighted`. Default value is None. + beta: Determines the weight of precision and recall + in harmonic mean. Determines the weight given to the + precision and recall. Default value is 1. + threshold: Elements of `y_pred` greater than threshold are + converted to be 1, and the rest 0. If threshold is + None, the argmax is converted to 1, and the rest 0. + name: (Optional) String name of the metric instance. + dtype: (Optional) Data type of the metric result. + + Returns: + F-Beta Score: float. + + Raises: + ValueError: If the `average` has values other than + `[None, 'micro', 'macro', 'weighted']`. + + ValueError: If the `beta` value is less than or equal + to 0. + + `average` parameter behavior: + + None: Scores for each class are returned. + + micro: True positivies, false positives and + false negatives are computed globally. + + macro: True positivies, false positives and + false negatives are computed for each class + and their unweighted mean is returned. + + weighted: Metrics are computed for each class + and returns the mean weighted by the + number of true instances in each class. + + Usage: + + >>> metric = tfa.metrics.FBetaScore(num_classes=3, beta=2.0, threshold=0.5) + >>> y_true = np.array([[1, 1, 1], + ... [1, 0, 0], + ... [1, 1, 0]], np.int32) + >>> y_pred = np.array([[0.2, 0.6, 0.7], + ... [0.2, 0.6, 0.6], + ... [0.6, 0.8, 0.0]], np.float32) + >>> metric.update_state(y_true, y_pred) + >>> result = metric.result() + >>> result.numpy() + array([0.3846154 , 0.90909094, 0.8333334 ], dtype=float32) + """ + + def __init__( + self, + num_classes: TensorLike, + average: Optional[str] = None, + beta: TensorLike = 1.0, + threshold: Optional[TensorLike] = None, + name: str = "fbeta_score", + dtype: Any = None, + **kwargs: Any, + ) -> None: + super().__init__(name=name, dtype=dtype) + + if average not in (None, "micro", "macro", "weighted"): + raise ValueError( + "Unknown average type. Acceptable values " + "are: [None, 'micro', 'macro', 'weighted']" + ) + + if not isinstance(beta, float): + raise TypeError("The value of beta should be a python float") + + if beta <= 0.0: + raise ValueError("beta value should be greater than zero") + + if threshold is not None: + if not isinstance(threshold, float): + raise TypeError("The value of threshold should be a python float") + if threshold > 1.0 or threshold <= 0.0: + raise ValueError("threshold should be between 0 and 1") + + self.num_classes = num_classes + self.average = average + self.beta = beta + self.threshold = threshold + self.axis = None + self.init_shape = [] + + if self.average != "micro": + self.axis = 0 + self.init_shape = [self.num_classes] + + def _zero_wt_init(name: Any) -> Any: + return self.add_weight( + name, shape=self.init_shape, initializer="zeros", dtype=self.dtype + ) + + self.true_positives = _zero_wt_init("true_positives") + self.false_positives = _zero_wt_init("false_positives") + self.false_negatives = _zero_wt_init("false_negatives") + self.weights_intermediate = _zero_wt_init("weights_intermediate") + + def update_state( + self, + y_true: TensorLike, + y_pred: TensorLike, + sample_weight: Optional[TensorLike] = None, + ) -> None: + if self.threshold is None: + threshold = tf.reduce_max(y_pred, axis=-1, keepdims=True) + # make sure [0, 0, 0] doesn't become [1, 1, 1] + # Use abs(x) > eps, instead of x != 0 to check for zero + y_pred = tf.logical_and(y_pred >= threshold, tf.abs(y_pred) > 1e-12) + else: + y_pred = y_pred > self.threshold + + y_true = tf.cast(y_true, self.dtype) + y_pred = tf.cast(y_pred, self.dtype) + + def _weighted_sum( + val: TensorLike, sample_weight: Optional[TensorLike] + ) -> TensorLike: + if sample_weight is not None: + val = tf.math.multiply(val, tf.expand_dims(sample_weight, 1)) + return tf.reduce_sum(val, axis=self.axis) + + self.true_positives.assign_add(_weighted_sum(y_pred * y_true, sample_weight)) + self.false_positives.assign_add( + _weighted_sum(y_pred * (1 - y_true), sample_weight) + ) + self.false_negatives.assign_add( + _weighted_sum((1 - y_pred) * y_true, sample_weight) + ) + self.weights_intermediate.assign_add(_weighted_sum(y_true, sample_weight)) + + def result(self) -> TensorLike: + precision = tf.math.divide_no_nan( + self.true_positives, self.true_positives + self.false_positives + ) + recall = tf.math.divide_no_nan( + self.true_positives, self.true_positives + self.false_negatives + ) + + mul_value = precision * recall + add_value = (tf.math.square(self.beta) * precision) + recall + mean = tf.math.divide_no_nan(mul_value, add_value) + f1_score = mean * (1 + tf.math.square(self.beta)) + + if self.average == "weighted": + weights = tf.math.divide_no_nan( + self.weights_intermediate, tf.reduce_sum(self.weights_intermediate) + ) + f1_score = tf.reduce_sum(f1_score * weights) + + elif self.average is not None: # [micro, macro] + f1_score = tf.reduce_mean(f1_score) + + return f1_score + + def get_config(self) -> Dict[str, Any]: + """Returns the serializable config of the metric.""" + + config = { + "num_classes": self.num_classes, + "average": self.average, + "beta": self.beta, + "threshold": self.threshold, + } + + base_config = super().get_config() + return {**base_config, **config} + + def reset_state(self) -> None: + reset_value = tf.zeros(self.init_shape, dtype=self.dtype) + K.batch_set_value([(v, reset_value) for v in self.variables]) + + def reset_states(self) -> None: + # Backwards compatibility alias of `reset_state`. New classes should + # only implement `reset_state`. + # Required in Tensorflow < 2.5.0 + return self.reset_state() + + +class F1Score(FBetaScore): + r"""Computes F-1 Score. + + It is the harmonic mean of precision and recall. + Output range is `[0, 1]`. Works for both multi-class + and multi-label classification. + + $$ + F_1 = 2 \cdot \frac{\textrm{precision} \cdot \textrm{recall}}{\textrm{precision} + + \textrm{recall}} + $$ + + Args: + num_classes: Number of unique classes in the dataset. + average: Type of averaging to be performed on data. + Acceptable values are `None`, `micro`, `macro` + and `weighted`. Default value is None. + threshold: Elements of `y_pred` above threshold are + considered to be 1, and the rest 0. If threshold is + None, the argmax is converted to 1, and the rest 0. + name: (Optional) String name of the metric instance. + dtype: (Optional) Data type of the metric result. + + Returns: + F-1 Score: float. + + Raises: + ValueError: If the `average` has values other than + [None, 'micro', 'macro', 'weighted']. + + `average` parameter behavior: + None: Scores for each class are returned + + micro: True positivies, false positives and + false negatives are computed globally. + + macro: True positivies, false positives and + false negatives are computed for each class + and their unweighted mean is returned. + + weighted: Metrics are computed for each class + and returns the mean weighted by the + number of true instances in each class. + + Usage: + + >>> metric = tfa.metrics.F1Score(num_classes=3, threshold=0.5) + >>> y_true = np.array([[1, 1, 1], + ... [1, 0, 0], + ... [1, 1, 0]], np.int32) + >>> y_pred = np.array([[0.2, 0.6, 0.7], + ... [0.2, 0.6, 0.6], + ... [0.6, 0.8, 0.0]], np.float32) + >>> metric.update_state(y_true, y_pred) + >>> result = metric.result() + >>> result.numpy() + array([0.5 , 0.8 , 0.6666667], dtype=float32) + """ + + def __init__( + self, + num_classes: TensorLike, + average: str = None, + threshold: Optional[TensorLike] = None, + name: str = "f1_score", + dtype: Any = None, + ): + super().__init__(num_classes, average, 1.0, threshold, name=name, dtype=dtype) + + def get_config(self) -> Dict[str, Any]: + base_config = super().get_config() + del base_config["beta"] + return base_config diff --git a/tests/utils/tensorflow/test_crf.py b/tests/utils/tensorflow/test_crf.py new file mode 100644 index 000000000000..593327f2bc5d --- /dev/null +++ b/tests/utils/tensorflow/test_crf.py @@ -0,0 +1,233 @@ +"""Tests for CRF.""" + +# original code taken from +# https://github.com/tensorflow/addons/blob/master/tensorflow_addons/text/tests/crf_test.py +# (modified to our neeeds) + +import itertools + +import pytest +import numpy as np +import tensorflow as tf + +from rasa.utils.tensorflow.crf import ( + crf_sequence_score, + crf_unary_score, + crf_binary_score, + crf_log_norm, + crf_log_likelihood, +) + + +def calculate_sequence_score(inputs, transition_params, tag_indices, sequence_lengths): + expected_unary_score = sum( + inputs[i][tag_indices[i]] for i in range(sequence_lengths) + ) + expected_binary_score = sum( + transition_params[tag_indices[i], tag_indices[i + 1]] + for i in range(sequence_lengths - 1) + ) + return expected_unary_score + expected_binary_score + + +def brute_force_decode(sequence_lengths, inputs, transition_params): + num_words = inputs.shape[0] + num_tags = inputs.shape[1] + + all_sequence_scores = [] + all_sequences = [] + + tag_indices_iterator = itertools.product(range(num_tags), repeat=sequence_lengths) + inputs = tf.expand_dims(inputs, 0) + sequence_lengths = tf.expand_dims(sequence_lengths, 0) + transition_params = tf.constant(transition_params) + + # Compare the dynamic program with brute force computation. + for tag_indices in tag_indices_iterator: + tag_indices = list(tag_indices) + tag_indices.extend([0] * (num_words - sequence_lengths)) + all_sequences.append(tag_indices) + sequence_score = crf_sequence_score( + inputs=inputs, + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=sequence_lengths, + transition_params=transition_params, + ) + sequence_score = tf.squeeze(sequence_score, [0]) + all_sequence_scores.append(sequence_score) + + expected_max_sequence_index = np.argmax(all_sequence_scores) + expected_max_sequence = all_sequences[expected_max_sequence_index] + expected_max_score = all_sequence_scores[expected_max_sequence_index] + return expected_max_sequence, expected_max_score + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32]) +def test_crf_sequence_score(dtype): + transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) + # Test both the length-1 and regular cases. + sequence_lengths_list = [ + np.array(3, dtype=np.int32), + np.array(1, dtype=np.int32), + ] + inputs_list = [ + np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), + np.array([[4, 5, -3]], dtype=dtype), + ] + tag_indices_list = [ + np.array([1, 2, 1, 0], dtype=np.int32), + np.array([1], dtype=np.int32), + ] + for sequence_lengths, inputs, tag_indices in zip( + sequence_lengths_list, inputs_list, tag_indices_list + ): + sequence_score = crf_sequence_score( + inputs=tf.expand_dims(inputs, 0), + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + transition_params=tf.constant(transition_params), + ) + sequence_score = tf.squeeze(sequence_score, [0]) + + expected_sequence_score = calculate_sequence_score( + inputs, transition_params, tag_indices, sequence_lengths + ) + np.testing.assert_allclose(sequence_score, expected_sequence_score) + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32]) +def test_crf_unary_score(dtype): + inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype) + for dtype in (np.int32, np.int64): + tag_indices = np.array([1, 2, 1, 0], dtype=dtype) + sequence_lengths = np.array(3, dtype=np.int32) + unary_score = crf_unary_score( + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + inputs=tf.expand_dims(inputs, 0), + ) + unary_score = tf.squeeze(unary_score, [0]) + expected_unary_score = sum( + inputs[i][tag_indices[i]] for i in range(sequence_lengths) + ) + np.testing.assert_allclose(unary_score, expected_unary_score) + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32]) +def test_crf_binary_score(dtype): + tag_indices = np.array([1, 2, 1, 0], dtype=np.int32) + transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) + sequence_lengths = np.array(3, dtype=np.int32) + binary_score = crf_binary_score( + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + transition_params=tf.constant(transition_params), + ) + binary_score = tf.squeeze(binary_score, [0]) + expected_binary_score = sum( + transition_params[tag_indices[i], tag_indices[i + 1]] + for i in range(sequence_lengths - 1) + ) + np.testing.assert_allclose(binary_score, expected_binary_score) + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32]) +def test_crf_log_norm(dtype): + transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) + # Test both the length-1 and regular cases. + sequence_lengths_list = [ + np.array(3, dtype=np.int32), + np.array(1, dtype=np.int64), + ] + inputs_list = [ + np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype), + np.array([[3, -1, 3]], dtype=dtype), + ] + tag_indices_list = [ + np.array([1, 2, 1, 0], dtype=np.int32), + np.array([2], dtype=np.int32), + ] + + for sequence_lengths, inputs, tag_indices in zip( + sequence_lengths_list, inputs_list, tag_indices_list + ): + num_words = inputs.shape[0] + num_tags = inputs.shape[1] + all_sequence_scores = [] + + # Compare the dynamic program with brute force computation. + for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): + tag_indices = list(tag_indices) + tag_indices.extend([0] * (num_words - sequence_lengths)) + all_sequence_scores.append( + crf_sequence_score( + inputs=tf.expand_dims(inputs, 0), + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + transition_params=tf.constant(transition_params), + ) + ) + + brute_force_log_norm = tf.reduce_logsumexp(all_sequence_scores) + log_norm = crf_log_norm( + inputs=tf.expand_dims(inputs, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + transition_params=tf.constant(transition_params), + ) + log_norm = tf.squeeze(log_norm, [0]) + + np.testing.assert_allclose(log_norm, brute_force_log_norm) + + +@pytest.mark.parametrize("dtype", [np.float16, np.float32]) +def test_crf_log_norm_zero_seq_length(dtype): + """Test `crf_log_norm` when `sequence_lengths` contains one or more + zeros.""" + inputs = tf.constant(np.ones([2, 10, 5], dtype=dtype)) + transition_params = tf.constant(np.ones([5, 5], dtype=dtype)) + sequence_lengths = tf.constant(np.zeros([2], dtype=np.int32)) + expected_log_norm = np.zeros([2], dtype=dtype) + log_norm = crf_log_norm(inputs, sequence_lengths, transition_params) + np.testing.assert_allclose(log_norm, expected_log_norm) + + +@pytest.mark.parametrize("dtype", [np.float32]) +def test_crf_log_likelihood(dtype): + inputs = np.array([[4, 5, -3], [3, -1, 3], [-1, 2, 1], [0, 0, 0]], dtype=dtype) + transition_params = np.array([[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=dtype) + sequence_lengths = np.array(3, dtype=np.int32) + + num_words = inputs.shape[0] + num_tags = inputs.shape[1] + all_sequence_log_likelihoods = [] + + # Make sure all probabilities sum to 1. + for tag_indices in itertools.product(range(num_tags), repeat=sequence_lengths): + tag_indices = list(tag_indices) + tag_indices.extend([0] * (num_words - sequence_lengths)) + sequence_log_likelihood, _ = crf_log_likelihood( + inputs=tf.expand_dims(inputs, 0), + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + transition_params=tf.constant(transition_params), + ) + all_sequence_log_likelihoods.append(sequence_log_likelihood) + total_log_likelihood = tf.reduce_logsumexp(all_sequence_log_likelihoods) + np.testing.assert_allclose(total_log_likelihood, 0.0, rtol=1e-6, atol=1e-6) + + # check if `transition_params = None` raises an error + crf_log_likelihood( + inputs=tf.expand_dims(inputs, 0), + tag_indices=tf.expand_dims(tag_indices, 0), + sequence_lengths=tf.expand_dims(sequence_lengths, 0), + ) + + +def test_different_dtype(): + inputs = np.ones([16, 20, 5], dtype=np.float32) + tags = tf.convert_to_tensor(np.ones([16, 20], dtype=np.int64)) + seq_lens = np.ones([16], dtype=np.int64) * 20 + + loss, _ = crf_log_likelihood( + inputs=inputs, tag_indices=tags, sequence_lengths=seq_lens + ) diff --git a/tests/utils/tensorflow/test_metrics.py b/tests/utils/tensorflow/test_metrics.py new file mode 100644 index 000000000000..9d6ffb5c2e4a --- /dev/null +++ b/tests/utils/tensorflow/test_metrics.py @@ -0,0 +1,205 @@ +"""Tests F beta metrics.""" + +# original code taken from +# https://github.com/tensorflow/addons/blob/master/tensorflow_addons/metrics/tests/f_scores_test.py +# (modified to our neeeds) + +import numpy as np +import pytest +import tensorflow as tf +from rasa.utils.tensorflow.metrics import FBetaScore, F1Score + + +def test_config_fbeta(): + fbeta_obj = FBetaScore(num_classes=3, beta=0.5, threshold=0.3, average=None) + assert fbeta_obj.beta == 0.5 + assert fbeta_obj.average is None + assert fbeta_obj.threshold == 0.3 + assert fbeta_obj.num_classes == 3 + assert fbeta_obj.dtype == tf.float32 + + # Check save and restore config + fbeta_obj2 = FBetaScore.from_config(fbeta_obj.get_config()) + assert fbeta_obj2.beta == 0.5 + assert fbeta_obj2.average is None + assert fbeta_obj2.threshold == 0.3 + assert fbeta_obj2.num_classes == 3 + assert fbeta_obj2.dtype == tf.float32 + + +def _test_tf(avg, beta, act, pred, sample_weights, threshold): + act = tf.constant(act, tf.float32) + pred = tf.constant(pred, tf.float32) + + fbeta = FBetaScore(3, avg, beta, threshold) + fbeta.update_state(act, pred, sample_weights) + return fbeta.result().numpy() + + +def _test_fbeta_score(actuals, preds, sample_weights, avg, beta_val, result, threshold): + tf_score = _test_tf(avg, beta_val, actuals, preds, sample_weights, threshold) + np.testing.assert_allclose(tf_score, result, atol=1e-7, rtol=1e-6) + + +def test_fbeta_perfect_score(): + preds = [[0.7, 0.7, 0.7], [1, 0, 0], [0.9, 0.8, 0]] + actuals = [[1, 1, 1], [1, 0, 0], [1, 1, 0]] + + for avg_val in ["micro", "macro", "weighted"]: + for beta in [0.5, 1.0, 2.0]: + _test_fbeta_score(actuals, preds, None, avg_val, beta, 1.0, 0.66) + + +def test_fbeta_worst_score(): + preds = [[0.7, 0.7, 0.7], [1, 0, 0], [0.9, 0.8, 0]] + actuals = [[0, 0, 0], [0, 1, 0], [0, 0, 1]] + + for avg_val in ["micro", "macro", "weighted"]: + for beta in [0.5, 1.0, 2.0]: + _test_fbeta_score(actuals, preds, None, avg_val, beta, 0.0, 0.66) + + +@pytest.mark.parametrize( + "avg_val, beta, result", + [ + (None, 0.5, [0.71428573, 0.5, 0.833334]), + (None, 1.0, [0.8, 0.5, 0.6666667]), + (None, 2.0, [0.9090904, 0.5, 0.555556]), + ("micro", 0.5, 0.6666667), + ("micro", 1.0, 0.6666667), + ("micro", 2.0, 0.6666667), + ("macro", 0.5, 0.6825397), + ("macro", 1.0, 0.6555555), + ("macro", 2.0, 0.6548822), + ("weighted", 0.5, 0.6825397), + ("weighted", 1.0, 0.6555555), + ("weighted", 2.0, 0.6548822), + ], +) +def test_fbeta_random_score(avg_val, beta, result): + preds = [[0.7, 0.7, 0.7], [1, 0, 0], [0.9, 0.8, 0]] + actuals = [[0, 0, 1], [1, 1, 0], [1, 1, 1]] + _test_fbeta_score(actuals, preds, None, avg_val, beta, result, 0.66) + + +@pytest.mark.parametrize( + "avg_val, beta, result", + [ + (None, 0.5, [0.9090904, 0.555556, 1.0]), + (None, 1.0, [0.8, 0.6666667, 1.0]), + (None, 2.0, [0.71428573, 0.833334, 1.0]), + ("micro", 0.5, 0.833334), + ("micro", 1.0, 0.833334), + ("micro", 2.0, 0.833334), + ("macro", 0.5, 0.821549), + ("macro", 1.0, 0.822222), + ("macro", 2.0, 0.849206), + ("weighted", 0.5, 0.880471), + ("weighted", 1.0, 0.844445), + ("weighted", 2.0, 0.829365), + ], +) +def test_fbeta_random_score_none(avg_val, beta, result): + preds = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + actuals = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1]] + _test_fbeta_score(actuals, preds, None, avg_val, beta, result, None) + + +@pytest.mark.parametrize( + "avg_val, beta, sample_weights, result", + [ + (None, 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.909091, 0.555556, 1.0]), + (None, 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], [1.0, 0.0, 1.0]), + (None, 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], [0.9375, 0.714286, 1.0]), + (None, 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.8, 0.666667, 1.0]), + (None, 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], [1.0, 0.0, 1.0]), + (None, 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], [0.857143, 0.8, 1.0]), + (None, 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [0.714286, 0.833333, 1.0]), + (None, 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], [1.0, 0.0, 1.0]), + (None, 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], [0.789474, 0.909091, 1.0]), + ("micro", 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.833333), + ("micro", 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("micro", 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.9), + ("micro", 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.833333), + ("micro", 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("micro", 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.9), + ("micro", 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.833333), + ("micro", 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("micro", 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.9), + ("macro", 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.821549), + ("macro", 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 0.666667), + ("macro", 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.883929), + ("macro", 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.822222), + ("macro", 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 0.666667), + ("macro", 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.885714), + ("macro", 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.849206), + ("macro", 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 0.666667), + ("macro", 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.899522), + ("weighted", 0.5, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.880471), + ("weighted", 0.5, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("weighted", 0.5, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.917857), + ("weighted", 1.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.844444), + ("weighted", 1.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("weighted", 1.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.902857), + ("weighted", 2.0, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], 0.829365), + ("weighted", 2.0, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0], 1.0), + ("weighted", 2.0, [0.5, 1.0, 1.0, 1.0, 0.5, 1.0], 0.897608), + ], +) +def test_fbeta_weighted_random_score_none(avg_val, beta, sample_weights, result): + preds = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + actuals = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1]] + _test_fbeta_score(actuals, preds, sample_weights, avg_val, beta, result, None) + + +def test_eq(): + f1 = F1Score(3) + fbeta = FBetaScore(3, beta=1.0) + + preds = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + actuals = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1]] + + fbeta.update_state(actuals, preds) + f1.update_state(actuals, preds) + np.testing.assert_allclose(fbeta.result().numpy(), f1.result().numpy()) + + +def test_sample_eq(): + f1 = F1Score(3) + f1_weighted = F1Score(3) + + preds = [ + [0.9, 0.1, 0], + [0.2, 0.6, 0.2], + [0, 0, 1], + [0.4, 0.3, 0.3], + [0, 0.9, 0.1], + [0, 0, 1], + ] + actuals = [[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 0, 0], [0, 0, 1]] + sample_weights = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] + + f1.update_state(actuals, preds) + f1_weighted(actuals, preds, sample_weights) + np.testing.assert_allclose(f1.result().numpy(), f1_weighted.result().numpy())