diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..fc9e9c3
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,38 @@
+# This workflow will install Python dependencies, run tests and lint with multiple versions of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: build
+
+on: [push]
+  
+permissions:
+  contents: read
+
+jobs:
+  build:
+
+    runs-on: [ubuntu-latest, windows-latest, macos-latest]
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+        
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/README.md b/README.md
index 29ba344..096ac71 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,7 @@ Preprint available at bioRxiv: https://doi.org/10.1101/2022.06.07.495081.
 # PyPEF: Pythonic Protein Engineering Framework
 [![PyPI version](https://img.shields.io/pypi/v/PyPEF?color=blue)](https://pypi.org/project/pypef/)
 [![Python version](https://img.shields.io/pypi/pyversions/PyPEF)](https://www.python.org/downloads/)
+![Build](https://github.com/Protein-Engineering-Framework/PyPEF/actions/workflows/build.yml/badge.svg)
 
 a framework written in Python 3 for performing sequence-based machine learning-assisted protein engineering to predict a protein's fitness from its sequence using different forms of sequence encoding:
 - One-hot encoding
diff --git a/pypef/__init__.py b/pypef/__init__.py
index 1fe65ba..16d8b1f 100644
--- a/pypef/__init__.py
+++ b/pypef/__init__.py
@@ -17,4 +17,4 @@
 # §Equal contribution
 
 
-__version__ = '0.3.1-alpha'
+__version__ = '0.3.2-alpha'
diff --git a/pypef/dca/gremlin_inference.py b/pypef/dca/gremlin_inference.py
index bf13576..cc2cbc3 100644
--- a/pypef/dca/gremlin_inference.py
+++ b/pypef/dca/gremlin_inference.py
@@ -79,7 +79,6 @@ def __init__(
         self.eff_cutoff = eff_cutoff
         self.opt_iter = opt_iter
         self.states = len(self.char_alphabet)
-        self.a2n = self.a2n_dict()
         self.seqs, _, _ = get_sequences_from_file(alignment)
         self.msa_ori = self.get_msa_ori()
         self.n_col_ori = self.msa_ori.shape[1]
@@ -97,7 +96,7 @@ def __init__(
         self.n_eff = np.sum(self.msa_weights)
         self.n_row = self.msa_trimmed.shape[0]
         self.n_col = self.msa_trimmed.shape[1]
-        self.v_ini, self.w_ini = self.initialize_v_w(remove_gap_entries=False)
+        self.v_ini, self.w_ini, self.aa_counts = self.initialize_v_w(remove_gap_entries=False)
         self.optimize = optimize
         if self.optimize:
             self.v_opt, self.w_opt = self.run_opt_tf()
@@ -110,27 +109,30 @@ def a2n_dict(self):
         return a2n
 
     def aa2int(self, aa):
-        """convert single aa into numerical integer value, e.g.
+        """convert single aa into numerical integer value, e.g.:
         "A" -> 0 or "-" to 21 dependent on char_alphabet"""
-        if aa in self.a2n:
-            return self.a2n[aa]
+        a2n = self.a2n_dict()
+        if aa in a2n:
+            return a2n[aa]
         else:  # for unknown characters insert Gap character
-            return self.a2n['-']
+            return a2n['-']
 
-    def str2int(self, x):
+    def seq2int(self, aa_seqs):
         """
-        convert a list of strings into list of integers
-        Example: ["ACD","EFG"] -> [[0,4,3], [6,13,7]]
+        convert a single sequence or a list of sequences into a list of integer sequences, e.g.:
+        ["ACD","EFG"] -> [[0,4,3], [6,13,7]]
         """
-        if type(x) == list:
-            x = np.array(x)
-        if x.dtype.type is np.str_:
-            if x.ndim == 0:  # single seq
-                return np.array([self.aa2int(aa) for aa in str(x)])
+        if type(aa_seqs) == str:
+            aa_seqs = np.array(aa_seqs)
+        if type(aa_seqs) == list:
+            aa_seqs = np.array(aa_seqs)
+        if aa_seqs.dtype.type is np.str_:
+            if aa_seqs.ndim == 0:  # single seq
+                return np.array([self.aa2int(aa) for aa in str(aa_seqs)])
             else:  # list of seqs
-                return np.array([[self.aa2int(aa) for aa in seq] for seq in x])
+                return np.array([[self.aa2int(aa) for aa in seq] for seq in aa_seqs])
         else:
-            return x
+            return aa_seqs
 
     @property
     def get_v_idx_w_idx(self):
@@ -150,7 +152,7 @@ def filt_gaps(self, msa_ori):
         non_gaps = np.where(np.sum(tmp.T, -1).T / msa_ori.shape[0] < self.gap_cutoff)[0]
 
         gaps = np.where(np.sum(tmp.T, -1).T / msa_ori.shape[0] >= self.gap_cutoff)[0]
-        logger.info(f'Gap positions (removed from msa):\n{gaps}')
+        logger.info(f'Gap positions (removed from MSA; 0-indexed):\n{gaps}')
         ncol_trimmed = len(non_gaps)
         v_idx = non_gaps
         w_idx = v_idx[np.stack(np.triu_indices(ncol_trimmed, 1), -1)]
@@ -362,17 +364,19 @@ def initialize_v_w(self, remove_gap_entries=True):
         """
         w_ini = np.zeros((self.n_col, self.states, self.n_col, self.states))
         onehot_cat_msa = np.eye(self.states)[self.msa_trimmed]
+        aa_counts = np.sum(onehot_cat_msa, axis=0)
         pseudo_count = 0.01 * np.log(self.n_eff)
         v_ini = np.log(np.sum(onehot_cat_msa.T * self.msa_weights, -1).T + pseudo_count)
         v_ini = v_ini - np.mean(v_ini, -1, keepdims=True)
-        # loss_score_ini = self.objective(v_ini, w_ini, flattened=False)  # * self.n_eff
+        # loss_score_ini = self.objective(v_ini, w_ini, flattened=False)
 
         if remove_gap_entries:
             no_gap_states = self.states - 1
             v_ini = v_ini[:, :no_gap_states]
             w_ini = w_ini[:, :no_gap_states, :, :no_gap_states]
+            aa_counts = aa_counts[:, :no_gap_states]
 
-        return v_ini, w_ini
+        return v_ini, w_ini, aa_counts
 
     @property
     def get_v_w_opt(self):
@@ -390,10 +394,10 @@ def get_score(self, seqs, v=None, w=None, v_idx=None, encode=False, h_wt_seq=0.0
             if self.optimize:
                 v, w = self.v_opt, self.w_opt
             else:
-                v, w = self.v_ini, self.w_ini
+                v, w, _ = self.initialize_v_w(remove_gap_entries=True)
         if v_idx is None:
             v_idx = self.v_idx
-        seqs_int = self.str2int(seqs)
+        seqs_int = self.seq2int(seqs)
         # if length of sequence != length of model use only
         # valid positions (v_idx) from the trimmed alignment
         try:
@@ -439,7 +443,7 @@ def get_score(self, seqs, v=None, w=None, v_idx=None, encode=False, h_wt_seq=0.0
         else:
             return np.sum(h, axis=-1) - h_wt_seq
 
-    def get_wt_score(self, wt_seq=None, v=None, w=None):
+    def get_wt_score(self, wt_seq=None, v=None, w=None, encode=False):
         if wt_seq is None:
             wt_seq = self.wt_seq
         if v is None or w is None:
@@ -448,7 +452,7 @@ def get_wt_score(self, wt_seq=None, v=None, w=None):
             else:
                 v, w = self.v_ini, self.w_ini
         wt_seq = np.array(wt_seq, dtype=str)
-        return self.get_score(wt_seq, v, w)
+        return self.get_score(wt_seq, v, w, encode=encode)
 
     def collect_encoded_sequences(self, seqs, v=None, w=None, v_idx=None):
         """
@@ -541,17 +545,22 @@ def plot_correlation_matrix(self, matrix_type: str = 'apc', set_diag_zero=True):
         else:
             ax.imshow(matrix, cmap='Blues')
         tick_pos = ax.get_xticks()
+        tick_pos = np.array([int(t) for t in tick_pos])
         tick_pos[-1] = matrix.shape[0]
-        tick_pos[2:] -= 1
+        if tick_pos[2] > 1:
+            tick_pos[2:] -= 1
         ax.set_xticks(tick_pos)
         ax.set_yticks(tick_pos)
         labels = [item.get_text() for item in ax.get_xticklabels()]
-        labels = [labels[0]] + [str(int(label) + 1) for label in labels[1:]]
+        try:
+            labels = [labels[0]] + [str(int(label) + 1) for label in labels[1:]]
+        except ValueError:
+            pass
         ax.set_xticklabels(labels)
         ax.set_yticklabels(labels)
         ax.set_xlim(-1, matrix.shape[0])
         ax.set_ylim(-1, matrix.shape[0])
-        plt.title(matrix_type)
+        plt.title(matrix_type.upper())
         plt.savefig(f'{matrix_type}.png', dpi=500)
         plt.close('all')
 
diff --git a/scripts/GREMLIN_numba/README.md b/scripts/GREMLIN_numba/README.md
new file mode 100644
index 0000000..e52a68b
--- /dev/null
+++ b/scripts/GREMLIN_numba/README.md
@@ -0,0 +1,15 @@
+## GREMLIN in Python using numba
+
+GREMLIN_CPP-like (L-BFGS and CG-based MSA-DCA optimization) port to Python using numba.
+
+
+GREMLIN_CPP (https://github.com/sokrypton/GREMLIN_CPP) is licensed under  
+
+----------------------------------------------------------------------------
+
+"THE BEER-WARE LICENSE" (Revision 42):
+<so@fas.harvard.edu> wrote this file.  As long as you retain this notice you
+can do whatever you want with this stuff. If we meet some day, and you think
+this stuff is worth it, you can buy me a beer in return.  Sergey Ovchinnikov
+
+----------------------------------------------------------------------------
diff --git a/scripts/GREMLIN_numba/gremlin_inference_numba.py b/scripts/GREMLIN_numba/gremlin_inference_numba.py
new file mode 100644
index 0000000..fc09c82
--- /dev/null
+++ b/scripts/GREMLIN_numba/gremlin_inference_numba.py
@@ -0,0 +1,799 @@
+# Gremlin in Python
+
+"""
+------------------------------------------------------------
+"THE BEERWARE LICENSE" (Revision 42):
+ <so@g.harvard.edu>  wrote this code. As long as you retain this
+ notice, you can do whatever you want with this stuff. If we meet
+ someday, and you think this stuff is worth it, you can buy me a
+ beer in return. --Sergey Ovchinnikov
+ ------------------------------------------------------------
+ If you use this code, please cite the following papers:
+
+ Balakrishnan, Sivaraman, Hetunandan Kamisetty, Jaime G. Carbonell,
+ Su?In Lee, and Christopher James Langmead.
+ "Learning generative models for protein fold families."
+ Proteins: Structure, Function, and Bioinformatics 79, no. 4 (2011): 1061-1078.
+
+ Kamisetty, Hetunandan, Sergey Ovchinnikov, and David Baker.
+ "Assessing the utility of coevolution-based residue–residue
+ contact predictions in a sequence-and structure-rich era."
+ Proceedings of the National Academy of Sciences (2013): 201314045.
+"""
+
+# Using numba for getting closer to C/C++ performance:
+# https://numba.pydata.org/numba-doc/latest/user/performance-tips.html
+# https://numba.pydata.org/numba-doc/latest/user/parallel.html#numba-parallel-supported
+
+import numpy as np
+from numba import njit, prange
+from typing import List
+
+
+def get_sequences_from_file(file: str) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    "Get_Sequences" reads (learning and test) .fasta and
+    .fasta-like ".fasl" format files and extracts the name,
+    the target value and the sequence of the protein.
+    Only takes one-liner sequences for correct input.
+    See example directory for required fasta file format.
+    Make sure every marker (> and ;) is seperated by a
+    space ' ' from the value respectively name.
+    file: str    : File in FASTA or A2M format
+    """
+    sequences = []
+    values = []
+    names_of_mutations = []
+
+    with open(file, 'r') as f:
+        words = ""
+        for line in f:
+            if line.startswith('>'):
+                if words != "":
+                    sequences.append(words)
+                words = line.split('>')
+                names_of_mutations.append(words[1].strip())
+                words = ""
+
+            elif line.startswith('#'):
+                pass  # are comments
+
+            elif line.startswith(';'):
+                if words != "":
+                    sequences.append(words)
+                words = line.split(';')
+                values.append(float(words[1].strip()))
+                words = ""
+
+            else:
+                try:
+                    words += line.strip()
+                except IndexError:
+                    raise IndexError("Learning or Validation sets (.fasta) likely "
+                                     "have emtpy lines (e.g. at end of file)")
+        if words != "":
+            sequences.append(words)
+    # Check consistency
+    if len(values) != 0:
+        if len(sequences) != len(values):
+            raise SystemError(
+                f'Error: Number of sequences does not fit with number of target values! '
+                f'Number of sequences: {str(len(sequences))}, Number of target values: {str(len(values))}.'
+            )
+
+    return np.array(sequences), np.array(names_of_mutations), np.array(values)
+
+
+def get_seqs_from_var_name(
+        wt_seq,
+        substitutions,
+        fitness_values
+) -> tuple[list, list, list]:
+    """
+    Similar to function above but just returns sequences
+
+    wt: str
+        Wild-type sequence as string
+    substitutions: list
+        List of substiutuions of a single variant of the format:
+            - Single substitution variant, e.g. variant A123C: ['A123C']
+            - Higher variants, e.g. variant A123C/D234E/F345G: ['A123C', 'D234E, 'F345G']
+            --> Full substitutions list, e.g.: [['A123C'], ['A123C', 'D234E, 'F345G']]
+    fitness_values: list
+        List of ints/floats of the variant fitness values, e.g. for two variants: [1.4, 0.8]
+    """
+    variant, values, sequences = [], [], []
+    for i, var in enumerate(substitutions):  # var are lists of (single or multiple) substitutions
+        temp = list(wt_seq)
+        name = ''
+        separation = 0
+        if var == ['WT']:
+            name = 'WT'
+        else:
+            for single_var in var:  # single entries of substitution list
+                position_index = int(str(single_var)[1:-1]) - 1
+                new_amino_acid = str(single_var)[-1]
+                temp[position_index] = new_amino_acid
+                # checking if multiple entries are inside list
+                if separation == 0:
+                    name += single_var
+                else:
+                    name += '/' + single_var
+                separation += 1
+        variant.append(name)
+        values.append(fitness_values[i])
+        sequences.append(''.join(temp))
+
+    return variant, values, sequences
+
+
+def a2n_dict(char_alphabet="ARNDCQEGHILKMFPSTWYV-"):
+    states = len(char_alphabet)
+    a2n = {}
+    for a, n in zip(char_alphabet, range(states)):
+        a2n[a] = n
+    return a2n
+
+
+def aa2int(aa):
+    """convert single aa into numerical integer value, e.g.
+    "A" -> 0 or "-" to 21 dependent on char_alphabet"""
+    a2n = a2n_dict()
+    if aa in a2n:
+        return a2n[aa]
+    else:  # for unknown characters insert Gap character
+        return a2n['-']
+
+
+def seqs2int(msa_seqs):
+    """converts list of sequences to msa"""
+    msa_ori = []
+    for seq in msa_seqs:
+        msa_ori.append([aa2int(aa.upper()) for aa in seq])
+    msa_ori = np.array(msa_ori)
+    return msa_ori
+
+
+def filt_gaps(msa_ori, gap_cutoff=0.5):
+    """filters alignment to remove gappy positions"""
+    tmp = (msa_ori == 21 - 1).astype(float)
+    non_gaps = np.where(np.sum(tmp.T, -1).T / msa_ori.shape[0] < gap_cutoff)[0]
+    gaps = np.where(np.sum(tmp.T, -1).T / msa_ori.shape[0] >= gap_cutoff)[0]
+    return msa_ori[:, non_gaps], gaps
+
+
+def aa_counts_to_v_ini():
+    pass
+
+
+@njit
+def get_1d_position(i_pos: int, aa_pos: int, na: int = 21) -> int:
+    """
+    positions for 1-bd term
+    """
+    return i_pos * na + aa_pos
+
+
+@njit
+def get_2d_position(w: int, a: int, b: int, nc: int, na: int = 21) -> int:
+    """
+    positions for 2-bd term
+
+    w: int from range 0, 1, ..., pair_size-1
+    a: int from range 0, 1, ..., na-1 (=20)
+    b: int from range 0, 1, ..., na-1 (=20)
+    nc: Number of trimmed MSA columns for shifting position by N1 = nc * na
+    """
+    return nc * na + w * na * na + a * na + b
+
+
+@njit
+def get_h_entropy(msa_trimmed, weights, na : int = 21):
+    """
+    Transform AA counts to entropy = counts * eff[sequence_n]
+    """
+    nr = msa_trimmed.shape[0]
+    nc = msa_trimmed.shape[1]
+    freq = np.zeros(nc * na)
+    for i in range(nc):
+        for n in range(nr):
+            d = get_1d_position(i, msa_trimmed[n][i])
+            freq[d] += weights[n]
+    return freq
+
+
+@njit
+def get_all_pairs(nc):
+    """
+    Coupling pairs of two, resulting in shape (int(nc * (nc - 1) / 2), 2)
+    """
+    pairs = []
+    for i in range(nc):
+        for j in range(i + 1, nc):
+            pairs.append([i, j])
+    return pairs
+
+
+@njit
+def get_eff_weights(msa_trimmed: np.ndarray, eff_cutoff: float = 0.8):
+    """
+    Weights alignment sequences according to sequence similarity:
+    If sequences are too identical, they get down-weighted.
+
+    :return:
+    """
+    nr = msa_trimmed.shape[0]
+    nc = msa_trimmed.shape[1]
+    chk = nc * eff_cutoff
+    N_ = np.ones(nr)
+    eff_weights = np.ones(nr)
+    for n in prange(nr):
+        w = N_[n]
+        for m in range(n+1, nr):
+            hm = 0
+            for i in range(nc):
+                if msa_trimmed[n][i] == msa_trimmed[m][i]:
+                    hm += 1
+            if hm > chk:
+                N_[m] += 1
+                w += 1
+        eff_weights[n] = 1.0 / w
+    neff = np.sum(eff_weights)
+    return neff, eff_weights
+
+
+@njit
+def eval_v(
+        x: np.ndarray,
+        msa_trimmed: np.ndarray,
+        na: int = 21,
+        lam_v=0.01
+) -> tuple[float, np.ndarray]:
+    """
+    1-bd term
+    :return: float
+    """
+    # nr = msa_trimmed.shape[0]
+    nc = msa_trimmed.shape[1]
+    g = np.zeros(nc * na)  # Reset g
+    neff, weights = get_eff_weights(msa_trimmed)
+    f = get_h_entropy(msa_trimmed, weights)
+
+    fx = 0.0
+    reg = 0.0
+    PC = np.zeros(shape=(nc, na))  # (P)robability of (C)olumn entries
+
+    for i in range(nc):
+        d = get_1d_position(i, 0)
+        for a in range(na):
+            PC[i][a] += x[d]
+            d += 1
+
+    for i in range(nc):
+        # compute Z for each column
+        Z = 0.0
+        for a in range(na):
+            Z += np.exp(PC[i][a])
+        Z = np.log(Z)  # The natural logarithm is logarithm in base e
+        for a in range(na):
+            fx += f[get_1d_position(i, a)] * (PC[i][a] - Z)
+            PC[i][a] = np.exp(PC[i][a] - Z)
+    for i in range(nc):
+        d = get_1d_position(i, 0)
+        for a in range(na):
+            g[d] += (PC[i][a] * neff) - f[get_1d_position(i, a)] + (lam_v * 2 * x[d])  # Changing of gradient
+            reg += lam_v * np.power(x[d], 2)
+            d += 1
+
+    return -1.0 * (fx - reg), g
+
+
+@njit(parallel=True)
+def eval_vw(
+        x: np.ndarray,
+        msa_trimmed: np.ndarray,
+        na: int = 21,
+        lam_v: float = 0.01
+) -> tuple[float, np.ndarray]:
+    """
+    Updates
+        (g)radient (array)
+        and fx (float)
+    according to given input
+        x (array).
+    N1 = nc * na
+    N2 = N1 + pair_size * na * na
+       = N1 +
+    """
+    nr = msa_trimmed.shape[0]
+    nc = msa_trimmed.shape[1]
+    pairs = get_all_pairs(nc)
+    pair_size = int(nc * (nc - 1) / 2)
+    N1 = nc * na
+    N2 = N1 + pair_size * na * na
+    g = np.zeros(N2)  # Reset g
+    neff, weights = get_eff_weights(msa_trimmed)
+    lam_w = lam_v * (nc - 1.0) * (na - 1.0)
+
+    fx = 0.0
+    reg = 0.0
+    PCN = np.zeros(shape=(nr, nc, na))  # (P)robability of (C)olumn entries?!
+
+    for n in prange(nr):
+        # precompute sum(V+W) for each position "i" and amino acids "a"
+        # assuming all other positions are fixed
+        PC = np.zeros(shape=(nc, na))
+        # for each position i
+        for i in range(nc):
+            # for each amino acid
+            for a in range(na):
+                # 1-body-term
+                PC[i][a] += x[get_1d_position(i, a)]
+        if True:  # if (N == msa.N2)
+            for w in range(pair_size):
+                i = pairs[w][0]
+                j = pairs[w][1]
+                xni = msa_trimmed[n][i]
+                xnj = msa_trimmed[n][j]
+                for a in range(na):
+                    PC[i][a] += x[get_2d_position(w, a, xnj, nc)]
+                    PC[j][a] += x[get_2d_position(w, xni, a, nc)]
+        for i in range(nc):
+            # compute local Z
+            Z = 0.0
+            for a in range(na):
+                Z += np.exp(PC[i][a])
+            Z = np.log(Z)
+
+            # compute fx
+            xni = msa_trimmed[n][i]
+            fx += (PC[i][xni] - Z) * weights[n]
+
+            # needed for (g)radient calculation
+            for a in range(na):
+                PCN[n][i][a] = np.exp(PC[i][a] - Z) * weights[n]
+    # compute (g)radient for 1bd
+    if True:  # if (N >= msa.N1)
+        for i in prange(nc):
+            for n in range(nr):
+                xni = msa_trimmed[n][i]
+                g[get_1d_position(i, xni)] -= weights[n]
+                for a in range(na):
+                    g[get_1d_position(i, a)] += PCN[n][i][a]
+    #  compute (g)radient for 2bd
+    if True:  # if (N == msa.N2)
+        for w in prange(pair_size):
+            i = pairs[w][0]
+            j = pairs[w][1]
+            for n in range(nr):
+                xni = msa_trimmed[n][i]
+                xnj = msa_trimmed[n][j]
+                g[get_2d_position(w, xni, xnj, nc)] -= 2.0 * weights[n]
+                for a in range(na):
+                    g[get_2d_position(w, a, xnj, nc)] += PCN[n][i][a]
+                    g[get_2d_position(w, xni, a, nc)] += PCN[n][j][a]
+    # compute (reg)ularization and (g)raident for 1bd
+    if True:  # if (N >= msa.N1)
+        for d in prange(N1):
+            reg += lam_v * np.power(x[d], 2)
+            g[d] += lam_v * 2.0 * x[d]
+    # compute (reg)ularization and (g)radient for 2bd
+    if True:  # if (N == msa.N2)
+        for d in prange(N1, N2):
+            reg += lam_w * np.power(x[d], 2)
+            g[d] += lam_w * 2.0 * x[d]
+    # flip direction, since we are passing function to a minimizer
+    return -1.0 * (fx - reg), g
+
+
+@njit
+def deep_copy(arr: list | np.ndarray):
+    """
+    Just makes a copy similar to the copy (deepcopy) module.
+    Required for making a copy of 1D-arrays using numba.
+    """
+    ret: List[float] = list()
+    for value in arr:
+        ret.append(value)
+    return np.array(ret)
+
+
+@njit(parallel=True)
+def lbfgs(msa_trimmed, func, mode: str = 'vw', max_iter: int = 100, na: int = 21):
+    """
+    Limited-memory Broyden–Fletcher–Goldfarb–Shanno (LBFGS)
+    ---------------------------------------------------------------------------
+    Adopted from: GREMLIN_CPP, https://github.com/sokrypton/GREMLIN_CPP, that is
+    Adopted from: https://github.com/js850/lbfgs_cpp
+    "THE BEER-WARE LICENSE" (Revision 42):
+     <js850@camsa.ac.uk> wrote this function. As long as you retain this notice
+     you can do whatever you want with this stuff. If we meet some day, and you
+     think this stuff is worth it, you can buy me a beer in return Jacob Stevenson
+     ---------------------------------------------------------------------------
+    modified to remove convergence criteria, will continue until max_iter
+    modified to remove maxstep check
+     ---------------------------------------------------------------------------
+
+    func: Target function returning a scalar value to be minimized
+    :return:
+    """
+    # nr = msa_trimmed.shape[0]
+    nc = msa_trimmed.shape[1]
+
+    max_f_rise = 1e-4
+    H0 = 0.1
+    # maxstep = 0.2
+    # pairs = get_all_pairs(nc)
+    pair_size = int(nc * (nc - 1) / 2)
+    N1 = nc * na
+    N2 = N1 + pair_size * na * na
+
+    if mode == 'v':
+        N = nc * na
+    elif mode == 'vw':
+        N = N2
+    else:
+        raise SystemError("Unknown mode.")
+
+    M = 5
+    y = np.zeros(shape=(M, N))
+    s = np.zeros(shape=(M, N))
+    x_new = np.zeros(N)
+    rho = np.zeros(M)
+    step = np.zeros(N)
+
+    # Compute target (f)unction value
+    fx_new, g_new = func(x_new, msa_trimmed)
+
+    print('lbfgs::iter S_S fx: ', fx_new, 'gnorm:', np.linalg.norm(g_new))
+
+    for iteration in range(max_iter):
+        g_old = deep_copy(g_new)
+        x_old = deep_copy(x_new)
+        fx_old = fx_new
+        if iteration == 0:
+            gnorm = np.linalg.norm(g_old)
+            if gnorm > 1.0:
+                gnorm = 1.0 / gnorm
+            for n in range(N):
+                step[n] = -gnorm * H0 * g_old[n]
+        else:  # iteration > 0
+            step = deep_copy(g_old)
+            jmin = iteration - M
+            if jmin < 0:
+                jmin = 0
+            jmax = iteration
+
+            # i = 0
+            # beta = 0.0
+            alpha = np.zeros(M)
+
+            for j in range(jmax - 1, jmin - 1, -1):
+                i = j % M
+                alpha[i] = rho[i] * np.dot(s[i], step)
+                for n in range(N):
+                    step[n] -= alpha[i] * y[i][n]
+            # Scale the step size by H0
+            for n in range(N):
+                step[n] *= H0
+            for j in range(jmin, jmax):
+                i = j % M
+                beta = rho[i] * np.dot(y[i], step)
+                for n in range(N):
+                    step[n] += s[i][n] * (alpha[i] - beta)
+            # invert the step to point downhill
+            for n in range(N):
+                step[n] *= -1
+        ###############################################
+        # backtracking_linesearch
+        ###############################################
+        # fnew = 0.0
+        # if the step is pointing uphill, invert it
+        if np.dot(step, g_old) > 0.0:
+            for n in range(N):
+                step[n] *= -1
+        attempt = 0
+        factor = 1.0
+        # stepsize = np.linalg.norm(step)
+
+        step_sum = 0.0
+        for step_i in step:
+            step_sum += step_i
+        # make sure the step is no larger than maxstep
+        # if (factor * stepsize > maxstep){factor = maxstep/stepsize;}
+        for nred in range(10):
+            attempt += 1
+            for n in range(N):
+                x_new[n] = x_old[n] + factor * step[n]
+            fx_new, g_new = func(x_new, msa_trimmed)  # new func calculation
+            df = fx_new - fx_old
+            if df < max_f_rise:
+                break
+            else:
+                factor /= 10.0
+        # stepsize = stepsize * factor
+        ###############################################
+        # update_memory
+        ###############################################
+        klocal = iteration % M
+        for n in range(N):
+            y[klocal][n] = g_new[n] - g_old[n]
+            s[klocal][n] = x_new[n] - x_old[n]
+        ys = np.dot(y[klocal], s[klocal])
+        if ys == 0.0:
+            ys = 1.0
+        rho[klocal] = 1.0 / ys
+
+        yy = np.dot(y[klocal], y[klocal])
+        if yy == 0.0:
+            yy = 1.0
+        H0 = ys / yy
+        try:
+            if (iteration + 1) % int(max_iter / 10) == 0:
+                print(iteration + 1, '/', max_iter, ' : ', fx_new)
+        except:  # bare 'except' used for numba functionality
+            print(iteration + 1, '/', max_iter, ' : ', fx_new)
+
+    return x_new
+
+
+@njit
+def cg(msa_trimmed, func, mode: str = 'vw', max_iter: int = 100, na: int = 21):
+    """
+    Nonlinear Conjugate Gradient (CG)
+    ---------------------------------------------------------------------------
+    Adopted from: GREMLIN_CPP, https://github.com/sokrypton/GREMLIN_CPP, that is
+    Adopted from: https://bitbucket.org/soedinglab/libconjugrad
+    CCMpred is released under the GNU Affero General Public License v3 or later.
+    ---------------------------------------------------------------------------
+    modified to remove convergence criteria, will continue until max_iter
+    ---------------------------------------------------------------------------
+
+    :return:
+    """
+    # nr = msa_trimmed.shape[0]
+    nc = msa_trimmed.shape[1]
+    pair_size = int(nc * (nc - 1) / 2)
+    N1 = nc * na
+    N2 = N1 + pair_size * na * na
+
+    if mode == 'v':
+        N = N1
+    elif mode == 'vw':
+        N = N2
+    else:
+        raise SystemError("Unknown mode.")
+
+    # epsilon = 1e-5
+    ftol = 1e-4
+    wolfe = 0.1
+    alpha_mul = 0.5
+    max_line = 10
+
+    s = np.zeros(N)
+
+    gnorm_old = 0.0
+    alpha_old = 0.0
+    dg_old = 0.0
+
+    x = np.zeros(N)
+
+    fx, g = func(x, msa_trimmed)
+
+    gnorm = np.dot(g, g)
+
+    # dg = 0.0
+    alpha = 1.0 / np.sqrt(gnorm)
+
+    print("# cg::iter S_S fx: ", fx, " gnorm: ", np.sqrt(gnorm))
+
+    for iteration in range(max_iter):
+        if iteration == 0:
+            for n in range(N):
+                s[n] = -g[n]
+            dg = np.dot(s, g)
+        else:  # iteration > 0
+            # Fletcher-Reeves
+            beta = gnorm / gnorm_old
+            for n in range(N):
+                s[n] = s[n] * beta - g[n]
+            dg = np.dot(s, g)
+            alpha = alpha_old * dg_old / dg
+        #########################################
+        # Linesearch
+        #########################################
+        attempts = 0
+        dg_ini = dg
+        dg_test = dg_ini * ftol
+        fx_ini = fx
+        old_alpha = 0.0
+        for line in range(max_line):
+            attempts += 1
+            step = alpha - old_alpha
+            for n in range(N):
+                x[n] += s[n] * step
+            fx_step, g = func(x, msa_trimmed)
+            if fx_step <= fx_ini + alpha * dg_test:
+                if np.dot(s, g) < wolfe * dg_ini:
+                    fx = fx_step
+                    break
+            old_alpha = alpha
+            alpha *= alpha_mul
+        #########################################
+        gnorm_old = gnorm
+        gnorm = np.dot(g, g)
+        alpha_old = alpha
+        dg_old = dg
+        try:
+            if (iteration + 1) % int(max_iter / 10) == 0:
+                print(iteration + 1, '/', max_iter, ' : ', fx)
+        except:  # bare 'except' used for numba functionality
+            print(iteration + 1, '/', max_iter, ' : ', fx)
+    return x
+
+
+def get_seqs_onehot_1bd(seqs_int):
+    new_seqs_oh = []
+    for arr in seqs_int:
+        new_seq_oh = np.zeros(len(arr) * 21, dtype=int)
+        for i, a, in enumerate(arr):
+            d = get_1d_position(i, a)
+            new_seq_oh[d] += 1
+        new_seqs_oh.append(new_seq_oh)
+    return np.array(new_seqs_oh)
+
+
+def get_pair_size(seqs_int):
+    for arr in seqs_int[:1]:
+        c = 0
+        for a_i in range(len(arr)):
+            for b_i in range(a_i + 1, len(arr)):
+                c += 1
+    return c
+
+
+@njit
+def oh_1bd_predict(seqs_int, x_opt, na: int = 21):
+    nc = seqs_int.shape[1]
+    N1 = nc * na
+    y_pred_all: List[float] = list()
+    for arr in seqs_int:
+        new_seq_oh = np.zeros(N1)
+        for i, a, in enumerate(arr):
+            d = get_1d_position(i, a)
+            new_seq_oh[d] += 1
+        # Predict here directly to save memory instead
+        # of storing whole one-hot vector
+        y_pred = np.sum(new_seq_oh * x_opt)
+        y_pred_all.append(y_pred)
+    return y_pred_all
+
+
+@njit(parallel=True)
+def oh_2bd_predict(seqs_int, x_opt, na: int = 21):
+    nc = seqs_int.shape[1]
+    pair_size = int(nc * (nc - 1) / 2)
+    N2 = nc * na + pair_size * na * na
+    y_pred_all: List[float] = list()
+    pairs = get_all_pairs(nc)
+    for arr in seqs_int:
+        new_seq_oh = np.zeros(N2)
+        for i, a, in enumerate(arr):
+            d = get_1d_position(i, a)
+            new_seq_oh[d] += 1
+        for w in prange(pair_size):
+            i = pairs[w][0]
+            j = pairs[w][1]
+            xni = arr[i]
+            xnj = arr[j]
+            new_seq_oh[get_2d_position(w, xni, xnj, nc)] += 1
+        # Predict here directly to save memory instead
+        # of storing whole one-hot vector
+        y_pred = np.sum(new_seq_oh * x_opt)
+        y_pred_all.append(y_pred)
+    return y_pred_all
+
+
+@njit
+def oh_1bd_1d_encode(seqs_int, x_opt):
+    """
+    Encoding the input sequences (amino acids to integer-transformed
+    trimmed MSA sequences).
+    As V, i.e., 1-body-transformed, sequences require much memory to
+    be stored, encoding leads to len(sequence) encoded sequences, i.e.,
+    adding up sequence position-dependent probabilities.
+
+    :param seqs_int:
+    :return:
+    """
+    x_oh_pos_all: List[float] = list()
+    for arr in seqs_int:
+        x_oh_pos = np.zeros(np.shape(seqs_int)[1])
+        for i, a, in enumerate(arr):
+            d = get_1d_position(i, a)
+            x_oh_pos[i] += x_opt[d]
+        x_oh_pos_all.append(x_oh_pos)
+
+    return x_oh_pos_all
+
+
+
+@njit(parallel=True)
+def oh_2bd_1d_encode(seqs_int, x_opt):
+    """
+    Encoding the input sequences (amino acids to integer-transformed
+    trimmed MSA sequences).
+    As V+W, i.e., 2-body-transformed, sequences require much memory to
+    be stored, encoding leads to len(sequence) encoded sequences, i.e.,
+    adding up sequence position-dependent probabilities.
+
+    :param seqs_int:
+    :return:
+    """
+    nc = seqs_int.shape[1]
+    pairs = get_all_pairs(nc)
+    pair_size = int(nc * (nc - 1) / 2)
+
+    x_oh_pos_all: List[float] = list()
+    for arr in seqs_int:
+        x_oh_pos = np.zeros(np.shape(seqs_int)[1])
+        for i, a, in enumerate(arr):
+            d = get_1d_position(i, a)
+            x_oh_pos[i] += x_opt[d]
+        for w in prange(pair_size):
+            i = pairs[w][0]
+            j = pairs[w][1]
+            xni = arr[i]
+            xnj = arr[j]
+            d = get_2d_position(w, xni, xnj, nc)
+            x_oh_pos[i] += x_opt[d]
+        x_oh_pos_all.append(x_oh_pos)
+
+    return x_oh_pos_all
+
+
+def aa_count_predict(msa_trimmed, sequences_int_trimmed):
+    """
+    Simple encoding based on amino acid counts (MSA column-based counts/frequencies).
+
+    :param msa_trimmed: Multiple sequence alignment (MSA) in trimmed form.
+    :param sequences_int_trimmed: Int-converted input sequences to be used
+        for fitness prediction.
+
+    :return: Predicted fitness values of input sequences.
+    """
+    onehot_cat_msa = np.eye(21)[msa_trimmed]
+    aa_counts = np.sum(onehot_cat_msa, axis=0)
+    sequences_int_trimmed_onehot = np.eye(21)[sequences_int_trimmed]
+    tmp = []
+    for seq_oh in sequences_int_trimmed_onehot:
+        tmp.append(seq_oh.flatten())
+    sequences_int_trimmed_onehot = np.array(tmp)
+    x_aac_pred = sequences_int_trimmed_onehot * aa_counts.flatten()
+    y_pred_aac = np.sum(x_aac_pred, axis=1)
+    return y_pred_aac
+
+
+# def sum_encoded_seqs(x_seqs):
+#     return np.sum(x_seqs, axis=1)
+#
+#
+# def oh_2bd_pred(seqs_int, x_opt):
+#     nc = seqs_int.shape[1]
+#     pairs = get_all_pairs(nc)
+#     pair_size = int(nc * (nc - 1) / 2)
+#     x_oh_pos_all: List[float] = list(list())
+#     for arr in seqs_int:
+#         sum_x = 0.0
+#         for i, a, in enumerate(arr):
+#             d = get_1d_position(i, a)
+#             sum_x += x_opt[d]
+#         for w in prange(pair_size):
+#             i = pairs[w][0]
+#             j = pairs[w][1]
+#             xni = arr[i]
+#             xnj = arr[j]
+#             d = get_2d_position(w, xni, xnj, nc)
+#             sum_x += x_opt[d]
+#         x_oh_pos_all.append(sum_x)
+#     return x_oh_pos_all
diff --git a/scripts/GREMLIN_numba/using_gremlin_functionalities.ipynb b/scripts/GREMLIN_numba/using_gremlin_functionalities.ipynb
new file mode 100644
index 0000000..1087f58
--- /dev/null
+++ b/scripts/GREMLIN_numba/using_gremlin_functionalities.ipynb
@@ -0,0 +1,304 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "64ae8419",
+   "metadata": {},
+   "source": [
+    "### Maximization of MSA sequence probability using L-BFGS (and CG) \n",
+    "Port of GREMLIN_CPP (https://github.com/sokrypton/GREMLIN_CPP) using Python and numba."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "4274632d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import spearmanr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "cd9f9db9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Local .py file imports\n",
+    "from gremlin_inference_numba import (\n",
+    "    get_sequences_from_file, seqs2int, filt_gaps, lbfgs, cg, get_seqs_from_var_name,\n",
+    "    eval_v, eval_vw, oh_1bd_predict, oh_2bd_predict, aa_count_predict\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3df7d2d4",
+   "metadata": {},
+   "source": [
+    "Loading MSA sequences, converting alphabetical amino acid sequences to seuqences of integer values, and trimming MSA."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "3cb55570",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['mskgeELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLSYGVQCFSRYPDHMKqhDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMadkqKNGIKVNFKIRHNIEDGSVQLADHyqQNTPIGDGPVLLPDNHYLSTQSALSKDpNEKRDHMVLLEFVTAAGithgmdelyk'\n",
+      " 'metgrALFSKPMTCQTEIDGEINGNKFKVVGNGDS-PGGGDFSIHAYCTTGELPMSWVVLGSPLQYGFHMFSGYPDDII..HYFQECFPEGYILTRSLRFEYDGTLTTTHHYSLEGNCVKAKVTLKGEGFDPNGPTMTKEEEQHPSQVQIFPH....GSGIRLLSNVVFKKKDGTTQLALQdcSVKPLGSRDVPLPNVHFLRTQIIQKKDdSDKRDHVVQREIAIAEH..........'\n",
+      " '..rgrALFSNSMTSKTEIDGEINGKKFKVVGEGDS-PGGGDFTIRAYCTTGELPMSWVVMGSPLQYGFHMLSHYPDDIV..HYFQECFPEGYTLTRKLRFEGDGTLTTHHRYELAGTCVKAKVSLTGESFDPSGPTMTKTVEQLPNQVQVFPH....ADGIRLLSDVVFVKNDGTTQIAHQdcSVKPLATRKITLPRFHFLHTQISQWKDrSDKRDHVVQREVSKAE-..........'\n",
+      " ...\n",
+      " 'mersaSFFTGTAKSKVIAEIMVDDTEYKVSGEGFACPLEGHQTLELHCSGKAMSINWSILGTIIQSNFKLFTQYTGSCV.yDFFKTSFPGGLKVETTASFSDGAVIRGNSSLTYVKDTVICRCNIQCEGFCEESPARARDLGQTLPCYEVIEG..ykADEVTCTMDLEWNDSDKQKYLCRLesSFVSGGTGNF-APPRHFIGHHFKITDK.SPNNLHFAQRCKSRANRi.........'\n",
+      " 'mersiSLFTGTAKSKVIATVKIDDMEYTITGEGFACPTEGQQNLELHCSGSALPINWCILGTIIQCNFKLFTQYKGSNE.yDFFKTSFPGGLKVEYVGSFVDGAEVSGNSTMSYVKDTVICRCSIQCKGFSEESPARAHDLGQTLTCYEVVEG..lrADEVTSQVSLEWLDGYNQKYACRLnsSVKSSGSQGNFSPSRHFIGHHFKVTDK.SPNNLHFAQRLKSRACSi.........'\n",
+      " 'mersiSLFTGTAKSKVIATVKIDDMEYTITGEGFACPTEGNQNLELHCSGTALPINWCILGTIIQCNFKLFTQYKGSNE.yDFFKTSFPGGLKVEYVGSFVDGAEVSGNSTMSYVKDTVICRCSIQCKGFSEESPARAHDLGQTLTCYEVVEG..lrADEVTSQVTLEWLDGYNQKYACRLnsSVKSSGSQGNFSPSRHFIGHHFKVTDK.SPNNLHFAQRLKSRACSi.........']\n",
+      "[[12 15 11 ... 10 18 11]\n",
+      " [12  6 16 ... 20 20 20]\n",
+      " [20 20  1 ... 20 20 20]\n",
+      " ...\n",
+      " [12  6  1 ... 20 20 20]\n",
+      " [12  6  1 ... 20 20 20]\n",
+      " [12  6  1 ... 20 20 20]]\n",
+      "(1046, 238)\n",
+      "[[11  7  6 ...  0  0  7]\n",
+      " [16  7  1 ...  0  6  8]\n",
+      " [ 1  7  1 ...  0  6 20]\n",
+      " ...\n",
+      " [ 1 15  0 ...  0  2  1]\n",
+      " [ 1 15  9 ...  0  4 15]\n",
+      " [ 1 15  9 ...  0  4 15]]\n",
+      "(1046, 220)\n"
+     ]
+    }
+   ],
+   "source": [
+    "alignment = '../../workflow/test_dataset_avgfp/uref100_avgfp_jhmmer_119.a2m'\n",
+    "msa, *_ = get_sequences_from_file(alignment)\n",
+    "print(msa)\n",
+    "msa_int = seqs2int(msa)\n",
+    "print(msa_int)\n",
+    "print(msa_int.shape)\n",
+    "msa_trimmed, gaps = filt_gaps(msa_int)\n",
+    "print(msa_trimmed)\n",
+    "print(msa_trimmed.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7f7083fa",
+   "metadata": {},
+   "source": [
+    "Optimization of MSA probability using L-BFGS and CG."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "8cf645de",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "lbfgs::iter S_S fx:  107388.27079431039 gnorm: 1225.1515293096882\n",
+      "10 / 100  :  60609.63108277181\n",
+      "20 / 100  :  59947.64281511534\n",
+      "30 / 100  :  59869.53178623354\n",
+      "40 / 100  :  59859.941731911356\n",
+      "50 / 100  :  59857.879720644436\n",
+      "60 / 100  :  59857.31594100573\n",
+      "70 / 100  :  59857.15028889863\n",
+      "80 / 100  :  59857.09504682756\n",
+      "90 / 100  :  59857.08003258002\n",
+      "100 / 100  :  59857.07454351859\n",
+      "lbfgs::iter S_S fx:  107388.27079430803 gnorm: 15388.644842127675\n",
+      "10 / 100  :  31533.55176383793\n",
+      "20 / 100  :  31060.84534749837\n",
+      "30 / 100  :  28888.573386249183\n",
+      "40 / 100  :  28396.431248611527\n",
+      "50 / 100  :  28342.800784800973\n",
+      "60 / 100  :  28254.50087272058\n",
+      "70 / 100  :  28227.888482528473\n",
+      "80 / 100  :  28188.92690343361\n",
+      "90 / 100  :  28171.676017789905\n",
+      "100 / 100  :  28158.183075984813\n",
+      "# cg::iter S_S fx:  107388.27079431039  gnorm:  1225.15152930969\n",
+      "10 / 100  :  95195.27951793215\n",
+      "20 / 100  :  83096.38618130801\n",
+      "30 / 100  :  71395.61723248461\n",
+      "40 / 100  :  67400.86027747851\n",
+      "50 / 100  :  66669.79690088452\n",
+      "60 / 100  :  65910.63369043467\n",
+      "70 / 100  :  65151.95901760452\n",
+      "80 / 100  :  64395.652024425486\n",
+      "90 / 100  :  63644.68778084792\n",
+      "100 / 100  :  62911.661334753415\n",
+      "# cg::iter S_S fx:  107388.27079430803  gnorm:  15388.644842126007\n",
+      "10 / 100  :  35864.91179731878\n",
+      "20 / 100  :  31408.292000635145\n",
+      "30 / 100  :  31130.30687731922\n",
+      "40 / 100  :  31071.685376873065\n",
+      "50 / 100  :  31003.20159049482\n",
+      "60 / 100  :  30932.300252526038\n",
+      "70 / 100  :  30859.601343470487\n",
+      "80 / 100  :  30785.998696509025\n",
+      "90 / 100  :  30712.083040509264\n",
+      "100 / 100  :  30637.811822276144\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Limited-memory Broyden–Fletcher–Goldfarb–Shanno (L-BFGS) algorithm\n",
+    "x_opt_1bd = lbfgs(msa_trimmed, eval_v, mode='v', max_iter=100)\n",
+    "x_opt_2bd = lbfgs(msa_trimmed, eval_vw, mode='vw', max_iter=100)\n",
+    "# Conjugate Gradient (CG) method\n",
+    "x_opt_1bd_cg = cg(msa_trimmed, eval_v, mode='v', max_iter=100)\n",
+    "x_opt_2bd_cg = cg(msa_trimmed, eval_vw, mode='vw', max_iter=100)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7477db54",
+   "metadata": {},
+   "source": [
+    "Loading collected literature sequences, converting to integer sequences, and trimming sequences, to predict fitness and compare to true measured fitness later."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "54954ad2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wt_sequence = 'MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTL' \\\n",
+    "              'VTTLSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLV' \\\n",
+    "              'NRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLAD' \\\n",
+    "              'HYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK'\n",
+    "variant_fitness_data = pd.read_csv('../../workflow/test_dataset_avgfp/avgfp.csv', sep=';')\n",
+    "variants = variant_fitness_data.iloc[:2000, 0].values  # \"just\" using 2000 variants for faster processing\n",
+    "fitness_values = variant_fitness_data.iloc[:2000, 1].values\n",
+    "variants_split = []\n",
+    "for variant in variants:\n",
+    "    variants_split.append(variant.split('/'))\n",
+    "variants, fitness_values, sequences = get_seqs_from_var_name(wt_sequence, variants_split, fitness_values)\n",
+    "sequences_int = seqs2int(sequences)\n",
+    "sequences_int_trimmed = np.delete(sequences_int, gaps, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66dc4b88",
+   "metadata": {},
+   "source": [
+    "L-BFGS optimization: Rank correlation of measured and predicted sequence fitness."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "1119ebba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AA counts:         SignificanceResult(statistic=0.5029247668771557, pvalue=1.0850574512849578e-128)\n",
+      "1-body term (V):   SignificanceResult(statistic=0.6585532465117758, pvalue=5.395952272751471e-249)\n",
+      "2-body term (VW):  SignificanceResult(statistic=0.718349826225515, pvalue=3.4027793e-317)\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred_1bd = oh_1bd_predict(sequences_int_trimmed, x_opt_1bd)\n",
+    "y_pred_2bd = oh_2bd_predict(sequences_int_trimmed, x_opt_2bd)\n",
+    "y_pred_aac = aa_count_predict(msa_trimmed, sequences_int_trimmed)\n",
+    "\n",
+    "# 1D-sequence encodings for further machine learning tasks\n",
+    "#x_pred_1bd = oh_1bd_1d_encode(sequences_int_trimmed, x_opt_1bd)\n",
+    "#x_pred_2bd = oh_2bd_1d_encode(sequences_int_trimmed, x_opt_2bd)\n",
+    "\n",
+    "print(f\"{'AA counts:':<18}\", spearmanr(fitness_values, y_pred_aac))\n",
+    "print(f\"{'1-body term (V):':<18}\", spearmanr(fitness_values, y_pred_1bd))\n",
+    "print(f\"{'2-body term (VW):':<18}\", spearmanr(fitness_values, y_pred_2bd))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "61fab7bc",
+   "metadata": {},
+   "source": [
+    "CG optimization: Rank correlation of measured and predicted sequence fitness."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3cb353d5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AA counts:         SignificanceResult(statistic=0.5029247668771557, pvalue=1.0850574512849578e-128)\n",
+      "1-body term (V):   SignificanceResult(statistic=0.5459010681666135, pvalue=9.995612748011931e-156)\n",
+      "2-body term (VW):  SignificanceResult(statistic=0.6943000690360119, pvalue=9.57615754836693e-288)\n"
+     ]
+    }
+   ],
+   "source": [
+    "y_pred_1bd = oh_1bd_predict(sequences_int_trimmed, x_opt_1bd_cg)\n",
+    "y_pred_2bd = oh_2bd_predict(sequences_int_trimmed, x_opt_2bd_cg)\n",
+    "y_pred_aac = aa_count_predict(msa_trimmed, sequences_int_trimmed)\n",
+    "\n",
+    "# 1D-sequence encodings for further machine learning tasks\n",
+    "#x_pred_1bd = oh_1bd_1d_encode(sequences_int_trimmed, x_opt_1bd)\n",
+    "#x_pred_2bd = oh_2bd_1d_encode(sequences_int_trimmed, x_opt_2bd)\n",
+    "\n",
+    "print(f\"{'AA counts:':<18}\", spearmanr(fitness_values, y_pred_aac))\n",
+    "print(f\"{'1-body term (V):':<18}\", spearmanr(fitness_values, y_pred_1bd))\n",
+    "print(f\"{'2-body term (VW):':<18}\", spearmanr(fitness_values, y_pred_2bd))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a7249221",
+   "metadata": {},
+   "source": [
+    "Notebook end."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pypef_dev",
+   "language": "python",
+   "name": "pypef_dev"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/workflow/run_cli_tests_linux.sh b/workflow/run_cli_tests_linux.sh
index 2db1b9a..81dcbf5 100644
--- a/workflow/run_cli_tests_linux.sh
+++ b/workflow/run_cli_tests_linux.sh
@@ -1,476 +1,483 @@
-#!/bin/bash
-
-### Bash script for testing some PyPEF CLI commands 
-### based on the two datasets provided (ANEH and avGFP)
-printf 'For successful running, following files are required:\n\nin test_dataset_aneh/\n\tSequence_WT_ANEH.fasta\n\t37_ANEH_variants.csv
-\tANEH_jhmmer.a2m\n\tANEH_72.6.params (generated using PLMC or dowloaded from https://github.com/niklases/PyPEF/blob/main/workflow/test_dataset_aneh/ANEH_72.6.params)\n
-in test_dataset_avgfp/\n\tP42212_F64L.fasta\n\tavGFP.csv\n\turef100_avgfp_jhmmer_119.a2m
-\turef100_avgfp_jhmmer_119_plmc_42.6.params (generated using PLMC or dowloaded from https://github.com/niklases/PyPEF/blob/main/workflow/test_dataset_avgfp/uref100_avgfp_jhmmer_119_plmc_42.6.params)\n\n'
-
-set -x  # echo on
-set -e  # exit on (PyPEF) errors
-export PS4='+(Line ${LINENO}): '  # echo script line numbers 
-
-### RUN ME WITH
-### $ ./run_cli_tests_linux.sh                      # printing STDOUT and STDERR to terminal
-### $ ./run_cli_tests_linux.sh &> test_cli_run.log  # writing STDOUT and STDERR to log file
-
-### if using downloaded/locally stored pypef .py files:
-############### CHANGE THIS PATHS AND USED THREADS, REQUIRES PYTHON ENVIRONMENT WITH PRE-INSTALLED MODULES ###############
-export PYTHONPATH=${PYTHONPATH}:/path/to/pypef-main                                                                      #
-pypef='python3 /path/to/pypef-main/pypef/main.py'                                                                        #                                                                                                                   #
-##########################################################################################################################
-### else just use pip-installed pypef version (uncomment):                                                               #
-#pypef=pypef                                                                                                             #
-##########################################################################################################################
-threads=16                                                                                                               #
-##########################################################################################################################
-
-### threads=1 shows progress bar where possible
-### CV-based mlp and rf regression option optimization take a long time and related testing commands are commented out/not included herein
-
-### Pure ML (and some hybrid model) tests on ANEH dataset
-cd 'test_dataset_aneh'
-#######################################################################
-echo
-
-$pypef --version
-echo
-$pypef -h
-echo
-$pypef mklsts -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta
-echo
-
-$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor pls
-echo
-$pypef ml --show
-echo
-$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor pls_loocv
-echo
-$pypef ml --show
-echo
-$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor ridge
-echo
-$pypef ml --show
-echo
-$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor lasso
-echo
-$pypef ml --show
-echo
-$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor elasticnet
-echo
-$pypef ml --show
-echo
-#$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor mlp
-#$pypef ml --show
-#$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor rf
-#$pypef ml --show
-
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls_loocv --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor ridge --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor lasso --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor elasticnet --threads $threads
-echo
-$pypef ml --show
-echo
-
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls --nofft --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls_loocv --nofft --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor ridge --nofft --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor lasso --nofft --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor elasticnet --nofft --threads $threads
-echo
-$pypef ml --show
-echo
-
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params ANEH_72.6.params --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls_loocv --params ANEH_72.6.params --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor ridge --params ANEH_72.6.params --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor lasso --params ANEH_72.6.params --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor elasticnet --params ANEH_72.6.params --threads $threads
-echo
-$pypef ml --show
-echo
-
-$pypef param_inference --msa ANEH_jhmmer.a2m --opt_iter 100
-echo
-$pypef save_msa_info --msa ANEH_jhmmer.a2m -w Sequence_WT_ANEH.fasta --opt_iter 100
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params GREMLIN
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls_loocv --params GREMLIN
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor ridge --params GREMLIN
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor lasso --params GREMLIN
-echo
-$pypef ml --show
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor elasticnet --params GREMLIN
-echo
-$pypef ml --show
-echo
-
-$pypef ml -e aaidx -m FAUJ880104 -t TS.fasl
-echo
-$pypef ml -e onehot -m ONEHOT -t TS.fasl
-echo
-$pypef ml -e dca -m MLplmc -t TS.fasl --params ANEH_72.6.params --threads $threads 
-echo
-$pypef ml -e aaidx -m FAUJ880104 -t TS.fasl --label
-echo
-$pypef ml -e onehot -m ONEHOT -t TS.fasl --label
-echo
-$pypef ml -e dca -m MLplmc -t TS.fasl --label --params ANEH_72.6.params --threads $threads
-echo
-
-$pypef mkps -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta
-echo
-$pypef ml -e aaidx -m FAUJ880104 -p 37_ANEH_variants_prediction_set.fasta
-echo
-$pypef ml -e onehot -m ONEHOT -p 37_ANEH_variants_prediction_set.fasta
-echo
-$pypef ml -e dca -m MLplmc -p 37_ANEH_variants_prediction_set.fasta --params ANEH_72.6.params --threads $threads
-echo
-$pypef ml -e dca -m MLgremlin -p 37_ANEH_variants_prediction_set.fasta --params GREMLIN
-echo
-
-$pypef mkps -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta --drecomb --trecomb --qarecomb --qirecomb --ddiverse
-echo
-$pypef ml -e aaidx -m FAUJ880104 --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse
-echo
-$pypef ml -e onehot -m ONEHOT --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse
-echo
-$pypef ml -e dca -m MLplmc --params ANEH_72.6.params --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse --threads $threads
-echo
-$pypef ml -e dca -m MLgremlin --params GREMLIN --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse
-echo
-
-$pypef ml -e aaidx directevo -m FAUJ880104 -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative
-echo
-$pypef ml -e onehot directevo -m ONEHOT -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative
-echo
-$pypef ml -e dca directevo -m MLplmc -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative --params ANEH_72.6.params
-echo
-$pypef ml -e dca directevo -m MLgremlin -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative --params GREMLIN
-echo
-$pypef ml -e aaidx directevo -m FAUJ880104 -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative
-echo
-$pypef ml -e onehot directevo -m ONEHOT -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative
-echo
-$pypef ml -e dca directevo -m MLplmc -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative --params ANEH_72.6.params
-echo
-$pypef ml -e dca directevo -m MLgremlin -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative --params GREMLIN
-echo
-$pypef ml -e aaidx directevo -m FAUJ880104 -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative
-echo
-$pypef ml -e onehot directevo -m ONEHOT -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative
-echo
-$pypef ml -e dca directevo -m MLplmc -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative --params ANEH_72.6.params
-echo
-$pypef ml -e dca directevo -m MLgremlin -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative --params GREMLIN
-echo
-
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls --nofft
-echo
-$pypef ml --show
-echo
-$pypef ml -e aaidx directevo -m WEBA780101 -w Sequence_WT_ANEH.fasta -y -1.5 --negative --nofft
-echo
-
-$pypef encode -i 37_ANEH_variants.csv -e aaidx -m FAUJ880104 -w Sequence_WT_ANEH.fasta
-echo
-$pypef encode -i 37_ANEH_variants.csv -e onehot -w Sequence_WT_ANEH.fasta
-echo
-$pypef encode -i 37_ANEH_variants.csv -e dca -w Sequence_WT_ANEH.fasta --params ANEH_72.6.params --threads $threads
-echo
-mv 37_ANEH_variants_dca_encoded.csv 37_ANEH_variants_plmc_dca_encoded.csv
-echo
-$pypef encode -i 37_ANEH_variants.csv -e dca -w Sequence_WT_ANEH.fasta --params GREMLIN
-echo
-mv 37_ANEH_variants_dca_encoded.csv 37_ANEH_variants_gremlin_dca_encoded.csv
-echo
-
-$pypef ml low_n -i 37_ANEH_variants_aaidx_encoded.csv
-echo
-$pypef ml low_n -i 37_ANEH_variants_onehot_encoded.csv
-echo
-$pypef ml low_n -i 37_ANEH_variants_plmc_dca_encoded.csv
-echo
-$pypef ml low_n -i 37_ANEH_variants_gremlin_dca_encoded.csv
-echo
-
-$pypef ml extrapolation -i 37_ANEH_variants_aaidx_encoded.csv
-echo
-$pypef ml extrapolation -i 37_ANEH_variants_onehot_encoded.csv
-echo
-$pypef ml extrapolation -i 37_ANEH_variants_plmc_dca_encoded.csv
-echo
-$pypef ml extrapolation -i 37_ANEH_variants_gremlin_dca_encoded.csv
-echo
-
-$pypef ml extrapolation -i 37_ANEH_variants_aaidx_encoded.csv --conc
-echo
-$pypef ml extrapolation -i 37_ANEH_variants_onehot_encoded.csv --conc
-echo
-$pypef ml extrapolation -i 37_ANEH_variants_plmc_dca_encoded.csv --conc
-echo
-$pypef ml extrapolation -i 37_ANEH_variants_gremlin_dca_encoded.csv --conc
-echo
-
-$pypef hybrid train_and_save -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta --params ANEH_72.6.params --fit_size 0.66 --threads $threads
-echo
-$pypef hybrid -l LS.fasl -t TS.fasl --params ANEH_72.6.params --threads $threads
-echo
-$pypef hybrid -m HYBRIDplmc -t TS.fasl --params ANEH_72.6.params --threads $threads
-echo
-
-$pypef hybrid train_and_save -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta --params GREMLIN --fit_size 0.66
-echo
-$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
-echo
-$pypef hybrid -m HYBRIDgremlin -t TS.fasl --params GREMLIN
-echo
-
-$pypef mkps -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta
-echo
-$pypef hybrid -m HYBRIDplmc -p 37_ANEH_variants_prediction_set.fasta --params ANEH_72.6.params --threads $threads
-echo
-$pypef hybrid -m HYBRIDplmc --params ANEH_72.6.params --pmult --drecomb --threads $threads
-echo
-
-$pypef hybrid directevo -m HYBRIDplmc -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative --params ANEH_72.6.params
-echo
-$pypef hybrid directevo -m HYBRIDplmc -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative --params ANEH_72.6.params
-echo
-$pypef hybrid directevo -m HYBRIDplmc -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative --params ANEH_72.6.params
-echo
-
-$pypef encode -i 37_ANEH_variants.csv -e dca -w Sequence_WT_ANEH.fasta --params ANEH_72.6.params --threads $threads
-echo
-$pypef hybrid low_n -i 37_ANEH_variants_dca_encoded.csv
-echo
-$pypef hybrid extrapolation -i 37_ANEH_variants_dca_encoded.csv
-echo
-$pypef hybrid extrapolation -i 37_ANEH_variants_dca_encoded.csv --conc
-echo
-
-
-### Hybrid model (and some pure ML and pure DCA) tests on avGFP dataset 
-cd '../test_dataset_avgfp'
-#######################################################################
-echo
-
-$pypef mklsts -i avGFP.csv -w P42212_F64L.fasta
-echo
-$pypef param_inference --msa uref100_avgfp_jhmmer_119.a2m --opt_iter 100
-echo
-# Check MSA coevolution info
-$pypef save_msa_info --msa uref100_avgfp_jhmmer_119.a2m -w P42212_F64L.fasta --opt_iter 100
-###
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --params GREMLIN
-echo
-$pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
-echo
-# Similar to line above
-$pypef hybrid -t TS.fasl --params GREMLIN
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --params GREMLIN
-echo
-$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
-echo
-$pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
-echo
-# pure statistical
-$pypef hybrid -t TS.fasl --params GREMLIN
-echo
-
-
-# using .params file
-$pypef ml -e dca -l LS.fasl -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-# ML LS/TS
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params GREMLIN
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params uref100_avgfp_jhmmer_119_plmc_42.6.params
-echo
-# Transforming .params file to DCAEncoding and using DCAEncoding Pickle; output file: Pickles/MLplmc.
-# That means using uref100_avgfp_jhmmer_119_plmc_42.6.params or PLMC as params file is identical.
-$pypef param_inference --params uref100_avgfp_jhmmer_119_plmc_42.6.params
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params PLMC --threads $threads
-echo
-# ml only TS
-$pypef ml -e dca -m MLplmc -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-$pypef ml -e dca -m MLgremlin -t TS.fasl --params GREMLIN --threads $threads
-echo
-
-
-echo
-$pypef ml -e dca -l LS.fasl -t TS.fasl --params PLMC --threads $threads
-echo
-$pypef hybrid -l LS.fasl -t TS.fasl --params PLMC --threads $threads
-echo
-$pypef hybrid -m PLMC -t TS.fasl --params PLMC --threads $threads
-echo
-
-# pure statistical
-$pypef hybrid -t TS.fasl --params PLMC --threads $threads
-echo
-$pypef hybrid -t TS.fasl --params GREMLIN
-echo
-$pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
-echo
-$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
-echo
-$pypef save_msa_info --msa uref100_avgfp_jhmmer_119.a2m -w P42212_F64L.fasta --opt_iter 100
-# train and save only for hybrid
-$pypef hybrid train_and_save -i avGFP.csv --params GREMLIN --wt P42212_F64L.fasta
-echo
-# Encode CSV
-$pypef encode -e dca -i avGFP.csv --wt P42212_F64L.fasta --params GREMLIN
-echo
-$pypef encode --encoding dca -i avGFP.csv --wt P42212_F64L.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads 12
-
-#Extrapolation
-echo
-$pypef ml low_n -i avGFP_dca_encoded.csv --regressor ridge
-echo
-$pypef ml extrapolation -i avGFP_dca_encoded.csv --regressor ridge
-echo
-$pypef ml extrapolation -i avGFP_dca_encoded.csv --conc --regressor ridge
-echo
-
-# Direct Evo
-$pypef ml -e dca directevo -m MLgremlin --wt P42212_F64L.fasta --params GREMLIN
-echo
-$pypef ml -e dca directevo -m MLplmc --wt P42212_F64L.fasta --params PLMC
-echo
-$pypef hybrid directevo -m GREMLIN --wt P42212_F64L.fasta --params GREMLIN
-echo
-$pypef hybrid directevo -m PLMC --wt P42212_F64L.fasta --params PLMC
-echo
-$pypef hybrid directevo --wt P42212_F64L.fasta --params GREMLIN
-echo
-$pypef hybrid directevo --wt P42212_F64L.fasta --params PLMC
-echo
-
-$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
-echo 
-$pypef hybrid -m HYBRIDgremlin -t TS.fasl --params GREMLIN
-echo 
-
-### Similar to old CLI run test from here
-
-$pypef encode -i avGFP.csv -e dca -w P42212_F64L.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-$pypef encode -i avGFP.csv -e onehot -w P42212_F64L.fasta
-echo
-$pypef ml -e aaidx -l LS.fasl -t TS.fasl --threads $threads
-echo
-$pypef ml --show
-echo
-$pypef encode -i avGFP.csv -e aaidx -m GEIM800103 -w P42212_F64L.fasta 
-echo
-
-$pypef hybrid train_and_save -i avGFP.csv --params uref100_avgfp_jhmmer_119_plmc_42.6.params --fit_size 0.66 -w P42212_F64L.fasta --threads $threads
-echo
-$pypef hybrid -l LS.fasl -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-$pypef hybrid -m HYBRIDplmc -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-
-# No training set given
-$pypef hybrid -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-$pypef ml -e dca -m MLplmc -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --label --threads $threads
-echo
-
-$pypef mkps -i avGFP.csv -w P42212_F64L.fasta
-echo
-$pypef hybrid -m HYBRIDplmc -p avGFP_prediction_set.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
-echo
-$pypef mkps -i avGFP.csv -w P42212_F64L.fasta --drecomb
-#$pypef hybrid -m HYBRID --params uref100_avgfp_jhmmer_119_plmc_42.6.params --pmult --drecomb --threads $threads  # many single variants for recombination, takes too long
-echo
-
-$pypef hybrid directevo -m HYBRIDplmc -w P42212_F64L.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params
-echo
-$pypef hybrid directevo -m HYBRIDplmc -w P42212_F64L.fasta --numiter 10 --numtraj 8 --params uref100_avgfp_jhmmer_119_plmc_42.6.params
-echo
-$pypef hybrid directevo -m HYBRIDplmc -i avGFP.csv -w P42212_F64L.fasta --temp 0.1 --usecsv --csvaa --params uref100_avgfp_jhmmer_119_plmc_42.6.params
-
-$pypef hybrid low_n -i avGFP_dca_encoded.csv
-echo
-$pypef hybrid extrapolation -i avGFP_dca_encoded.csv
-echo
-$pypef hybrid extrapolation -i avGFP_dca_encoded.csv --conc
-echo
-
-$pypef ml low_n -i avGFP_dca_encoded.csv --regressor ridge
-echo
-$pypef ml extrapolation -i avGFP_dca_encoded.csv --regressor ridge
-echo
-$pypef ml extrapolation -i avGFP_dca_encoded.csv --conc --regressor ridge
-echo
-
-$pypef ml low_n -i avGFP_onehot_encoded.csv --regressor pls
-echo
-$pypef ml extrapolation -i avGFP_onehot_encoded.csv --regressor pls
-echo
-$pypef ml extrapolation -i avGFP_onehot_encoded.csv --conc --regressor pls
-echo
-
-$pypef ml low_n -i avGFP_aaidx_encoded.csv --regressor ridge
-echo
-$pypef ml extrapolation -i avGFP_aaidx_encoded.csv --regressor ridge
-echo
-$pypef ml extrapolation -i avGFP_aaidx_encoded.csv --conc --regressor ridge
-echo
-
+#!/bin/bash
+
+### Bash script for testing some PyPEF CLI commands 
+### based on the two datasets provided (ANEH and avGFP)
+### REQUIRES MINICONDA OR ANACONDA BEING INSTALLED
+printf 'For successful running, following files are required:\n\nin test_dataset_aneh/\n\tSequence_WT_ANEH.fasta\n\t37_ANEH_variants.csv
+\tANEH_jhmmer.a2m\n\tANEH_72.6.params (generated using PLMC or dowloaded from https://github.com/niklases/PyPEF/blob/main/workflow/test_dataset_aneh/ANEH_72.6.params)\n
+in test_dataset_avgfp/\n\tP42212_F64L.fasta\n\tavGFP.csv\n\turef100_avgfp_jhmmer_119.a2m
+\turef100_avgfp_jhmmer_119_plmc_42.6.params (generated using PLMC or dowloaded from https://github.com/niklases/PyPEF/blob/main/workflow/test_dataset_avgfp/uref100_avgfp_jhmmer_119_plmc_42.6.params)\n\n'
+
+set -x  # echo on
+set -e  # exit on (PyPEF) errors
+export PS4='+(Line ${LINENO}): '  # echo script line numbers 
+
+### RUN ME WITH
+### $ ./run_cli_tests_linux.sh                      # printing STDOUT and STDERR to terminal
+### $ ./run_cli_tests_linux.sh &> test_cli_run.log  # writing STDOUT and STDERR to log file
+
+### if using downloaded/locally stored pypef .py files:
+##########################################################################################################################
+conda env remove -n pypef                                                                                                #
+conda create -n pypef python=3.10 -y                                                                                     #
+eval "$(conda shell.bash hook)"                                                                                          #
+conda activate pypef                                                                                                     #
+python -m pip install -r ../requirements.txt                                                                             #
+path=$( echo ${PWD%/*} )                                                                                                 #
+export PYTHONPATH=${PYTHONPATH}:$path                                                                                    #
+pypef='python3 '$path'/pypef/main.py'                                                                                    #                                                                                                                   #
+##########################################################################################################################
+### else just use pip-installed pypef version (uncomment):                                                               #
+#pypef=pypef                                                                                                             #
+##########################################################################################################################
+threads=12                                                                                                               #
+##########################################################################################################################
+
+### threads=1 shows progress bar where possible
+### CV-based mlp and rf regression option optimization take a long time and related testing commands are commented out/not included herein
+
+### Pure ML (and some hybrid model) tests on ANEH dataset
+cd 'test_dataset_aneh'
+#######################################################################
+echo
+
+$pypef --version
+echo
+$pypef -h
+echo
+$pypef mklsts -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta
+echo
+
+$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor pls
+echo
+$pypef ml --show
+echo
+$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor pls_loocv
+echo
+$pypef ml --show
+echo
+$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor ridge
+echo
+$pypef ml --show
+echo
+$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor lasso
+echo
+$pypef ml --show
+echo
+$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor elasticnet
+echo
+$pypef ml --show
+echo
+#$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor mlp
+#$pypef ml --show
+#$pypef ml -e onehot -l LS.fasl -t TS.fasl --regressor rf
+#$pypef ml --show
+
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls_loocv --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor ridge --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor lasso --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor elasticnet --threads $threads
+echo
+$pypef ml --show
+echo
+
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls --nofft --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls_loocv --nofft --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor ridge --nofft --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor lasso --nofft --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor elasticnet --nofft --threads $threads
+echo
+$pypef ml --show
+echo
+
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params ANEH_72.6.params --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls_loocv --params ANEH_72.6.params --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor ridge --params ANEH_72.6.params --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor lasso --params ANEH_72.6.params --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor elasticnet --params ANEH_72.6.params --threads $threads
+echo
+$pypef ml --show
+echo
+
+$pypef param_inference --msa ANEH_jhmmer.a2m --opt_iter 100
+echo
+$pypef save_msa_info --msa ANEH_jhmmer.a2m -w Sequence_WT_ANEH.fasta --opt_iter 100
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params GREMLIN
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls_loocv --params GREMLIN
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor ridge --params GREMLIN
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor lasso --params GREMLIN
+echo
+$pypef ml --show
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor elasticnet --params GREMLIN
+echo
+$pypef ml --show
+echo
+
+$pypef ml -e aaidx -m FAUJ880104 -t TS.fasl
+echo
+$pypef ml -e onehot -m ONEHOT -t TS.fasl
+echo
+$pypef ml -e dca -m MLplmc -t TS.fasl --params ANEH_72.6.params --threads $threads 
+echo
+$pypef ml -e aaidx -m FAUJ880104 -t TS.fasl --label
+echo
+$pypef ml -e onehot -m ONEHOT -t TS.fasl --label
+echo
+$pypef ml -e dca -m MLplmc -t TS.fasl --label --params ANEH_72.6.params --threads $threads
+echo
+
+$pypef mkps -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta
+echo
+$pypef ml -e aaidx -m FAUJ880104 -p 37_ANEH_variants_prediction_set.fasta
+echo
+$pypef ml -e onehot -m ONEHOT -p 37_ANEH_variants_prediction_set.fasta
+echo
+$pypef ml -e dca -m MLplmc -p 37_ANEH_variants_prediction_set.fasta --params ANEH_72.6.params --threads $threads
+echo
+$pypef ml -e dca -m MLgremlin -p 37_ANEH_variants_prediction_set.fasta --params GREMLIN
+echo
+
+$pypef mkps -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta --drecomb --trecomb --qarecomb --qirecomb --ddiverse
+echo
+$pypef ml -e aaidx -m FAUJ880104 --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse
+echo
+$pypef ml -e onehot -m ONEHOT --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse
+echo
+$pypef ml -e dca -m MLplmc --params ANEH_72.6.params --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse --threads $threads
+echo
+$pypef ml -e dca -m MLgremlin --params GREMLIN --pmult --drecomb --trecomb --qarecomb --qirecomb --ddiverse
+echo
+
+$pypef ml -e aaidx directevo -m FAUJ880104 -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative
+echo
+$pypef ml -e onehot directevo -m ONEHOT -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative
+echo
+$pypef ml -e dca directevo -m MLplmc -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative --params ANEH_72.6.params
+echo
+$pypef ml -e dca directevo -m MLgremlin -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative --params GREMLIN
+echo
+$pypef ml -e aaidx directevo -m FAUJ880104 -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative
+echo
+$pypef ml -e onehot directevo -m ONEHOT -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative
+echo
+$pypef ml -e dca directevo -m MLplmc -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative --params ANEH_72.6.params
+echo
+$pypef ml -e dca directevo -m MLgremlin -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative --params GREMLIN
+echo
+$pypef ml -e aaidx directevo -m FAUJ880104 -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative
+echo
+$pypef ml -e onehot directevo -m ONEHOT -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative
+echo
+$pypef ml -e dca directevo -m MLplmc -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative --params ANEH_72.6.params
+echo
+$pypef ml -e dca directevo -m MLgremlin -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative --params GREMLIN
+echo
+
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --regressor pls --nofft
+echo
+$pypef ml --show
+echo
+$pypef ml -e aaidx directevo -m WEBA780101 -w Sequence_WT_ANEH.fasta -y -1.5 --negative --nofft
+echo
+
+$pypef encode -i 37_ANEH_variants.csv -e aaidx -m FAUJ880104 -w Sequence_WT_ANEH.fasta
+echo
+$pypef encode -i 37_ANEH_variants.csv -e onehot -w Sequence_WT_ANEH.fasta
+echo
+$pypef encode -i 37_ANEH_variants.csv -e dca -w Sequence_WT_ANEH.fasta --params ANEH_72.6.params --threads $threads
+echo
+mv 37_ANEH_variants_dca_encoded.csv 37_ANEH_variants_plmc_dca_encoded.csv
+echo
+$pypef encode -i 37_ANEH_variants.csv -e dca -w Sequence_WT_ANEH.fasta --params GREMLIN
+echo
+mv 37_ANEH_variants_dca_encoded.csv 37_ANEH_variants_gremlin_dca_encoded.csv
+echo
+
+$pypef ml low_n -i 37_ANEH_variants_aaidx_encoded.csv
+echo
+$pypef ml low_n -i 37_ANEH_variants_onehot_encoded.csv
+echo
+$pypef ml low_n -i 37_ANEH_variants_plmc_dca_encoded.csv
+echo
+$pypef ml low_n -i 37_ANEH_variants_gremlin_dca_encoded.csv
+echo
+
+$pypef ml extrapolation -i 37_ANEH_variants_aaidx_encoded.csv
+echo
+$pypef ml extrapolation -i 37_ANEH_variants_onehot_encoded.csv
+echo
+$pypef ml extrapolation -i 37_ANEH_variants_plmc_dca_encoded.csv
+echo
+$pypef ml extrapolation -i 37_ANEH_variants_gremlin_dca_encoded.csv
+echo
+
+$pypef ml extrapolation -i 37_ANEH_variants_aaidx_encoded.csv --conc
+echo
+$pypef ml extrapolation -i 37_ANEH_variants_onehot_encoded.csv --conc
+echo
+$pypef ml extrapolation -i 37_ANEH_variants_plmc_dca_encoded.csv --conc
+echo
+$pypef ml extrapolation -i 37_ANEH_variants_gremlin_dca_encoded.csv --conc
+echo
+
+$pypef hybrid train_and_save -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta --params ANEH_72.6.params --fit_size 0.66 --threads $threads
+echo
+$pypef hybrid -l LS.fasl -t TS.fasl --params ANEH_72.6.params --threads $threads
+echo
+$pypef hybrid -m HYBRIDplmc -t TS.fasl --params ANEH_72.6.params --threads $threads
+echo
+
+$pypef hybrid train_and_save -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta --params GREMLIN --fit_size 0.66
+echo
+$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
+echo
+$pypef hybrid -m HYBRIDgremlin -t TS.fasl --params GREMLIN
+echo
+
+$pypef mkps -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta
+echo
+$pypef hybrid -m HYBRIDplmc -p 37_ANEH_variants_prediction_set.fasta --params ANEH_72.6.params --threads $threads
+echo
+$pypef hybrid -m HYBRIDplmc --params ANEH_72.6.params --pmult --drecomb --threads $threads
+echo
+
+$pypef hybrid directevo -m HYBRIDplmc -w Sequence_WT_ANEH.fasta --y_wt -1.5 --negative --params ANEH_72.6.params
+echo
+$pypef hybrid directevo -m HYBRIDplmc -w Sequence_WT_ANEH.fasta -y -1.5 --numiter 10 --numtraj 8 --negative --params ANEH_72.6.params
+echo
+$pypef hybrid directevo -m HYBRIDplmc -i 37_ANEH_variants.csv -w Sequence_WT_ANEH.fasta -y -1.5 --temp 0.1 --usecsv --csvaa --negative --params ANEH_72.6.params
+echo
+
+$pypef encode -i 37_ANEH_variants.csv -e dca -w Sequence_WT_ANEH.fasta --params ANEH_72.6.params --threads $threads
+echo
+$pypef hybrid low_n -i 37_ANEH_variants_dca_encoded.csv
+echo
+$pypef hybrid extrapolation -i 37_ANEH_variants_dca_encoded.csv
+echo
+$pypef hybrid extrapolation -i 37_ANEH_variants_dca_encoded.csv --conc
+echo
+
+
+### Hybrid model (and some pure ML and pure DCA) tests on avGFP dataset 
+cd '../test_dataset_avgfp'
+#######################################################################
+echo
+
+$pypef mklsts -i avGFP.csv -w P42212_F64L.fasta
+echo
+$pypef param_inference --msa uref100_avgfp_jhmmer_119.a2m --opt_iter 100
+echo
+# Check MSA coevolution info
+$pypef save_msa_info --msa uref100_avgfp_jhmmer_119.a2m -w P42212_F64L.fasta --opt_iter 100
+###
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --params GREMLIN
+echo
+$pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
+echo
+# Similar to line above
+$pypef hybrid -t TS.fasl --params GREMLIN
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --params GREMLIN
+echo
+$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
+echo
+$pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
+echo
+# pure statistical
+$pypef hybrid -t TS.fasl --params GREMLIN
+echo
+
+
+# using .params file
+$pypef ml -e dca -l LS.fasl -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+# ML LS/TS
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params GREMLIN
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params uref100_avgfp_jhmmer_119_plmc_42.6.params
+echo
+# Transforming .params file to DCAEncoding and using DCAEncoding Pickle; output file: Pickles/MLplmc.
+# That means using uref100_avgfp_jhmmer_119_plmc_42.6.params or PLMC as params file is identical.
+$pypef param_inference --params uref100_avgfp_jhmmer_119_plmc_42.6.params
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --regressor pls --params PLMC --threads $threads
+echo
+# ml only TS
+$pypef ml -e dca -m MLplmc -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+$pypef ml -e dca -m MLgremlin -t TS.fasl --params GREMLIN --threads $threads
+echo
+
+
+echo
+$pypef ml -e dca -l LS.fasl -t TS.fasl --params PLMC --threads $threads
+echo
+$pypef hybrid -l LS.fasl -t TS.fasl --params PLMC --threads $threads
+echo
+$pypef hybrid -m PLMC -t TS.fasl --params PLMC --threads $threads
+echo
+
+# pure statistical
+$pypef hybrid -t TS.fasl --params PLMC --threads $threads
+echo
+$pypef hybrid -t TS.fasl --params GREMLIN
+echo
+$pypef hybrid -m GREMLIN -t TS.fasl --params GREMLIN
+echo
+$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
+echo
+$pypef save_msa_info --msa uref100_avgfp_jhmmer_119.a2m -w P42212_F64L.fasta --opt_iter 100
+# train and save only for hybrid
+$pypef hybrid train_and_save -i avGFP.csv --params GREMLIN --wt P42212_F64L.fasta
+echo
+# Encode CSV
+$pypef encode -e dca -i avGFP.csv --wt P42212_F64L.fasta --params GREMLIN
+echo
+$pypef encode --encoding dca -i avGFP.csv --wt P42212_F64L.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads 12
+
+#Extrapolation
+echo
+$pypef ml low_n -i avGFP_dca_encoded.csv --regressor ridge
+echo
+$pypef ml extrapolation -i avGFP_dca_encoded.csv --regressor ridge
+echo
+$pypef ml extrapolation -i avGFP_dca_encoded.csv --conc --regressor ridge
+echo
+
+# Direct Evo
+$pypef ml -e dca directevo -m MLgremlin --wt P42212_F64L.fasta --params GREMLIN
+echo
+$pypef ml -e dca directevo -m MLplmc --wt P42212_F64L.fasta --params PLMC
+echo
+$pypef hybrid directevo -m GREMLIN --wt P42212_F64L.fasta --params GREMLIN
+echo
+$pypef hybrid directevo -m PLMC --wt P42212_F64L.fasta --params PLMC
+echo
+$pypef hybrid directevo --wt P42212_F64L.fasta --params GREMLIN
+echo
+$pypef hybrid directevo --wt P42212_F64L.fasta --params PLMC
+echo
+
+$pypef hybrid -l LS.fasl -t TS.fasl --params GREMLIN
+echo 
+$pypef hybrid -m HYBRIDgremlin -t TS.fasl --params GREMLIN
+echo 
+
+### Similar to old CLI run test from here
+
+$pypef encode -i avGFP.csv -e dca -w P42212_F64L.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+$pypef encode -i avGFP.csv -e onehot -w P42212_F64L.fasta
+echo
+$pypef ml -e aaidx -l LS.fasl -t TS.fasl --threads $threads
+echo
+$pypef ml --show
+echo
+$pypef encode -i avGFP.csv -e aaidx -m GEIM800103 -w P42212_F64L.fasta 
+echo
+
+$pypef hybrid train_and_save -i avGFP.csv --params uref100_avgfp_jhmmer_119_plmc_42.6.params --fit_size 0.66 -w P42212_F64L.fasta --threads $threads
+echo
+$pypef hybrid -l LS.fasl -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+$pypef hybrid -m HYBRIDplmc -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+
+# No training set given
+$pypef hybrid -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+$pypef ml -e dca -m MLplmc -t TS.fasl --params uref100_avgfp_jhmmer_119_plmc_42.6.params --label --threads $threads
+echo
+
+$pypef mkps -i avGFP.csv -w P42212_F64L.fasta
+echo
+$pypef hybrid -m HYBRIDplmc -p avGFP_prediction_set.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params --threads $threads
+echo
+$pypef mkps -i avGFP.csv -w P42212_F64L.fasta --drecomb
+#$pypef hybrid -m HYBRID --params uref100_avgfp_jhmmer_119_plmc_42.6.params --pmult --drecomb --threads $threads  # many single variants for recombination, takes too long
+echo
+
+$pypef hybrid directevo -m HYBRIDplmc -w P42212_F64L.fasta --params uref100_avgfp_jhmmer_119_plmc_42.6.params
+echo
+$pypef hybrid directevo -m HYBRIDplmc -w P42212_F64L.fasta --numiter 10 --numtraj 8 --params uref100_avgfp_jhmmer_119_plmc_42.6.params
+echo
+$pypef hybrid directevo -m HYBRIDplmc -i avGFP.csv -w P42212_F64L.fasta --temp 0.1 --usecsv --csvaa --params uref100_avgfp_jhmmer_119_plmc_42.6.params
+
+$pypef hybrid low_n -i avGFP_dca_encoded.csv
+echo
+$pypef hybrid extrapolation -i avGFP_dca_encoded.csv
+echo
+$pypef hybrid extrapolation -i avGFP_dca_encoded.csv --conc
+echo
+
+$pypef ml low_n -i avGFP_dca_encoded.csv --regressor ridge
+echo
+$pypef ml extrapolation -i avGFP_dca_encoded.csv --regressor ridge
+echo
+$pypef ml extrapolation -i avGFP_dca_encoded.csv --conc --regressor ridge
+echo
+
+$pypef ml low_n -i avGFP_onehot_encoded.csv --regressor pls
+echo
+$pypef ml extrapolation -i avGFP_onehot_encoded.csv --regressor pls
+echo
+$pypef ml extrapolation -i avGFP_onehot_encoded.csv --conc --regressor pls
+echo
+
+$pypef ml low_n -i avGFP_aaidx_encoded.csv --regressor ridge
+echo
+$pypef ml extrapolation -i avGFP_aaidx_encoded.csv --regressor ridge
+echo
+$pypef ml extrapolation -i avGFP_aaidx_encoded.csv --conc --regressor ridge
+echo
+
 echo 'All tests finished without error!'
\ No newline at end of file
diff --git a/workflow/run_cli_tests_win.ps1 b/workflow/run_cli_tests_win.ps1
index 3ff7453..fb3dab0 100644
--- a/workflow/run_cli_tests_win.ps1
+++ b/workflow/run_cli_tests_win.ps1
@@ -1,4 +1,6 @@
+### PowerShell script for testing some PyPEF CLI commands 
 ### based on the two datasets provided (ANEH and avGFP)
+### REQUIRES MINICONDA OR ANACONDA BEING INSTALLED
 Write-Host "For successful running, following files are required:`n`nin test_dataset_aneh/
 `tSequence_WT_ANEH.fasta`n`t37_ANEH_variants.csv`n`tANEH_jhmmer.a2m
 `tANEH_72.6.params (generated using PLMC or dowloaded from https://github.com/niklases/PyPEF/blob/main/workflow/test_dataset_aneh/ANEH_72.6.params)
@@ -19,14 +21,20 @@ function ExitOnExitCode { if ($LastExitCode) { Write-Host "PyPEF command error;
 ### $ .\run_cli_tests_win.ps1                      # printing STDOUT and STDERR to terminal
 
 ### if using downloaded/locally stored pypef .py files:
-############### CHANGE THIS PATHS AND USED THREADS, REQUIRES PYTHON ENVIRONMENT WITH PRE-INSTALLED MODULES ###############
-$env:PYTHONPATH="C:\path\to\pypef-main"                                                                                  #
-function pypef { python C:\path\to\pypef-main\pypef\main.py @args }                                                      #
+##########################################################################################################################
+conda env remove -n pypef                                                                                                #
+conda create -n pypef python=3.10 -y                                                                                     #
+conda activate pypef                                                                                                     #
+$path=Get-Location                                                                                                       #
+$path=Split-Path -Path $path -Parent                                                                                     #
+python -m pip install -r $path\requirements.txt                                                                          #
+$env:PYTHONPATH=$path                                                                                                    #
+function pypef { python $path\pypef\main.py @args }                                                                      #
 ##########################################################################################################################
 ### else just use pip-installed pypef version (uncomment):                                                               #
 #pypef = pypef                                                                                                           #
 ##########################################################################################################################
-$threads = 16                                                                                                            #
+$threads = 12                                                                                                            #
 ##########################################################################################################################
 
 ### threads=1 shows progress bar where possible