Merge pull request #483 from DeepRank/479_scrapewhitespace_dbodor

style: auto-scrape trailing whitespace upon save in VS code
DeepRank · Sep 5, 2023 · 3d418ad · 3d418ad
2 parents 3467412 + 0ba6dd7
commit 3d418ad
Show file tree

Hide file tree

Showing 31 changed files with 209 additions and 204 deletions.
diff --git a/.github/actions/install-python-and-package/action.yml b/.github/actions/install-python-and-package/action.yml
@@ -64,4 +64,3 @@ runs:
       run: python3 -m pip install .[${{ inputs.extras-require }}]
       env:
         CONDA_PREFIX: /usr/share/miniconda
-
diff --git a/.github/workflows/fair-software.yml b/.github/workflows/fair-software.yml
@@ -16,6 +16,6 @@ jobs:
       - uses: fair-software/[email protected]
         name: Measure compliance with fair-software.eu recommendations
         env:
-          PYCHARM_HOSTED: "Trick colorama into displaying colored output" 
+          PYCHARM_HOSTED: "Trick colorama into displaying colored output"
         with:
           MY_REPO_URL: "https://github.com/${{ github.repository }}"
diff --git a/.prospector.yml b/.prospector.yml
@@ -18,7 +18,7 @@ ignore-patterns:
 
 pyroma:
   run: true
-  # pyroma gives errors in the setup.py file, 
+  # pyroma gives errors in the setup.py file,
   # thus we disable here these errors.
   # This should not be happening, because
   # prospector should be ignoring the setup.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -2,8 +2,14 @@
     "[python]": {
         "editor.codeActionsOnSave": {
             "source.organizeImports": true
-        }
+        },
+        "files.trimTrailingWhitespace": true,
     },
+
     "python.linting.prospectorEnabled": true,
     "notebook.lineNumbers": "on",
-}
+
+    "[*.yml]": {
+        "files.trimTrailingWhitespace": true,
+    },
+}
diff --git a/deeprank2/dataset.py b/deeprank2/dataset.py
diff --git a/deeprank2/domain/aminoacidlist.py b/deeprank2/domain/aminoacidlist.py
@@ -4,7 +4,7 @@
 
 # All info below sourced from above websites in December 2022 and summarized in deeprank2/domain/aminoacid_summary.xlsx
 
-# Charge is calculated from summing all atoms in the residue (from ./deeprank2/domain/forcefield/protein-allhdg5-5_new.top). 
+# Charge is calculated from summing all atoms in the residue (from ./deeprank2/domain/forcefield/protein-allhdg5-5_new.top).
 # This results in the expected charge of 0 for all POLAR and NONPOLAR residues, +1 for POSITIVE residues and -1 for NEGATIVE residues.
 # Note that SER, THR, and TYR lead to a charge of ~1e-16. A rounding error is assumed in these cases and they are set to 0.
 
@@ -19,7 +19,7 @@
 # The other sources have some minor discrepancies compared to this and are commented inline.
 
 # Source for size:
-#   https://www.shimadzu.co.jp/aboutus/ms_r/archive/files/AminoAcidTable.pdf 
+#   https://www.shimadzu.co.jp/aboutus/ms_r/archive/files/AminoAcidTable.pdf
 
 # Sources for mass and pI:
 #   1) https://www.sigmaaldrich.com/NL/en/technical-documents/technical-article/protein-biology/protein-structural-analysis/amino-acid-reference-chart
@@ -69,7 +69,7 @@
     "U",
     charge = 0,
     polarity = Polarity.POLAR, # source 3: "special case"
-    size = 2, # source: https://en.wikipedia.org/wiki/Selenocysteine 
+    size = 2, # source: https://en.wikipedia.org/wiki/Selenocysteine
     mass = 150.0, # only from source 3
     pI = 5.47, # only from source 3
     hydrogen_bond_donors = 1, # unconfirmed
@@ -174,7 +174,7 @@
     "PYL",
     "O",
     charge = 0, # unconfirmed
-    polarity = Polarity.POLAR, # based on having both H-bond donors and acceptors 
+    polarity = Polarity.POLAR, # based on having both H-bond donors and acceptors
     size = 13, # source: https://en.wikipedia.org/wiki/Pyrrolysine
     mass = 255.32, # from source 3
     pI = 7.394, # rough estimate from https://rstudio-pubs-static.s3.amazonaws.com/846259_7a9236df54e6410a972621590ecdcfcb.html
@@ -353,9 +353,9 @@
     ]
 
 def convert_aa_nomenclature(aa: str, output_type: Optional[int] = None):
-    
+
     # pylint: disable = raise-missing-from
-    try: 
+    try:
         if len(aa) == 1:
             aa: AminoAcid = [entry for entry in amino_acids if entry.one_letter_code.lower() == aa.lower()][0]
         elif len(aa) == 3:
@@ -367,7 +367,7 @@ def convert_aa_nomenclature(aa: str, output_type: Optional[int] = None):
 
     if not output_type:
         return aa.name
-    if output_type == 3: 
+    if output_type == 3:
         return aa.three_letter_code
     if output_type == 1:
         return aa.one_letter_code

diff --git a/deeprank2/domain/losstypes.py b/deeprank2/domain/losstypes.py
@@ -1,31 +1,31 @@
 from torch import nn
 
-regression_losses = (nn.L1Loss, 
-                    nn.SmoothL1Loss, 
-                    nn.MSELoss, 
+regression_losses = (nn.L1Loss,
+                    nn.SmoothL1Loss,
+                    nn.MSELoss,
                     nn.HuberLoss)
 
-binary_classification_losses = (nn.SoftMarginLoss, 
-                                nn.BCELoss, 
+binary_classification_losses = (nn.SoftMarginLoss,
+                                nn.BCELoss,
                                 nn.BCEWithLogitsLoss)
 
-multi_classification_losses = (nn.CrossEntropyLoss, 
-                                nn.NLLLoss, 
-                                nn.PoissonNLLLoss, 
-                                nn.GaussianNLLLoss, 
-                                nn.KLDivLoss, 
-                                nn.MultiLabelMarginLoss, 
+multi_classification_losses = (nn.CrossEntropyLoss,
+                                nn.NLLLoss,
+                                nn.PoissonNLLLoss,
+                                nn.GaussianNLLLoss,
+                                nn.KLDivLoss,
+                                nn.MultiLabelMarginLoss,
                                 nn.MultiLabelSoftMarginLoss)
 
-other_losses = (nn.HingeEmbeddingLoss, 
-                nn.CosineEmbeddingLoss, 
-                nn.MarginRankingLoss, 
-                nn.TripletMarginLoss, 
+other_losses = (nn.HingeEmbeddingLoss,
+                nn.CosineEmbeddingLoss,
+                nn.MarginRankingLoss,
+                nn.TripletMarginLoss,
                 nn.CTCLoss)
-                
+
 classification_losses = multi_classification_losses + binary_classification_losses
 
-classification_tested = (nn.CrossEntropyLoss, 
-                        nn.NLLLoss, 
-                        nn.BCELoss, 
+classification_tested = (nn.CrossEntropyLoss,
+                        nn.NLLLoss,
+                        nn.BCELoss,
                         nn.BCEWithLogitsLoss)
diff --git a/deeprank2/domain/nodestorage.py b/deeprank2/domain/nodestorage.py
@@ -13,7 +13,7 @@
 
 ## residue core features
 RESTYPE = "res_type" # AminoAcid object; former FEATURENAME_AMINOACID
-RESCHARGE = "res_charge" # float(<0); former FEATURENAME_CHARGE (was not assigned) 
+RESCHARGE = "res_charge" # float(<0); former FEATURENAME_CHARGE (was not assigned)
 POLARITY = "polarity" #  Polarity object; former FEATURENAME_POLARITY
 RESSIZE = "res_size" # int; former FEATURENAME_SIZE
 RESMASS = "res_mass"
@@ -39,7 +39,7 @@
 
 ## protein context features
 RESDEPTH = "res_depth" # float; former FEATURENAME_RESIDUEDEPTH
-HSE = "hse" # list[3xfloat]; former FEATURENAME_HALFSPHEREEXPOSURE 
+HSE = "hse" # list[3xfloat]; former FEATURENAME_HALFSPHEREEXPOSURE
 SASA = "sasa" # float; former FEATURENAME_SASA
 BSA = "bsa" # float; former FEATURENAME_BURIEDSURFACEAREA
 SECSTRUCT = "sec_struct" #secondary structure

diff --git a/deeprank2/features/components.py b/deeprank2/features/components.py
@@ -23,12 +23,12 @@ def add_features( # pylint: disable=unused-argument
         elif isinstance(node.id, Atom):
             atom = node.id
             residue = atom.residue
-            
+
             node.features[Nfeat.ATOMTYPE] = atom.element.onehot
             node.features[Nfeat.PDBOCCUPANCY] = atom.occupancy
             node.features[Nfeat.ATOMCHARGE] = atomic_forcefield.get_charge(atom)
         else:
-            raise TypeError(f"Unexpected node type: {type(node.id)}") 
+            raise TypeError(f"Unexpected node type: {type(node.id)}")
 
         node.features[Nfeat.RESTYPE] = residue.amino_acid.onehot
         node.features[Nfeat.RESCHARGE] = residue.amino_acid.charge

diff --git a/deeprank2/features/conservation.py b/deeprank2/features/conservation.py
@@ -31,7 +31,7 @@ def add_features( # pylint: disable=unused-argument
         node.features[Nfeat.PSSM] = profile
         node.features[Nfeat.INFOCONTENT] = pssm_row.information_content
 
-        if single_amino_acid_variant is not None:            
+        if single_amino_acid_variant is not None:
             if residue == single_amino_acid_variant.residue:
                 # only the variant residue can have a variant and wildtype amino acid
                 conservation_wildtype = pssm_row.get_conservation(single_amino_acid_variant.wildtype_amino_acid)

diff --git a/deeprank2/features/contact.py b/deeprank2/features/contact.py
@@ -21,7 +21,7 @@
 cutoff_14 = 4.2
 
 def _get_nonbonded_energy( #pylint: disable=too-many-locals
-    atoms: List[Atom], 
+    atoms: List[Atom],
     distances: npt.NDArray[np.float64],
     ) -> Tuple [npt.NDArray[np.float64], npt.NDArray[np.float64]]:
     """Calculates all pairwise electrostatic (Coulomb) and Van der Waals (Lennard Jones) potential energies between all atoms in the structure.
@@ -30,8 +30,8 @@ def _get_nonbonded_energy( #pylint: disable=too-many-locals
     However, the potential tends to 0 at large distance.
 
     Args:
-        atoms (List[Atom]): list of all atoms in the structure 
-        distances (npt.NDArray[np.float64]): matrix of pairwise distances between all atoms in the structure 
+        atoms (List[Atom]): list of all atoms in the structure
+        distances (npt.NDArray[np.float64]): matrix of pairwise distances between all atoms in the structure
             in the format that is the output of scipy.spatial's distance_matrix (i.e. a diagonally symmetric matrix)
 
     Returns:
@@ -70,18 +70,18 @@ def _get_nonbonded_energy( #pylint: disable=too-many-locals
     E_vdw[pair_14] = E_vdw_14pairs[pair_14]
     E_vdw[pair_13] = 0
     E_elec[pair_13] = 0
-    
-    
+
+
     return E_elec, E_vdw
 
 
 def add_features( # pylint: disable=unused-argument, too-many-locals
     pdb_path: str, graph: Graph,
     single_amino_acid_variant: Optional[SingleResidueVariant] = None
     ):
-    
+
     # assign each atoms (from all edges) a unique index
-    all_atoms = set() 
+    all_atoms = set()
     if isinstance(graph.edges[0].id, AtomicContact):
         for edge in graph.edges:
             contact = edge.id
@@ -109,8 +109,8 @@ def add_features( # pylint: disable=unused-argument, too-many-locals
     # assign features
     for edge in graph.edges:
         contact = edge.id
-        
-        if isinstance(contact, AtomicContact):    
+
+        if isinstance(contact, AtomicContact):
             ## find the indices
             atom1_index = atom_dict[contact.atom1]
             atom2_index = atom_dict[contact.atom2]
@@ -130,6 +130,6 @@ def add_features( # pylint: disable=unused-argument, too-many-locals
             edge.features[Efeat.DISTANCE] = np.min([[interatomic_distances[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
             edge.features[Efeat.ELEC] = np.sum([[interatomic_electrostatic_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
             edge.features[Efeat.VDW] = np.sum([[interatomic_vanderwaals_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
-        
+
         # Calculate irrespective of node type
         edge.features[Efeat.COVALENT] = float(edge.features[Efeat.DISTANCE] < covalent_cutoff and edge.features[Efeat.SAMECHAIN])
diff --git a/deeprank2/features/irc.py b/deeprank2/features/irc.py
@@ -21,18 +21,18 @@ def _id_from_residue(residue: Tuple[str, int, str]) -> str:
     Args:
         residue (tuple): Input residue as rendered by pdb2sql: ( str(<chain>), int(<residue_number>), str(<three_letter_code> )
             For example: ('A', 27, 'GLU').
-    
+
     Returns:
         str: Output id in form of '<chain><residue_number>'. For example: 'A27'.
     """
-    
+
     return residue[0] + str(residue[1])
 
 
 class _ContactDensity:
     """Internal class that holds contact density information for a given residue.
     """
-    
+
     def __init__(self, residue: Tuple[str, int, str], polarity: Polarity):
         self.res = residue
         self.polarity = polarity
@@ -52,7 +52,7 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
         cutoff (float, optional): Cutoff distance (in Ångström) to be considered a close contact. Defaults to 10.
 
     Returns:
-        Dict[str, _ContactDensity]: 
+        Dict[str, _ContactDensity]:
             keys: ids of residues in form returned by id_from_residue.
             items: _ContactDensity objects, containing all contact density information for the residue.
     """
@@ -61,11 +61,11 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
 
     sql = pdb2sql.interface(pdb_path)
     pdb2sql_contacts = sql.get_contact_residues(
-        cutoff=cutoff, 
+        cutoff=cutoff,
         chain1=chains[0], chain2=chains[1],
         return_contact_pairs=True
     )
-    
+
     for chain1_res, chain2_residues in pdb2sql_contacts.items():
         aa1_code = chain1_res[2]
         try:
@@ -76,20 +76,20 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
         # add chain1_res to residue_contact dict
         contact1_id = _id_from_residue(chain1_res)
         residue_contacts[contact1_id] = _ContactDensity(chain1_res, aa1.polarity)
-        
+
         for chain2_res in chain2_residues:
             aa2_code = chain2_res[2]
             try:
                 aa2 = [amino_acid for amino_acid in amino_acids if amino_acid.three_letter_code == aa2_code][0]
             except IndexError:
                 continue  # skip keys that are not an amino acid
-            
+
             # populate densities and connections for chain1_res
             residue_contacts[contact1_id].densities['total'] += 1
             residue_contacts[contact1_id].densities[aa2.polarity] += 1
             residue_contacts[contact1_id].connections['all'].append(chain2_res)
             residue_contacts[contact1_id].connections[aa2.polarity].append(chain2_res)
-            
+
             # add chain2_res to residue_contact dict if it doesn't exist yet
             contact2_id = _id_from_residue(chain2_res)
             if contact2_id not in residue_contacts:
@@ -100,19 +100,19 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
             residue_contacts[contact2_id].densities[aa1.polarity] += 1
             residue_contacts[contact2_id].connections['all'].append(chain1_res)
             residue_contacts[contact2_id].connections[aa1.polarity].append(chain1_res)
-    
+
     return residue_contacts
 
 
 def add_features(
     pdb_path: str, graph: Graph,
     single_amino_acid_variant: Optional[SingleResidueVariant] = None
     ):
-    
+
     if not single_amino_acid_variant: # VariantQueries do not use this feature
         polarity_pairs = list(combinations(Polarity, 2))
         polarity_pair_string = [f'irc_{x[0].name.lower()}_{x[1].name.lower()}' for x in polarity_pairs]
-        
+
         total_contacts = 0
         residue_contacts = get_IRCs(pdb_path, graph.get_all_chains())
 
@@ -126,11 +126,11 @@ def add_features(
                 raise TypeError(f"Unexpected node type: {type(node.id)}")
 
             contact_id = residue.chain.id + residue.number_string  # reformat id to be in line with residue_contacts keys
-            
+
             # initialize all IRC features to 0
             for IRC_type in Nfeat.IRC_FEATURES:
                 node.features[IRC_type] = 0
-            
+
             # load correct values to IRC features
             try:
                 node.features[Nfeat.IRCTOTAL] = residue_contacts[contact_id].densities['total']