Skip to content

Commit

Permalink
Merge pull request #483 from DeepRank/479_scrapewhitespace_dbodor
Browse files Browse the repository at this point in the history
style: auto-scrape trailing whitespace upon save in VS code
  • Loading branch information
DaniBodor authored Sep 5, 2023
2 parents 3467412 + 0ba6dd7 commit 3d418ad
Show file tree
Hide file tree
Showing 31 changed files with 209 additions and 204 deletions.
1 change: 0 additions & 1 deletion .github/actions/install-python-and-package/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,3 @@ runs:
run: python3 -m pip install .[${{ inputs.extras-require }}]
env:
CONDA_PREFIX: /usr/share/miniconda

2 changes: 1 addition & 1 deletion .github/workflows/fair-software.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ jobs:
- uses: fair-software/[email protected]
name: Measure compliance with fair-software.eu recommendations
env:
PYCHARM_HOSTED: "Trick colorama into displaying colored output"
PYCHARM_HOSTED: "Trick colorama into displaying colored output"
with:
MY_REPO_URL: "https://github.com/${{ github.repository }}"
2 changes: 1 addition & 1 deletion .prospector.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ignore-patterns:

pyroma:
run: true
# pyroma gives errors in the setup.py file,
# pyroma gives errors in the setup.py file,
# thus we disable here these errors.
# This should not be happening, because
# prospector should be ignoring the setup.py
Expand Down
10 changes: 8 additions & 2 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@
"[python]": {
"editor.codeActionsOnSave": {
"source.organizeImports": true
}
},
"files.trimTrailingWhitespace": true,
},

"python.linting.prospectorEnabled": true,
"notebook.lineNumbers": "on",
}

"[*.yml]": {
"files.trimTrailingWhitespace": true,
},
}
126 changes: 63 additions & 63 deletions deeprank2/dataset.py

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions deeprank2/domain/aminoacidlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# All info below sourced from above websites in December 2022 and summarized in deeprank2/domain/aminoacid_summary.xlsx

# Charge is calculated from summing all atoms in the residue (from ./deeprank2/domain/forcefield/protein-allhdg5-5_new.top).
# Charge is calculated from summing all atoms in the residue (from ./deeprank2/domain/forcefield/protein-allhdg5-5_new.top).
# This results in the expected charge of 0 for all POLAR and NONPOLAR residues, +1 for POSITIVE residues and -1 for NEGATIVE residues.
# Note that SER, THR, and TYR lead to a charge of ~1e-16. A rounding error is assumed in these cases and they are set to 0.

Expand All @@ -19,7 +19,7 @@
# The other sources have some minor discrepancies compared to this and are commented inline.

# Source for size:
# https://www.shimadzu.co.jp/aboutus/ms_r/archive/files/AminoAcidTable.pdf
# https://www.shimadzu.co.jp/aboutus/ms_r/archive/files/AminoAcidTable.pdf

# Sources for mass and pI:
# 1) https://www.sigmaaldrich.com/NL/en/technical-documents/technical-article/protein-biology/protein-structural-analysis/amino-acid-reference-chart
Expand Down Expand Up @@ -69,7 +69,7 @@
"U",
charge = 0,
polarity = Polarity.POLAR, # source 3: "special case"
size = 2, # source: https://en.wikipedia.org/wiki/Selenocysteine
size = 2, # source: https://en.wikipedia.org/wiki/Selenocysteine
mass = 150.0, # only from source 3
pI = 5.47, # only from source 3
hydrogen_bond_donors = 1, # unconfirmed
Expand Down Expand Up @@ -174,7 +174,7 @@
"PYL",
"O",
charge = 0, # unconfirmed
polarity = Polarity.POLAR, # based on having both H-bond donors and acceptors
polarity = Polarity.POLAR, # based on having both H-bond donors and acceptors
size = 13, # source: https://en.wikipedia.org/wiki/Pyrrolysine
mass = 255.32, # from source 3
pI = 7.394, # rough estimate from https://rstudio-pubs-static.s3.amazonaws.com/846259_7a9236df54e6410a972621590ecdcfcb.html
Expand Down Expand Up @@ -353,9 +353,9 @@
]

def convert_aa_nomenclature(aa: str, output_type: Optional[int] = None):

# pylint: disable = raise-missing-from
try:
try:
if len(aa) == 1:
aa: AminoAcid = [entry for entry in amino_acids if entry.one_letter_code.lower() == aa.lower()][0]
elif len(aa) == 3:
Expand All @@ -367,7 +367,7 @@ def convert_aa_nomenclature(aa: str, output_type: Optional[int] = None):

if not output_type:
return aa.name
if output_type == 3:
if output_type == 3:
return aa.three_letter_code
if output_type == 1:
return aa.one_letter_code
Expand Down
38 changes: 19 additions & 19 deletions deeprank2/domain/losstypes.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,31 @@
from torch import nn

regression_losses = (nn.L1Loss,
nn.SmoothL1Loss,
nn.MSELoss,
regression_losses = (nn.L1Loss,
nn.SmoothL1Loss,
nn.MSELoss,
nn.HuberLoss)

binary_classification_losses = (nn.SoftMarginLoss,
nn.BCELoss,
binary_classification_losses = (nn.SoftMarginLoss,
nn.BCELoss,
nn.BCEWithLogitsLoss)

multi_classification_losses = (nn.CrossEntropyLoss,
nn.NLLLoss,
nn.PoissonNLLLoss,
nn.GaussianNLLLoss,
nn.KLDivLoss,
nn.MultiLabelMarginLoss,
multi_classification_losses = (nn.CrossEntropyLoss,
nn.NLLLoss,
nn.PoissonNLLLoss,
nn.GaussianNLLLoss,
nn.KLDivLoss,
nn.MultiLabelMarginLoss,
nn.MultiLabelSoftMarginLoss)

other_losses = (nn.HingeEmbeddingLoss,
nn.CosineEmbeddingLoss,
nn.MarginRankingLoss,
nn.TripletMarginLoss,
other_losses = (nn.HingeEmbeddingLoss,
nn.CosineEmbeddingLoss,
nn.MarginRankingLoss,
nn.TripletMarginLoss,
nn.CTCLoss)

classification_losses = multi_classification_losses + binary_classification_losses

classification_tested = (nn.CrossEntropyLoss,
nn.NLLLoss,
nn.BCELoss,
classification_tested = (nn.CrossEntropyLoss,
nn.NLLLoss,
nn.BCELoss,
nn.BCEWithLogitsLoss)
4 changes: 2 additions & 2 deletions deeprank2/domain/nodestorage.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

## residue core features
RESTYPE = "res_type" # AminoAcid object; former FEATURENAME_AMINOACID
RESCHARGE = "res_charge" # float(<0); former FEATURENAME_CHARGE (was not assigned)
RESCHARGE = "res_charge" # float(<0); former FEATURENAME_CHARGE (was not assigned)
POLARITY = "polarity" # Polarity object; former FEATURENAME_POLARITY
RESSIZE = "res_size" # int; former FEATURENAME_SIZE
RESMASS = "res_mass"
Expand All @@ -39,7 +39,7 @@

## protein context features
RESDEPTH = "res_depth" # float; former FEATURENAME_RESIDUEDEPTH
HSE = "hse" # list[3xfloat]; former FEATURENAME_HALFSPHEREEXPOSURE
HSE = "hse" # list[3xfloat]; former FEATURENAME_HALFSPHEREEXPOSURE
SASA = "sasa" # float; former FEATURENAME_SASA
BSA = "bsa" # float; former FEATURENAME_BURIEDSURFACEAREA
SECSTRUCT = "sec_struct" #secondary structure
Expand Down
4 changes: 2 additions & 2 deletions deeprank2/features/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ def add_features( # pylint: disable=unused-argument
elif isinstance(node.id, Atom):
atom = node.id
residue = atom.residue

node.features[Nfeat.ATOMTYPE] = atom.element.onehot
node.features[Nfeat.PDBOCCUPANCY] = atom.occupancy
node.features[Nfeat.ATOMCHARGE] = atomic_forcefield.get_charge(atom)
else:
raise TypeError(f"Unexpected node type: {type(node.id)}")
raise TypeError(f"Unexpected node type: {type(node.id)}")

node.features[Nfeat.RESTYPE] = residue.amino_acid.onehot
node.features[Nfeat.RESCHARGE] = residue.amino_acid.charge
Expand Down
2 changes: 1 addition & 1 deletion deeprank2/features/conservation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def add_features( # pylint: disable=unused-argument
node.features[Nfeat.PSSM] = profile
node.features[Nfeat.INFOCONTENT] = pssm_row.information_content

if single_amino_acid_variant is not None:
if single_amino_acid_variant is not None:
if residue == single_amino_acid_variant.residue:
# only the variant residue can have a variant and wildtype amino acid
conservation_wildtype = pssm_row.get_conservation(single_amino_acid_variant.wildtype_amino_acid)
Expand Down
20 changes: 10 additions & 10 deletions deeprank2/features/contact.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
cutoff_14 = 4.2

def _get_nonbonded_energy( #pylint: disable=too-many-locals
atoms: List[Atom],
atoms: List[Atom],
distances: npt.NDArray[np.float64],
) -> Tuple [npt.NDArray[np.float64], npt.NDArray[np.float64]]:
"""Calculates all pairwise electrostatic (Coulomb) and Van der Waals (Lennard Jones) potential energies between all atoms in the structure.
Expand All @@ -30,8 +30,8 @@ def _get_nonbonded_energy( #pylint: disable=too-many-locals
However, the potential tends to 0 at large distance.
Args:
atoms (List[Atom]): list of all atoms in the structure
distances (npt.NDArray[np.float64]): matrix of pairwise distances between all atoms in the structure
atoms (List[Atom]): list of all atoms in the structure
distances (npt.NDArray[np.float64]): matrix of pairwise distances between all atoms in the structure
in the format that is the output of scipy.spatial's distance_matrix (i.e. a diagonally symmetric matrix)
Returns:
Expand Down Expand Up @@ -70,18 +70,18 @@ def _get_nonbonded_energy( #pylint: disable=too-many-locals
E_vdw[pair_14] = E_vdw_14pairs[pair_14]
E_vdw[pair_13] = 0
E_elec[pair_13] = 0


return E_elec, E_vdw


def add_features( # pylint: disable=unused-argument, too-many-locals
pdb_path: str, graph: Graph,
single_amino_acid_variant: Optional[SingleResidueVariant] = None
):

# assign each atoms (from all edges) a unique index
all_atoms = set()
all_atoms = set()
if isinstance(graph.edges[0].id, AtomicContact):
for edge in graph.edges:
contact = edge.id
Expand Down Expand Up @@ -109,8 +109,8 @@ def add_features( # pylint: disable=unused-argument, too-many-locals
# assign features
for edge in graph.edges:
contact = edge.id
if isinstance(contact, AtomicContact):

if isinstance(contact, AtomicContact):
## find the indices
atom1_index = atom_dict[contact.atom1]
atom2_index = atom_dict[contact.atom2]
Expand All @@ -130,6 +130,6 @@ def add_features( # pylint: disable=unused-argument, too-many-locals
edge.features[Efeat.DISTANCE] = np.min([[interatomic_distances[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.ELEC] = np.sum([[interatomic_electrostatic_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])
edge.features[Efeat.VDW] = np.sum([[interatomic_vanderwaals_energy[a1, a2] for a1 in atom1_indices] for a2 in atom2_indices])

# Calculate irrespective of node type
edge.features[Efeat.COVALENT] = float(edge.features[Efeat.DISTANCE] < covalent_cutoff and edge.features[Efeat.SAMECHAIN])
28 changes: 14 additions & 14 deletions deeprank2/features/irc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@ def _id_from_residue(residue: Tuple[str, int, str]) -> str:
Args:
residue (tuple): Input residue as rendered by pdb2sql: ( str(<chain>), int(<residue_number>), str(<three_letter_code> )
For example: ('A', 27, 'GLU').
Returns:
str: Output id in form of '<chain><residue_number>'. For example: 'A27'.
"""

return residue[0] + str(residue[1])


class _ContactDensity:
"""Internal class that holds contact density information for a given residue.
"""

def __init__(self, residue: Tuple[str, int, str], polarity: Polarity):
self.res = residue
self.polarity = polarity
Expand All @@ -52,7 +52,7 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
cutoff (float, optional): Cutoff distance (in Ångström) to be considered a close contact. Defaults to 10.
Returns:
Dict[str, _ContactDensity]:
Dict[str, _ContactDensity]:
keys: ids of residues in form returned by id_from_residue.
items: _ContactDensity objects, containing all contact density information for the residue.
"""
Expand All @@ -61,11 +61,11 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,

sql = pdb2sql.interface(pdb_path)
pdb2sql_contacts = sql.get_contact_residues(
cutoff=cutoff,
cutoff=cutoff,
chain1=chains[0], chain2=chains[1],
return_contact_pairs=True
)

for chain1_res, chain2_residues in pdb2sql_contacts.items():
aa1_code = chain1_res[2]
try:
Expand All @@ -76,20 +76,20 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
# add chain1_res to residue_contact dict
contact1_id = _id_from_residue(chain1_res)
residue_contacts[contact1_id] = _ContactDensity(chain1_res, aa1.polarity)

for chain2_res in chain2_residues:
aa2_code = chain2_res[2]
try:
aa2 = [amino_acid for amino_acid in amino_acids if amino_acid.three_letter_code == aa2_code][0]
except IndexError:
continue # skip keys that are not an amino acid

# populate densities and connections for chain1_res
residue_contacts[contact1_id].densities['total'] += 1
residue_contacts[contact1_id].densities[aa2.polarity] += 1
residue_contacts[contact1_id].connections['all'].append(chain2_res)
residue_contacts[contact1_id].connections[aa2.polarity].append(chain2_res)

# add chain2_res to residue_contact dict if it doesn't exist yet
contact2_id = _id_from_residue(chain2_res)
if contact2_id not in residue_contacts:
Expand All @@ -100,19 +100,19 @@ def get_IRCs(pdb_path: str, chains: List[str], cutoff: float = 5.5) -> Dict[str,
residue_contacts[contact2_id].densities[aa1.polarity] += 1
residue_contacts[contact2_id].connections['all'].append(chain1_res)
residue_contacts[contact2_id].connections[aa1.polarity].append(chain1_res)

return residue_contacts


def add_features(
pdb_path: str, graph: Graph,
single_amino_acid_variant: Optional[SingleResidueVariant] = None
):

if not single_amino_acid_variant: # VariantQueries do not use this feature
polarity_pairs = list(combinations(Polarity, 2))
polarity_pair_string = [f'irc_{x[0].name.lower()}_{x[1].name.lower()}' for x in polarity_pairs]

total_contacts = 0
residue_contacts = get_IRCs(pdb_path, graph.get_all_chains())

Expand All @@ -126,11 +126,11 @@ def add_features(
raise TypeError(f"Unexpected node type: {type(node.id)}")

contact_id = residue.chain.id + residue.number_string # reformat id to be in line with residue_contacts keys

# initialize all IRC features to 0
for IRC_type in Nfeat.IRC_FEATURES:
node.features[IRC_type] = 0

# load correct values to IRC features
try:
node.features[Nfeat.IRCTOTAL] = residue_contacts[contact_id].densities['total']
Expand Down
Loading

0 comments on commit 3d418ad

Please sign in to comment.