Skip to content

Commit

Permalink
updated models
Browse files Browse the repository at this point in the history
  • Loading branch information
bkb3 committed Nov 19, 2020
1 parent f145528 commit 76be99a
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 17 deletions.
4 changes: 1 addition & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@ __pycache__/

# Test files
tests/
benchmark_set.fasta
final_df.csv
benchmark_seq.fasta
result.csv
test.fasta

#
razor.7z
Expand Down
Binary file modified libs/C.pkl.gz
Binary file not shown.
Binary file modified libs/Cleavage_weights.pkl.gz
Binary file not shown.
Binary file modified libs/Fungi_Classifier.pkl.gz
Binary file not shown.
Binary file modified libs/S.pkl.gz
Binary file not shown.
Binary file modified libs/Toxin_Classifier.pkl.gz
Binary file not shown.
29 changes: 15 additions & 14 deletions libs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,29 +89,28 @@ def features(seq):
if len(seq) != 30:
raise ValueError(
"Input sequence must be 30 residues long!"
"\nExpected length 30: Got {}".format(len(seq))
"\nExpected length 30: Got {}\r".format(len(seq))
)

aa_list = 'RKNDCEVIYFWL' + 'STG'
converted = np.array([hydrop_flex_swi[i] for i in seq])
hydro = converted[:, 0]
flex = converted[:, 1]
swi = converted[:, 2]
helix = np.array([seq.count(i) for i in "VIYFWL"])
other_aa = np.array([seq.count(i) for i in "RKNDCE"])
aa_counts = [seq.count(i) for i in aa_list]

return np.concatenate(
[savgol_filter(hydro, 15, 2), savgol_filter(swi, 15, 2), helix, flex, other_aa]
[savgol_filter(hydro, 15, 2), savgol_filter(swi, 15, 2), flex, aa_counts]
)

def s_score(feat):
"""
S score of sequence.
Input is an array of features (102)
"""
if len(feat) != 102:
if len(feat) != 105:
raise ValueError(
"Input features length is incorrect!"
"Expected length 102: Got {}".format(len(feat))
"Expected length 105: Got {}".format(len(feat))
)

if feat.dtype != np.float64:
Expand All @@ -129,14 +128,14 @@ def validate_scan(seq, max_scan):
warnings.warn(
"The minimum length to take for evaluating C score "
"must be greater than 16 but received {max_scan}."
" Correcting it to 45.".format(max_scan=max_scan)
" Correcting it to 45.\r".format(max_scan=max_scan)
)
max_scan = 45
if max_scan > len(seq):
warnings.warn(
"The given maximum length to take for evaluating C score {max_scan} "
"is greater than the input sequence length {len_seq}."
" Correcting it to sequence length {len_seq}.".format(
" Correcting it to sequence length {len_seq}.\r".format(
max_scan=max_scan, len_seq=len(seq)
)
)
Expand Down Expand Up @@ -198,12 +197,14 @@ def check_fungi(seq):
def check_toxin(seq):
'''
Check if a sequence has toxic peptide.
Features is hydrophobicity and SWI upto position 19
Features is hydrophobicity and SWI upto position 23
'''
seq = validate(seq)[:19]
hydrop = np.array([hydrop_flex_swi[i] for i in seq])[:, 0]
swi = np.array([hydrop_flex_swi[i] for i in seq])[:, 2]
feat = np.concatenate([hydrop, swi])
seq = validate(seq)[:23]
hydrop = np.array([hydrop_flex_swi[i] for i in seq])[:,0]
swi = np.array([hydrop_flex_swi[i] for i in seq])[:,2]
flex = np.array([hydrop_flex_swi[i] for i in seq])[:,1]
turn = np.array([seq.count(i) for i in 'NPGS'])
feat = np.concatenate([hydrop, swi, flex, turn])

classifiers = TOXIN.Classifier
scores = np.array([clf.predict_proba([feat]) for clf in classifiers])[:, :, 1].flatten()
Expand Down

0 comments on commit 76be99a

Please sign in to comment.