Skip to content

Commit

Permalink
rename 'alias_protein' to 'mat_pep'
Browse files Browse the repository at this point in the history
  • Loading branch information
miseminger committed Jul 30, 2024
1 parent ec976f8 commit bfd06d3
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions bin/gvf2indexandlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def parse_args():

# create index from GVF
# make empty index df
index_cols=['pos', 'mutation', 'hgvs_aa_mutation', 'hgvs_nt_mutation', 'gene_name', 'gene_symbol', 'protein_name', 'protein_symbol', 'alias', 'hgvs_alias', 'alias_protein', 'Pokay_annotation', 'lineages']
index_cols=['pos', 'mutation', 'hgvs_aa_mutation', 'hgvs_nt_mutation', 'gene_name', 'gene_symbol', 'protein_name', 'protein_symbol', 'alias', 'hgvs_alias', 'mat_pep', 'Pokay_annotation', 'lineages']
index = pd.DataFrame(np.empty((gvf.shape[0], len(index_cols))), columns=index_cols)
# populate index df with gvf info
index['pos'] = gvf['#start']
Expand All @@ -58,8 +58,7 @@ def parse_args():
index['hgvs_nt_mutation'] = gvf['hgvs_nt']
index['alias'] = gvf['alias']
index['hgvs_alias'] = gvf['hgvs_alias']
index['alias_protein'] = 'n/a'
index.loc[index['alias']!='n/a', 'alias_protein'] = gvf['mat_pep']
index['mat_pep'] = gvf['mat_pep']
index['gene_name'] = gvf['gene_name']
index['gene_symbol'] = gvf['gene_symbol']
index['protein_name'] = gvf['protein_name']
Expand All @@ -77,7 +76,7 @@ def parse_args():
# fill in 'new_mutations' column like: "gene:mutation"
log['new_mutations'] = log["gene_symbol"] + ":" + log["mutation"]
# for orf1ab mutations, fill in 'new_mutations' column like: "gene:mutation / nsp:alias"
log.loc[log['alias']!='n/a', 'new_mutations'] = log['new_mutations'] + " / " + log["alias_protein"] + ":" + log["alias"]
log.loc[log['alias']!='n/a', 'new_mutations'] = log['new_mutations'] + " / " + log["mat_pep"] + ":" + log["alias"]
# drop duplicates (there shouldn't be any)
log = log[['pos', 'new_mutations', 'lineages']].drop_duplicates()
# drop any NaN rows
Expand Down

0 comments on commit bfd06d3

Please sign in to comment.