Skip to content

Commit

Permalink
Merge pull request #7 from t0mdavid-m/FDRdevelop
Browse files Browse the repository at this point in the history
FDR merging
  • Loading branch information
t0mdavid-m authored May 29, 2024
2 parents c656f46 + 644f4a9 commit 9a6cc4b
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 148 deletions.
2 changes: 1 addition & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def flashdeconvPages():
Page("pages/FLASHDeconvViewer.py", "Viewer", "👀"),
Page("pages/FLASHDeconvDownload.py", "Download", "⬇️"),
Page("pages/FLASHFDR.py", "ECDF Plot", "📈"),
Page("pages/FileUploadFDR.py", "FDR File Upload", "📊"),
#Page("pages/FileUploadFDR.py", "FDR File Upload", "📊"),
])


Expand Down
57 changes: 41 additions & 16 deletions pages/FLASHFDR.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,54 @@
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

def ecdf(data):
"""Compute ECDF."""
x = np.sort(data)
y = np.arange(1, len(data) + 1) / len(data)
return x, y

def generate_and_display_ecdf_plot(df):
"""Generate and display ECDF plot."""
def generate_and_display_plots(df):
"""Generate and display ECDF and density plots."""
# Extract Qscore data
target_qscores = df[df['TargetDecoyType'] == 0]['QScore']
decoy_qscores = df[df['TargetDecoyType'] > 0]['QScore']
target_qscores = df[df['TargetDecoyType'] == 0]['Qscore']
decoy_qscores = df[df['TargetDecoyType'] > 0]['Qscore']

# Generate and display the ECDF plot
plt.figure(figsize=(10, 6))
# Generate ECDF data
x_target, y_target = ecdf(target_qscores)
x_decoy, y_decoy = ecdf(decoy_qscores)
plt.plot(x_target, y_target, marker='.', linestyle='none', color='green', label='Target QScores')
plt.plot(x_decoy, y_decoy, marker='.', linestyle='none', color='red', label='Decoy QScores')
plt.xlabel('qScore')
plt.ylabel('ECDF')
plt.title('ECDF of QScore Distribution')
plt.legend()
st.pyplot(plt)

# Create ECDF Plotly figure
fig_ecdf = px.line(title='ECDF of QScore Distribution')
fig_ecdf.add_scatter(x=x_target, y=y_target, mode='markers', name='Target QScores', marker=dict(color='green'))
fig_ecdf.add_scatter(x=x_decoy, y=y_decoy, mode='markers', name='Decoy QScores', marker=dict(color='red'))
fig_ecdf.update_layout(
xaxis_title='qScore',
yaxis_title='ECDF',
legend_title='QScore Type'
)

# Create Density Plotly figure without area fill
fig_density = go.Figure()
fig_density.add_trace(go.Histogram(x=target_qscores, histnorm='density', name='Targets', opacity=0.75, marker_color='green'))
fig_density.add_trace(go.Histogram(x=decoy_qscores, histnorm='density', name='Decoys', opacity=0.75, marker_color='red'))
fig_density.update_traces(opacity=0.75)
fig_density.update_layout(
title='Density Plot of QScore Distribution',
xaxis_title='qScore',
yaxis_title='Density',
barmode='overlay',
legend_title='QScore Type'
)

# Display plots
st.plotly_chart(fig_ecdf)
st.plotly_chart(fig_density)

def main():
st.title('ECDF Plot of QScore Distribution')
st.title('ECDF and Density Plot of QScore Distribution of Targets and Decoys')

if 'parsed_tsv_files' not in st.session_state or not st.session_state['parsed_tsv_files']:
st.warning("No TSV files uploaded. Please upload TSV files first.")
Expand All @@ -39,7 +59,12 @@ def main():

if tsv_file:
df = st.session_state['parsed_tsv_files'][tsv_file]
generate_and_display_ecdf_plot(df)
generate_and_display_plots(df)

if __name__ == "__main__":
main()





76 changes: 33 additions & 43 deletions pages/FileUpload.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
from src.masstable import parseFLASHDeconvOutput
from src.common import page_setup, v_space, save_params, reset_directory


input_file_types = ["deconv-mzMLs", "anno-mzMLs"]
parsed_df_types = ["deconv_dfs", "anno_dfs"]
# Define input and parsed file types for both mzML and TSV files
input_file_types = ["deconv-mzMLs", "anno-mzMLs", "tsv-files"]
parsed_df_types = ["deconv_dfs", "anno_dfs", "parsed_tsv_files"]
tool = 'FLASHDeconvViewer'


Expand Down Expand Up @@ -46,7 +46,6 @@ def getUploadedFileDF(deconv_files_, anno_files_):
'Annotated Files': anno_files_})
return df


def remove_selected_mzML_files(to_remove: list[str], params: dict) -> dict:
"""
Removes selected mzML files from the mzML directory. (From fileUpload.py)
Expand Down Expand Up @@ -80,39 +79,35 @@ def remove_selected_mzML_files(to_remove: list[str], params: dict) -> dict:
st.success("Selected mzML files removed!")
return params


def handleInputFiles(uploaded_files):
for file in uploaded_files:
if not file.name.endswith("mzML"):
if file.name.endswith("mzML"):
session_name = 'deconv-mzMLs' if file.name.endswith('_deconv.mzML') else 'anno-mzMLs'
elif file.name.endswith("tsv"):
session_name = 'tsv-files'
else:
continue

session_name = ''
if file.name.endswith('_deconv.mzML'):
session_name = 'deconv-mzMLs'
elif file.name.endswith('_annotated.mzML'):
session_name = 'anno-mzMLs'
if file.name not in st.session_state[session_name]:
with open(
Path(st.session_state.workspace, tool, session_name, file.name), "wb"
) as f:
f.write(file.getbuffer())
st.session_state[session_name].append(file.name)


def parseUploadedFiles():
# get newly uploaded files
deconv_files = st.session_state['deconv-mzMLs']
anno_files = st.session_state['anno-mzMLs']
# anno_files = Path(st.session_state['anno-mzMLs']).iterdir()
tsv_files = st.session_state['tsv-files']
new_deconv_files = [f for f in deconv_files if f not in st.session_state['deconv_dfs']]
new_anno_files = [f for f in anno_files if f not in st.session_state['anno_dfs']]
new_tsv_files = [f for f in tsv_files if f not in st.session_state['parsed_tsv_files']]

# if newly uploaded files are not as needed
if len(new_deconv_files) == 0 and len(new_anno_files) == 0: # if no newly uploaded files, move on
if len(new_deconv_files) == 0 and len(new_anno_files) == 0 and len(new_tsv_files) == 0:
return
elif len(new_deconv_files) != len(new_anno_files): # if newly uploaded files doesn't match, write message
elif len(new_deconv_files) != len(new_anno_files):
st.error('Added files are not in pair, so not parsed. \n Here are uploaded ones, but not parsed ones:')
# not_parsed = [f.name for f in new_deconv_files] + [f.name for f in new_anno_files]
not_parsed = new_deconv_files + new_anno_files
for i in not_parsed:
st.markdown("- " + i)
Expand All @@ -138,6 +133,11 @@ def parseUploadedFiles():
st.session_state['deconv_dfs'][deconv_f] = spec_df
st.success('Done parsing the experiment %s!' % exp_name)

for tsv_file in new_tsv_files:
df = pd.read_csv(Path(st.session_state.workspace, tool, "tsv-files", tsv_file), sep='\t')
if 'TargetDecoyType' not in df.columns:
continue
st.session_state['parsed_tsv_files'][tsv_file] = df

def showUploadedFilesTable() -> bool:
''' return: if showing without error '''
Expand Down Expand Up @@ -190,7 +190,7 @@ def postprocessingAfterUpload_FD(uploaded_files: list) -> None:
_, c2, _ = st.columns(3)
if c2.button("Load Example Data", type="primary"):
# loading and copying example files into default workspace
for filename_postfix, input_file_session_name in zip(['*deconv.mzML', '*annotated.mzML'],
for filename_postfix, input_file_session_name in zip(['*deconv.mzML', '*annotated.mzML', '*.tsv'],
input_file_types):
for file in Path("example-data/flashdeconv").glob(filename_postfix):
if file.name not in st.session_state[input_file_session_name]:
Expand All @@ -199,29 +199,26 @@ def postprocessingAfterUpload_FD(uploaded_files: list) -> None:
# parsing the example files is done in parseUploadedFiles later
st.success("Example mzML files loaded!")

# Upload files via upload widget
with tabs[0]:
st.subheader("**Upload FLASHDeconv output files (\*_annotated.mzML & \*_deconv.mzML)**")
# Display info how to upload files
st.subheader("**Upload FLASHDeconv output files (\*_annotated.mzML & \*_deconv.mzML) or TSV files (ECDF Plot only)**")
st.info(
"""
**💡 How to upload files**
1. Browse files on your computer or drag and drops files
2. Click the **Add the uploaded mzML files** button to use them in the workflows
Select data for analysis from the uploaded files shown below.
**💡 Make sure that the same number of deconvolved and annotated mzML files are uploaded!**
"""
**💡 How to upload files**
1. Browse files on your computer or drag and drops files
2. Click the **Add the uploaded files** button to use them in the workflows
Select data for analysis from the uploaded files shown below.
**💡 Make sure that the same number of deconvolved and annotated mzML files are uploaded!**
"""
)
with st.form('input_mzml', clear_on_submit=True):
with st.form('input_files', clear_on_submit=True):
uploaded_file = st.file_uploader(
"FLASHDeconv output mzML files", accept_multiple_files=True
"FLASHDeconv output mzML files or TSV files", accept_multiple_files=True, type=["mzML", "tsv"]
)
_, c2, _ = st.columns(3)
# User needs to click button to upload selected files
if c2.form_submit_button("Add mzML files to workspace", type="primary"):
if c2.form_submit_button("Add files to workspace", type="primary"):
# Copy uploaded mzML files to deconv-mzML-files directory
if 'selected_experiment0' in st.session_state:
del(st.session_state['selected_experiment0'])
Expand All @@ -242,22 +239,20 @@ def postprocessingAfterUpload_FD(uploaded_files: list) -> None:
else:
st.warning("Upload some files before adding them.")

# parse files if newly uploaded
st.session_state['progress_bar_space'] = st.container()
parseUploadedFiles()

if showUploadedFilesTable():
# Remove files
with st.expander("🗑️ Remove mzML files"):
to_remove = st.multiselect(
"select mzML files", options=st.session_state["experiment-df"]['Experiment Name']
"select files", options=st.session_state["experiment-df"]['Experiment Name']
)
c1, c2 = st.columns(2)
if c2.button(
"Remove **selected**", type="primary", disabled=not any(to_remove)
):
params = remove_selected_mzML_files(to_remove, params)
# save_params(params)
st.rerun()

if c1.button("⚠️ Remove **all**", disabled=not any(st.session_state["experiment-df"])):
Expand All @@ -267,13 +262,8 @@ def postprocessingAfterUpload_FD(uploaded_files: list) -> None:
st.session_state[file_option] = []
if df_option in st.session_state:
st.session_state[df_option] = {}

# for k, v in params.items():
# if df_option in k and isinstance(v, list):
# params[k] = []
st.success("All mzML files removed!")
st.success("All files removed!")
del st.session_state["experiment-df"] # reset the experiment df table
# save_params(params)
st.rerun()

save_params(params)
88 changes: 0 additions & 88 deletions pages/FileUploadFDR.py

This file was deleted.

5 changes: 5 additions & 0 deletions src/Workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ def execution(self) -> None:
folder_path = join(base_path, 'FLASHDeconvOutput', '%s_%s'%(file_name, current_time))
folder_path_anno = join(base_path, self.tool_name, 'anno-mzMLs')
folder_path_deconv = join(base_path, self.tool_name, 'deconv-mzMLs')
folder_path_tsv = join(base_path, self.tool_name, 'tsv-files')

if exists(folder_path):
rmtree(folder_path)
Expand All @@ -436,8 +437,11 @@ def execution(self) -> None:
makedirs(folder_path_anno)
if not exists(folder_path_deconv):
makedirs(folder_path_deconv)
if not exists(folder_path_tsv):
makedirs(folder_path_tsv)

out_tsv = join(folder_path, f'out.tsv')
out_tsv_fdr = join(folder_path_tsv, f'{file_name}_{current_time}.tsv')
out_spec1 = join(folder_path, f'spec1.tsv')
out_spec2 = join(folder_path, f'spec2.tsv')
out_spec3 = join(folder_path, f'spec3.tsv')
Expand Down Expand Up @@ -478,3 +482,4 @@ def execution(self) -> None:

copyfile(out_mzml, out_deconv_mzml_viewer)
copyfile(out_annotated_mzml, out_annotated_mzml_viewer)
copyfile(out_tsv, out_tsv_fdr)

0 comments on commit 9a6cc4b

Please sign in to comment.